1{-# LANGUAGE GADTs #-} 2{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} 3module GHC.Cmm.Switch ( 4 SwitchTargets, 5 mkSwitchTargets, 6 switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned, 7 mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough, 8 switchTargetsToList, eqSwitchTargetWith, 9 10 SwitchPlan(..), 11 backendSupportsSwitch, 12 createSwitchPlan, 13 ) where 14 15import GHC.Prelude 16 17import GHC.Utils.Outputable 18import GHC.Driver.Backend 19import GHC.Utils.Panic 20import GHC.Cmm.Dataflow.Label (Label) 21 22import Data.Maybe 23import Data.List (groupBy) 24import Data.Function (on) 25import qualified Data.Map as M 26 27-- Note [Cmm Switches, the general plan] 28-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29-- 30-- Compiling a high-level switch statement, as it comes out of a STG case 31-- expression, for example, allows for a surprising amount of design decisions. 32-- Therefore, we cleanly separated this from the Stg → Cmm transformation, as 33-- well as from the actual code generation. 34-- 35-- The overall plan is: 36-- * The Stg → Cmm transformation creates a single `SwitchTargets` in 37-- emitSwitch and emitCmmLitSwitch in GHC.StgToCmm.Utils. 38-- At this stage, they are unsuitable for code generation. 39-- * A dedicated Cmm transformation (GHC.Cmm.Switch.Implement) replaces these 40-- switch statements with code that is suitable for code generation, i.e. 41-- a nice balanced tree of decisions with dense jump tables in the leafs. 42-- The actual planning of this tree is performed in pure code in createSwitchPlan 43-- in this module. See Note [createSwitchPlan]. 44-- * The actual code generation will not do any further processing and 45-- implement each CmmSwitch with a jump tables. 46-- 47-- When compiling to LLVM or C, GHC.Cmm.Switch.Implement leaves the switch 48-- statements alone, as we can turn a SwitchTargets value into a nice 49-- switch-statement in LLVM resp. C, and leave the rest to the compiler. 50-- 51-- See Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] why the two module are 52-- separated. 53 54----------------------------------------------------------------------------- 55-- Note [Magic Constants in GHC.Cmm.Switch] 56-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 57-- 58-- There are a lot of heuristics here that depend on magic values where it is 59-- hard to determine the "best" value (for whatever that means). These are the 60-- magic values: 61 62-- | Number of consecutive default values allowed in a jump table. If there are 63-- more of them, the jump tables are split. 64-- 65-- Currently 7, as it costs 7 words of additional code when a jump table is 66-- split (at least on x64, determined experimentally). 67maxJumpTableHole :: Integer 68maxJumpTableHole = 7 69 70-- | Minimum size of a jump table. If the number is smaller, the switch is 71-- implemented using conditionals. 72-- Currently 5, because an if-then-else tree of 4 values is nice and compact. 73minJumpTableSize :: Int 74minJumpTableSize = 5 75 76-- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset]. 77minJumpTableOffset :: Integer 78minJumpTableOffset = 2 79 80 81----------------------------------------------------------------------------- 82-- Switch Targets 83 84-- Note [SwitchTargets] 85-- ~~~~~~~~~~~~~~~~~~~~ 86-- 87-- The branches of a switch are stored in a SwitchTargets, which consists of an 88-- (optional) default jump target, and a map from values to jump targets. 89-- 90-- If the default jump target is absent, the behaviour of the switch outside the 91-- values of the map is undefined. 92-- 93-- We use an Integer for the keys the map so that it can be used in switches on 94-- unsigned as well as signed integers. 95-- 96-- The map may be empty (we prune out-of-range branches here, so it could be us 97-- emptying it). 98-- 99-- Before code generation, the table needs to be brought into a form where all 100-- entries are non-negative, so that it can be compiled into a jump table. 101-- See switchTargetsToTable. 102 103 104-- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch' 105-- value, and knows whether the value is signed, the possible range, an 106-- optional default value and a map from values to jump labels. 107data SwitchTargets = 108 SwitchTargets 109 Bool -- Signed values 110 (Integer, Integer) -- Range 111 (Maybe Label) -- Default value 112 (M.Map Integer Label) -- The branches 113 deriving (Show, Eq) 114 115-- | The smart constructor mkSwitchTargets normalises the map a bit: 116-- * No entries outside the range 117-- * No entries equal to the default 118-- * No default if all elements have explicit values 119mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets 120mkSwitchTargets signed range@(lo,hi) mbdef ids 121 = SwitchTargets signed range mbdef' ids' 122 where 123 ids' = dropDefault $ restrict ids 124 mbdef' | defaultNeeded = mbdef 125 | otherwise = Nothing 126 127 -- Drop entries outside the range, if there is a range 128 restrict = restrictMap (lo,hi) 129 130 -- Drop entries that equal the default, if there is a default 131 dropDefault | Just l <- mbdef = M.filter (/= l) 132 | otherwise = id 133 134 -- Check if the default is still needed 135 defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1 136 137 138-- | Changes all labels mentioned in the SwitchTargets value 139mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets 140mapSwitchTargets f (SwitchTargets signed range mbdef branches) 141 = SwitchTargets signed range (fmap f mbdef) (fmap f branches) 142 143-- | Returns the list of non-default branches of the SwitchTargets value 144switchTargetsCases :: SwitchTargets -> [(Integer, Label)] 145switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches 146 147-- | Return the default label of the SwitchTargets value 148switchTargetsDefault :: SwitchTargets -> Maybe Label 149switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef 150 151-- | Return the range of the SwitchTargets value 152switchTargetsRange :: SwitchTargets -> (Integer, Integer) 153switchTargetsRange (SwitchTargets _ range _ _) = range 154 155-- | Return whether this is used for a signed value 156switchTargetsSigned :: SwitchTargets -> Bool 157switchTargetsSigned (SwitchTargets signed _ _ _) = signed 158 159-- | switchTargetsToTable creates a dense jump table, usable for code generation. 160-- 161-- Also returns an offset to add to the value; the list is 0-based on the 162-- result of that addition. 163-- 164-- The conversion from Integer to Int is a bit of a wart, as the actual 165-- scrutinee might be an unsigned word, but it just works, due to wrap-around 166-- arithmetic (as verified by the CmmSwitchTest test case). 167switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label]) 168switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches) 169 = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ]) 170 where 171 labelFor i = case M.lookup i branches of Just l -> Just l 172 Nothing -> mbdef 173 start | lo >= 0 && lo < minJumpTableOffset = 0 -- See Note [Jump Table Offset] 174 | otherwise = lo 175 176-- Note [Jump Table Offset] 177-- ~~~~~~~~~~~~~~~~~~~~~~~~ 178-- 179-- Usually, the code for a jump table starting at x will first subtract x from 180-- the value, to avoid a large amount of empty entries. But if x is very small, 181-- the extra entries are no worse than the subtraction in terms of code size, and 182-- not having to do the subtraction is quicker. 183-- 184-- I.e. instead of 185-- _u20N: 186-- leaq -1(%r14),%rax 187-- jmp *_n20R(,%rax,8) 188-- _n20R: 189-- .quad _c20p 190-- .quad _c20q 191-- do 192-- _u20N: 193-- jmp *_n20Q(,%r14,8) 194-- 195-- _n20Q: 196-- .quad 0 197-- .quad _c20p 198-- .quad _c20q 199-- .quad _c20r 200 201-- | The list of all labels occurring in the SwitchTargets value. 202switchTargetsToList :: SwitchTargets -> [Label] 203switchTargetsToList (SwitchTargets _ _ mbdef branches) 204 = maybeToList mbdef ++ M.elems branches 205 206-- | Groups cases with equal targets, suitable for pretty-printing to a 207-- c-like switch statement with fall-through semantics. 208switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label) 209switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef) 210 where 211 groups = map (\xs -> (map fst xs, snd (head xs))) $ 212 groupBy ((==) `on` snd) $ 213 M.toList branches 214 215-- | Custom equality helper, needed for "GHC.Cmm.CommonBlockElim" 216eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool 217eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) = 218 signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2) 219 where 220 goMB Nothing Nothing = True 221 goMB (Just l1) (Just l2) = l1 `eq` l2 222 goMB _ _ = False 223 goList [] [] = True 224 goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2 225 goList _ _ = False 226 227----------------------------------------------------------------------------- 228-- Code generation for Switches 229 230 231-- | A SwitchPlan abstractly describes how a Switch statement ought to be 232-- implemented. See Note [createSwitchPlan] 233data SwitchPlan 234 = Unconditionally Label 235 | IfEqual Integer Label SwitchPlan 236 | IfLT Bool Integer SwitchPlan SwitchPlan 237 | JumpTable SwitchTargets 238 deriving Show 239-- 240-- Note [createSwitchPlan] 241-- ~~~~~~~~~~~~~~~~~~~~~~~ 242-- 243-- A SwitchPlan describes how a Switch statement is to be broken down into 244-- smaller pieces suitable for code generation. 245-- 246-- createSwitchPlan creates such a switch plan, in these steps: 247-- 1. It splits the switch statement at segments of non-default values that 248-- are too large. See splitAtHoles and Note [Magic Constants in GHC.Cmm.Switch] 249-- 2. Too small jump tables should be avoided, so we break up smaller pieces 250-- in breakTooSmall. 251-- 3. We fill in the segments between those pieces with a jump to the default 252-- label (if there is one), returning a SeparatedList in mkFlatSwitchPlan 253-- 4. We find and replace two less-than branches by a single equal-to-test in 254-- findSingleValues 255-- 5. The thus collected pieces are assembled to a balanced binary tree. 256 257{- 258 Note [Two alts + default] 259 ~~~~~~~~~~~~~~~~~~~~~~~~~ 260 261Discussion and a bit more info at #14644 262 263When dealing with a switch of the form: 264switch(e) { 265 case 1: goto l1; 266 case 3000: goto l2; 267 default: goto ldef; 268} 269 270If we treat it as a sparse jump table we would generate: 271 272if (e > 3000) //Check if value is outside of the jump table. 273 goto ldef; 274else { 275 if (e < 3000) { //Compare to upper value 276 if(e != 1) //Compare to remaining value 277 goto ldef; 278 else 279 goto l2; 280 } 281 else 282 goto l1; 283} 284 285Instead we special case this to : 286 287if (e==1) goto l1; 288else if (e==3000) goto l2; 289else goto l3; 290 291This means we have: 292* Less comparisons for: 1,<3000 293* Unchanged for 3000 294* One more for >3000 295 296This improves code in a few ways: 297* One comparison less means smaller code which helps with cache. 298* It exchanges a taken jump for two jumps no taken in the >range case. 299 Jumps not taken are cheaper (See Agner guides) making this about as fast. 300* For all other cases the first range check is removed making it faster. 301 302The end result is that the change is not measurably slower for the case 303>3000 and faster for the other cases. 304 305This makes running this kind of match in an inner loop cheaper by 10-20% 306depending on the data. 307In nofib this improves wheel-sieve1 by 4-9% depending on problem 308size. 309 310We could also add a second conditional jump after the comparison to 311keep the range check like this: 312 cmp 3000, rArgument 313 jg <default> 314 je <branch 2> 315While this is fairly cheap it made no big difference for the >3000 case 316and slowed down all other cases making it not worthwhile. 317-} 318 319 320-- | Does the backend support switch out of the box? Then leave this to the 321-- backend! 322backendSupportsSwitch :: Backend -> Bool 323backendSupportsSwitch ViaC = True 324backendSupportsSwitch LLVM = True 325backendSupportsSwitch _ = False 326 327-- | This function creates a SwitchPlan from a SwitchTargets value, breaking it 328-- down into smaller pieces suitable for code generation. 329createSwitchPlan :: SwitchTargets -> SwitchPlan 330-- Lets do the common case of a singleton map quickly and efficiently (#10677) 331createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m) 332 | [(x, l)] <- M.toList m 333 = IfEqual x l (Unconditionally defLabel) 334-- And another common case, matching "booleans" 335createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m) 336 | [(x1, l1), (_x2,l2)] <- M.toAscList m 337 --Checking If |range| = 2 is enough if we have two unique literals 338 , hi - lo == 1 339 = IfEqual x1 l1 (Unconditionally l2) 340-- See Note [Two alts + default] 341createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m) 342 | [(x1, l1), (x2,l2)] <- M.toAscList m 343 = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel)) 344createSwitchPlan (SwitchTargets signed range mbdef m) = 345 -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $ 346 plan 347 where 348 pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m 349 flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces 350 plan = buildTree signed $ flatPlan 351 352 353--- 354--- Step 1: Splitting at large holes 355--- 356splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a] 357splitAtHoles _ m | M.null m = [] 358splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles 359 where 360 holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m)) 361 nonHoles = reassocTuples lo holes hi 362 363 (lo,_) = M.findMin m 364 (hi,_) = M.findMax m 365 366--- 367--- Step 2: Avoid small jump tables 368--- 369-- We do not want jump tables below a certain size. This breaks them up 370-- (into singleton maps, for now). 371breakTooSmall :: M.Map Integer a -> [M.Map Integer a] 372breakTooSmall m 373 | M.size m > minJumpTableSize = [m] 374 | otherwise = [M.singleton k v | (k,v) <- M.toList m] 375 376--- 377--- Step 3: Fill in the blanks 378--- 379 380-- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every 381-- two entries, dividing the range. 382-- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if 383-- the expression is < n, and plan2 otherwise. 384 385type FlatSwitchPlan = SeparatedList Integer SwitchPlan 386 387mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan 388 389-- If we have no default (i.e. undefined where there is no entry), we can 390-- branch at the minimum of each map 391mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty 392mkFlatSwitchPlan signed Nothing _ (m:ms) 393 = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ]) 394 395-- If we have a default, we have to interleave segments that jump 396-- to the default between the maps 397mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps) 398 where 399 go (lo,hi) [] 400 | lo > hi = [] 401 | otherwise = [(lo, Unconditionally l)] 402 go (lo,hi) (m:ms) 403 | lo < min 404 = (lo, Unconditionally l) : go (min,hi) (m:ms) 405 | lo == min 406 = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms 407 | otherwise 408 = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min) 409 where 410 min = fst (M.findMin m) 411 max = fst (M.findMax m) 412 413 414mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan 415mkLeafPlan signed mbdef m 416 | [(_,l)] <- M.toList m -- singleton map 417 = Unconditionally l 418 | otherwise 419 = JumpTable $ mkSwitchTargets signed (min,max) mbdef m 420 where 421 min = fst (M.findMin m) 422 max = fst (M.findMax m) 423 424--- 425--- Step 4: Reduce the number of branches using == 426--- 427 428-- A sequence of three unconditional jumps, with the outer two pointing to the 429-- same value and the bounds off by exactly one can be improved 430findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan 431findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs) 432 | l == l3 && i + 1 == i' 433 = findSingleValues (IfEqual i l2 (Unconditionally l), xs) 434findSingleValues (p, (i,p'):xs) 435 = (p,i) `consSL` findSingleValues (p', xs) 436findSingleValues (p, []) 437 = (p, []) 438 439--- 440--- Step 5: Actually build the tree 441--- 442 443-- Build a balanced tree from a separated list 444buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan 445buildTree _ (p,[]) = p 446buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2) 447 where 448 (sl1, m, sl2) = divideSL sl 449 450 451 452-- 453-- Utility data type: Non-empty lists with extra markers in between each 454-- element: 455-- 456 457type SeparatedList b a = (a, [(b,a)]) 458 459consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a 460consSL (a, b) (a', xs) = (a, (b,a'):xs) 461 462divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a) 463divideSL (_,[]) = error "divideSL: Singleton SeparatedList" 464divideSL (p,xs) = ((p, xs1), m, (p', xs2)) 465 where 466 (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs 467 468-- 469-- Other Utilities 470-- 471 472restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b 473restrictMap (lo,hi) m = mid 474 where (_, mid_hi) = M.split (lo-1) m 475 (mid, _) = M.split (hi+1) mid_hi 476 477-- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)] 478reassocTuples :: a -> [(a,a)] -> a -> [(a,a)] 479reassocTuples initial [] last 480 = [(initial,last)] 481reassocTuples initial ((a,b):tuples) last 482 = (initial,a) : reassocTuples b tuples last 483 484-- Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] 485-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 486-- I (Joachim) separated the two somewhat closely related modules 487-- 488-- - GHC.Cmm.Switch, which provides the CmmSwitchTargets type and contains the strategy 489-- for implementing a Cmm switch (createSwitchPlan), and 490-- - GHC.Cmm.Switch.Implement, which contains the actual Cmm graph modification, 491-- 492-- for these reasons: 493-- 494-- * GHC.Cmm.Switch is very low in the dependency tree, i.e. does not depend on any 495-- GHC specific modules at all (with the exception of Output and 496-- GHC.Cmm.Dataflow (Literal)). 497-- * GHC.Cmm.Switch.Implement is the Cmm transformation and hence very high in 498-- the dependency tree. 499-- * GHC.Cmm.Switch provides the CmmSwitchTargets data type, which is abstract, but 500-- used in GHC.Cmm.Node. 501-- * Because GHC.Cmm.Switch is low in the dependency tree, the separation allows 502-- for more parallelism when building GHC. 503-- * The interaction between the modules is very explicit and easy to 504-- understand, due to the small and simple interface. 505