1{-# LANGUAGE GADTs #-} 2{-# LANGUAGE BangPatterns #-} 3{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} 4module CmmContFlowOpt 5 ( cmmCfgOpts 6 , cmmCfgOptsProc 7 , removeUnreachableBlocksProc 8 , replaceLabels 9 ) 10where 11 12import GhcPrelude hiding (succ, unzip, zip) 13 14import Hoopl.Block 15import Hoopl.Collections 16import Hoopl.Graph 17import Hoopl.Label 18import BlockId 19import Cmm 20import CmmUtils 21import CmmSwitch (mapSwitchTargets, switchTargetsToList) 22import Maybes 23import Panic 24import Util 25 26import Control.Monad 27 28 29-- Note [What is shortcutting] 30-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 31-- 32-- Consider this Cmm code: 33-- 34-- L1: ... 35-- goto L2; 36-- L2: goto L3; 37-- L3: ... 38-- 39-- Here L2 is an empty block and contains only an unconditional branch 40-- to L3. In this situation any block that jumps to L2 can jump 41-- directly to L3: 42-- 43-- L1: ... 44-- goto L3; 45-- L2: goto L3; 46-- L3: ... 47-- 48-- In this situation we say that we shortcut L2 to L3. One of 49-- consequences of shortcutting is that some blocks of code may become 50-- unreachable (in the example above this is true for L2). 51 52 53-- Note [Control-flow optimisations] 54-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 55-- 56-- This optimisation does three things: 57-- 58-- - If a block finishes in an unconditional branch to another block 59-- and that is the only jump to that block we concatenate the 60-- destination block at the end of the current one. 61-- 62-- - If a block finishes in a call whose continuation block is a 63-- goto, then we can shortcut the destination, making the 64-- continuation block the destination of the goto - but see Note 65-- [Shortcut call returns]. 66-- 67-- - For any block that is not a call we try to shortcut the 68-- destination(s). Additionally, if a block ends with a 69-- conditional branch we try to invert the condition. 70-- 71-- Blocks are processed using postorder DFS traversal. A side effect 72-- of determining traversal order with a graph search is elimination 73-- of any blocks that are unreachable. 74-- 75-- Transformations are improved by working from the end of the graph 76-- towards the beginning, because we may be able to perform many 77-- shortcuts in one go. 78 79 80-- Note [Shortcut call returns] 81-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 82-- 83-- We are going to maintain the "current" graph (LabelMap CmmBlock) as 84-- we go, and also a mapping from BlockId to BlockId, representing 85-- continuation labels that we have renamed. This latter mapping is 86-- important because we might shortcut a CmmCall continuation. For 87-- example: 88-- 89-- Sp[0] = L 90-- call g returns to L 91-- L: goto M 92-- M: ... 93-- 94-- So when we shortcut the L block, we need to replace not only 95-- the continuation of the call, but also references to L in the 96-- code (e.g. the assignment Sp[0] = L): 97-- 98-- Sp[0] = M 99-- call g returns to M 100-- M: ... 101-- 102-- So we keep track of which labels we have renamed and apply the mapping 103-- at the end with replaceLabels. 104 105 106-- Note [Shortcut call returns and proc-points] 107-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 108-- 109-- Consider this code that you might get from a recursive 110-- let-no-escape: 111-- 112-- goto L1 113-- L1: 114-- if (Hp > HpLim) then L2 else L3 115-- L2: 116-- call stg_gc_noregs returns to L4 117-- L4: 118-- goto L1 119-- L3: 120-- ... 121-- goto L1 122-- 123-- Then the control-flow optimiser shortcuts L4. But that turns L1 124-- into the call-return proc point, and every iteration of the loop 125-- has to shuffle variables to and from the stack. So we must *not* 126-- shortcut L4. 127-- 128-- Moreover not shortcutting call returns is probably fine. If L4 can 129-- concat with its branch target then it will still do so. And we 130-- save some compile time because we don't have to traverse all the 131-- code in replaceLabels. 132-- 133-- However, we probably do want to do this if we are splitting proc 134-- points, because L1 will be a proc-point anyway, so merging it with 135-- L4 reduces the number of proc points. Unfortunately recursive 136-- let-no-escapes won't generate very good code with proc-point 137-- splitting on - we should probably compile them to explicitly use 138-- the native calling convention instead. 139 140cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph 141cmmCfgOpts split g = fst (blockConcat split g) 142 143cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl 144cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g' 145 where (g', env) = blockConcat split g 146 info' = info{ info_tbls = new_info_tbls } 147 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info))) 148 149 -- If we changed any labels, then we have to update the info tables 150 -- too, except for the top-level info table because that might be 151 -- referred to by other procs. 152 upd_info (k,info) 153 | Just k' <- mapLookup k env 154 = (k', if k' == g_entry g' 155 then info 156 else info{ cit_lbl = infoTblLbl k' }) 157 | otherwise 158 = (k,info) 159cmmCfgOptsProc _ top = top 160 161 162blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId) 163blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } 164 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map') 165 where 166 -- We might be able to shortcut the entry BlockId itself. 167 -- Remember to update the shortcut_map, since we also have to 168 -- update the info_tbls mapping now. 169 (new_entry, shortcut_map') 170 | Just entry_blk <- mapLookup entry_id new_blocks 171 , Just dest <- canShortcut entry_blk 172 = (dest, mapInsert entry_id dest shortcut_map) 173 | otherwise 174 = (entry_id, shortcut_map) 175 176 -- blocks are sorted in reverse postorder, but we want to go from the exit 177 -- towards beginning, so we use foldr below. 178 blocks = revPostorder g 179 blockmap = foldl' (flip addBlock) emptyBody blocks 180 181 -- Accumulator contains three components: 182 -- * map of blocks in a graph 183 -- * map of shortcut labels. See Note [Shortcut call returns] 184 -- * map containing number of predecessors for each block. We discard 185 -- it after we process all blocks. 186 (new_blocks, shortcut_map, _) = 187 foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks 188 189 -- Map of predecessors for initial graph. We increase number of 190 -- predecessors for entry block by one to denote that it is 191 -- target of a jump, even if no block in the current graph jumps 192 -- to it. 193 initialBackEdges = incPreds entry_id (predMap blocks) 194 195 maybe_concat :: CmmBlock 196 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int) 197 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int) 198 maybe_concat block (!blocks, !shortcut_map, !backEdges) 199 -- If: 200 -- (1) current block ends with unconditional branch to b' and 201 -- (2) it has exactly one predecessor (namely, current block) 202 -- 203 -- Then: 204 -- (1) append b' block at the end of current block 205 -- (2) remove b' from the map of blocks 206 -- (3) remove information about b' from predecessors map 207 -- 208 -- Since we know that the block has only one predecessor we call 209 -- mapDelete directly instead of calling decPreds. 210 -- 211 -- Note that we always maintain an up-to-date list of predecessors, so 212 -- we can ignore the contents of shortcut_map 213 | CmmBranch b' <- last 214 , hasOnePredecessor b' 215 , Just blk' <- mapLookup b' blocks 216 = let bid' = entryLabel blk' 217 in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks 218 , shortcut_map 219 , mapDelete b' backEdges ) 220 221 -- If: 222 -- (1) we are splitting proc points (see Note 223 -- [Shortcut call returns and proc-points]) and 224 -- (2) current block is a CmmCall or CmmForeignCall with 225 -- continuation b' and 226 -- (3) we can shortcut that continuation to dest 227 -- Then: 228 -- (1) we change continuation to point to b' 229 -- (2) create mapping from b' to dest 230 -- (3) increase number of predecessors of dest by 1 231 -- (4) decrease number of predecessors of b' by 1 232 -- 233 -- Later we will use replaceLabels to substitute all occurrences of b' 234 -- with dest. 235 | splitting_procs 236 , Just b' <- callContinuation_maybe last 237 , Just blk' <- mapLookup b' blocks 238 , Just dest <- canShortcut blk' 239 = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks 240 , mapInsert b' dest shortcut_map 241 , decPreds b' $ incPreds dest backEdges ) 242 243 -- If: 244 -- (1) a block does not end with a call 245 -- Then: 246 -- (1) if it ends with a conditional attempt to invert the 247 -- conditional 248 -- (2) attempt to shortcut all destination blocks 249 -- (3) if new successors of a block are different from the old ones 250 -- update the of predecessors accordingly 251 -- 252 -- A special case of this is a situation when a block ends with an 253 -- unconditional jump to a block that can be shortcut. 254 | Nothing <- callContinuation_maybe last 255 = let oldSuccs = successors last 256 newSuccs = successors rewrite_last 257 in ( mapInsert bid (blockJoinTail head rewrite_last) blocks 258 , shortcut_map 259 , if oldSuccs == newSuccs 260 then backEdges 261 else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs ) 262 263 -- Otherwise don't do anything 264 | otherwise 265 = ( blocks, shortcut_map, backEdges ) 266 where 267 (head, last) = blockSplitTail block 268 bid = entryLabel block 269 270 -- Changes continuation of a call to a specified label 271 update_cont dest = 272 case last of 273 CmmCall{} -> last { cml_cont = Just dest } 274 CmmForeignCall{} -> last { succ = dest } 275 _ -> panic "Can't shortcut continuation." 276 277 -- Attempts to shortcut successors of last node 278 shortcut_last = mapSuccessors shortcut last 279 where 280 shortcut l = 281 case mapLookup l blocks of 282 Just b | Just dest <- canShortcut b -> dest 283 _otherwise -> l 284 285 rewrite_last 286 -- Sometimes we can get rid of the conditional completely. 287 | CmmCondBranch _cond t f _l <- shortcut_last 288 , t == f 289 = CmmBranch t 290 291 -- See Note [Invert Cmm conditionals] 292 | CmmCondBranch cond t f l <- shortcut_last 293 , hasOnePredecessor t -- inverting will make t a fallthrough 294 , likelyTrue l || (numPreds f > 1) 295 , Just cond' <- maybeInvertCmmExpr cond 296 = CmmCondBranch cond' f t (invertLikeliness l) 297 298 -- If all jump destinations of a switch go to the 299 -- same target eliminate the switch. 300 | CmmSwitch _expr targets <- shortcut_last 301 , (t:ts) <- switchTargetsToList targets 302 , all (== t) ts 303 = CmmBranch t 304 305 | otherwise 306 = shortcut_last 307 308 likelyTrue (Just True) = True 309 likelyTrue _ = False 310 311 invertLikeliness :: Maybe Bool -> Maybe Bool 312 invertLikeliness = fmap not 313 314 -- Number of predecessors for a block 315 numPreds bid = mapLookup bid backEdges `orElse` 0 316 317 hasOnePredecessor b = numPreds b == 1 318 319{- 320 Note [Invert Cmm conditionals] 321 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 322 The native code generator always produces jumps to the true branch. 323 Falling through to the false branch is however faster. So we try to 324 arrange for that to happen. 325 This means we invert the condition if: 326 * The likely path will become a fallthrough. 327 * We can't guarantee a fallthrough for the false branch but for the 328 true branch. 329 330 In some cases it's faster to avoid inverting when the false branch is likely. 331 However determining when that is the case is neither easy nor cheap so for 332 now we always invert as this produces smaller binaries and code that is 333 equally fast on average. (On an i7-6700K) 334 335 TODO: 336 There is also the edge case when both branches have multiple predecessors. 337 In this case we could assume that we will end up with a jump for BOTH 338 branches. In this case it might be best to put the likely path in the true 339 branch especially if there are large numbers of predecessors as this saves 340 us the jump thats not taken. However I haven't tested this and as of early 341 2018 we almost never generate cmm where this would apply. 342-} 343 344-- Functions for incrementing and decrementing number of predecessors. If 345-- decrementing would set the predecessor count to 0, we remove entry from the 346-- map. 347-- Invariant: if a block has no predecessors it should be dropped from the 348-- graph because it is unreachable. maybe_concat is constructed to maintain 349-- that invariant, but calling replaceLabels may introduce unreachable blocks. 350-- We rely on subsequent passes in the Cmm pipeline to remove unreachable 351-- blocks. 352incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int 353incPreds bid edges = mapInsertWith (+) bid 1 edges 354decPreds bid edges = case mapLookup bid edges of 355 Just preds | preds > 1 -> mapInsert bid (preds - 1) edges 356 Just _ -> mapDelete bid edges 357 _ -> edges 358 359 360-- Checks if a block consists only of "goto dest". If it does than we return 361-- "Just dest" label. See Note [What is shortcutting] 362canShortcut :: CmmBlock -> Maybe BlockId 363canShortcut block 364 | (_, middle, CmmBranch dest) <- blockSplit block 365 , all dont_care $ blockToList middle 366 = Just dest 367 | otherwise 368 = Nothing 369 where dont_care CmmComment{} = True 370 dont_care CmmTick{} = True 371 dont_care _other = False 372 373-- Concatenates two blocks. First one is assumed to be open on exit, the second 374-- is assumed to be closed on entry (i.e. it has a label attached to it, which 375-- the splice function removes by calling snd on result of blockSplitHead). 376splice :: Block CmmNode C O -> CmmBlock -> CmmBlock 377splice head rest = entry `blockJoinHead` code0 `blockAppend` code1 378 where (CmmEntry lbl sc0, code0) = blockSplitHead head 379 (CmmEntry _ sc1, code1) = blockSplitHead rest 380 entry = CmmEntry lbl (combineTickScopes sc0 sc1) 381 382-- If node is a call with continuation call return Just label of that 383-- continuation. Otherwise return Nothing. 384callContinuation_maybe :: CmmNode O C -> Maybe BlockId 385callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b 386callContinuation_maybe (CmmForeignCall { succ = b }) = Just b 387callContinuation_maybe _ = Nothing 388 389 390-- Map over the CmmGraph, replacing each label with its mapping in the 391-- supplied LabelMap. 392replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph 393replaceLabels env g 394 | mapNull env = g 395 | otherwise = replace_eid $ mapGraphNodes1 txnode g 396 where 397 replace_eid g = g {g_entry = lookup (g_entry g)} 398 lookup id = mapLookup id env `orElse` id 399 400 txnode :: CmmNode e x -> CmmNode e x 401 txnode (CmmBranch bid) = CmmBranch (lookup bid) 402 txnode (CmmCondBranch p t f l) = 403 mkCmmCondBranch (exp p) (lookup t) (lookup f) l 404 txnode (CmmSwitch e ids) = 405 CmmSwitch (exp e) (mapSwitchTargets lookup ids) 406 txnode (CmmCall t k rg a res r) = 407 CmmCall (exp t) (liftM lookup k) rg a res r 408 txnode fc@CmmForeignCall{} = 409 fc{ args = map exp (args fc), succ = lookup (succ fc) } 410 txnode other = mapExpDeep exp other 411 412 exp :: CmmExpr -> CmmExpr 413 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid)) 414 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i 415 exp e = e 416 417mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C 418mkCmmCondBranch p t f l = 419 if t == f then CmmBranch t else CmmCondBranch p t f l 420 421-- Build a map from a block to its set of predecessors. 422predMap :: [CmmBlock] -> LabelMap Int 423predMap blocks = foldr add_preds mapEmpty blocks 424 where 425 add_preds block env = foldr add env (successors block) 426 where add lbl env = mapInsertWith (+) lbl 1 env 427 428-- Removing unreachable blocks 429removeUnreachableBlocksProc :: CmmDecl -> CmmDecl 430removeUnreachableBlocksProc proc@(CmmProc info lbl live g) 431 | used_blocks `lengthLessThan` mapSize (toBlockMap g) 432 = CmmProc info' lbl live g' 433 | otherwise 434 = proc 435 where 436 g' = ofBlockList (g_entry g) used_blocks 437 info' = info { info_tbls = keep_used (info_tbls info) } 438 -- Remove any info_tbls for unreachable 439 440 keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable 441 keep_used bs = mapFoldlWithKey keep mapEmpty bs 442 443 keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable 444 keep env l i | l `setMember` used_lbls = mapInsert l i env 445 | otherwise = env 446 447 used_blocks :: [CmmBlock] 448 used_blocks = revPostorder g 449 450 used_lbls :: LabelSet 451 used_lbls = setFromList $ map entryLabel used_blocks 452