1{-# LANGUAGE GADTs, BangPatterns, ScopedTypeVariables #-} 2 3module CmmCommonBlockElim 4 ( elimCommonBlocks 5 ) 6where 7 8 9import GhcPrelude hiding (iterate, succ, unzip, zip) 10 11import BlockId 12import Cmm 13import CmmUtils 14import CmmSwitch (eqSwitchTargetWith) 15import CmmContFlowOpt 16 17import Hoopl.Block 18import Hoopl.Graph 19import Hoopl.Label 20import Hoopl.Collections 21import Data.Bits 22import Data.Maybe (mapMaybe) 23import qualified Data.List as List 24import Data.Word 25import qualified Data.Map as M 26import Outputable 27import qualified TrieMap as TM 28import UniqFM 29import Unique 30import Control.Arrow (first, second) 31 32-- ----------------------------------------------------------------------------- 33-- Eliminate common blocks 34 35-- If two blocks are identical except for the label on the first node, 36-- then we can eliminate one of the blocks. To ensure that the semantics 37-- of the program are preserved, we have to rewrite each predecessor of the 38-- eliminated block to proceed with the block we keep. 39 40-- The algorithm iterates over the blocks in the graph, 41-- checking whether it has seen another block that is equal modulo labels. 42-- If so, then it adds an entry in a map indicating that the new block 43-- is made redundant by the old block. 44-- Otherwise, it is added to the useful blocks. 45 46-- To avoid comparing every block with every other block repeatedly, we group 47-- them by 48-- * a hash of the block, ignoring labels (explained below) 49-- * the list of outgoing labels 50-- The hash is invariant under relabeling, so we only ever compare within 51-- the same group of blocks. 52-- 53-- The list of outgoing labels is updated as we merge blocks (that is why they 54-- are not included in the hash, which we want to calculate only once). 55-- 56-- All in all, two blocks should never be compared if they have different 57-- hashes, and at most once otherwise. Previously, we were slower, and people 58-- rightfully complained: #10397 59 60-- TODO: Use optimization fuel 61elimCommonBlocks :: CmmGraph -> CmmGraph 62elimCommonBlocks g = replaceLabels env $ copyTicks env g 63 where 64 env = iterate mapEmpty blocks_with_key 65 -- The order of blocks doesn't matter here. While we could use 66 -- revPostorder which drops unreachable blocks this is done in 67 -- ContFlowOpt already which runs before this pass. So we use 68 -- toBlockList since it is faster. 69 groups = groupByInt hash_block (toBlockList g) :: [[CmmBlock]] 70 blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups] 71 72-- Invariant: The blocks in the list are pairwise distinct 73-- (so avoid comparing them again) 74type DistinctBlocks = [CmmBlock] 75type Key = [Label] 76type Subst = LabelMap BlockId 77 78-- The outer list groups by hash. We retain this grouping throughout. 79iterate :: Subst -> [[(Key, DistinctBlocks)]] -> Subst 80iterate subst blocks 81 | mapNull new_substs = subst 82 | otherwise = iterate subst' updated_blocks 83 where 84 grouped_blocks :: [[(Key, [DistinctBlocks])]] 85 grouped_blocks = map groupByLabel blocks 86 87 merged_blocks :: [[(Key, DistinctBlocks)]] 88 (new_substs, merged_blocks) = List.mapAccumL (List.mapAccumL go) mapEmpty grouped_blocks 89 where 90 go !new_subst1 (k,dbs) = (new_subst1 `mapUnion` new_subst2, (k,db)) 91 where 92 (new_subst2, db) = mergeBlockList subst dbs 93 94 subst' = subst `mapUnion` new_substs 95 updated_blocks = map (map (first (map (lookupBid subst')))) merged_blocks 96 97-- Combine two lists of blocks. 98-- While they are internally distinct they can still share common blocks. 99mergeBlocks :: Subst -> DistinctBlocks -> DistinctBlocks -> (Subst, DistinctBlocks) 100mergeBlocks subst existing new = go new 101 where 102 go [] = (mapEmpty, existing) 103 go (b:bs) = case List.find (eqBlockBodyWith (eqBid subst) b) existing of 104 -- This block is a duplicate. Drop it, and add it to the substitution 105 Just b' -> first (mapInsert (entryLabel b) (entryLabel b')) $ go bs 106 -- This block is not a duplicate, keep it. 107 Nothing -> second (b:) $ go bs 108 109mergeBlockList :: Subst -> [DistinctBlocks] -> (Subst, DistinctBlocks) 110mergeBlockList _ [] = pprPanic "mergeBlockList" empty 111mergeBlockList subst (b:bs) = go mapEmpty b bs 112 where 113 go !new_subst1 b [] = (new_subst1, b) 114 go !new_subst1 b1 (b2:bs) = go new_subst b bs 115 where 116 (new_subst2, b) = mergeBlocks subst b1 b2 117 new_subst = new_subst1 `mapUnion` new_subst2 118 119 120-- ----------------------------------------------------------------------------- 121-- Hashing and equality on blocks 122 123-- Below here is mostly boilerplate: hashing blocks ignoring labels, 124-- and comparing blocks modulo a label mapping. 125 126-- To speed up comparisons, we hash each basic block modulo jump labels. 127-- The hashing is a bit arbitrary (the numbers are completely arbitrary), 128-- but it should be fast and good enough. 129 130-- We want to get as many small buckets as possible, as comparing blocks is 131-- expensive. So include as much as possible in the hash. Ideally everything 132-- that is compared with (==) in eqBlockBodyWith. 133 134type HashCode = Int 135 136hash_block :: CmmBlock -> HashCode 137hash_block block = 138 fromIntegral (foldBlockNodesB3 (hash_fst, hash_mid, hash_lst) block (0 :: Word32) .&. (0x7fffffff :: Word32)) 139 -- UniqFM doesn't like negative Ints 140 where hash_fst _ h = h 141 hash_mid m h = hash_node m + h `shiftL` 1 142 hash_lst m h = hash_node m + h `shiftL` 1 143 144 hash_node :: CmmNode O x -> Word32 145 hash_node n | dont_care n = 0 -- don't care 146 hash_node (CmmAssign r e) = hash_reg r + hash_e e 147 hash_node (CmmStore e e') = hash_e e + hash_e e' 148 hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as 149 hash_node (CmmBranch _) = 23 -- NB. ignore the label 150 hash_node (CmmCondBranch p _ _ _) = hash_e p 151 hash_node (CmmCall e _ _ _ _ _) = hash_e e 152 hash_node (CmmForeignCall t _ _ _ _ _ _) = hash_tgt t 153 hash_node (CmmSwitch e _) = hash_e e 154 hash_node _ = error "hash_node: unknown Cmm node!" 155 156 hash_reg :: CmmReg -> Word32 157 hash_reg (CmmLocal localReg) = hash_unique localReg -- important for performance, see #10397 158 hash_reg (CmmGlobal _) = 19 159 160 hash_e :: CmmExpr -> Word32 161 hash_e (CmmLit l) = hash_lit l 162 hash_e (CmmLoad e _) = 67 + hash_e e 163 hash_e (CmmReg r) = hash_reg r 164 hash_e (CmmMachOp _ es) = hash_list hash_e es -- pessimal - no operator check 165 hash_e (CmmRegOff r i) = hash_reg r + cvt i 166 hash_e (CmmStackSlot _ _) = 13 167 168 hash_lit :: CmmLit -> Word32 169 hash_lit (CmmInt i _) = fromInteger i 170 hash_lit (CmmFloat r _) = truncate r 171 hash_lit (CmmVec ls) = hash_list hash_lit ls 172 hash_lit (CmmLabel _) = 119 -- ugh 173 hash_lit (CmmLabelOff _ i) = cvt $ 199 + i 174 hash_lit (CmmLabelDiffOff _ _ i _) = cvt $ 299 + i 175 hash_lit (CmmBlock _) = 191 -- ugh 176 hash_lit (CmmHighStackMark) = cvt 313 177 178 hash_tgt (ForeignTarget e _) = hash_e e 179 hash_tgt (PrimTarget _) = 31 -- lots of these 180 181 hash_list f = foldl' (\z x -> f x + z) (0::Word32) 182 183 cvt = fromInteger . toInteger 184 185 hash_unique :: Uniquable a => a -> Word32 186 hash_unique = cvt . getKey . getUnique 187 188-- | Ignore these node types for equality 189dont_care :: CmmNode O x -> Bool 190dont_care CmmComment {} = True 191dont_care CmmTick {} = True 192dont_care CmmUnwind {} = True 193dont_care _other = False 194 195-- Utilities: equality and substitution on the graph. 196 197-- Given a map ``subst'' from BlockID -> BlockID, we define equality. 198eqBid :: LabelMap BlockId -> BlockId -> BlockId -> Bool 199eqBid subst bid bid' = lookupBid subst bid == lookupBid subst bid' 200lookupBid :: LabelMap BlockId -> BlockId -> BlockId 201lookupBid subst bid = case mapLookup bid subst of 202 Just bid -> lookupBid subst bid 203 Nothing -> bid 204 205-- Middle nodes and expressions can contain BlockIds, in particular in 206-- CmmStackSlot and CmmBlock, so we have to use a special equality for 207-- these. 208-- 209eqMiddleWith :: (BlockId -> BlockId -> Bool) 210 -> CmmNode O O -> CmmNode O O -> Bool 211eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2) 212 = r1 == r2 && eqExprWith eqBid e1 e2 213eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2) 214 = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2 215eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1) 216 (CmmUnsafeForeignCall t2 r2 a2) 217 = t1 == t2 && r1 == r2 && eqListWith (eqExprWith eqBid) a1 a2 218eqMiddleWith _ _ _ = False 219 220eqExprWith :: (BlockId -> BlockId -> Bool) 221 -> CmmExpr -> CmmExpr -> Bool 222eqExprWith eqBid = eq 223 where 224 CmmLit l1 `eq` CmmLit l2 = eqLit l1 l2 225 CmmLoad e1 _ `eq` CmmLoad e2 _ = e1 `eq` e2 226 CmmReg r1 `eq` CmmReg r2 = r1==r2 227 CmmRegOff r1 i1 `eq` CmmRegOff r2 i2 = r1==r2 && i1==i2 228 CmmMachOp op1 es1 `eq` CmmMachOp op2 es2 = op1==op2 && es1 `eqs` es2 229 CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2 230 _e1 `eq` _e2 = False 231 232 xs `eqs` ys = eqListWith eq xs ys 233 234 eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2 235 eqLit l1 l2 = l1 == l2 236 237 eqArea Old Old = True 238 eqArea (Young id1) (Young id2) = eqBid id1 id2 239 eqArea _ _ = False 240 241-- Equality on the body of a block, modulo a function mapping block 242-- IDs to block IDs. 243eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool 244eqBlockBodyWith eqBid block block' 245 {- 246 | equal = pprTrace "equal" (vcat [ppr block, ppr block']) True 247 | otherwise = pprTrace "not equal" (vcat [ppr block, ppr block']) False 248 -} 249 = equal 250 where (_,m,l) = blockSplit block 251 nodes = filter (not . dont_care) (blockToList m) 252 (_,m',l') = blockSplit block' 253 nodes' = filter (not . dont_care) (blockToList m') 254 255 equal = eqListWith (eqMiddleWith eqBid) nodes nodes' && 256 eqLastWith eqBid l l' 257 258 259eqLastWith :: (BlockId -> BlockId -> Bool) -> CmmNode O C -> CmmNode O C -> Bool 260eqLastWith eqBid (CmmBranch bid1) (CmmBranch bid2) = eqBid bid1 bid2 261eqLastWith eqBid (CmmCondBranch c1 t1 f1 l1) (CmmCondBranch c2 t2 f2 l2) = 262 c1 == c2 && l1 == l2 && eqBid t1 t2 && eqBid f1 f2 263eqLastWith eqBid (CmmCall t1 c1 g1 a1 r1 u1) (CmmCall t2 c2 g2 a2 r2 u2) = 264 t1 == t2 && eqMaybeWith eqBid c1 c2 && a1 == a2 && r1 == r2 && u1 == u2 && g1 == g2 265eqLastWith eqBid (CmmSwitch e1 ids1) (CmmSwitch e2 ids2) = 266 e1 == e2 && eqSwitchTargetWith eqBid ids1 ids2 267eqLastWith _ _ _ = False 268 269eqMaybeWith :: (a -> b -> Bool) -> Maybe a -> Maybe b -> Bool 270eqMaybeWith eltEq (Just e) (Just e') = eltEq e e' 271eqMaybeWith _ Nothing Nothing = True 272eqMaybeWith _ _ _ = False 273 274eqListWith :: (a -> b -> Bool) -> [a] -> [b] -> Bool 275eqListWith f (a : as) (b : bs) = f a b && eqListWith f as bs 276eqListWith _ [] [] = True 277eqListWith _ _ _ = False 278 279-- | Given a block map, ensure that all "target" blocks are covered by 280-- the same ticks as the respective "source" blocks. This not only 281-- means copying ticks, but also adjusting tick scopes where 282-- necessary. 283copyTicks :: LabelMap BlockId -> CmmGraph -> CmmGraph 284copyTicks env g 285 | mapNull env = g 286 | otherwise = ofBlockMap (g_entry g) $ mapMap copyTo blockMap 287 where -- Reverse block merge map 288 blockMap = toBlockMap g 289 revEnv = mapFoldlWithKey insertRev M.empty env 290 insertRev m k x = M.insertWith (const (k:)) x [k] m 291 -- Copy ticks and scopes into the given block 292 copyTo block = case M.lookup (entryLabel block) revEnv of 293 Nothing -> block 294 Just ls -> foldr copy block $ mapMaybe (flip mapLookup blockMap) ls 295 copy from to = 296 let ticks = blockTicks from 297 CmmEntry _ scp0 = firstNode from 298 (CmmEntry lbl scp1, code) = blockSplitHead to 299 in CmmEntry lbl (combineTickScopes scp0 scp1) `blockJoinHead` 300 foldr blockCons code (map CmmTick ticks) 301 302-- Group by [Label] 303-- See Note [Compressed TrieMap] in coreSyn/TrieMap about the usage of GenMap. 304groupByLabel :: [(Key, DistinctBlocks)] -> [(Key, [DistinctBlocks])] 305groupByLabel = 306 go (TM.emptyTM :: TM.ListMap (TM.GenMap LabelMap) (Key, [DistinctBlocks])) 307 where 308 go !m [] = TM.foldTM (:) m [] 309 go !m ((k,v) : entries) = go (TM.alterTM k adjust m) entries 310 where --k' = map (getKey . getUnique) k 311 adjust Nothing = Just (k,[v]) 312 adjust (Just (_,vs)) = Just (k,v:vs) 313 314groupByInt :: (a -> Int) -> [a] -> [[a]] 315groupByInt f xs = nonDetEltsUFM $ List.foldl' go emptyUFM xs 316 -- See Note [Unique Determinism and code generation] 317 where 318 go m x = alterUFM addEntry m (f x) 319 where 320 addEntry xs = Just $! maybe [x] (x:) xs 321