1----------------------------------------------------------------------------- 2-- 3-- Cmm optimisation 4-- 5-- (c) The University of Glasgow 2006 6-- 7----------------------------------------------------------------------------- 8 9module CmmOpt ( 10 constantFoldNode, 11 constantFoldExpr, 12 cmmMachOpFold, 13 cmmMachOpFoldM 14 ) where 15 16import GhcPrelude 17 18import CmmUtils 19import Cmm 20import DynFlags 21import Util 22 23import Outputable 24import GHC.Platform 25 26import Data.Bits 27import Data.Maybe 28 29 30constantFoldNode :: DynFlags -> CmmNode e x -> CmmNode e x 31constantFoldNode dflags = mapExp (constantFoldExpr dflags) 32 33constantFoldExpr :: DynFlags -> CmmExpr -> CmmExpr 34constantFoldExpr dflags = wrapRecExp f 35 where f (CmmMachOp op args) = cmmMachOpFold dflags op args 36 f (CmmRegOff r 0) = CmmReg r 37 f e = e 38 39-- ----------------------------------------------------------------------------- 40-- MachOp constant folder 41 42-- Now, try to constant-fold the MachOps. The arguments have already 43-- been optimized and folded. 44 45cmmMachOpFold 46 :: DynFlags 47 -> MachOp -- The operation from an CmmMachOp 48 -> [CmmExpr] -- The optimized arguments 49 -> CmmExpr 50 51cmmMachOpFold dflags op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM dflags op args) 52 53-- Returns Nothing if no changes, useful for Hoopl, also reduces 54-- allocation! 55cmmMachOpFoldM 56 :: DynFlags 57 -> MachOp 58 -> [CmmExpr] 59 -> Maybe CmmExpr 60 61cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)] 62 = Just $ case op of 63 MO_S_Neg _ -> CmmLit (CmmInt (-x) rep) 64 MO_Not _ -> CmmLit (CmmInt (complement x) rep) 65 66 -- these are interesting: we must first narrow to the 67 -- "from" type, in order to truncate to the correct size. 68 -- The final narrow/widen to the destination type 69 -- is implicit in the CmmLit. 70 MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to) 71 MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to) 72 MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to) 73 74 _ -> panic $ "cmmMachOpFoldM: unknown unary op: " ++ show op 75 76 77-- Eliminate conversion NOPs 78cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x 79cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x 80 81-- Eliminate nested conversions where possible 82cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]] 83 | Just (rep1,rep2,signed1) <- isIntConversion conv_inner, 84 Just (_, rep3,signed2) <- isIntConversion conv_outer 85 = case () of 86 -- widen then narrow to the same size is a nop 87 _ | rep1 < rep2 && rep1 == rep3 -> Just x 88 -- Widen then narrow to different size: collapse to single conversion 89 -- but remember to use the signedness from the widening, just in case 90 -- the final conversion is a widen. 91 | rep1 < rep2 && rep2 > rep3 -> 92 Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x] 93 -- Nested widenings: collapse if the signedness is the same 94 | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 -> 95 Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x] 96 -- Nested narrowings: collapse 97 | rep1 > rep2 && rep2 > rep3 -> 98 Just $ cmmMachOpFold dflags (MO_UU_Conv rep1 rep3) [x] 99 | otherwise -> 100 Nothing 101 where 102 isIntConversion (MO_UU_Conv rep1 rep2) 103 = Just (rep1,rep2,False) 104 isIntConversion (MO_SS_Conv rep1 rep2) 105 = Just (rep1,rep2,True) 106 isIntConversion _ = Nothing 107 108 intconv True = MO_SS_Conv 109 intconv False = MO_UU_Conv 110 111-- ToDo: a narrow of a load can be collapsed into a narrow load, right? 112-- but what if the architecture only supports word-sized loads, should 113-- we do the transformation anyway? 114 115cmmMachOpFoldM dflags mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)] 116 = case mop of 117 -- for comparisons: don't forget to narrow the arguments before 118 -- comparing, since they might be out of range. 119 MO_Eq _ -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth dflags)) 120 MO_Ne _ -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth dflags)) 121 122 MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u > y_u then 1 else 0) (wordWidth dflags)) 123 MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth dflags)) 124 MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u < y_u then 1 else 0) (wordWidth dflags)) 125 MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth dflags)) 126 127 MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s > y_s then 1 else 0) (wordWidth dflags)) 128 MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth dflags)) 129 MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s < y_s then 1 else 0) (wordWidth dflags)) 130 MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth dflags)) 131 132 MO_Add r -> Just $ CmmLit (CmmInt (x + y) r) 133 MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r) 134 MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r) 135 MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r) 136 MO_U_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem` y_u) r) 137 MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r) 138 MO_S_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r) 139 140 MO_And r -> Just $ CmmLit (CmmInt (x .&. y) r) 141 MO_Or r -> Just $ CmmLit (CmmInt (x .|. y) r) 142 MO_Xor r -> Just $ CmmLit (CmmInt (x `xor` y) r) 143 144 MO_Shl r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r) 145 MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r) 146 MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r) 147 148 _ -> Nothing 149 150 where 151 x_u = narrowU xrep x 152 y_u = narrowU xrep y 153 x_s = narrowS xrep x 154 y_s = narrowS xrep y 155 156 157-- When possible, shift the constants to the right-hand side, so that we 158-- can match for strength reductions. Note that the code generator will 159-- also assume that constants have been shifted to the right when 160-- possible. 161 162cmmMachOpFoldM dflags op [x@(CmmLit _), y] 163 | not (isLit y) && isCommutableMachOp op 164 = Just (cmmMachOpFold dflags op [y, x]) 165 166-- Turn (a+b)+c into a+(b+c) where possible. Because literals are 167-- moved to the right, it is more likely that we will find 168-- opportunities for constant folding when the expression is 169-- right-associated. 170-- 171-- ToDo: this appears to introduce a quadratic behaviour due to the 172-- nested cmmMachOpFold. Can we fix this? 173-- 174-- Why do we check isLit arg1? If arg1 is a lit, it means that arg2 175-- is also a lit (otherwise arg1 would be on the right). If we 176-- put arg1 on the left of the rearranged expression, we'll get into a 177-- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ... 178-- 179-- Also don't do it if arg1 is PicBaseReg, so that we don't separate the 180-- PicBaseReg from the corresponding label (or label difference). 181-- 182cmmMachOpFoldM dflags mop1 [CmmMachOp mop2 [arg1,arg2], arg3] 183 | mop2 `associates_with` mop1 184 && not (isLit arg1) && not (isPicReg arg1) 185 = Just (cmmMachOpFold dflags mop2 [arg1, cmmMachOpFold dflags mop1 [arg2,arg3]]) 186 where 187 MO_Add{} `associates_with` MO_Sub{} = True 188 mop1 `associates_with` mop2 = 189 mop1 == mop2 && isAssociativeMachOp mop1 190 191-- special case: (a - b) + c ==> a + (c - b) 192cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3] 193 | not (isLit arg1) && not (isPicReg arg1) 194 = Just (cmmMachOpFold dflags mop1 [arg1, cmmMachOpFold dflags mop2 [arg3,arg2]]) 195 196-- special case: (PicBaseReg + lit) + N ==> PicBaseReg + (lit+N) 197-- 198-- this is better because lit+N is a single link-time constant (e.g. a 199-- CmmLabelOff), so the right-hand expression needs only one 200-- instruction, whereas the left needs two. This happens when pointer 201-- tagging gives us label+offset, and PIC turns the label into 202-- PicBaseReg + label. 203-- 204cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit] 205 , CmmLit (CmmInt n rep) ] 206 | isPicReg pic 207 = Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ] 208 where off = fromIntegral (narrowS rep n) 209 210-- Make a RegOff if we can 211cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)] 212 = Just $ cmmRegOff reg (fromIntegral (narrowS rep n)) 213cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)] 214 = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n)) 215cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)] 216 = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n)) 217cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)] 218 = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n)) 219 220-- Fold label(+/-)offset into a CmmLit where possible 221 222cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)] 223 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i))) 224cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit] 225 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i))) 226cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)] 227 = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i)))) 228 229 230-- Comparison of literal with widened operand: perform the comparison 231-- at the smaller width, as long as the literal is within range. 232 233-- We can't do the reverse trick, when the operand is narrowed: 234-- narrowing throws away bits from the operand, there's no way to do 235-- the same comparison at the larger size. 236 237cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)] 238 | -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try 239 platformArch (targetPlatform dflags) `elem` [ArchX86, ArchX86_64], 240 -- if the operand is widened: 241 Just (rep, signed, narrow_fn) <- maybe_conversion conv, 242 -- and this is a comparison operation: 243 Just narrow_cmp <- maybe_comparison cmp rep signed, 244 -- and the literal fits in the smaller size: 245 i == narrow_fn rep i 246 -- then we can do the comparison at the smaller size 247 = Just (cmmMachOpFold dflags narrow_cmp [x, CmmLit (CmmInt i rep)]) 248 where 249 maybe_conversion (MO_UU_Conv from to) 250 | to > from 251 = Just (from, False, narrowU) 252 maybe_conversion (MO_SS_Conv from to) 253 | to > from 254 = Just (from, True, narrowS) 255 256 -- don't attempt to apply this optimisation when the source 257 -- is a float; see #1916 258 maybe_conversion _ = Nothing 259 260 -- careful (#2080): if the original comparison was signed, but 261 -- we were doing an unsigned widen, then we must do an 262 -- unsigned comparison at the smaller size. 263 maybe_comparison (MO_U_Gt _) rep _ = Just (MO_U_Gt rep) 264 maybe_comparison (MO_U_Ge _) rep _ = Just (MO_U_Ge rep) 265 maybe_comparison (MO_U_Lt _) rep _ = Just (MO_U_Lt rep) 266 maybe_comparison (MO_U_Le _) rep _ = Just (MO_U_Le rep) 267 maybe_comparison (MO_Eq _) rep _ = Just (MO_Eq rep) 268 maybe_comparison (MO_S_Gt _) rep True = Just (MO_S_Gt rep) 269 maybe_comparison (MO_S_Ge _) rep True = Just (MO_S_Ge rep) 270 maybe_comparison (MO_S_Lt _) rep True = Just (MO_S_Lt rep) 271 maybe_comparison (MO_S_Le _) rep True = Just (MO_S_Le rep) 272 maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep) 273 maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep) 274 maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep) 275 maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep) 276 maybe_comparison _ _ _ = Nothing 277 278-- We can often do something with constants of 0 and 1 ... 279-- See Note [Comparison operators] 280 281cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))] 282 = case mop of 283 -- Arithmetic 284 MO_Add _ -> Just x -- x + 0 = x 285 MO_Sub _ -> Just x -- x - 0 = x 286 MO_Mul _ -> Just y -- x * 0 = 0 287 288 -- Logical operations 289 MO_And _ -> Just y -- x & 0 = 0 290 MO_Or _ -> Just x -- x | 0 = x 291 MO_Xor _ -> Just x -- x `xor` 0 = x 292 293 -- Shifts 294 MO_Shl _ -> Just x -- x << 0 = x 295 MO_S_Shr _ -> Just x -- ditto shift-right 296 MO_U_Shr _ -> Just x 297 298 -- Comparisons; these ones are trickier 299 -- See Note [Comparison operators] 300 MO_Ne _ | isComparisonExpr x -> Just x -- (x > y) != 0 = x > y 301 MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) == 0 = x <= y 302 MO_U_Gt _ | isComparisonExpr x -> Just x -- (x > y) > 0 = x > y 303 MO_S_Gt _ | isComparisonExpr x -> Just x -- ditto 304 MO_U_Lt _ | isComparisonExpr x -> Just zero -- (x > y) < 0 = 0 305 MO_S_Lt _ | isComparisonExpr x -> Just zero 306 MO_U_Ge _ | isComparisonExpr x -> Just one -- (x > y) >= 0 = 1 307 MO_S_Ge _ | isComparisonExpr x -> Just one 308 309 MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) <= 0 = x <= y 310 MO_S_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x' 311 _ -> Nothing 312 where 313 zero = CmmLit (CmmInt 0 (wordWidth dflags)) 314 one = CmmLit (CmmInt 1 (wordWidth dflags)) 315 316cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))] 317 = case mop of 318 -- Arithmetic: x*1 = x, etc 319 MO_Mul _ -> Just x 320 MO_S_Quot _ -> Just x 321 MO_U_Quot _ -> Just x 322 MO_S_Rem _ -> Just $ CmmLit (CmmInt 0 rep) 323 MO_U_Rem _ -> Just $ CmmLit (CmmInt 0 rep) 324 325 -- Comparisons; trickier 326 -- See Note [Comparison operators] 327 MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) != 1 = x<=y 328 MO_Eq _ | isComparisonExpr x -> Just x -- (x>y) == 1 = x>y 329 MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) < 1 = x<=y 330 MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- ditto 331 MO_U_Gt _ | isComparisonExpr x -> Just zero -- (x>y) > 1 = 0 332 MO_S_Gt _ | isComparisonExpr x -> Just zero 333 MO_U_Le _ | isComparisonExpr x -> Just one -- (x>y) <= 1 = 1 334 MO_S_Le _ | isComparisonExpr x -> Just one 335 MO_U_Ge _ | isComparisonExpr x -> Just x -- (x>y) >= 1 = x>y 336 MO_S_Ge _ | isComparisonExpr x -> Just x 337 _ -> Nothing 338 where 339 zero = CmmLit (CmmInt 0 (wordWidth dflags)) 340 one = CmmLit (CmmInt 1 (wordWidth dflags)) 341 342-- Now look for multiplication/division by powers of 2 (integers). 343 344cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))] 345 = case mop of 346 MO_Mul rep 347 | Just p <- exactLog2 n -> 348 Just (cmmMachOpFold dflags (MO_Shl rep) [x, CmmLit (CmmInt p rep)]) 349 MO_U_Quot rep 350 | Just p <- exactLog2 n -> 351 Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)]) 352 MO_U_Rem rep 353 | Just _ <- exactLog2 n -> 354 Just (cmmMachOpFold dflags (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)]) 355 MO_S_Quot rep 356 | Just p <- exactLog2 n, 357 CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require 358 -- it is a reg. FIXME: remove this restriction. 359 Just (cmmMachOpFold dflags (MO_S_Shr rep) 360 [signedQuotRemHelper rep p, CmmLit (CmmInt p rep)]) 361 MO_S_Rem rep 362 | Just p <- exactLog2 n, 363 CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require 364 -- it is a reg. FIXME: remove this restriction. 365 -- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p). 366 -- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot) 367 -- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation. 368 Just (cmmMachOpFold dflags (MO_Sub rep) 369 [x, cmmMachOpFold dflags (MO_And rep) 370 [signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]]) 371 _ -> Nothing 372 where 373 -- In contrast with unsigned integers, for signed ones 374 -- shift right is not the same as quot, because it rounds 375 -- to minus infinity, whereas quot rounds toward zero. 376 -- To fix this up, we add one less than the divisor to the 377 -- dividend if it is a negative number. 378 -- 379 -- to avoid a test/jump, we use the following sequence: 380 -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve) 381 -- x2 = y & (divisor-1) 382 -- result = x + x2 383 -- this could be done a bit more simply using conditional moves, 384 -- but we're processor independent here. 385 -- 386 -- we optimise the divide by 2 case slightly, generating 387 -- x1 = x >> word_size-1 (unsigned) 388 -- return = x + x1 389 signedQuotRemHelper :: Width -> Integer -> CmmExpr 390 signedQuotRemHelper rep p = CmmMachOp (MO_Add rep) [x, x2] 391 where 392 bits = fromIntegral (widthInBits rep) - 1 393 shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep 394 x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)] 395 x2 = if p == 1 then x1 else 396 CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)] 397 398-- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x 399-- Unfortunately this needs a unique supply because x might not be a 400-- register. See #2253 (program 6) for an example. 401 402 403-- Anything else is just too hard. 404 405cmmMachOpFoldM _ _ _ = Nothing 406 407{- Note [Comparison operators] 408~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 409If we have 410 CmmCondBranch ((x>#y) == 1) t f 411we really want to convert to 412 CmmCondBranch (x>#y) t f 413 414That's what the constant-folding operations on comparison operators do above. 415-} 416 417 418-- ----------------------------------------------------------------------------- 419-- Utils 420 421isPicReg :: CmmExpr -> Bool 422isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True 423isPicReg _ = False 424