1-----------------------------------------------------------------------------
2--
3-- Cmm optimisation
4--
5-- (c) The University of Glasgow 2006
6--
7-----------------------------------------------------------------------------
8
9module CmmOpt (
10        constantFoldNode,
11        constantFoldExpr,
12        cmmMachOpFold,
13        cmmMachOpFoldM
14 ) where
15
16import GhcPrelude
17
18import CmmUtils
19import Cmm
20import DynFlags
21import Util
22
23import Outputable
24import GHC.Platform
25
26import Data.Bits
27import Data.Maybe
28
29
30constantFoldNode :: DynFlags -> CmmNode e x -> CmmNode e x
31constantFoldNode dflags = mapExp (constantFoldExpr dflags)
32
33constantFoldExpr :: DynFlags -> CmmExpr -> CmmExpr
34constantFoldExpr dflags = wrapRecExp f
35  where f (CmmMachOp op args) = cmmMachOpFold dflags op args
36        f (CmmRegOff r 0) = CmmReg r
37        f e = e
38
39-- -----------------------------------------------------------------------------
40-- MachOp constant folder
41
42-- Now, try to constant-fold the MachOps.  The arguments have already
43-- been optimized and folded.
44
45cmmMachOpFold
46    :: DynFlags
47    -> MachOp       -- The operation from an CmmMachOp
48    -> [CmmExpr]    -- The optimized arguments
49    -> CmmExpr
50
51cmmMachOpFold dflags op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM dflags op args)
52
53-- Returns Nothing if no changes, useful for Hoopl, also reduces
54-- allocation!
55cmmMachOpFoldM
56    :: DynFlags
57    -> MachOp
58    -> [CmmExpr]
59    -> Maybe CmmExpr
60
61cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
62  = Just $ case op of
63      MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
64      MO_Not _   -> CmmLit (CmmInt (complement x) rep)
65
66        -- these are interesting: we must first narrow to the
67        -- "from" type, in order to truncate to the correct size.
68        -- The final narrow/widen to the destination type
69        -- is implicit in the CmmLit.
70      MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to)
71      MO_SS_Conv  from to -> CmmLit (CmmInt (narrowS from x) to)
72      MO_UU_Conv  from to -> CmmLit (CmmInt (narrowU from x) to)
73
74      _ -> panic $ "cmmMachOpFoldM: unknown unary op: " ++ show op
75
76
77-- Eliminate conversion NOPs
78cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
79cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
80
81-- Eliminate nested conversions where possible
82cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]]
83  | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
84    Just (_,   rep3,signed2) <- isIntConversion conv_outer
85  = case () of
86        -- widen then narrow to the same size is a nop
87      _ | rep1 < rep2 && rep1 == rep3 -> Just x
88        -- Widen then narrow to different size: collapse to single conversion
89        -- but remember to use the signedness from the widening, just in case
90        -- the final conversion is a widen.
91        | rep1 < rep2 && rep2 > rep3 ->
92            Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
93        -- Nested widenings: collapse if the signedness is the same
94        | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
95            Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x]
96        -- Nested narrowings: collapse
97        | rep1 > rep2 && rep2 > rep3 ->
98            Just $ cmmMachOpFold dflags (MO_UU_Conv rep1 rep3) [x]
99        | otherwise ->
100            Nothing
101  where
102        isIntConversion (MO_UU_Conv rep1 rep2)
103          = Just (rep1,rep2,False)
104        isIntConversion (MO_SS_Conv rep1 rep2)
105          = Just (rep1,rep2,True)
106        isIntConversion _ = Nothing
107
108        intconv True  = MO_SS_Conv
109        intconv False = MO_UU_Conv
110
111-- ToDo: a narrow of a load can be collapsed into a narrow load, right?
112-- but what if the architecture only supports word-sized loads, should
113-- we do the transformation anyway?
114
115cmmMachOpFoldM dflags mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
116  = case mop of
117        -- for comparisons: don't forget to narrow the arguments before
118        -- comparing, since they might be out of range.
119        MO_Eq _   -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth dflags))
120        MO_Ne _   -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth dflags))
121
122        MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u >  y_u then 1 else 0) (wordWidth dflags))
123        MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth dflags))
124        MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u <  y_u then 1 else 0) (wordWidth dflags))
125        MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth dflags))
126
127        MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s >  y_s then 1 else 0) (wordWidth dflags))
128        MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth dflags))
129        MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s <  y_s then 1 else 0) (wordWidth dflags))
130        MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth dflags))
131
132        MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
133        MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
134        MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
135        MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
136        MO_U_Rem  r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem`  y_u) r)
137        MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
138        MO_S_Rem  r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)
139
140        MO_And   r -> Just $ CmmLit (CmmInt (x .&. y) r)
141        MO_Or    r -> Just $ CmmLit (CmmInt (x .|. y) r)
142        MO_Xor   r -> Just $ CmmLit (CmmInt (x `xor` y) r)
143
144        MO_Shl   r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
145        MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
146        MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
147
148        _          -> Nothing
149
150   where
151        x_u = narrowU xrep x
152        y_u = narrowU xrep y
153        x_s = narrowS xrep x
154        y_s = narrowS xrep y
155
156
157-- When possible, shift the constants to the right-hand side, so that we
158-- can match for strength reductions.  Note that the code generator will
159-- also assume that constants have been shifted to the right when
160-- possible.
161
162cmmMachOpFoldM dflags op [x@(CmmLit _), y]
163   | not (isLit y) && isCommutableMachOp op
164   = Just (cmmMachOpFold dflags op [y, x])
165
166-- Turn (a+b)+c into a+(b+c) where possible.  Because literals are
167-- moved to the right, it is more likely that we will find
168-- opportunities for constant folding when the expression is
169-- right-associated.
170--
171-- ToDo: this appears to introduce a quadratic behaviour due to the
172-- nested cmmMachOpFold.  Can we fix this?
173--
174-- Why do we check isLit arg1?  If arg1 is a lit, it means that arg2
175-- is also a lit (otherwise arg1 would be on the right).  If we
176-- put arg1 on the left of the rearranged expression, we'll get into a
177-- loop:  (x1+x2)+x3 => x1+(x2+x3)  => (x2+x3)+x1 => x2+(x3+x1) ...
178--
179-- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
180-- PicBaseReg from the corresponding label (or label difference).
181--
182cmmMachOpFoldM dflags mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
183   | mop2 `associates_with` mop1
184     && not (isLit arg1) && not (isPicReg arg1)
185   = Just (cmmMachOpFold dflags mop2 [arg1, cmmMachOpFold dflags mop1 [arg2,arg3]])
186   where
187     MO_Add{} `associates_with` MO_Sub{} = True
188     mop1 `associates_with` mop2 =
189        mop1 == mop2 && isAssociativeMachOp mop1
190
191-- special case: (a - b) + c  ==>  a + (c - b)
192cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
193   | not (isLit arg1) && not (isPicReg arg1)
194   = Just (cmmMachOpFold dflags mop1 [arg1, cmmMachOpFold dflags mop2 [arg3,arg2]])
195
196-- special case: (PicBaseReg + lit) + N  ==>  PicBaseReg + (lit+N)
197--
198-- this is better because lit+N is a single link-time constant (e.g. a
199-- CmmLabelOff), so the right-hand expression needs only one
200-- instruction, whereas the left needs two.  This happens when pointer
201-- tagging gives us label+offset, and PIC turns the label into
202-- PicBaseReg + label.
203--
204cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit]
205                          , CmmLit (CmmInt n rep) ]
206  | isPicReg pic
207  = Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ]
208  where off = fromIntegral (narrowS rep n)
209
210-- Make a RegOff if we can
211cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
212  = Just $ cmmRegOff reg (fromIntegral (narrowS rep n))
213cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
214  = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n))
215cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
216  = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n))
217cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
218  = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n))
219
220-- Fold label(+/-)offset into a CmmLit where possible
221
222cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)]
223  = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
224cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit]
225  = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i)))
226cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)]
227  = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i))))
228
229
230-- Comparison of literal with widened operand: perform the comparison
231-- at the smaller width, as long as the literal is within range.
232
233-- We can't do the reverse trick, when the operand is narrowed:
234-- narrowing throws away bits from the operand, there's no way to do
235-- the same comparison at the larger size.
236
237cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
238  |     -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
239    platformArch (targetPlatform dflags) `elem` [ArchX86, ArchX86_64],
240        -- if the operand is widened:
241    Just (rep, signed, narrow_fn) <- maybe_conversion conv,
242        -- and this is a comparison operation:
243    Just narrow_cmp <- maybe_comparison cmp rep signed,
244        -- and the literal fits in the smaller size:
245    i == narrow_fn rep i
246        -- then we can do the comparison at the smaller size
247  = Just (cmmMachOpFold dflags narrow_cmp [x, CmmLit (CmmInt i rep)])
248 where
249    maybe_conversion (MO_UU_Conv from to)
250        | to > from
251        = Just (from, False, narrowU)
252    maybe_conversion (MO_SS_Conv from to)
253        | to > from
254        = Just (from, True, narrowS)
255
256        -- don't attempt to apply this optimisation when the source
257        -- is a float; see #1916
258    maybe_conversion _ = Nothing
259
260        -- careful (#2080): if the original comparison was signed, but
261        -- we were doing an unsigned widen, then we must do an
262        -- unsigned comparison at the smaller size.
263    maybe_comparison (MO_U_Gt _) rep _     = Just (MO_U_Gt rep)
264    maybe_comparison (MO_U_Ge _) rep _     = Just (MO_U_Ge rep)
265    maybe_comparison (MO_U_Lt _) rep _     = Just (MO_U_Lt rep)
266    maybe_comparison (MO_U_Le _) rep _     = Just (MO_U_Le rep)
267    maybe_comparison (MO_Eq   _) rep _     = Just (MO_Eq   rep)
268    maybe_comparison (MO_S_Gt _) rep True  = Just (MO_S_Gt rep)
269    maybe_comparison (MO_S_Ge _) rep True  = Just (MO_S_Ge rep)
270    maybe_comparison (MO_S_Lt _) rep True  = Just (MO_S_Lt rep)
271    maybe_comparison (MO_S_Le _) rep True  = Just (MO_S_Le rep)
272    maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep)
273    maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep)
274    maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep)
275    maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep)
276    maybe_comparison _ _ _ = Nothing
277
278-- We can often do something with constants of 0 and 1 ...
279-- See Note [Comparison operators]
280
281cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))]
282  = case mop of
283        -- Arithmetic
284        MO_Add   _ -> Just x   -- x + 0 = x
285        MO_Sub   _ -> Just x   -- x - 0 = x
286        MO_Mul   _ -> Just y   -- x * 0 = 0
287
288        -- Logical operations
289        MO_And   _ -> Just y   -- x &     0 = 0
290        MO_Or    _ -> Just x   -- x |     0 = x
291        MO_Xor   _ -> Just x   -- x `xor` 0 = x
292
293        -- Shifts
294        MO_Shl   _ -> Just x   -- x << 0 = x
295        MO_S_Shr _ -> Just x   -- ditto shift-right
296        MO_U_Shr _ -> Just x
297
298        -- Comparisons; these ones are trickier
299        -- See Note [Comparison operators]
300        MO_Ne    _ | isComparisonExpr x -> Just x                -- (x > y) != 0  =  x > y
301        MO_Eq    _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- (x > y) == 0  =  x <= y
302        MO_U_Gt  _ | isComparisonExpr x -> Just x                -- (x > y) > 0   =  x > y
303        MO_S_Gt  _ | isComparisonExpr x -> Just x                -- ditto
304        MO_U_Lt  _ | isComparisonExpr x -> Just zero             -- (x > y) < 0  =  0
305        MO_S_Lt  _ | isComparisonExpr x -> Just zero
306        MO_U_Ge  _ | isComparisonExpr x -> Just one              -- (x > y) >= 0  =  1
307        MO_S_Ge  _ | isComparisonExpr x -> Just one
308
309        MO_U_Le  _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- (x > y) <= 0  =  x <= y
310        MO_S_Le  _ | Just x' <- maybeInvertCmmExpr x -> Just x'
311        _ -> Nothing
312  where
313    zero = CmmLit (CmmInt 0 (wordWidth dflags))
314    one  = CmmLit (CmmInt 1 (wordWidth dflags))
315
316cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))]
317  = case mop of
318        -- Arithmetic: x*1 = x, etc
319        MO_Mul    _ -> Just x
320        MO_S_Quot _ -> Just x
321        MO_U_Quot _ -> Just x
322        MO_S_Rem  _ -> Just $ CmmLit (CmmInt 0 rep)
323        MO_U_Rem  _ -> Just $ CmmLit (CmmInt 0 rep)
324
325        -- Comparisons; trickier
326        -- See Note [Comparison operators]
327        MO_Ne    _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- (x>y) != 1  =  x<=y
328        MO_Eq    _ | isComparisonExpr x -> Just x                -- (x>y) == 1  =  x>y
329        MO_U_Lt  _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- (x>y) < 1   =  x<=y
330        MO_S_Lt  _ | Just x' <- maybeInvertCmmExpr x -> Just x'  -- ditto
331        MO_U_Gt  _ | isComparisonExpr x -> Just zero             -- (x>y) > 1   = 0
332        MO_S_Gt  _ | isComparisonExpr x -> Just zero
333        MO_U_Le  _ | isComparisonExpr x -> Just one              -- (x>y) <= 1  = 1
334        MO_S_Le  _ | isComparisonExpr x -> Just one
335        MO_U_Ge  _ | isComparisonExpr x -> Just x                -- (x>y) >= 1  = x>y
336        MO_S_Ge  _ | isComparisonExpr x -> Just x
337        _ -> Nothing
338  where
339    zero = CmmLit (CmmInt 0 (wordWidth dflags))
340    one  = CmmLit (CmmInt 1 (wordWidth dflags))
341
342-- Now look for multiplication/division by powers of 2 (integers).
343
344cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
345  = case mop of
346        MO_Mul rep
347           | Just p <- exactLog2 n ->
348                 Just (cmmMachOpFold dflags (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
349        MO_U_Quot rep
350           | Just p <- exactLog2 n ->
351                 Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
352        MO_U_Rem rep
353           | Just _ <- exactLog2 n ->
354                 Just (cmmMachOpFold dflags (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)])
355        MO_S_Quot rep
356           | Just p <- exactLog2 n,
357             CmmReg _ <- x ->   -- We duplicate x in signedQuotRemHelper, hence require
358                                -- it is a reg.  FIXME: remove this restriction.
359                Just (cmmMachOpFold dflags (MO_S_Shr rep)
360                  [signedQuotRemHelper rep p, CmmLit (CmmInt p rep)])
361        MO_S_Rem rep
362           | Just p <- exactLog2 n,
363             CmmReg _ <- x ->   -- We duplicate x in signedQuotRemHelper, hence require
364                                -- it is a reg.  FIXME: remove this restriction.
365                -- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p).
366                -- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot)
367                -- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation.
368                Just (cmmMachOpFold dflags (MO_Sub rep)
369                    [x, cmmMachOpFold dflags (MO_And rep)
370                      [signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]])
371        _ -> Nothing
372  where
373    -- In contrast with unsigned integers, for signed ones
374    -- shift right is not the same as quot, because it rounds
375    -- to minus infinity, whereas quot rounds toward zero.
376    -- To fix this up, we add one less than the divisor to the
377    -- dividend if it is a negative number.
378    --
379    -- to avoid a test/jump, we use the following sequence:
380    --      x1 = x >> word_size-1  (all 1s if -ve, all 0s if +ve)
381    --      x2 = y & (divisor-1)
382    --      result = x + x2
383    -- this could be done a bit more simply using conditional moves,
384    -- but we're processor independent here.
385    --
386    -- we optimise the divide by 2 case slightly, generating
387    --      x1 = x >> word_size-1  (unsigned)
388    --      return = x + x1
389    signedQuotRemHelper :: Width -> Integer -> CmmExpr
390    signedQuotRemHelper rep p = CmmMachOp (MO_Add rep) [x, x2]
391      where
392        bits = fromIntegral (widthInBits rep) - 1
393        shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
394        x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
395        x2 = if p == 1 then x1 else
396             CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
397
398-- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
399-- Unfortunately this needs a unique supply because x might not be a
400-- register.  See #2253 (program 6) for an example.
401
402
403-- Anything else is just too hard.
404
405cmmMachOpFoldM _ _ _ = Nothing
406
407{- Note [Comparison operators]
408~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
409If we have
410   CmmCondBranch ((x>#y) == 1) t f
411we really want to convert to
412   CmmCondBranch (x>#y) t f
413
414That's what the constant-folding operations on comparison operators do above.
415-}
416
417
418-- -----------------------------------------------------------------------------
419-- Utils
420
421isPicReg :: CmmExpr -> Bool
422isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
423isPicReg _ = False
424