1{-# LANGUAGE CPP, GADTs #-}
2
3-----------------------------------------------------------------------------
4--
5-- Generating machine code (instruction selection)
6--
7-- (c) The University of Glasgow 1996-2004
8--
9-----------------------------------------------------------------------------
10
11-- This is a big module, but, if you pay attention to
12-- (a) the sectioning, and (b) the type signatures,
13-- the structure should not be too overwhelming.
14
15module PPC.CodeGen (
16        cmmTopCodeGen,
17        generateJumpTableForInstr,
18        InstrBlock
19)
20
21where
22
23#include "HsVersions.h"
24
25-- NCG stuff:
26import GhcPrelude
27
28import GHC.Platform.Regs
29import PPC.Instr
30import PPC.Cond
31import PPC.Regs
32import CPrim
33import NCGMonad   ( NatM, getNewRegNat, getNewLabelNat
34                  , getBlockIdNat, getPicBaseNat, getNewRegPairNat
35                  , getPicBaseMaybeNat )
36import Instruction
37import PIC
38import Format
39import RegClass
40import Reg
41import TargetReg
42import GHC.Platform
43
44-- Our intermediate code:
45import BlockId
46import PprCmm           ( pprExpr )
47import Cmm
48import CmmUtils
49import CmmSwitch
50import CLabel
51import Hoopl.Block
52import Hoopl.Graph
53
54-- The rest:
55import OrdList
56import Outputable
57import DynFlags
58
59import Control.Monad    ( mapAndUnzipM, when )
60import Data.Bits
61import Data.Word
62
63import BasicTypes
64import FastString
65import Util
66
67-- -----------------------------------------------------------------------------
68-- Top-level of the instruction selector
69
70-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
71-- They are really trees of insns to facilitate fast appending, where a
72-- left-to-right traversal (pre-order?) yields the insns in the correct
73-- order.
74
75cmmTopCodeGen
76        :: RawCmmDecl
77        -> NatM [NatCmmDecl CmmStatics Instr]
78
79cmmTopCodeGen (CmmProc info lab live graph) = do
80  let blocks = toBlockListEntryFirst graph
81  (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
82  dflags <- getDynFlags
83  let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
84      tops = proc : concat statics
85      os   = platformOS $ targetPlatform dflags
86      arch = platformArch $ targetPlatform dflags
87  case arch of
88    ArchPPC | os == OSAIX -> return tops
89            | otherwise -> do
90      picBaseMb <- getPicBaseMaybeNat
91      case picBaseMb of
92           Just picBase -> initializePicBase_ppc arch os picBase tops
93           Nothing -> return tops
94    ArchPPC_64 ELF_V1 -> fixup_entry tops
95                      -- generating function descriptor is handled in
96                      -- pretty printer
97    ArchPPC_64 ELF_V2 -> fixup_entry tops
98                      -- generating function prologue is handled in
99                      -- pretty printer
100    _          -> panic "PPC.cmmTopCodeGen: unknown arch"
101    where
102      fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
103        = do
104        let BasicBlock bID insns = entry
105        bID' <- if lab == (blockLbl bID)
106                then newBlockId
107                else return bID
108        let b' = BasicBlock bID' insns
109        return (CmmProc info lab live (ListGraph (b':blocks)) : statics)
110      fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc"
111
112cmmTopCodeGen (CmmData sec dat) = do
113  return [CmmData sec dat]  -- no translation, we just use CmmStatic
114
115basicBlockCodeGen
116        :: Block CmmNode C C
117        -> NatM ( [NatBasicBlock Instr]
118                , [NatCmmDecl CmmStatics Instr])
119
120basicBlockCodeGen block = do
121  let (_, nodes, tail)  = blockSplit block
122      id = entryLabel block
123      stmts = blockToList nodes
124  mid_instrs <- stmtsToInstrs stmts
125  tail_instrs <- stmtToInstrs tail
126  let instrs = mid_instrs `appOL` tail_instrs
127  -- code generation may introduce new basic block boundaries, which
128  -- are indicated by the NEWBLOCK instruction.  We must split up the
129  -- instruction stream into basic blocks again.  Also, we extract
130  -- LDATAs here too.
131  let
132        (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
133
134        mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
135          = ([], BasicBlock id instrs : blocks, statics)
136        mkBlocks (LDATA sec dat) (instrs,blocks,statics)
137          = (instrs, blocks, CmmData sec dat:statics)
138        mkBlocks instr (instrs,blocks,statics)
139          = (instr:instrs, blocks, statics)
140  return (BasicBlock id top : other_blocks, statics)
141
142stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
143stmtsToInstrs stmts
144   = do instrss <- mapM stmtToInstrs stmts
145        return (concatOL instrss)
146
147stmtToInstrs :: CmmNode e x -> NatM InstrBlock
148stmtToInstrs stmt = do
149  dflags <- getDynFlags
150  case stmt of
151    CmmComment s   -> return (unitOL (COMMENT s))
152    CmmTick {}     -> return nilOL
153    CmmUnwind {}   -> return nilOL
154
155    CmmAssign reg src
156      | isFloatType ty -> assignReg_FltCode format reg src
157      | target32Bit (targetPlatform dflags) &&
158        isWord64 ty    -> assignReg_I64Code      reg src
159      | otherwise      -> assignReg_IntCode format reg src
160        where ty = cmmRegType dflags reg
161              format = cmmTypeFormat ty
162
163    CmmStore addr src
164      | isFloatType ty -> assignMem_FltCode format addr src
165      | target32Bit (targetPlatform dflags) &&
166        isWord64 ty    -> assignMem_I64Code      addr src
167      | otherwise      -> assignMem_IntCode format addr src
168        where ty = cmmExprType dflags src
169              format = cmmTypeFormat ty
170
171    CmmUnsafeForeignCall target result_regs args
172       -> genCCall target result_regs args
173
174    CmmBranch id          -> genBranch id
175    CmmCondBranch arg true false prediction -> do
176      b1 <- genCondJump true arg prediction
177      b2 <- genBranch false
178      return (b1 `appOL` b2)
179    CmmSwitch arg ids -> do dflags <- getDynFlags
180                            genSwitch dflags arg ids
181    CmmCall { cml_target = arg
182            , cml_args_regs = gregs } -> do
183                                dflags <- getDynFlags
184                                genJump arg (jumpRegs dflags gregs)
185    _ ->
186      panic "stmtToInstrs: statement should have been cps'd away"
187
188jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
189jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
190    where platform = targetPlatform dflags
191
192--------------------------------------------------------------------------------
193-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
194--      They are really trees of insns to facilitate fast appending, where a
195--      left-to-right traversal yields the insns in the correct order.
196--
197type InstrBlock
198        = OrdList Instr
199
200
201-- | Register's passed up the tree.  If the stix code forces the register
202--      to live in a pre-decided machine register, it comes out as @Fixed@;
203--      otherwise, it comes out as @Any@, and the parent can decide which
204--      register to put it in.
205--
206data Register
207        = Fixed Format Reg InstrBlock
208        | Any   Format (Reg -> InstrBlock)
209
210
211swizzleRegisterRep :: Register -> Format -> Register
212swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
213swizzleRegisterRep (Any _ codefn)     format = Any   format codefn
214
215
216-- | Grab the Reg for a CmmReg
217getRegisterReg :: Platform -> CmmReg -> Reg
218
219getRegisterReg _ (CmmLocal (LocalReg u pk))
220  = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
221
222getRegisterReg platform (CmmGlobal mid)
223  = case globalRegMaybe platform mid of
224        Just reg -> RegReal reg
225        Nothing  -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
226        -- By this stage, the only MagicIds remaining should be the
227        -- ones which map to a real machine register on this
228        -- platform.  Hence ...
229
230-- | Convert a BlockId to some CmmStatic data
231jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
232jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
233jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
234    where blockLabel = blockLbl blockid
235
236
237
238-- -----------------------------------------------------------------------------
239-- General things for putting together code sequences
240
241-- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
242-- CmmExprs into CmmRegOff?
243mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
244mangleIndexTree dflags (CmmRegOff reg off)
245  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
246  where width = typeWidth (cmmRegType dflags reg)
247
248mangleIndexTree _ _
249        = panic "PPC.CodeGen.mangleIndexTree: no match"
250
251-- -----------------------------------------------------------------------------
252--  Code gen for 64-bit arithmetic on 32-bit platforms
253
254{-
255Simple support for generating 64-bit code (ie, 64 bit values and 64
256bit assignments) on 32-bit platforms.  Unlike the main code generator
257we merely shoot for generating working code as simply as possible, and
258pay little attention to code quality.  Specifically, there is no
259attempt to deal cleverly with the fixed-vs-floating register
260distinction; all values are generated into (pairs of) floating
261registers, even if this would mean some redundant reg-reg moves as a
262result.  Only one of the VRegUniques is returned, since it will be
263of the VRegUniqueLo form, and the upper-half VReg can be determined
264by applying getHiVRegFromLo to it.
265-}
266
267data ChildCode64        -- a.k.a "Register64"
268      = ChildCode64
269           InstrBlock   -- code
270           Reg          -- the lower 32-bit temporary which contains the
271                        -- result; use getHiVRegFromLo to find the other
272                        -- VRegUnique.  Rules of this simplified insn
273                        -- selection game are therefore that the returned
274                        -- Reg may be modified
275
276
277-- | Compute an expression into a register, but
278--      we don't mind which one it is.
279getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
280getSomeReg expr = do
281  r <- getRegister expr
282  case r of
283    Any rep code -> do
284        tmp <- getNewRegNat rep
285        return (tmp, code tmp)
286    Fixed _ reg code ->
287        return (reg, code)
288
289getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
290getI64Amodes addrTree = do
291    Amode hi_addr addr_code <- getAmode D addrTree
292    case addrOffset hi_addr 4 of
293        Just lo_addr -> return (hi_addr, lo_addr, addr_code)
294        Nothing      -> do (hi_ptr, code) <- getSomeReg addrTree
295                           return (AddrRegImm hi_ptr (ImmInt 0),
296                                   AddrRegImm hi_ptr (ImmInt 4),
297                                   code)
298
299
300assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
301assignMem_I64Code addrTree valueTree = do
302        (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
303        ChildCode64 vcode rlo <- iselExpr64 valueTree
304        let
305                rhi = getHiVRegFromLo rlo
306
307                -- Big-endian store
308                mov_hi = ST II32 rhi hi_addr
309                mov_lo = ST II32 rlo lo_addr
310        return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
311
312
313assignReg_I64Code :: CmmReg  -> CmmExpr -> NatM InstrBlock
314assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
315   ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
316   let
317         r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
318         r_dst_hi = getHiVRegFromLo r_dst_lo
319         r_src_hi = getHiVRegFromLo r_src_lo
320         mov_lo = MR r_dst_lo r_src_lo
321         mov_hi = MR r_dst_hi r_src_hi
322   return (
323        vcode `snocOL` mov_lo `snocOL` mov_hi
324     )
325
326assignReg_I64Code _ _
327   = panic "assignReg_I64Code(powerpc): invalid lvalue"
328
329
330iselExpr64        :: CmmExpr -> NatM ChildCode64
331iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
332    (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
333    (rlo, rhi) <- getNewRegPairNat II32
334    let mov_hi = LD II32 rhi hi_addr
335        mov_lo = LD II32 rlo lo_addr
336    return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
337                         rlo
338
339iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
340   = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
341
342iselExpr64 (CmmLit (CmmInt i _)) = do
343  (rlo,rhi) <- getNewRegPairNat II32
344  let
345        half0 = fromIntegral (fromIntegral i :: Word16)
346        half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
347        half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
348        half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
349
350        code = toOL [
351                LIS rlo (ImmInt half1),
352                OR rlo rlo (RIImm $ ImmInt half0),
353                LIS rhi (ImmInt half3),
354                OR rhi rhi (RIImm $ ImmInt half2)
355                ]
356  return (ChildCode64 code rlo)
357
358iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
359   ChildCode64 code1 r1lo <- iselExpr64 e1
360   ChildCode64 code2 r2lo <- iselExpr64 e2
361   (rlo,rhi) <- getNewRegPairNat II32
362   let
363        r1hi = getHiVRegFromLo r1lo
364        r2hi = getHiVRegFromLo r2lo
365        code =  code1 `appOL`
366                code2 `appOL`
367                toOL [ ADDC rlo r1lo r2lo,
368                       ADDE rhi r1hi r2hi ]
369   return (ChildCode64 code rlo)
370
371iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
372   ChildCode64 code1 r1lo <- iselExpr64 e1
373   ChildCode64 code2 r2lo <- iselExpr64 e2
374   (rlo,rhi) <- getNewRegPairNat II32
375   let
376        r1hi = getHiVRegFromLo r1lo
377        r2hi = getHiVRegFromLo r2lo
378        code =  code1 `appOL`
379                code2 `appOL`
380                toOL [ SUBFC rlo r2lo (RIReg r1lo),
381                       SUBFE rhi r2hi r1hi ]
382   return (ChildCode64 code rlo)
383
384iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
385    (expr_reg,expr_code) <- getSomeReg expr
386    (rlo, rhi) <- getNewRegPairNat II32
387    let mov_hi = LI rhi (ImmInt 0)
388        mov_lo = MR rlo expr_reg
389    return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
390                         rlo
391
392iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
393    (expr_reg,expr_code) <- getSomeReg expr
394    (rlo, rhi) <- getNewRegPairNat II32
395    let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31))
396        mov_lo = MR rlo expr_reg
397    return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
398                         rlo
399iselExpr64 expr
400   = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
401
402
403
404getRegister :: CmmExpr -> NatM Register
405getRegister e = do dflags <- getDynFlags
406                   getRegister' dflags e
407
408getRegister' :: DynFlags -> CmmExpr -> NatM Register
409
410getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
411  | OSAIX <- platformOS (targetPlatform dflags) = do
412        let code dst = toOL [ LD II32 dst tocAddr ]
413            tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]"))
414        return (Any II32 code)
415  | target32Bit (targetPlatform dflags) = do
416      reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
417      return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
418                    reg nilOL)
419  | otherwise = return (Fixed II64 toc nilOL)
420
421getRegister' dflags (CmmReg reg)
422  = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
423                  (getRegisterReg (targetPlatform dflags) reg) nilOL)
424
425getRegister' dflags tree@(CmmRegOff _ _)
426  = getRegister' dflags (mangleIndexTree dflags tree)
427
428    -- for 32-bit architectuers, support some 64 -> 32 bit conversions:
429    -- TO_W_(x), TO_W_(x >> 32)
430
431getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
432                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
433 | target32Bit (targetPlatform dflags) = do
434  ChildCode64 code rlo <- iselExpr64 x
435  return $ Fixed II32 (getHiVRegFromLo rlo) code
436
437getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
438                     [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
439 | target32Bit (targetPlatform dflags) = do
440  ChildCode64 code rlo <- iselExpr64 x
441  return $ Fixed II32 (getHiVRegFromLo rlo) code
442
443getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
444 | target32Bit (targetPlatform dflags) = do
445  ChildCode64 code rlo <- iselExpr64 x
446  return $ Fixed II32 rlo code
447
448getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
449 | target32Bit (targetPlatform dflags) = do
450  ChildCode64 code rlo <- iselExpr64 x
451  return $ Fixed II32 rlo code
452
453getRegister' dflags (CmmLoad mem pk)
454 | not (isWord64 pk) = do
455        let platform = targetPlatform dflags
456        Amode addr addr_code <- getAmode D mem
457        let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
458                       addr_code `snocOL` LD format dst addr
459        return (Any format code)
460 | not (target32Bit (targetPlatform dflags)) = do
461        Amode addr addr_code <- getAmode DS mem
462        let code dst = addr_code `snocOL` LD II64 dst addr
463        return (Any II64 code)
464
465          where format = cmmTypeFormat pk
466
467-- catch simple cases of zero- or sign-extended load
468getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
469    Amode addr addr_code <- getAmode D mem
470    return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
471
472getRegister' _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _]) = do
473    Amode addr addr_code <- getAmode D mem
474    return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
475
476getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
477    Amode addr addr_code <- getAmode D mem
478    return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
479
480getRegister' _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _]) = do
481    Amode addr addr_code <- getAmode D mem
482    return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
483
484-- Note: there is no Load Byte Arithmetic instruction, so no signed case here
485
486getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
487    Amode addr addr_code <- getAmode D mem
488    return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
489
490getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
491    Amode addr addr_code <- getAmode D mem
492    return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
493
494getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
495    Amode addr addr_code <- getAmode D mem
496    return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
497
498getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
499    Amode addr addr_code <- getAmode D mem
500    return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
501
502getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
503    Amode addr addr_code <- getAmode D mem
504    return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
505
506getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
507    -- lwa is DS-form. See Note [Power instruction format]
508    Amode addr addr_code <- getAmode DS mem
509    return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
510
511getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
512  = case mop of
513      MO_Not rep   -> triv_ucode_int rep NOT
514
515      MO_F_Neg w   -> triv_ucode_float w FNEG
516      MO_S_Neg w   -> triv_ucode_int   w NEG
517
518      MO_FF_Conv W64 W32 -> trivialUCode  FF32 FRSP x
519      MO_FF_Conv W32 W64 -> conversionNop FF64 x
520
521      MO_FS_Conv from to -> coerceFP2Int from to x
522      MO_SF_Conv from to -> coerceInt2FP from to x
523
524      MO_SS_Conv from to
525        | from >= to -> conversionNop (intFormat to) x
526        | otherwise  -> triv_ucode_int to (EXTS (intFormat from))
527
528      MO_UU_Conv from to
529        | from >= to -> conversionNop (intFormat to) x
530        | otherwise  -> clearLeft from to
531
532      MO_XX_Conv _ to -> conversionNop (intFormat to) x
533
534      _ -> panic "PPC.CodeGen.getRegister: no match"
535
536    where
537        triv_ucode_int   width instr = trivialUCode (intFormat    width) instr x
538        triv_ucode_float width instr = trivialUCode (floatFormat  width) instr x
539
540        conversionNop new_format expr
541            = do e_code <- getRegister' dflags expr
542                 return (swizzleRegisterRep e_code new_format)
543
544        clearLeft from to
545            = do (src1, code1) <- getSomeReg x
546                 let arch_fmt  = intFormat (wordWidth dflags)
547                     arch_bits = widthInBits (wordWidth dflags)
548                     size      = widthInBits from
549                     code dst  = code1 `snocOL`
550                                 CLRLI arch_fmt dst src1 (arch_bits - size)
551                 return (Any (intFormat to) code)
552
553getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
554  = case mop of
555      MO_F_Eq _ -> condFltReg EQQ x y
556      MO_F_Ne _ -> condFltReg NE  x y
557      MO_F_Gt _ -> condFltReg GTT x y
558      MO_F_Ge _ -> condFltReg GE  x y
559      MO_F_Lt _ -> condFltReg LTT x y
560      MO_F_Le _ -> condFltReg LE  x y
561
562      MO_Eq rep -> condIntReg EQQ rep x y
563      MO_Ne rep -> condIntReg NE  rep x y
564
565      MO_S_Gt rep -> condIntReg GTT rep x y
566      MO_S_Ge rep -> condIntReg GE  rep x y
567      MO_S_Lt rep -> condIntReg LTT rep x y
568      MO_S_Le rep -> condIntReg LE  rep x y
569
570      MO_U_Gt rep -> condIntReg GU  rep x y
571      MO_U_Ge rep -> condIntReg GEU rep x y
572      MO_U_Lt rep -> condIntReg LU  rep x y
573      MO_U_Le rep -> condIntReg LEU rep x y
574
575      MO_F_Add w  -> triv_float w FADD
576      MO_F_Sub w  -> triv_float w FSUB
577      MO_F_Mul w  -> triv_float w FMUL
578      MO_F_Quot w -> triv_float w FDIV
579
580         -- optimize addition with 32-bit immediate
581         -- (needed for PIC)
582      MO_Add W32 ->
583        case y of
584          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm
585            -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
586          CmmLit lit
587            -> do
588                (src, srcCode) <- getSomeReg x
589                let imm = litToImm lit
590                    code dst = srcCode `appOL` toOL [
591                                    ADDIS dst src (HA imm),
592                                    ADD dst dst (RIImm (LO imm))
593                                ]
594                return (Any II32 code)
595          _ -> trivialCode W32 True ADD x y
596
597      MO_Add rep -> trivialCode rep True ADD x y
598      MO_Sub rep ->
599        case y of
600          CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
601            -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
602          _ -> case x of
603                 CmmLit (CmmInt imm _)
604                   | Just _ <- makeImmediate rep True imm
605                   -- subfi ('substract from' with immediate) doesn't exist
606                   -> trivialCode rep True SUBFC y x
607                 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
608
609      MO_Mul rep -> shiftMulCode rep True MULL x y
610      MO_S_MulMayOflo rep -> do
611        (src1, code1) <- getSomeReg x
612        (src2, code2) <- getSomeReg y
613        let
614          format = intFormat rep
615          code dst = code1 `appOL` code2
616                       `appOL` toOL [ MULLO format dst src1 src2
617                                    , MFOV  format dst
618                                    ]
619        return (Any format code)
620
621      MO_S_Quot rep -> divCode rep True x y
622      MO_U_Quot rep -> divCode rep False x y
623
624      MO_S_Rem rep -> remainder rep True x y
625      MO_U_Rem rep -> remainder rep False x y
626
627      MO_And rep   -> case y of
628        (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
629            -> do
630                (src, srcCode) <- getSomeReg x
631                let clear_mask = if imm == -4 then 2 else 3
632                    fmt = intFormat rep
633                    code dst = srcCode
634                               `appOL` unitOL (CLRRI fmt dst src clear_mask)
635                return (Any fmt code)
636        _ -> trivialCode rep False AND x y
637      MO_Or rep    -> trivialCode rep False OR x y
638      MO_Xor rep   -> trivialCode rep False XOR x y
639
640      MO_Shl rep   -> shiftMulCode rep False SL x y
641      MO_S_Shr rep -> srCode rep True SRA x y
642      MO_U_Shr rep -> srCode rep False SR x y
643      _         -> panic "PPC.CodeGen.getRegister: no match"
644
645  where
646    triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
647    triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
648
649    remainder :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
650    remainder rep sgn x y = do
651      let fmt = intFormat rep
652      tmp <- getNewRegNat fmt
653      code <- remainderCode rep sgn tmp x y
654      return (Any fmt code)
655
656
657getRegister' _ (CmmLit (CmmInt i rep))
658  | Just imm <- makeImmediate rep True i
659  = let
660        code dst = unitOL (LI dst imm)
661    in
662        return (Any (intFormat rep) code)
663
664getRegister' _ (CmmLit (CmmFloat f frep)) = do
665    lbl <- getNewLabelNat
666    dflags <- getDynFlags
667    dynRef <- cmmMakeDynamicReference dflags DataReference lbl
668    Amode addr addr_code <- getAmode D dynRef
669    let format = floatFormat frep
670        code dst =
671            LDATA (Section ReadOnlyData lbl)
672                  (Statics lbl [CmmStaticLit (CmmFloat f frep)])
673            `consOL` (addr_code `snocOL` LD format dst addr)
674    return (Any format code)
675
676getRegister' dflags (CmmLit lit)
677  | target32Bit (targetPlatform dflags)
678  = let rep = cmmLitType dflags lit
679        imm = litToImm lit
680        code dst = toOL [
681              LIS dst (HA imm),
682              ADD dst dst (RIImm (LO imm))
683          ]
684    in return (Any (cmmTypeFormat rep) code)
685  | otherwise
686  = do lbl <- getNewLabelNat
687       dflags <- getDynFlags
688       dynRef <- cmmMakeDynamicReference dflags DataReference lbl
689       Amode addr addr_code <- getAmode D dynRef
690       let rep = cmmLitType dflags lit
691           format = cmmTypeFormat rep
692           code dst =
693            LDATA (Section ReadOnlyData lbl) (Statics lbl [CmmStaticLit lit])
694            `consOL` (addr_code `snocOL` LD format dst addr)
695       return (Any format code)
696
697getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
698
699    -- extend?Rep: wrap integer expression of type `from`
700    -- in a conversion to `to`
701extendSExpr :: Width -> Width -> CmmExpr -> CmmExpr
702extendSExpr from to x = CmmMachOp (MO_SS_Conv from to) [x]
703
704extendUExpr :: Width -> Width -> CmmExpr -> CmmExpr
705extendUExpr from to x = CmmMachOp (MO_UU_Conv from to) [x]
706
707-- -----------------------------------------------------------------------------
708--  The 'Amode' type: Memory addressing modes passed up the tree.
709
710data Amode
711        = Amode AddrMode InstrBlock
712
713{-
714Now, given a tree (the argument to a CmmLoad) that references memory,
715produce a suitable addressing mode.
716
717A Rule of the Game (tm) for Amodes: use of the addr bit must
718immediately follow use of the code part, since the code part puts
719values in registers which the addr then refers to.  So you can't put
720anything in between, lest it overwrite some of those registers.  If
721you need to do some other computation between the code part and use of
722the addr bit, first store the effective address from the amode in a
723temporary, then do the other computation, and then use the temporary:
724
725    code
726    LEA amode, tmp
727    ... other computation ...
728    ... (tmp) ...
729-}
730
731{- Note [Power instruction format]
732In some instructions the 16 bit offset must be a multiple of 4, i.e.
733the two least significant bits must be zero. The "Power ISA" specification
734calls these instruction formats "DS-FORM" and the instructions with
735arbitrary 16 bit offsets are "D-FORM".
736
737The Power ISA specification document can be obtained from www.power.org.
738-}
739data InstrForm = D | DS
740
741getAmode :: InstrForm -> CmmExpr -> NatM Amode
742getAmode inf tree@(CmmRegOff _ _)
743  = do dflags <- getDynFlags
744       getAmode inf (mangleIndexTree dflags tree)
745
746getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
747  | Just off <- makeImmediate W32 True (-i)
748  = do
749        (reg, code) <- getSomeReg x
750        return (Amode (AddrRegImm reg off) code)
751
752
753getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
754  | Just off <- makeImmediate W32 True i
755  = do
756        (reg, code) <- getSomeReg x
757        return (Amode (AddrRegImm reg off) code)
758
759getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
760  | Just off <- makeImmediate W64 True (-i)
761  = do
762        (reg, code) <- getSomeReg x
763        return (Amode (AddrRegImm reg off) code)
764
765
766getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
767  | Just off <- makeImmediate W64 True i
768  = do
769        (reg, code) <- getSomeReg x
770        return (Amode (AddrRegImm reg off) code)
771
772getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
773  | Just off <- makeImmediate W64 True (-i)
774  = do
775        (reg, code) <- getSomeReg x
776        (reg', off', code')  <-
777                     if i `mod` 4 == 0
778                      then do return (reg, off, code)
779                      else do
780                           tmp <- getNewRegNat II64
781                           return (tmp, ImmInt 0,
782                                  code `snocOL` ADD tmp reg (RIImm off))
783        return (Amode (AddrRegImm reg' off') code')
784
785getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
786  | Just off <- makeImmediate W64 True i
787  = do
788        (reg, code) <- getSomeReg x
789        (reg', off', code')  <-
790                     if i `mod` 4 == 0
791                      then do return (reg, off, code)
792                      else do
793                           tmp <- getNewRegNat II64
794                           return (tmp, ImmInt 0,
795                                  code `snocOL` ADD tmp reg (RIImm off))
796        return (Amode (AddrRegImm reg' off') code')
797
798   -- optimize addition with 32-bit immediate
799   -- (needed for PIC)
800getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
801  = do
802        dflags <- getDynFlags
803        (src, srcCode) <- getSomeReg x
804        let imm = litToImm lit
805        case () of
806            _ | OSAIX <- platformOS (targetPlatform dflags)
807              , isCmmLabelType lit ->
808                    -- HA16/LO16 relocations on labels not supported on AIX
809                    return (Amode (AddrRegImm src imm) srcCode)
810              | otherwise -> do
811                    tmp <- getNewRegNat II32
812                    let code = srcCode `snocOL` ADDIS tmp src (HA imm)
813                    return (Amode (AddrRegImm tmp (LO imm)) code)
814  where
815      isCmmLabelType (CmmLabel {})        = True
816      isCmmLabelType (CmmLabelOff {})     = True
817      isCmmLabelType (CmmLabelDiffOff {}) = True
818      isCmmLabelType _                    = False
819
820getAmode _ (CmmLit lit)
821  = do
822        dflags <- getDynFlags
823        case platformArch $ targetPlatform dflags of
824             ArchPPC -> do
825                 tmp <- getNewRegNat II32
826                 let imm = litToImm lit
827                     code = unitOL (LIS tmp (HA imm))
828                 return (Amode (AddrRegImm tmp (LO imm)) code)
829             _        -> do -- TODO: Load from TOC,
830                            -- see getRegister' _ (CmmLit lit)
831                 tmp <- getNewRegNat II64
832                 let imm = litToImm lit
833                     code =  toOL [
834                          LIS tmp (HIGHESTA imm),
835                          OR tmp tmp (RIImm (HIGHERA imm)),
836                          SL  II64 tmp tmp (RIImm (ImmInt 32)),
837                          ORIS tmp tmp (HA imm)
838                          ]
839                 return (Amode (AddrRegImm tmp (LO imm)) code)
840
841getAmode _ (CmmMachOp (MO_Add W32) [x, y])
842  = do
843        (regX, codeX) <- getSomeReg x
844        (regY, codeY) <- getSomeReg y
845        return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
846
847getAmode _ (CmmMachOp (MO_Add W64) [x, y])
848  = do
849        (regX, codeX) <- getSomeReg x
850        (regY, codeY) <- getSomeReg y
851        return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
852
853getAmode _ other
854  = do
855        (reg, code) <- getSomeReg other
856        let
857            off  = ImmInt 0
858        return (Amode (AddrRegImm reg off) code)
859
860
861--  The 'CondCode' type:  Condition codes passed up the tree.
862data CondCode
863        = CondCode Bool Cond InstrBlock
864
865-- Set up a condition code for a conditional branch.
866
867getCondCode :: CmmExpr -> NatM CondCode
868
869-- almost the same as everywhere else - but we need to
870-- extend small integers to 32 bit or 64 bit first
871
872getCondCode (CmmMachOp mop [x, y])
873  = do
874    case mop of
875      MO_F_Eq W32 -> condFltCode EQQ x y
876      MO_F_Ne W32 -> condFltCode NE  x y
877      MO_F_Gt W32 -> condFltCode GTT x y
878      MO_F_Ge W32 -> condFltCode GE  x y
879      MO_F_Lt W32 -> condFltCode LTT x y
880      MO_F_Le W32 -> condFltCode LE  x y
881
882      MO_F_Eq W64 -> condFltCode EQQ x y
883      MO_F_Ne W64 -> condFltCode NE  x y
884      MO_F_Gt W64 -> condFltCode GTT x y
885      MO_F_Ge W64 -> condFltCode GE  x y
886      MO_F_Lt W64 -> condFltCode LTT x y
887      MO_F_Le W64 -> condFltCode LE  x y
888
889      MO_Eq rep -> condIntCode EQQ rep x y
890      MO_Ne rep -> condIntCode NE  rep x y
891
892      MO_S_Gt rep -> condIntCode GTT rep x y
893      MO_S_Ge rep -> condIntCode GE  rep x y
894      MO_S_Lt rep -> condIntCode LTT rep x y
895      MO_S_Le rep -> condIntCode LE  rep x y
896
897      MO_U_Gt rep -> condIntCode GU  rep x y
898      MO_U_Ge rep -> condIntCode GEU rep x y
899      MO_U_Lt rep -> condIntCode LU  rep x y
900      MO_U_Le rep -> condIntCode LEU rep x y
901
902      _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
903
904getCondCode _ = panic "getCondCode(2)(powerpc)"
905
906
907-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
908-- passed back up the tree.
909
910condIntCode :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
911condIntCode cond width x y = do
912  dflags <- getDynFlags
913  condIntCode' (target32Bit (targetPlatform dflags)) cond width x y
914
915condIntCode' :: Bool -> Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
916
917-- simple code for 64-bit on 32-bit platforms
918condIntCode' True cond W64 x y
919  | condUnsigned cond
920  = do
921      ChildCode64 code_x x_lo <- iselExpr64 x
922      ChildCode64 code_y y_lo <- iselExpr64 y
923      let x_hi = getHiVRegFromLo x_lo
924          y_hi = getHiVRegFromLo y_lo
925      end_lbl <- getBlockIdNat
926      let code = code_x `appOL` code_y `appOL` toOL
927                 [ CMPL II32 x_hi (RIReg y_hi)
928                 , BCC NE end_lbl Nothing
929                 , CMPL II32 x_lo (RIReg y_lo)
930                 , BCC ALWAYS end_lbl Nothing
931
932                 , NEWBLOCK end_lbl
933                 ]
934      return (CondCode False cond code)
935  | otherwise
936  = do
937      ChildCode64 code_x x_lo <- iselExpr64 x
938      ChildCode64 code_y y_lo <- iselExpr64 y
939      let x_hi = getHiVRegFromLo x_lo
940          y_hi = getHiVRegFromLo y_lo
941      end_lbl <- getBlockIdNat
942      cmp_lo  <- getBlockIdNat
943      let code = code_x `appOL` code_y `appOL` toOL
944                 [ CMP II32 x_hi (RIReg y_hi)
945                 , BCC NE end_lbl Nothing
946                 , CMP II32 x_hi (RIImm (ImmInt 0))
947                 , BCC LE cmp_lo Nothing
948                 , CMPL II32 x_lo (RIReg y_lo)
949                 , BCC ALWAYS end_lbl Nothing
950                 , NEWBLOCK cmp_lo
951                 , CMPL II32 y_lo (RIReg x_lo)
952                 , BCC ALWAYS end_lbl Nothing
953
954                 , NEWBLOCK end_lbl
955                 ]
956      return (CondCode False cond code)
957
958-- optimize pointer tag checks. Operation andi. sets condition register
959-- so cmpi ..., 0 is redundant.
960condIntCode' _ cond _ (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)])
961                 (CmmLit (CmmInt 0 _))
962  | not $ condUnsigned cond,
963    Just src2 <- makeImmediate rep False imm
964  = do
965      (src1, code) <- getSomeReg x
966      let code' = code `snocOL` AND r0 src1 (RIImm src2)
967      return (CondCode False cond code')
968
969condIntCode' _ cond width x (CmmLit (CmmInt y rep))
970  | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
971  = do
972      let op_len = max W32 width
973      let extend = extendSExpr width op_len
974      (src1, code) <- getSomeReg (extend x)
975      let format = intFormat op_len
976          code' = code `snocOL`
977            (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
978      return (CondCode False cond code')
979
980condIntCode' _ cond width x y = do
981  let op_len = max W32 width
982  let extend = if condUnsigned cond then extendUExpr width op_len
983               else extendSExpr width op_len
984  (src1, code1) <- getSomeReg (extend x)
985  (src2, code2) <- getSomeReg (extend y)
986  let format = intFormat op_len
987      code' = code1 `appOL` code2 `snocOL`
988        (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
989  return (CondCode False cond code')
990
991condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
992condFltCode cond x y = do
993    (src1, code1) <- getSomeReg x
994    (src2, code2) <- getSomeReg y
995    let
996        code'  = code1 `appOL` code2 `snocOL` FCMP src1 src2
997        code'' = case cond of -- twiddle CR to handle unordered case
998                    GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
999                    LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
1000                    _ -> code'
1001                 where
1002                    ltbit = 0 ; eqbit = 2 ; gtbit = 1
1003    return (CondCode True cond code'')
1004
1005
1006
1007-- -----------------------------------------------------------------------------
1008-- Generating assignments
1009
1010-- Assignments are really at the heart of the whole code generation
1011-- business.  Almost all top-level nodes of any real importance are
1012-- assignments, which correspond to loads, stores, or register
1013-- transfers.  If we're really lucky, some of the register transfers
1014-- will go away, because we can use the destination register to
1015-- complete the code generation for the right hand side.  This only
1016-- fails when the right hand side is forced into a fixed register
1017-- (e.g. the result of a call).
1018
1019assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1020assignReg_IntCode :: Format -> CmmReg  -> CmmExpr -> NatM InstrBlock
1021
1022assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
1023assignReg_FltCode :: Format -> CmmReg  -> CmmExpr -> NatM InstrBlock
1024
1025assignMem_IntCode pk addr src = do
1026    (srcReg, code) <- getSomeReg src
1027    Amode dstAddr addr_code <- case pk of
1028                                II64 -> getAmode DS addr
1029                                _    -> getAmode D  addr
1030    return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
1031
1032-- dst is a reg, but src could be anything
1033assignReg_IntCode _ reg src
1034    = do
1035        dflags <- getDynFlags
1036        let dst = getRegisterReg (targetPlatform dflags) reg
1037        r <- getRegister src
1038        return $ case r of
1039            Any _ code         -> code dst
1040            Fixed _ freg fcode -> fcode `snocOL` MR dst freg
1041
1042
1043
1044-- Easy, isn't it?
1045assignMem_FltCode = assignMem_IntCode
1046assignReg_FltCode = assignReg_IntCode
1047
1048
1049
1050genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
1051
1052genJump (CmmLit (CmmLabel lbl)) regs
1053  = return (unitOL $ JMP lbl regs)
1054
1055genJump tree gregs
1056  = do
1057        dflags <- getDynFlags
1058        genJump' tree (platformToGCP (targetPlatform dflags)) gregs
1059
1060genJump' :: CmmExpr -> GenCCallPlatform -> [Reg] -> NatM InstrBlock
1061
1062genJump' tree (GCP64ELF 1) regs
1063  = do
1064        (target,code) <- getSomeReg tree
1065        return (code
1066               `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
1067               `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
1068               `snocOL` MTCTR r11
1069               `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
1070               `snocOL` BCTR [] Nothing regs)
1071
1072genJump' tree (GCP64ELF 2) regs
1073  = do
1074        (target,code) <- getSomeReg tree
1075        return (code
1076               `snocOL` MR r12 target
1077               `snocOL` MTCTR r12
1078               `snocOL` BCTR [] Nothing regs)
1079
1080genJump' tree _ regs
1081  = do
1082        (target,code) <- getSomeReg tree
1083        return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing regs)
1084
1085-- -----------------------------------------------------------------------------
1086--  Unconditional branches
1087genBranch :: BlockId -> NatM InstrBlock
1088genBranch = return . toOL . mkJumpInstr
1089
1090
1091-- -----------------------------------------------------------------------------
1092--  Conditional jumps
1093
1094{-
1095Conditional jumps are always to local labels, so we can use branch
1096instructions.  We peek at the arguments to decide what kind of
1097comparison to do.
1098-}
1099
1100
1101genCondJump
1102    :: BlockId      -- the branch target
1103    -> CmmExpr      -- the condition on which to branch
1104    -> Maybe Bool
1105    -> NatM InstrBlock
1106
1107genCondJump id bool prediction = do
1108  CondCode _ cond code <- getCondCode bool
1109  return (code `snocOL` BCC cond id prediction)
1110
1111
1112
1113-- -----------------------------------------------------------------------------
1114--  Generating C calls
1115
1116-- Now the biggest nightmare---calls.  Most of the nastiness is buried in
1117-- @get_arg@, which moves the arguments to the correct registers/stack
1118-- locations.  Apart from that, the code is easy.
1119
1120genCCall :: ForeignTarget      -- function to call
1121         -> [CmmFormal]        -- where to put the result
1122         -> [CmmActual]        -- arguments (of mixed type)
1123         -> NatM InstrBlock
1124genCCall (PrimTarget MO_ReadBarrier) _ _
1125 = return $ unitOL LWSYNC
1126genCCall (PrimTarget MO_WriteBarrier) _ _
1127 = return $ unitOL LWSYNC
1128
1129genCCall (PrimTarget MO_Touch) _ _
1130 = return $ nilOL
1131
1132genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
1133 = return $ nilOL
1134
1135genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n]
1136 = do dflags <- getDynFlags
1137      let platform = targetPlatform dflags
1138          fmt      = intFormat width
1139          reg_dst  = getRegisterReg platform (CmmLocal dst)
1140      (instr, n_code) <- case amop of
1141            AMO_Add  -> getSomeRegOrImm ADD True reg_dst
1142            AMO_Sub  -> case n of
1143                CmmLit (CmmInt i _)
1144                  | Just imm <- makeImmediate width True (-i)
1145                   -> return (ADD reg_dst reg_dst (RIImm imm), nilOL)
1146                _
1147                   -> do
1148                         (n_reg, n_code) <- getSomeReg n
1149                         return  (SUBF reg_dst n_reg reg_dst, n_code)
1150            AMO_And  -> getSomeRegOrImm AND False reg_dst
1151            AMO_Nand -> do (n_reg, n_code) <- getSomeReg n
1152                           return (NAND reg_dst reg_dst n_reg, n_code)
1153            AMO_Or   -> getSomeRegOrImm OR False reg_dst
1154            AMO_Xor  -> getSomeRegOrImm XOR False reg_dst
1155      Amode addr_reg addr_code <- getAmodeIndex addr
1156      lbl_retry <- getBlockIdNat
1157      return $ n_code `appOL` addr_code
1158        `appOL` toOL [ HWSYNC
1159                     , BCC ALWAYS lbl_retry Nothing
1160
1161                     , NEWBLOCK lbl_retry
1162                     , LDR fmt reg_dst addr_reg
1163                     , instr
1164                     , STC fmt reg_dst addr_reg
1165                     , BCC NE lbl_retry (Just False)
1166                     , ISYNC
1167                     ]
1168         where
1169           getAmodeIndex (CmmMachOp (MO_Add _) [x, y])
1170             = do
1171                 (regX, codeX) <- getSomeReg x
1172                 (regY, codeY) <- getSomeReg y
1173                 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
1174           getAmodeIndex other
1175             = do
1176                 (reg, code) <- getSomeReg other
1177                 return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here!
1178           getSomeRegOrImm op sign dst
1179             = case n of
1180                 CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i
1181                    -> return (op dst dst (RIImm imm), nilOL)
1182                 _
1183                    -> do
1184                          (n_reg, n_code) <- getSomeReg n
1185                          return  (op dst dst (RIReg n_reg), n_code)
1186
1187genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
1188 = do dflags <- getDynFlags
1189      let platform = targetPlatform dflags
1190          fmt      = intFormat width
1191          reg_dst  = getRegisterReg platform (CmmLocal dst)
1192          form     = if widthInBits width == 64 then DS else D
1193      Amode addr_reg addr_code <- getAmode form addr
1194      lbl_end <- getBlockIdNat
1195      return $ addr_code `appOL` toOL [ HWSYNC
1196                                      , LD fmt reg_dst addr_reg
1197                                      , CMP fmt reg_dst (RIReg reg_dst)
1198                                      , BCC NE lbl_end (Just False)
1199                                      , BCC ALWAYS lbl_end Nothing
1200                            -- See Note [Seemingly useless cmp and bne]
1201                                      , NEWBLOCK lbl_end
1202                                      , ISYNC
1203                                      ]
1204
1205-- Note [Seemingly useless cmp and bne]
1206-- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction
1207-- the second paragraph says that isync may complete before storage accesses
1208-- "associated" with a preceding instruction have been performed. The cmp
1209-- operation and the following bne introduce a data and control dependency
1210-- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe
1211-- Fetch).
1212-- This is also what gcc does.
1213
1214
1215genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
1216    code <- assignMem_IntCode (intFormat width) addr val
1217    return $ unitOL(HWSYNC) `appOL` code
1218
1219genCCall (PrimTarget (MO_Clz width)) [dst] [src]
1220 = do dflags <- getDynFlags
1221      let platform = targetPlatform dflags
1222          reg_dst = getRegisterReg platform (CmmLocal dst)
1223      if target32Bit platform && width == W64
1224        then do
1225          ChildCode64 code vr_lo <- iselExpr64 src
1226          lbl1 <- getBlockIdNat
1227          lbl2 <- getBlockIdNat
1228          lbl3 <- getBlockIdNat
1229          let vr_hi = getHiVRegFromLo vr_lo
1230              cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
1231                           , BCC NE lbl2 Nothing
1232                           , BCC ALWAYS lbl1 Nothing
1233
1234                           , NEWBLOCK lbl1
1235                           , CNTLZ II32 reg_dst vr_lo
1236                           , ADD reg_dst reg_dst (RIImm (ImmInt 32))
1237                           , BCC ALWAYS lbl3 Nothing
1238
1239                           , NEWBLOCK lbl2
1240                           , CNTLZ II32 reg_dst vr_hi
1241                           , BCC ALWAYS lbl3 Nothing
1242
1243                           , NEWBLOCK lbl3
1244                           ]
1245          return $ code `appOL` cntlz
1246        else do
1247          let format = if width == W64 then II64 else II32
1248          (s_reg, s_code) <- getSomeReg src
1249          (pre, reg , post) <-
1250            case width of
1251              W64 -> return (nilOL, s_reg, nilOL)
1252              W32 -> return (nilOL, s_reg, nilOL)
1253              W16 -> do
1254                reg_tmp <- getNewRegNat format
1255                return
1256                  ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535))
1257                  , reg_tmp
1258                  , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16)))
1259                  )
1260              W8  -> do
1261                reg_tmp <- getNewRegNat format
1262                return
1263                  ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255))
1264                  , reg_tmp
1265                  , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24)))
1266                  )
1267              _   -> panic "genCall: Clz wrong format"
1268          let cntlz = unitOL (CNTLZ format reg_dst reg)
1269          return $ s_code `appOL` pre `appOL` cntlz `appOL` post
1270
1271genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
1272 = do dflags <- getDynFlags
1273      let platform = targetPlatform dflags
1274          reg_dst = getRegisterReg platform (CmmLocal dst)
1275      if target32Bit platform && width == W64
1276        then do
1277          let format = II32
1278          ChildCode64 code vr_lo <- iselExpr64 src
1279          lbl1 <- getBlockIdNat
1280          lbl2 <- getBlockIdNat
1281          lbl3 <- getBlockIdNat
1282          x' <- getNewRegNat format
1283          x'' <- getNewRegNat format
1284          r' <- getNewRegNat format
1285          cnttzlo <- cnttz format reg_dst vr_lo
1286          let vr_hi = getHiVRegFromLo vr_lo
1287              cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
1288                             , BCC NE lbl2 Nothing
1289                             , BCC ALWAYS lbl1 Nothing
1290
1291                             , NEWBLOCK lbl1
1292                             , ADD x' vr_hi (RIImm (ImmInt (-1)))
1293                             , ANDC x'' x' vr_hi
1294                             , CNTLZ format r' x''
1295                               -- 32 + (32 - clz(x''))
1296                             , SUBFC reg_dst r' (RIImm (ImmInt 64))
1297                             , BCC ALWAYS lbl3 Nothing
1298
1299                             , NEWBLOCK lbl2
1300                             ]
1301                        `appOL` cnttzlo `appOL`
1302                        toOL [ BCC ALWAYS lbl3 Nothing
1303
1304                             , NEWBLOCK lbl3
1305                             ]
1306          return $ code `appOL` cnttz64
1307        else do
1308          let format = if width == W64 then II64 else II32
1309          (s_reg, s_code) <- getSomeReg src
1310          (reg_ctz, pre_code) <-
1311            case width of
1312              W64 -> return (s_reg, nilOL)
1313              W32 -> return (s_reg, nilOL)
1314              W16 -> do
1315                reg_tmp <- getNewRegNat format
1316                return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1))
1317              W8  -> do
1318                reg_tmp <- getNewRegNat format
1319                return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256)))
1320              _   -> panic "genCall: Ctz wrong format"
1321          ctz_code <- cnttz format reg_dst reg_ctz
1322          return $ s_code `appOL` pre_code `appOL` ctz_code
1323        where
1324          -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1))
1325          -- see Henry S. Warren, Hacker's Delight, p 107
1326          cnttz format dst src = do
1327            let format_bits = 8 * formatInBytes format
1328            x' <- getNewRegNat format
1329            x'' <- getNewRegNat format
1330            r' <- getNewRegNat format
1331            return $ toOL [ ADD x' src (RIImm (ImmInt (-1)))
1332                          , ANDC x'' x' src
1333                          , CNTLZ format r' x''
1334                          , SUBFC dst r' (RIImm (ImmInt (format_bits)))
1335                          ]
1336
1337genCCall target dest_regs argsAndHints
1338 = do dflags <- getDynFlags
1339      let platform = targetPlatform dflags
1340      case target of
1341        PrimTarget (MO_S_QuotRem  width) -> divOp1 platform True  width
1342                                                   dest_regs argsAndHints
1343        PrimTarget (MO_U_QuotRem  width) -> divOp1 platform False width
1344                                                   dest_regs argsAndHints
1345        PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs
1346                                                   argsAndHints
1347        PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
1348                                                argsAndHints
1349        PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
1350        PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints
1351        PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
1352        PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
1353                                                   dest_regs argsAndHints
1354        PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width
1355                                                   dest_regs argsAndHints
1356        PrimTarget MO_F64_Fabs -> fabs platform dest_regs argsAndHints
1357        PrimTarget MO_F32_Fabs -> fabs platform dest_regs argsAndHints
1358        _ -> genCCall' dflags (platformToGCP platform)
1359                       target dest_regs argsAndHints
1360        where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y]
1361                = do let reg_q = getRegisterReg platform (CmmLocal res_q)
1362                         reg_r = getRegisterReg platform (CmmLocal res_r)
1363                     remainderCode width signed reg_q arg_x arg_y
1364                       <*> pure reg_r
1365
1366              divOp1 _ _ _ _ _
1367                = panic "genCCall: Wrong number of arguments for divOp1"
1368              divOp2 platform width [res_q, res_r]
1369                                    [arg_x_high, arg_x_low, arg_y]
1370                = do let reg_q = getRegisterReg platform (CmmLocal res_q)
1371                         reg_r = getRegisterReg platform (CmmLocal res_r)
1372                         fmt   = intFormat width
1373                         half  = 4 * (formatInBytes fmt)
1374                     (xh_reg, xh_code) <- getSomeReg arg_x_high
1375                     (xl_reg, xl_code) <- getSomeReg arg_x_low
1376                     (y_reg, y_code) <- getSomeReg arg_y
1377                     s <- getNewRegNat fmt
1378                     b <- getNewRegNat fmt
1379                     v <- getNewRegNat fmt
1380                     vn1 <- getNewRegNat fmt
1381                     vn0 <- getNewRegNat fmt
1382                     un32 <- getNewRegNat fmt
1383                     tmp  <- getNewRegNat fmt
1384                     un10 <- getNewRegNat fmt
1385                     un1 <- getNewRegNat fmt
1386                     un0 <- getNewRegNat fmt
1387                     q1 <- getNewRegNat fmt
1388                     rhat <- getNewRegNat fmt
1389                     tmp1 <- getNewRegNat fmt
1390                     q0 <- getNewRegNat fmt
1391                     un21 <- getNewRegNat fmt
1392                     again1 <- getBlockIdNat
1393                     no1 <- getBlockIdNat
1394                     then1 <- getBlockIdNat
1395                     endif1 <- getBlockIdNat
1396                     again2 <- getBlockIdNat
1397                     no2 <- getBlockIdNat
1398                     then2 <- getBlockIdNat
1399                     endif2 <- getBlockIdNat
1400                     return $ y_code `appOL` xl_code `appOL` xh_code `appOL`
1401                              -- see Hacker's Delight p 196 Figure 9-3
1402                              toOL [ -- b = 2 ^ (bits_in_word / 2)
1403                                     LI b (ImmInt 1)
1404                                   , SL fmt b b (RIImm (ImmInt half))
1405                                     -- s = clz(y)
1406                                   , CNTLZ fmt s y_reg
1407                                     -- v = y << s
1408                                   , SL fmt v y_reg (RIReg s)
1409                                     -- vn1 = upper half of v
1410                                   , SR fmt vn1 v (RIImm (ImmInt half))
1411                                     -- vn0 = lower half of v
1412                                   , CLRLI fmt vn0 v half
1413                                     -- un32 = (u1 << s)
1414                                     --      | (u0 >> (bits_in_word - s))
1415                                   , SL fmt un32 xh_reg (RIReg s)
1416                                   , SUBFC tmp s
1417                                        (RIImm (ImmInt (8 * formatInBytes fmt)))
1418                                   , SR fmt tmp xl_reg (RIReg tmp)
1419                                   , OR un32 un32 (RIReg tmp)
1420                                     -- un10 = u0 << s
1421                                   , SL fmt un10 xl_reg (RIReg s)
1422                                     -- un1 = upper half of un10
1423                                   , SR fmt un1 un10 (RIImm (ImmInt half))
1424                                     -- un0 = lower half of un10
1425                                   , CLRLI fmt un0 un10 half
1426                                     -- q1 = un32/vn1
1427                                   , DIV fmt False q1 un32 vn1
1428                                     -- rhat = un32 - q1*vn1
1429                                   , MULL fmt tmp q1 (RIReg vn1)
1430                                   , SUBF rhat tmp un32
1431                                   , BCC ALWAYS again1 Nothing
1432
1433                                   , NEWBLOCK again1
1434                                     -- if (q1 >= b || q1*vn0 > b*rhat + un1)
1435                                   , CMPL fmt q1 (RIReg b)
1436                                   , BCC GEU then1 Nothing
1437                                   , BCC ALWAYS no1 Nothing
1438
1439                                   , NEWBLOCK no1
1440                                   , MULL fmt tmp q1 (RIReg vn0)
1441                                   , SL fmt tmp1 rhat (RIImm (ImmInt half))
1442                                   , ADD tmp1 tmp1 (RIReg un1)
1443                                   , CMPL fmt tmp (RIReg tmp1)
1444                                   , BCC LEU endif1 Nothing
1445                                   , BCC ALWAYS then1 Nothing
1446
1447                                   , NEWBLOCK then1
1448                                     -- q1 = q1 - 1
1449                                   , ADD q1 q1 (RIImm (ImmInt (-1)))
1450                                     -- rhat = rhat + vn1
1451                                   , ADD rhat rhat (RIReg vn1)
1452                                     -- if (rhat < b) goto again1
1453                                   , CMPL fmt rhat (RIReg b)
1454                                   , BCC LTT again1 Nothing
1455                                   , BCC ALWAYS endif1 Nothing
1456
1457                                   , NEWBLOCK endif1
1458                                     -- un21 = un32*b + un1 - q1*v
1459                                   , SL fmt un21 un32 (RIImm (ImmInt half))
1460                                   , ADD un21 un21 (RIReg un1)
1461                                   , MULL fmt tmp q1 (RIReg v)
1462                                   , SUBF un21 tmp un21
1463                                     -- compute second quotient digit
1464                                     -- q0 = un21/vn1
1465                                   , DIV fmt False q0 un21 vn1
1466                                     -- rhat = un21- q0*vn1
1467                                   , MULL fmt tmp q0 (RIReg vn1)
1468                                   , SUBF rhat tmp un21
1469                                   , BCC ALWAYS again2 Nothing
1470
1471                                   , NEWBLOCK again2
1472                                     -- if (q0>b || q0*vn0 > b*rhat + un0)
1473                                   , CMPL fmt q0 (RIReg b)
1474                                   , BCC GEU then2 Nothing
1475                                   , BCC ALWAYS no2 Nothing
1476
1477                                   , NEWBLOCK no2
1478                                   , MULL fmt tmp q0 (RIReg vn0)
1479                                   , SL fmt tmp1 rhat (RIImm (ImmInt half))
1480                                   , ADD tmp1 tmp1 (RIReg un0)
1481                                   , CMPL fmt tmp (RIReg tmp1)
1482                                   , BCC LEU endif2 Nothing
1483                                   , BCC ALWAYS then2 Nothing
1484
1485                                   , NEWBLOCK then2
1486                                     -- q0 = q0 - 1
1487                                   , ADD q0 q0 (RIImm (ImmInt (-1)))
1488                                     -- rhat = rhat + vn1
1489                                   , ADD rhat rhat (RIReg vn1)
1490                                     -- if (rhat<b) goto again2
1491                                   , CMPL fmt rhat (RIReg b)
1492                                   , BCC LTT again2 Nothing
1493                                   , BCC ALWAYS endif2 Nothing
1494
1495                                   , NEWBLOCK endif2
1496                                     -- compute remainder
1497                                     -- r = (un21*b + un0 - q0*v) >> s
1498                                   , SL fmt reg_r un21 (RIImm (ImmInt half))
1499                                   , ADD reg_r reg_r (RIReg un0)
1500                                   , MULL fmt tmp q0 (RIReg v)
1501                                   , SUBF reg_r tmp reg_r
1502                                   , SR fmt reg_r reg_r (RIReg s)
1503                                     -- compute quotient
1504                                     -- q = q1*b + q0
1505                                   , SL fmt reg_q q1 (RIImm (ImmInt half))
1506                                   , ADD reg_q reg_q (RIReg q0)
1507                                   ]
1508              divOp2 _ _ _ _
1509                = panic "genCCall: Wrong number of arguments for divOp2"
1510              multOp2 platform width [res_h, res_l] [arg_x, arg_y]
1511                = do let reg_h = getRegisterReg platform (CmmLocal res_h)
1512                         reg_l = getRegisterReg platform (CmmLocal res_l)
1513                         fmt = intFormat width
1514                     (x_reg, x_code) <- getSomeReg arg_x
1515                     (y_reg, y_code) <- getSomeReg arg_y
1516                     return $ y_code `appOL` x_code
1517                            `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg)
1518                                         , MULHU fmt reg_h x_reg y_reg
1519                                         ]
1520              multOp2 _ _ _ _
1521                = panic "genCall: Wrong number of arguments for multOp2"
1522              add2Op platform [res_h, res_l] [arg_x, arg_y]
1523                = do let reg_h = getRegisterReg platform (CmmLocal res_h)
1524                         reg_l = getRegisterReg platform (CmmLocal res_l)
1525                     (x_reg, x_code) <- getSomeReg arg_x
1526                     (y_reg, y_code) <- getSomeReg arg_y
1527                     return $ y_code `appOL` x_code
1528                            `appOL` toOL [ LI reg_h (ImmInt 0)
1529                                         , ADDC reg_l x_reg y_reg
1530                                         , ADDZE reg_h reg_h
1531                                         ]
1532              add2Op _ _ _
1533                = panic "genCCall: Wrong number of arguments/results for add2"
1534
1535              addcOp platform [res_r, res_c] [arg_x, arg_y]
1536                = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y]
1537              addcOp _ _ _
1538                = panic "genCCall: Wrong number of arguments/results for addc"
1539
1540              -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
1541              -- which is 0 for borrow and 1 otherwise. We need 1 and 0
1542              -- so xor with 1.
1543              subcOp platform [res_r, res_c] [arg_x, arg_y]
1544                = do let reg_r = getRegisterReg platform (CmmLocal res_r)
1545                         reg_c = getRegisterReg platform (CmmLocal res_c)
1546                     (x_reg, x_code) <- getSomeReg arg_x
1547                     (y_reg, y_code) <- getSomeReg arg_y
1548                     return $ y_code `appOL` x_code
1549                            `appOL` toOL [ LI reg_c (ImmInt 0)
1550                                         , SUBFC reg_r y_reg (RIReg x_reg)
1551                                         , ADDZE reg_c reg_c
1552                                         , XOR reg_c reg_c (RIImm (ImmInt 1))
1553                                         ]
1554              subcOp _ _ _
1555                = panic "genCCall: Wrong number of arguments/results for subc"
1556              addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y]
1557                = do let reg_r = getRegisterReg platform (CmmLocal res_r)
1558                         reg_c = getRegisterReg platform (CmmLocal res_c)
1559                     (x_reg, x_code) <- getSomeReg arg_x
1560                     (y_reg, y_code) <- getSomeReg arg_y
1561                     return $ y_code `appOL` x_code
1562                            `appOL` toOL [ instr reg_r y_reg x_reg,
1563                                           -- SUBFO argument order reversed!
1564                                           MFOV (intFormat width) reg_c
1565                                         ]
1566              addSubCOp _ _ _ _ _
1567                = panic "genCall: Wrong number of arguments/results for addC"
1568              fabs platform [res] [arg]
1569                = do let res_r = getRegisterReg platform (CmmLocal res)
1570                     (arg_reg, arg_code) <- getSomeReg arg
1571                     return $ arg_code `snocOL` FABS res_r arg_reg
1572              fabs _ _ _
1573                = panic "genCall: Wrong number of arguments/results for fabs"
1574
1575-- TODO: replace 'Int' by an enum such as 'PPC_64ABI'
1576data GenCCallPlatform = GCP32ELF | GCP64ELF !Int | GCPAIX
1577
1578platformToGCP :: Platform -> GenCCallPlatform
1579platformToGCP platform
1580  = case platformOS platform of
1581      OSAIX    -> GCPAIX
1582      _ -> case platformArch platform of
1583             ArchPPC           -> GCP32ELF
1584             ArchPPC_64 ELF_V1 -> GCP64ELF 1
1585             ArchPPC_64 ELF_V2 -> GCP64ELF 2
1586             _ -> panic "platformToGCP: Not PowerPC"
1587
1588
1589genCCall'
1590    :: DynFlags
1591    -> GenCCallPlatform
1592    -> ForeignTarget            -- function to call
1593    -> [CmmFormal]        -- where to put the result
1594    -> [CmmActual]        -- arguments (of mixed type)
1595    -> NatM InstrBlock
1596
1597{-
1598    PowerPC Linux uses the System V Release 4 Calling Convention
1599    for PowerPC. It is described in the
1600    "System V Application Binary Interface PowerPC Processor Supplement".
1601
1602    PowerPC 64 Linux uses the System V Release 4 Calling Convention for
1603    64-bit PowerPC. It is specified in
1604    "64-bit PowerPC ELF Application Binary Interface Supplement 1.9"
1605    (PPC64 ELF v1.9).
1606
1607    PowerPC 64 Linux in little endian mode uses the "Power Architecture 64-Bit
1608    ELF V2 ABI Specification -- OpenPOWER ABI for Linux Supplement"
1609    (PPC64 ELF v2).
1610
1611    AIX follows the "PowerOpen ABI: Application Binary Interface Big-Endian
1612    32-Bit Hardware Implementation"
1613
1614    All four conventions are similar:
1615    Parameters may be passed in general-purpose registers starting at r3, in
1616    floating point registers starting at f1, or on the stack.
1617
1618    But there are substantial differences:
1619    * The number of registers used for parameter passing and the exact set of
1620      nonvolatile registers differs (see MachRegs.hs).
1621    * On AIX and 64-bit ELF, stack space is always reserved for parameters,
1622      even if they are passed in registers. The called routine may choose to
1623      save parameters from registers to the corresponding space on the stack.
1624    * On AIX and 64-bit ELF, a corresponding amount of GPRs is skipped when
1625      a floating point parameter is passed in an FPR.
1626    * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
1627      starting with an odd-numbered GPR. It may skip a GPR to achieve this.
1628      AIX just treats an I64 likt two separate I32s (high word first).
1629    * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
1630      4-byte aligned like everything else on AIX.
1631    * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
1632      PowerPC Linux does not agree, so neither do we.
1633
1634    According to all conventions, the parameter area should be part of the
1635    caller's stack frame, allocated in the caller's prologue code (large enough
1636    to hold the parameter lists for all called routines). The NCG already
1637    uses the stack for register spilling, leaving 64 bytes free at the top.
1638    If we need a larger parameter area than that, we increase the size
1639    of the stack frame just before ccalling.
1640-}
1641
1642
1643genCCall' dflags gcp target dest_regs args
1644  = do
1645        (finalStack,passArgumentsCode,usedRegs) <- passArguments
1646                                                   (zip3 args argReps argHints)
1647                                                   allArgRegs
1648                                                   (allFPArgRegs platform)
1649                                                   initialStackOffset
1650                                                   nilOL []
1651
1652        (labelOrExpr, reduceToFF32) <- case target of
1653            ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
1654                uses_pic_base_implicitly
1655                return (Left lbl, False)
1656            ForeignTarget expr _ -> do
1657                uses_pic_base_implicitly
1658                return (Right expr, False)
1659            PrimTarget mop -> outOfLineMachOp mop
1660
1661        let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
1662            codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
1663
1664        case labelOrExpr of
1665            Left lbl -> do -- the linker does all the work for us
1666                return (         codeBefore
1667                        `snocOL` BL lbl usedRegs
1668                        `appOL`  maybeNOP -- some ABI require a NOP after BL
1669                        `appOL`  codeAfter)
1670            Right dyn -> do -- implement call through function pointer
1671                (dynReg, dynCode) <- getSomeReg dyn
1672                case gcp of
1673                     GCP64ELF 1      -> return ( dynCode
1674                       `appOL`  codeBefore
1675                       `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 40))
1676                       `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
1677                       `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
1678                       `snocOL` MTCTR r11
1679                       `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
1680                       `snocOL` BCTRL usedRegs
1681                       `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 40))
1682                       `appOL`  codeAfter)
1683                     GCP64ELF 2      -> return ( dynCode
1684                       `appOL`  codeBefore
1685                       `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 24))
1686                       `snocOL` MR r12 dynReg
1687                       `snocOL` MTCTR r12
1688                       `snocOL` BCTRL usedRegs
1689                       `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 24))
1690                       `appOL`  codeAfter)
1691                     GCPAIX          -> return ( dynCode
1692                       -- AIX/XCOFF follows the PowerOPEN ABI
1693                       -- which is quite similiar to LinuxPPC64/ELFv1
1694                       `appOL`  codeBefore
1695                       `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 20))
1696                       `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0))
1697                       `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4))
1698                       `snocOL` MTCTR r11
1699                       `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8))
1700                       `snocOL` BCTRL usedRegs
1701                       `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 20))
1702                       `appOL`  codeAfter)
1703                     _               -> return ( dynCode
1704                       `snocOL` MTCTR dynReg
1705                       `appOL`  codeBefore
1706                       `snocOL` BCTRL usedRegs
1707                       `appOL`  codeAfter)
1708    where
1709        platform = targetPlatform dflags
1710
1711        uses_pic_base_implicitly = do
1712            -- See Note [implicit register in PPC PIC code]
1713            -- on why we claim to use PIC register here
1714            when (positionIndependent dflags && target32Bit platform) $ do
1715                _ <- getPicBaseNat $ archWordFormat True
1716                return ()
1717
1718        initialStackOffset = case gcp of
1719                             GCPAIX     -> 24
1720                             GCP32ELF   -> 8
1721                             GCP64ELF 1 -> 48
1722                             GCP64ELF 2 -> 32
1723                             _ -> panic "genCall': unknown calling convention"
1724            -- size of linkage area + size of arguments, in bytes
1725        stackDelta finalStack = case gcp of
1726                                GCPAIX ->
1727                                    roundTo 16 $ (24 +) $ max 32 $ sum $
1728                                    map (widthInBytes . typeWidth) argReps
1729                                GCP32ELF -> roundTo 16 finalStack
1730                                GCP64ELF 1 ->
1731                                    roundTo 16 $ (48 +) $ max 64 $ sum $
1732                                    map (roundTo 8 . widthInBytes . typeWidth)
1733                                        argReps
1734                                GCP64ELF 2 ->
1735                                    roundTo 16 $ (32 +) $ max 64 $ sum $
1736                                    map (roundTo 8 . widthInBytes . typeWidth)
1737                                        argReps
1738                                _ -> panic "genCall': unknown calling conv."
1739
1740        argReps = map (cmmExprType dflags) args
1741        (argHints, _) = foreignTargetHints target
1742
1743        roundTo a x | x `mod` a == 0 = x
1744                    | otherwise = x + a - (x `mod` a)
1745
1746        spFormat = if target32Bit platform then II32 else II64
1747
1748        -- TODO: Do not create a new stack frame if delta is too large.
1749        move_sp_down finalStack
1750               | delta > stackFrameHeaderSize dflags =
1751                        toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
1752                              DELTA (-delta)]
1753               | otherwise = nilOL
1754               where delta = stackDelta finalStack
1755        move_sp_up finalStack
1756               | delta > stackFrameHeaderSize dflags =
1757                        toOL [ADD sp sp (RIImm (ImmInt delta)),
1758                              DELTA 0]
1759               | otherwise = nilOL
1760               where delta = stackDelta finalStack
1761
1762        -- A NOP instruction is required after a call (bl instruction)
1763        -- on AIX and 64-Bit Linux.
1764        -- If the call is to a function with a different TOC (r2) the
1765        -- link editor replaces the NOP instruction with a load of the TOC
1766        -- from the stack to restore the TOC.
1767        maybeNOP = case gcp of
1768           GCP32ELF        -> nilOL
1769           -- See Section 3.9.4 of OpenPower ABI
1770           GCPAIX          -> unitOL NOP
1771           -- See Section 3.5.11 of PPC64 ELF v1.9
1772           GCP64ELF 1      -> unitOL NOP
1773           -- See Section 2.3.6 of PPC64 ELF v2
1774           GCP64ELF 2      -> unitOL NOP
1775           _               -> panic "maybeNOP: Unknown PowerPC 64-bit ABI"
1776
1777        passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
1778        passArguments ((arg,arg_ty,_):args) gprs fprs stackOffset
1779               accumCode accumUsed | isWord64 arg_ty
1780                                     && target32Bit (targetPlatform dflags) =
1781            do
1782                ChildCode64 code vr_lo <- iselExpr64 arg
1783                let vr_hi = getHiVRegFromLo vr_lo
1784
1785                case gcp of
1786                    GCPAIX ->
1787                        do let storeWord vr (gpr:_) _ = MR gpr vr
1788                               storeWord vr [] offset
1789                                   = ST II32 vr (AddrRegImm sp (ImmInt offset))
1790                           passArguments args
1791                                         (drop 2 gprs)
1792                                         fprs
1793                                         (stackOffset+8)
1794                                         (accumCode `appOL` code
1795                                               `snocOL` storeWord vr_hi gprs stackOffset
1796                                               `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
1797                                         ((take 2 gprs) ++ accumUsed)
1798                    GCP32ELF ->
1799                        do let stackOffset' = roundTo 8 stackOffset
1800                               stackCode = accumCode `appOL` code
1801                                   `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
1802                                   `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
1803                               regCode hireg loreg =
1804                                   accumCode `appOL` code
1805                                       `snocOL` MR hireg vr_hi
1806                                       `snocOL` MR loreg vr_lo
1807
1808                           case gprs of
1809                               hireg : loreg : regs | even (length gprs) ->
1810                                   passArguments args regs fprs stackOffset
1811                                                 (regCode hireg loreg) (hireg : loreg : accumUsed)
1812                               _skipped : hireg : loreg : regs ->
1813                                   passArguments args regs fprs stackOffset
1814                                                 (regCode hireg loreg) (hireg : loreg : accumUsed)
1815                               _ -> -- only one or no regs left
1816                                   passArguments args [] fprs (stackOffset'+8)
1817                                                 stackCode accumUsed
1818                    GCP64ELF _ -> panic "passArguments: 32 bit code"
1819
1820        passArguments ((arg,rep,hint):args) gprs fprs stackOffset accumCode accumUsed
1821            | reg : _ <- regs = do
1822                register <- getRegister arg_pro
1823                let code = case register of
1824                            Fixed _ freg fcode -> fcode `snocOL` MR reg freg
1825                            Any _ acode -> acode reg
1826                    stackOffsetRes = case gcp of
1827                                     -- The PowerOpen ABI requires that we
1828                                     -- reserve stack slots for register
1829                                     -- parameters
1830                                     GCPAIX    -> stackOffset + stackBytes
1831                                     -- ... the SysV ABI 32-bit doesn't.
1832                                     GCP32ELF -> stackOffset
1833                                     -- ... but SysV ABI 64-bit does.
1834                                     GCP64ELF _ -> stackOffset + stackBytes
1835                passArguments args
1836                              (drop nGprs gprs)
1837                              (drop nFprs fprs)
1838                              stackOffsetRes
1839                              (accumCode `appOL` code)
1840                              (reg : accumUsed)
1841            | otherwise = do
1842                (vr, code) <- getSomeReg arg_pro
1843                passArguments args
1844                              (drop nGprs gprs)
1845                              (drop nFprs fprs)
1846                              (stackOffset' + stackBytes)
1847                              (accumCode `appOL` code
1848                                         `snocOL` ST format_pro vr stackSlot)
1849                              accumUsed
1850            where
1851                arg_pro
1852                   | isBitsType rep = CmmMachOp (conv_op (typeWidth rep) (wordWidth dflags)) [arg]
1853                   | otherwise      = arg
1854                format_pro
1855                   | isBitsType rep = intFormat (wordWidth dflags)
1856                   | otherwise      = cmmTypeFormat rep
1857                conv_op = case hint of
1858                            SignedHint -> MO_SS_Conv
1859                            _          -> MO_UU_Conv
1860
1861                stackOffset' = case gcp of
1862                               GCPAIX ->
1863                                   -- The 32bit PowerOPEN ABI is happy with
1864                                   -- 32bit-alignment ...
1865                                   stackOffset
1866                               GCP32ELF
1867                                   -- ... the SysV ABI requires 8-byte
1868                                   -- alignment for doubles.
1869                                | isFloatType rep && typeWidth rep == W64 ->
1870                                   roundTo 8 stackOffset
1871                                | otherwise ->
1872                                   stackOffset
1873                               GCP64ELF _ ->
1874                                   -- Everything on the stack is mapped to
1875                                   -- 8-byte aligned doublewords
1876                                   stackOffset
1877                stackOffset''
1878                     | isFloatType rep && typeWidth rep == W32 =
1879                         case gcp of
1880                         -- The ELF v1 ABI Section 3.2.3 requires:
1881                         -- "Single precision floating point values
1882                         -- are mapped to the second word in a single
1883                         -- doubleword"
1884                         GCP64ELF 1      -> stackOffset' + 4
1885                         _               -> stackOffset'
1886                     | otherwise = stackOffset'
1887
1888                stackSlot = AddrRegImm sp (ImmInt stackOffset'')
1889                (nGprs, nFprs, stackBytes, regs)
1890                    = case gcp of
1891                      GCPAIX ->
1892                          case cmmTypeFormat rep of
1893                          II8  -> (1, 0, 4, gprs)
1894                          II16 -> (1, 0, 4, gprs)
1895                          II32 -> (1, 0, 4, gprs)
1896                          -- The PowerOpen ABI requires that we skip a
1897                          -- corresponding number of GPRs when we use
1898                          -- the FPRs.
1899                          --
1900                          -- E.g. for a `double` two GPRs are skipped,
1901                          -- whereas for a `float` one GPR is skipped
1902                          -- when parameters are assigned to
1903                          -- registers.
1904                          --
1905                          -- The PowerOpen ABI specification can be found at
1906                          -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/
1907                          FF32 -> (1, 1, 4, fprs)
1908                          FF64 -> (2, 1, 8, fprs)
1909                          II64 -> panic "genCCall' passArguments II64"
1910
1911                      GCP32ELF ->
1912                          case cmmTypeFormat rep of
1913                          II8  -> (1, 0, 4, gprs)
1914                          II16 -> (1, 0, 4, gprs)
1915                          II32 -> (1, 0, 4, gprs)
1916                          -- ... the SysV ABI doesn't.
1917                          FF32 -> (0, 1, 4, fprs)
1918                          FF64 -> (0, 1, 8, fprs)
1919                          II64 -> panic "genCCall' passArguments II64"
1920                      GCP64ELF _ ->
1921                          case cmmTypeFormat rep of
1922                          II8  -> (1, 0, 8, gprs)
1923                          II16 -> (1, 0, 8, gprs)
1924                          II32 -> (1, 0, 8, gprs)
1925                          II64 -> (1, 0, 8, gprs)
1926                          -- The ELFv1 ABI requires that we skip a
1927                          -- corresponding number of GPRs when we use
1928                          -- the FPRs.
1929                          FF32 -> (1, 1, 8, fprs)
1930                          FF64 -> (1, 1, 8, fprs)
1931
1932        moveResult reduceToFF32 =
1933            case dest_regs of
1934                [] -> nilOL
1935                [dest]
1936                    | reduceToFF32 && isFloat32 rep   -> unitOL (FRSP r_dest f1)
1937                    | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
1938                    | isWord64 rep && target32Bit (targetPlatform dflags)
1939                       -> toOL [MR (getHiVRegFromLo r_dest) r3,
1940                                MR r_dest r4]
1941                    | otherwise -> unitOL (MR r_dest r3)
1942                    where rep = cmmRegType dflags (CmmLocal dest)
1943                          r_dest = getRegisterReg platform (CmmLocal dest)
1944                _ -> panic "genCCall' moveResult: Bad dest_regs"
1945
1946        outOfLineMachOp mop =
1947            do
1948                dflags <- getDynFlags
1949                mopExpr <- cmmMakeDynamicReference dflags CallReference $
1950                              mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
1951                let mopLabelOrExpr = case mopExpr of
1952                        CmmLit (CmmLabel lbl) -> Left lbl
1953                        _ -> Right mopExpr
1954                return (mopLabelOrExpr, reduce)
1955            where
1956                (functionName, reduce) = case mop of
1957                    MO_F32_Exp   -> (fsLit "exp", True)
1958                    MO_F32_ExpM1 -> (fsLit "expm1", True)
1959                    MO_F32_Log   -> (fsLit "log", True)
1960                    MO_F32_Log1P -> (fsLit "log1p", True)
1961                    MO_F32_Sqrt  -> (fsLit "sqrt", True)
1962                    MO_F32_Fabs  -> unsupported
1963
1964                    MO_F32_Sin   -> (fsLit "sin", True)
1965                    MO_F32_Cos   -> (fsLit "cos", True)
1966                    MO_F32_Tan   -> (fsLit "tan", True)
1967
1968                    MO_F32_Asin  -> (fsLit "asin", True)
1969                    MO_F32_Acos  -> (fsLit "acos", True)
1970                    MO_F32_Atan  -> (fsLit "atan", True)
1971
1972                    MO_F32_Sinh  -> (fsLit "sinh", True)
1973                    MO_F32_Cosh  -> (fsLit "cosh", True)
1974                    MO_F32_Tanh  -> (fsLit "tanh", True)
1975                    MO_F32_Pwr   -> (fsLit "pow", True)
1976
1977                    MO_F32_Asinh -> (fsLit "asinh", True)
1978                    MO_F32_Acosh -> (fsLit "acosh", True)
1979                    MO_F32_Atanh -> (fsLit "atanh", True)
1980
1981                    MO_F64_Exp   -> (fsLit "exp", False)
1982                    MO_F64_ExpM1 -> (fsLit "expm1", False)
1983                    MO_F64_Log   -> (fsLit "log", False)
1984                    MO_F64_Log1P -> (fsLit "log1p", False)
1985                    MO_F64_Sqrt  -> (fsLit "sqrt", False)
1986                    MO_F64_Fabs  -> unsupported
1987
1988                    MO_F64_Sin   -> (fsLit "sin", False)
1989                    MO_F64_Cos   -> (fsLit "cos", False)
1990                    MO_F64_Tan   -> (fsLit "tan", False)
1991
1992                    MO_F64_Asin  -> (fsLit "asin", False)
1993                    MO_F64_Acos  -> (fsLit "acos", False)
1994                    MO_F64_Atan  -> (fsLit "atan", False)
1995
1996                    MO_F64_Sinh  -> (fsLit "sinh", False)
1997                    MO_F64_Cosh  -> (fsLit "cosh", False)
1998                    MO_F64_Tanh  -> (fsLit "tanh", False)
1999                    MO_F64_Pwr   -> (fsLit "pow", False)
2000
2001                    MO_F64_Asinh -> (fsLit "asinh", False)
2002                    MO_F64_Acosh -> (fsLit "acosh", False)
2003                    MO_F64_Atanh -> (fsLit "atanh", False)
2004
2005                    MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
2006
2007                    MO_Memcpy _  -> (fsLit "memcpy", False)
2008                    MO_Memset _  -> (fsLit "memset", False)
2009                    MO_Memmove _ -> (fsLit "memmove", False)
2010                    MO_Memcmp _  -> (fsLit "memcmp", False)
2011
2012                    MO_BSwap w   -> (fsLit $ bSwapLabel w, False)
2013                    MO_BRev w    -> (fsLit $ bRevLabel w, False)
2014                    MO_PopCnt w  -> (fsLit $ popCntLabel w, False)
2015                    MO_Pdep w    -> (fsLit $ pdepLabel w, False)
2016                    MO_Pext w    -> (fsLit $ pextLabel w, False)
2017                    MO_Clz _     -> unsupported
2018                    MO_Ctz _     -> unsupported
2019                    MO_AtomicRMW {} -> unsupported
2020                    MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
2021                    MO_AtomicRead _  -> unsupported
2022                    MO_AtomicWrite _ -> unsupported
2023
2024                    MO_S_QuotRem {}  -> unsupported
2025                    MO_U_QuotRem {}  -> unsupported
2026                    MO_U_QuotRem2 {} -> unsupported
2027                    MO_Add2 {}       -> unsupported
2028                    MO_AddWordC {}   -> unsupported
2029                    MO_SubWordC {}   -> unsupported
2030                    MO_AddIntC {}    -> unsupported
2031                    MO_SubIntC {}    -> unsupported
2032                    MO_U_Mul2 {}     -> unsupported
2033                    MO_ReadBarrier   -> unsupported
2034                    MO_WriteBarrier  -> unsupported
2035                    MO_Touch         -> unsupported
2036                    MO_Prefetch_Data _ -> unsupported
2037                unsupported = panic ("outOfLineCmmOp: " ++ show mop
2038                                  ++ " not supported")
2039
2040-- -----------------------------------------------------------------------------
2041-- Generating a table-branch
2042
2043genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
2044genSwitch dflags expr targets
2045  | OSAIX <- platformOS (targetPlatform dflags)
2046  = do
2047        (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2048        let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2049            sha = if target32Bit $ targetPlatform dflags then 2 else 3
2050        tmp <- getNewRegNat fmt
2051        lbl <- getNewLabelNat
2052        dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2053        (tableReg,t_code) <- getSomeReg $ dynRef
2054        let code = e_code `appOL` t_code `appOL` toOL [
2055                            SL fmt tmp reg (RIImm (ImmInt sha)),
2056                            LD fmt tmp (AddrRegReg tableReg tmp),
2057                            MTCTR tmp,
2058                            BCTR ids (Just lbl) []
2059                    ]
2060        return code
2061
2062  | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags)
2063  = do
2064        (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2065        let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2066            sha = if target32Bit $ targetPlatform dflags then 2 else 3
2067        tmp <- getNewRegNat fmt
2068        lbl <- getNewLabelNat
2069        dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2070        (tableReg,t_code) <- getSomeReg $ dynRef
2071        let code = e_code `appOL` t_code `appOL` toOL [
2072                            SL fmt tmp reg (RIImm (ImmInt sha)),
2073                            LD fmt tmp (AddrRegReg tableReg tmp),
2074                            ADD tmp tmp (RIReg tableReg),
2075                            MTCTR tmp,
2076                            BCTR ids (Just lbl) []
2077                    ]
2078        return code
2079  | otherwise
2080  = do
2081        (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
2082        let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
2083            sha = if target32Bit $ targetPlatform dflags then 2 else 3
2084        tmp <- getNewRegNat fmt
2085        lbl <- getNewLabelNat
2086        let code = e_code `appOL` toOL [
2087                            SL fmt tmp reg (RIImm (ImmInt sha)),
2088                            ADDIS tmp tmp (HA (ImmCLbl lbl)),
2089                            LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
2090                            MTCTR tmp,
2091                            BCTR ids (Just lbl) []
2092                    ]
2093        return code
2094  where (offset, ids) = switchTargetsToTable targets
2095
2096generateJumpTableForInstr :: DynFlags -> Instr
2097                          -> Maybe (NatCmmDecl CmmStatics Instr)
2098generateJumpTableForInstr dflags (BCTR ids (Just lbl) _) =
2099    let jumpTable
2100            | (positionIndependent dflags)
2101              || (not $ target32Bit $ targetPlatform dflags)
2102            = map jumpTableEntryRel ids
2103            | otherwise = map (jumpTableEntry dflags) ids
2104                where jumpTableEntryRel Nothing
2105                        = CmmStaticLit (CmmInt 0 (wordWidth dflags))
2106                      jumpTableEntryRel (Just blockid)
2107                        = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0
2108                                         (wordWidth dflags))
2109                            where blockLabel = blockLbl blockid
2110    in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable))
2111generateJumpTableForInstr _ _ = Nothing
2112
2113-- -----------------------------------------------------------------------------
2114-- 'condIntReg' and 'condFltReg': condition codes into registers
2115
2116-- Turn those condition codes into integers now (when they appear on
2117-- the right hand side of an assignment).
2118
2119
2120
2121condReg :: NatM CondCode -> NatM Register
2122condReg getCond = do
2123    CondCode _ cond cond_code <- getCond
2124    dflags <- getDynFlags
2125    let
2126        code dst = cond_code
2127            `appOL` negate_code
2128            `appOL` toOL [
2129                MFCR dst,
2130                RLWINM dst dst (bit + 1) 31 31
2131            ]
2132
2133        negate_code | do_negate = unitOL (CRNOR bit bit bit)
2134                    | otherwise = nilOL
2135
2136        (bit, do_negate) = case cond of
2137            LTT -> (0, False)
2138            LE  -> (1, True)
2139            EQQ -> (2, False)
2140            GE  -> (0, True)
2141            GTT -> (1, False)
2142
2143            NE  -> (2, True)
2144
2145            LU  -> (0, False)
2146            LEU -> (1, True)
2147            GEU -> (0, True)
2148            GU  -> (1, False)
2149            _   -> panic "PPC.CodeGen.codeReg: no match"
2150
2151        format = archWordFormat $ target32Bit $ targetPlatform dflags
2152    return (Any format code)
2153
2154condIntReg :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM Register
2155condIntReg cond width x y = condReg (condIntCode cond width x y)
2156condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
2157condFltReg cond x y = condReg (condFltCode cond x y)
2158
2159
2160
2161-- -----------------------------------------------------------------------------
2162-- 'trivial*Code': deal with trivial instructions
2163
2164-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
2165-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
2166-- Only look for constants on the right hand side, because that's
2167-- where the generic optimizer will have put them.
2168
2169-- Similarly, for unary instructions, we don't have to worry about
2170-- matching an StInt as the argument, because genericOpt will already
2171-- have handled the constant-folding.
2172
2173
2174
2175{-
2176Wolfgang's PowerPC version of The Rules:
2177
2178A slightly modified version of The Rules to take advantage of the fact
2179that PowerPC instructions work on all registers and don't implicitly
2180clobber any fixed registers.
2181
2182* The only expression for which getRegister returns Fixed is (CmmReg reg).
2183
2184* If getRegister returns Any, then the code it generates may modify only:
2185        (a) fresh temporaries
2186        (b) the destination register
2187  It may *not* modify global registers, unless the global
2188  register happens to be the destination register.
2189  It may not clobber any other registers. In fact, only ccalls clobber any
2190  fixed registers.
2191  Also, it may not modify the counter register (used by genCCall).
2192
2193  Corollary: If a getRegister for a subexpression returns Fixed, you need
2194  not move it to a fresh temporary before evaluating the next subexpression.
2195  The Fixed register won't be modified.
2196  Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
2197
2198* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
2199  the value of the destination register.
2200-}
2201
2202trivialCode
2203        :: Width
2204        -> Bool
2205        -> (Reg -> Reg -> RI -> Instr)
2206        -> CmmExpr
2207        -> CmmExpr
2208        -> NatM Register
2209
2210trivialCode rep signed instr x (CmmLit (CmmInt y _))
2211    | Just imm <- makeImmediate rep signed y
2212    = do
2213        (src1, code1) <- getSomeReg x
2214        let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
2215        return (Any (intFormat rep) code)
2216
2217trivialCode rep _ instr x y = do
2218    (src1, code1) <- getSomeReg x
2219    (src2, code2) <- getSomeReg y
2220    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
2221    return (Any (intFormat rep) code)
2222
2223shiftMulCode
2224        :: Width
2225        -> Bool
2226        -> (Format-> Reg -> Reg -> RI -> Instr)
2227        -> CmmExpr
2228        -> CmmExpr
2229        -> NatM Register
2230shiftMulCode width sign instr x (CmmLit (CmmInt y _))
2231    | Just imm <- makeImmediate width sign y
2232    = do
2233        (src1, code1) <- getSomeReg x
2234        let format = intFormat width
2235        let ins_fmt = intFormat (max W32 width)
2236        let code dst = code1 `snocOL` instr ins_fmt dst src1 (RIImm imm)
2237        return (Any format code)
2238
2239shiftMulCode width _ instr x y = do
2240    (src1, code1) <- getSomeReg x
2241    (src2, code2) <- getSomeReg y
2242    let format = intFormat width
2243    let ins_fmt = intFormat (max W32 width)
2244    let code dst = code1 `appOL` code2
2245                   `snocOL` instr ins_fmt dst src1 (RIReg src2)
2246    return (Any format code)
2247
2248trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
2249                 -> CmmExpr -> CmmExpr -> NatM Register
2250trivialCodeNoImm' format instr x y = do
2251    (src1, code1) <- getSomeReg x
2252    (src2, code2) <- getSomeReg y
2253    let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
2254    return (Any format code)
2255
2256trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
2257                 -> CmmExpr -> CmmExpr -> NatM Register
2258trivialCodeNoImm format instr x y
2259  = trivialCodeNoImm' format (instr format) x y
2260
2261srCode :: Width -> Bool -> (Format-> Reg -> Reg -> RI -> Instr)
2262       -> CmmExpr -> CmmExpr -> NatM Register
2263srCode width sgn instr x (CmmLit (CmmInt y _))
2264    | Just imm <- makeImmediate width sgn y
2265    = do
2266        let op_len = max W32 width
2267            extend = if sgn then extendSExpr else extendUExpr
2268        (src1, code1) <- getSomeReg (extend width op_len x)
2269        let code dst = code1 `snocOL`
2270                       instr (intFormat op_len) dst src1 (RIImm imm)
2271        return (Any (intFormat width) code)
2272
2273srCode width sgn instr x y = do
2274  let op_len = max W32 width
2275      extend = if sgn then extendSExpr else extendUExpr
2276  (src1, code1) <- getSomeReg (extend width op_len x)
2277  (src2, code2) <- getSomeReg (extendUExpr width op_len y)
2278  -- Note: Shift amount `y` is unsigned
2279  let code dst = code1 `appOL` code2 `snocOL`
2280                 instr (intFormat op_len) dst src1 (RIReg src2)
2281  return (Any (intFormat width) code)
2282
2283divCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
2284divCode width sgn x y = do
2285  let op_len = max W32 width
2286      extend = if sgn then extendSExpr else extendUExpr
2287  (src1, code1) <- getSomeReg (extend width op_len x)
2288  (src2, code2) <- getSomeReg (extend width op_len y)
2289  let code dst = code1 `appOL` code2 `snocOL`
2290                 DIV (intFormat op_len) sgn dst src1 src2
2291  return (Any (intFormat width) code)
2292
2293
2294trivialUCode :: Format
2295             -> (Reg -> Reg -> Instr)
2296             -> CmmExpr
2297             -> NatM Register
2298trivialUCode rep instr x = do
2299    (src, code) <- getSomeReg x
2300    let code' dst = code `snocOL` instr dst src
2301    return (Any rep code')
2302
2303-- There is no "remainder" instruction on the PPC, so we have to do
2304-- it the hard way.
2305-- The "sgn" parameter is the signedness for the division instruction
2306
2307remainderCode :: Width -> Bool -> Reg -> CmmExpr -> CmmExpr
2308               -> NatM (Reg -> InstrBlock)
2309remainderCode rep sgn reg_q arg_x arg_y = do
2310  let op_len = max W32 rep
2311      fmt    = intFormat op_len
2312      extend = if sgn then extendSExpr else extendUExpr
2313  (x_reg, x_code) <- getSomeReg (extend rep op_len arg_x)
2314  (y_reg, y_code) <- getSomeReg (extend rep op_len arg_y)
2315  return $ \reg_r -> y_code `appOL` x_code
2316                     `appOL` toOL [ DIV fmt sgn reg_q x_reg y_reg
2317                                  , MULL fmt reg_r reg_q (RIReg y_reg)
2318                                  , SUBF reg_r reg_r x_reg
2319                                  ]
2320
2321
2322coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
2323coerceInt2FP fromRep toRep x = do
2324    dflags <- getDynFlags
2325    let arch =  platformArch $ targetPlatform dflags
2326    coerceInt2FP' arch fromRep toRep x
2327
2328coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
2329coerceInt2FP' ArchPPC fromRep toRep x = do
2330    (src, code) <- getSomeReg x
2331    lbl <- getNewLabelNat
2332    itmp <- getNewRegNat II32
2333    ftmp <- getNewRegNat FF64
2334    dflags <- getDynFlags
2335    dynRef <- cmmMakeDynamicReference dflags DataReference lbl
2336    Amode addr addr_code <- getAmode D dynRef
2337    let
2338        code' dst = code `appOL` maybe_exts `appOL` toOL [
2339                LDATA (Section ReadOnlyData lbl) $ Statics lbl
2340                                 [CmmStaticLit (CmmInt 0x43300000 W32),
2341                                  CmmStaticLit (CmmInt 0x80000000 W32)],
2342                XORIS itmp src (ImmInt 0x8000),
2343                ST II32 itmp (spRel dflags 3),
2344                LIS itmp (ImmInt 0x4330),
2345                ST II32 itmp (spRel dflags 2),
2346                LD FF64 ftmp (spRel dflags 2)
2347            ] `appOL` addr_code `appOL` toOL [
2348                LD FF64 dst addr,
2349                FSUB FF64 dst ftmp dst
2350            ] `appOL` maybe_frsp dst
2351
2352        maybe_exts = case fromRep of
2353                        W8 ->  unitOL $ EXTS II8 src src
2354                        W16 -> unitOL $ EXTS II16 src src
2355                        W32 -> nilOL
2356                        _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
2357
2358        maybe_frsp dst
2359                = case toRep of
2360                        W32 -> unitOL $ FRSP dst dst
2361                        W64 -> nilOL
2362                        _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
2363
2364    return (Any (floatFormat toRep) code')
2365
2366-- On an ELF v1 Linux we use the compiler doubleword in the stack frame
2367-- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
2368-- set right before a call and restored right after return from the call.
2369-- So it is fine.
2370coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
2371    (src, code) <- getSomeReg x
2372    dflags <- getDynFlags
2373    let
2374        code' dst = code `appOL` maybe_exts `appOL` toOL [
2375                ST II64 src (spRel dflags 3),
2376                LD FF64 dst (spRel dflags 3),
2377                FCFID dst dst
2378            ] `appOL` maybe_frsp dst
2379
2380        maybe_exts = case fromRep of
2381                        W8 ->  unitOL $ EXTS II8 src src
2382                        W16 -> unitOL $ EXTS II16 src src
2383                        W32 -> unitOL $ EXTS II32 src src
2384                        W64 -> nilOL
2385                        _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
2386
2387        maybe_frsp dst
2388                = case toRep of
2389                        W32 -> unitOL $ FRSP dst dst
2390                        W64 -> nilOL
2391                        _       -> panic "PPC.CodeGen.coerceInt2FP: no match"
2392
2393    return (Any (floatFormat toRep) code')
2394
2395coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
2396
2397
2398coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
2399coerceFP2Int fromRep toRep x = do
2400    dflags <- getDynFlags
2401    let arch =  platformArch $ targetPlatform dflags
2402    coerceFP2Int' arch fromRep toRep x
2403
2404coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
2405coerceFP2Int' ArchPPC _ toRep x = do
2406    dflags <- getDynFlags
2407    -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
2408    (src, code) <- getSomeReg x
2409    tmp <- getNewRegNat FF64
2410    let
2411        code' dst = code `appOL` toOL [
2412                -- convert to int in FP reg
2413            FCTIWZ tmp src,
2414                -- store value (64bit) from FP to stack
2415            ST FF64 tmp (spRel dflags 2),
2416                -- read low word of value (high word is undefined)
2417            LD II32 dst (spRel dflags 3)]
2418    return (Any (intFormat toRep) code')
2419
2420coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
2421    dflags <- getDynFlags
2422    -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
2423    (src, code) <- getSomeReg x
2424    tmp <- getNewRegNat FF64
2425    let
2426        code' dst = code `appOL` toOL [
2427                -- convert to int in FP reg
2428            FCTIDZ tmp src,
2429                -- store value (64bit) from FP to compiler word on stack
2430            ST FF64 tmp (spRel dflags 3),
2431            LD II64 dst (spRel dflags 3)]
2432    return (Any (intFormat toRep) code')
2433
2434coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
2435
2436-- Note [.LCTOC1 in PPC PIC code]
2437-- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
2438-- to make the most of the PPC's 16-bit displacements.
2439-- As 16-bit signed offset is used (usually via addi/lwz instructions)
2440-- first element will have '-32768' offset against .LCTOC1.
2441
2442-- Note [implicit register in PPC PIC code]
2443-- PPC generates calls by labels in assembly
2444-- in form of:
2445--     bl puts+32768@plt
2446-- in this form it's not seen directly (by GHC NCG)
2447-- that r30 (PicBaseReg) is used,
2448-- but r30 is a required part of PLT code setup:
2449--   puts+32768@plt:
2450--       lwz     r11,-30484(r30) ; offset in .LCTOC1
2451--       mtctr   r11
2452--       bctr
2453