1{-# LANGUAGE CPP, GADTs #-} 2 3----------------------------------------------------------------------------- 4-- 5-- Generating machine code (instruction selection) 6-- 7-- (c) The University of Glasgow 1996-2004 8-- 9----------------------------------------------------------------------------- 10 11-- This is a big module, but, if you pay attention to 12-- (a) the sectioning, and (b) the type signatures, 13-- the structure should not be too overwhelming. 14 15module PPC.CodeGen ( 16 cmmTopCodeGen, 17 generateJumpTableForInstr, 18 InstrBlock 19) 20 21where 22 23#include "HsVersions.h" 24 25-- NCG stuff: 26import GhcPrelude 27 28import GHC.Platform.Regs 29import PPC.Instr 30import PPC.Cond 31import PPC.Regs 32import CPrim 33import NCGMonad ( NatM, getNewRegNat, getNewLabelNat 34 , getBlockIdNat, getPicBaseNat, getNewRegPairNat 35 , getPicBaseMaybeNat ) 36import Instruction 37import PIC 38import Format 39import RegClass 40import Reg 41import TargetReg 42import GHC.Platform 43 44-- Our intermediate code: 45import BlockId 46import PprCmm ( pprExpr ) 47import Cmm 48import CmmUtils 49import CmmSwitch 50import CLabel 51import Hoopl.Block 52import Hoopl.Graph 53 54-- The rest: 55import OrdList 56import Outputable 57import DynFlags 58 59import Control.Monad ( mapAndUnzipM, when ) 60import Data.Bits 61import Data.Word 62 63import BasicTypes 64import FastString 65import Util 66 67-- ----------------------------------------------------------------------------- 68-- Top-level of the instruction selector 69 70-- | 'InstrBlock's are the insn sequences generated by the insn selectors. 71-- They are really trees of insns to facilitate fast appending, where a 72-- left-to-right traversal (pre-order?) yields the insns in the correct 73-- order. 74 75cmmTopCodeGen 76 :: RawCmmDecl 77 -> NatM [NatCmmDecl CmmStatics Instr] 78 79cmmTopCodeGen (CmmProc info lab live graph) = do 80 let blocks = toBlockListEntryFirst graph 81 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks 82 dflags <- getDynFlags 83 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks) 84 tops = proc : concat statics 85 os = platformOS $ targetPlatform dflags 86 arch = platformArch $ targetPlatform dflags 87 case arch of 88 ArchPPC | os == OSAIX -> return tops 89 | otherwise -> do 90 picBaseMb <- getPicBaseMaybeNat 91 case picBaseMb of 92 Just picBase -> initializePicBase_ppc arch os picBase tops 93 Nothing -> return tops 94 ArchPPC_64 ELF_V1 -> fixup_entry tops 95 -- generating function descriptor is handled in 96 -- pretty printer 97 ArchPPC_64 ELF_V2 -> fixup_entry tops 98 -- generating function prologue is handled in 99 -- pretty printer 100 _ -> panic "PPC.cmmTopCodeGen: unknown arch" 101 where 102 fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics) 103 = do 104 let BasicBlock bID insns = entry 105 bID' <- if lab == (blockLbl bID) 106 then newBlockId 107 else return bID 108 let b' = BasicBlock bID' insns 109 return (CmmProc info lab live (ListGraph (b':blocks)) : statics) 110 fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc" 111 112cmmTopCodeGen (CmmData sec dat) = do 113 return [CmmData sec dat] -- no translation, we just use CmmStatic 114 115basicBlockCodeGen 116 :: Block CmmNode C C 117 -> NatM ( [NatBasicBlock Instr] 118 , [NatCmmDecl CmmStatics Instr]) 119 120basicBlockCodeGen block = do 121 let (_, nodes, tail) = blockSplit block 122 id = entryLabel block 123 stmts = blockToList nodes 124 mid_instrs <- stmtsToInstrs stmts 125 tail_instrs <- stmtToInstrs tail 126 let instrs = mid_instrs `appOL` tail_instrs 127 -- code generation may introduce new basic block boundaries, which 128 -- are indicated by the NEWBLOCK instruction. We must split up the 129 -- instruction stream into basic blocks again. Also, we extract 130 -- LDATAs here too. 131 let 132 (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs 133 134 mkBlocks (NEWBLOCK id) (instrs,blocks,statics) 135 = ([], BasicBlock id instrs : blocks, statics) 136 mkBlocks (LDATA sec dat) (instrs,blocks,statics) 137 = (instrs, blocks, CmmData sec dat:statics) 138 mkBlocks instr (instrs,blocks,statics) 139 = (instr:instrs, blocks, statics) 140 return (BasicBlock id top : other_blocks, statics) 141 142stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock 143stmtsToInstrs stmts 144 = do instrss <- mapM stmtToInstrs stmts 145 return (concatOL instrss) 146 147stmtToInstrs :: CmmNode e x -> NatM InstrBlock 148stmtToInstrs stmt = do 149 dflags <- getDynFlags 150 case stmt of 151 CmmComment s -> return (unitOL (COMMENT s)) 152 CmmTick {} -> return nilOL 153 CmmUnwind {} -> return nilOL 154 155 CmmAssign reg src 156 | isFloatType ty -> assignReg_FltCode format reg src 157 | target32Bit (targetPlatform dflags) && 158 isWord64 ty -> assignReg_I64Code reg src 159 | otherwise -> assignReg_IntCode format reg src 160 where ty = cmmRegType dflags reg 161 format = cmmTypeFormat ty 162 163 CmmStore addr src 164 | isFloatType ty -> assignMem_FltCode format addr src 165 | target32Bit (targetPlatform dflags) && 166 isWord64 ty -> assignMem_I64Code addr src 167 | otherwise -> assignMem_IntCode format addr src 168 where ty = cmmExprType dflags src 169 format = cmmTypeFormat ty 170 171 CmmUnsafeForeignCall target result_regs args 172 -> genCCall target result_regs args 173 174 CmmBranch id -> genBranch id 175 CmmCondBranch arg true false prediction -> do 176 b1 <- genCondJump true arg prediction 177 b2 <- genBranch false 178 return (b1 `appOL` b2) 179 CmmSwitch arg ids -> do dflags <- getDynFlags 180 genSwitch dflags arg ids 181 CmmCall { cml_target = arg 182 , cml_args_regs = gregs } -> do 183 dflags <- getDynFlags 184 genJump arg (jumpRegs dflags gregs) 185 _ -> 186 panic "stmtToInstrs: statement should have been cps'd away" 187 188jumpRegs :: DynFlags -> [GlobalReg] -> [Reg] 189jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ] 190 where platform = targetPlatform dflags 191 192-------------------------------------------------------------------------------- 193-- | 'InstrBlock's are the insn sequences generated by the insn selectors. 194-- They are really trees of insns to facilitate fast appending, where a 195-- left-to-right traversal yields the insns in the correct order. 196-- 197type InstrBlock 198 = OrdList Instr 199 200 201-- | Register's passed up the tree. If the stix code forces the register 202-- to live in a pre-decided machine register, it comes out as @Fixed@; 203-- otherwise, it comes out as @Any@, and the parent can decide which 204-- register to put it in. 205-- 206data Register 207 = Fixed Format Reg InstrBlock 208 | Any Format (Reg -> InstrBlock) 209 210 211swizzleRegisterRep :: Register -> Format -> Register 212swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code 213swizzleRegisterRep (Any _ codefn) format = Any format codefn 214 215 216-- | Grab the Reg for a CmmReg 217getRegisterReg :: Platform -> CmmReg -> Reg 218 219getRegisterReg _ (CmmLocal (LocalReg u pk)) 220 = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk) 221 222getRegisterReg platform (CmmGlobal mid) 223 = case globalRegMaybe platform mid of 224 Just reg -> RegReal reg 225 Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) 226 -- By this stage, the only MagicIds remaining should be the 227 -- ones which map to a real machine register on this 228 -- platform. Hence ... 229 230-- | Convert a BlockId to some CmmStatic data 231jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic 232jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) 233jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) 234 where blockLabel = blockLbl blockid 235 236 237 238-- ----------------------------------------------------------------------------- 239-- General things for putting together code sequences 240 241-- Expand CmmRegOff. ToDo: should we do it this way around, or convert 242-- CmmExprs into CmmRegOff? 243mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr 244mangleIndexTree dflags (CmmRegOff reg off) 245 = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] 246 where width = typeWidth (cmmRegType dflags reg) 247 248mangleIndexTree _ _ 249 = panic "PPC.CodeGen.mangleIndexTree: no match" 250 251-- ----------------------------------------------------------------------------- 252-- Code gen for 64-bit arithmetic on 32-bit platforms 253 254{- 255Simple support for generating 64-bit code (ie, 64 bit values and 64 256bit assignments) on 32-bit platforms. Unlike the main code generator 257we merely shoot for generating working code as simply as possible, and 258pay little attention to code quality. Specifically, there is no 259attempt to deal cleverly with the fixed-vs-floating register 260distinction; all values are generated into (pairs of) floating 261registers, even if this would mean some redundant reg-reg moves as a 262result. Only one of the VRegUniques is returned, since it will be 263of the VRegUniqueLo form, and the upper-half VReg can be determined 264by applying getHiVRegFromLo to it. 265-} 266 267data ChildCode64 -- a.k.a "Register64" 268 = ChildCode64 269 InstrBlock -- code 270 Reg -- the lower 32-bit temporary which contains the 271 -- result; use getHiVRegFromLo to find the other 272 -- VRegUnique. Rules of this simplified insn 273 -- selection game are therefore that the returned 274 -- Reg may be modified 275 276 277-- | Compute an expression into a register, but 278-- we don't mind which one it is. 279getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) 280getSomeReg expr = do 281 r <- getRegister expr 282 case r of 283 Any rep code -> do 284 tmp <- getNewRegNat rep 285 return (tmp, code tmp) 286 Fixed _ reg code -> 287 return (reg, code) 288 289getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock) 290getI64Amodes addrTree = do 291 Amode hi_addr addr_code <- getAmode D addrTree 292 case addrOffset hi_addr 4 of 293 Just lo_addr -> return (hi_addr, lo_addr, addr_code) 294 Nothing -> do (hi_ptr, code) <- getSomeReg addrTree 295 return (AddrRegImm hi_ptr (ImmInt 0), 296 AddrRegImm hi_ptr (ImmInt 4), 297 code) 298 299 300assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock 301assignMem_I64Code addrTree valueTree = do 302 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree 303 ChildCode64 vcode rlo <- iselExpr64 valueTree 304 let 305 rhi = getHiVRegFromLo rlo 306 307 -- Big-endian store 308 mov_hi = ST II32 rhi hi_addr 309 mov_lo = ST II32 rlo lo_addr 310 return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) 311 312 313assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock 314assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do 315 ChildCode64 vcode r_src_lo <- iselExpr64 valueTree 316 let 317 r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32 318 r_dst_hi = getHiVRegFromLo r_dst_lo 319 r_src_hi = getHiVRegFromLo r_src_lo 320 mov_lo = MR r_dst_lo r_src_lo 321 mov_hi = MR r_dst_hi r_src_hi 322 return ( 323 vcode `snocOL` mov_lo `snocOL` mov_hi 324 ) 325 326assignReg_I64Code _ _ 327 = panic "assignReg_I64Code(powerpc): invalid lvalue" 328 329 330iselExpr64 :: CmmExpr -> NatM ChildCode64 331iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do 332 (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree 333 (rlo, rhi) <- getNewRegPairNat II32 334 let mov_hi = LD II32 rhi hi_addr 335 mov_lo = LD II32 rlo lo_addr 336 return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) 337 rlo 338 339iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty 340 = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32)) 341 342iselExpr64 (CmmLit (CmmInt i _)) = do 343 (rlo,rhi) <- getNewRegPairNat II32 344 let 345 half0 = fromIntegral (fromIntegral i :: Word16) 346 half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16) 347 half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16) 348 half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16) 349 350 code = toOL [ 351 LIS rlo (ImmInt half1), 352 OR rlo rlo (RIImm $ ImmInt half0), 353 LIS rhi (ImmInt half3), 354 OR rhi rhi (RIImm $ ImmInt half2) 355 ] 356 return (ChildCode64 code rlo) 357 358iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do 359 ChildCode64 code1 r1lo <- iselExpr64 e1 360 ChildCode64 code2 r2lo <- iselExpr64 e2 361 (rlo,rhi) <- getNewRegPairNat II32 362 let 363 r1hi = getHiVRegFromLo r1lo 364 r2hi = getHiVRegFromLo r2lo 365 code = code1 `appOL` 366 code2 `appOL` 367 toOL [ ADDC rlo r1lo r2lo, 368 ADDE rhi r1hi r2hi ] 369 return (ChildCode64 code rlo) 370 371iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do 372 ChildCode64 code1 r1lo <- iselExpr64 e1 373 ChildCode64 code2 r2lo <- iselExpr64 e2 374 (rlo,rhi) <- getNewRegPairNat II32 375 let 376 r1hi = getHiVRegFromLo r1lo 377 r2hi = getHiVRegFromLo r2lo 378 code = code1 `appOL` 379 code2 `appOL` 380 toOL [ SUBFC rlo r2lo (RIReg r1lo), 381 SUBFE rhi r2hi r1hi ] 382 return (ChildCode64 code rlo) 383 384iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do 385 (expr_reg,expr_code) <- getSomeReg expr 386 (rlo, rhi) <- getNewRegPairNat II32 387 let mov_hi = LI rhi (ImmInt 0) 388 mov_lo = MR rlo expr_reg 389 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) 390 rlo 391 392iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do 393 (expr_reg,expr_code) <- getSomeReg expr 394 (rlo, rhi) <- getNewRegPairNat II32 395 let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31)) 396 mov_lo = MR rlo expr_reg 397 return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) 398 rlo 399iselExpr64 expr 400 = pprPanic "iselExpr64(powerpc)" (pprExpr expr) 401 402 403 404getRegister :: CmmExpr -> NatM Register 405getRegister e = do dflags <- getDynFlags 406 getRegister' dflags e 407 408getRegister' :: DynFlags -> CmmExpr -> NatM Register 409 410getRegister' dflags (CmmReg (CmmGlobal PicBaseReg)) 411 | OSAIX <- platformOS (targetPlatform dflags) = do 412 let code dst = toOL [ LD II32 dst tocAddr ] 413 tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]")) 414 return (Any II32 code) 415 | target32Bit (targetPlatform dflags) = do 416 reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags)) 417 return (Fixed (archWordFormat (target32Bit (targetPlatform dflags))) 418 reg nilOL) 419 | otherwise = return (Fixed II64 toc nilOL) 420 421getRegister' dflags (CmmReg reg) 422 = return (Fixed (cmmTypeFormat (cmmRegType dflags reg)) 423 (getRegisterReg (targetPlatform dflags) reg) nilOL) 424 425getRegister' dflags tree@(CmmRegOff _ _) 426 = getRegister' dflags (mangleIndexTree dflags tree) 427 428 -- for 32-bit architectuers, support some 64 -> 32 bit conversions: 429 -- TO_W_(x), TO_W_(x >> 32) 430 431getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) 432 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) 433 | target32Bit (targetPlatform dflags) = do 434 ChildCode64 code rlo <- iselExpr64 x 435 return $ Fixed II32 (getHiVRegFromLo rlo) code 436 437getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) 438 [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) 439 | target32Bit (targetPlatform dflags) = do 440 ChildCode64 code rlo <- iselExpr64 x 441 return $ Fixed II32 (getHiVRegFromLo rlo) code 442 443getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x]) 444 | target32Bit (targetPlatform dflags) = do 445 ChildCode64 code rlo <- iselExpr64 x 446 return $ Fixed II32 rlo code 447 448getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x]) 449 | target32Bit (targetPlatform dflags) = do 450 ChildCode64 code rlo <- iselExpr64 x 451 return $ Fixed II32 rlo code 452 453getRegister' dflags (CmmLoad mem pk) 454 | not (isWord64 pk) = do 455 let platform = targetPlatform dflags 456 Amode addr addr_code <- getAmode D mem 457 let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk) 458 addr_code `snocOL` LD format dst addr 459 return (Any format code) 460 | not (target32Bit (targetPlatform dflags)) = do 461 Amode addr addr_code <- getAmode DS mem 462 let code dst = addr_code `snocOL` LD II64 dst addr 463 return (Any II64 code) 464 465 where format = cmmTypeFormat pk 466 467-- catch simple cases of zero- or sign-extended load 468getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do 469 Amode addr addr_code <- getAmode D mem 470 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) 471 472getRegister' _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _]) = do 473 Amode addr addr_code <- getAmode D mem 474 return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) 475 476getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do 477 Amode addr addr_code <- getAmode D mem 478 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr)) 479 480getRegister' _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _]) = do 481 Amode addr addr_code <- getAmode D mem 482 return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr)) 483 484-- Note: there is no Load Byte Arithmetic instruction, so no signed case here 485 486getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do 487 Amode addr addr_code <- getAmode D mem 488 return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr)) 489 490getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do 491 Amode addr addr_code <- getAmode D mem 492 return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr)) 493 494getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do 495 Amode addr addr_code <- getAmode D mem 496 return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr)) 497 498getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do 499 Amode addr addr_code <- getAmode D mem 500 return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr)) 501 502getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do 503 Amode addr addr_code <- getAmode D mem 504 return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr)) 505 506getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do 507 -- lwa is DS-form. See Note [Power instruction format] 508 Amode addr addr_code <- getAmode DS mem 509 return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr)) 510 511getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps 512 = case mop of 513 MO_Not rep -> triv_ucode_int rep NOT 514 515 MO_F_Neg w -> triv_ucode_float w FNEG 516 MO_S_Neg w -> triv_ucode_int w NEG 517 518 MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x 519 MO_FF_Conv W32 W64 -> conversionNop FF64 x 520 521 MO_FS_Conv from to -> coerceFP2Int from to x 522 MO_SF_Conv from to -> coerceInt2FP from to x 523 524 MO_SS_Conv from to 525 | from >= to -> conversionNop (intFormat to) x 526 | otherwise -> triv_ucode_int to (EXTS (intFormat from)) 527 528 MO_UU_Conv from to 529 | from >= to -> conversionNop (intFormat to) x 530 | otherwise -> clearLeft from to 531 532 MO_XX_Conv _ to -> conversionNop (intFormat to) x 533 534 _ -> panic "PPC.CodeGen.getRegister: no match" 535 536 where 537 triv_ucode_int width instr = trivialUCode (intFormat width) instr x 538 triv_ucode_float width instr = trivialUCode (floatFormat width) instr x 539 540 conversionNop new_format expr 541 = do e_code <- getRegister' dflags expr 542 return (swizzleRegisterRep e_code new_format) 543 544 clearLeft from to 545 = do (src1, code1) <- getSomeReg x 546 let arch_fmt = intFormat (wordWidth dflags) 547 arch_bits = widthInBits (wordWidth dflags) 548 size = widthInBits from 549 code dst = code1 `snocOL` 550 CLRLI arch_fmt dst src1 (arch_bits - size) 551 return (Any (intFormat to) code) 552 553getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps 554 = case mop of 555 MO_F_Eq _ -> condFltReg EQQ x y 556 MO_F_Ne _ -> condFltReg NE x y 557 MO_F_Gt _ -> condFltReg GTT x y 558 MO_F_Ge _ -> condFltReg GE x y 559 MO_F_Lt _ -> condFltReg LTT x y 560 MO_F_Le _ -> condFltReg LE x y 561 562 MO_Eq rep -> condIntReg EQQ rep x y 563 MO_Ne rep -> condIntReg NE rep x y 564 565 MO_S_Gt rep -> condIntReg GTT rep x y 566 MO_S_Ge rep -> condIntReg GE rep x y 567 MO_S_Lt rep -> condIntReg LTT rep x y 568 MO_S_Le rep -> condIntReg LE rep x y 569 570 MO_U_Gt rep -> condIntReg GU rep x y 571 MO_U_Ge rep -> condIntReg GEU rep x y 572 MO_U_Lt rep -> condIntReg LU rep x y 573 MO_U_Le rep -> condIntReg LEU rep x y 574 575 MO_F_Add w -> triv_float w FADD 576 MO_F_Sub w -> triv_float w FSUB 577 MO_F_Mul w -> triv_float w FMUL 578 MO_F_Quot w -> triv_float w FDIV 579 580 -- optimize addition with 32-bit immediate 581 -- (needed for PIC) 582 MO_Add W32 -> 583 case y of 584 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm 585 -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep) 586 CmmLit lit 587 -> do 588 (src, srcCode) <- getSomeReg x 589 let imm = litToImm lit 590 code dst = srcCode `appOL` toOL [ 591 ADDIS dst src (HA imm), 592 ADD dst dst (RIImm (LO imm)) 593 ] 594 return (Any II32 code) 595 _ -> trivialCode W32 True ADD x y 596 597 MO_Add rep -> trivialCode rep True ADD x y 598 MO_Sub rep -> 599 case y of 600 CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm) 601 -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep) 602 _ -> case x of 603 CmmLit (CmmInt imm _) 604 | Just _ <- makeImmediate rep True imm 605 -- subfi ('substract from' with immediate) doesn't exist 606 -> trivialCode rep True SUBFC y x 607 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x 608 609 MO_Mul rep -> shiftMulCode rep True MULL x y 610 MO_S_MulMayOflo rep -> do 611 (src1, code1) <- getSomeReg x 612 (src2, code2) <- getSomeReg y 613 let 614 format = intFormat rep 615 code dst = code1 `appOL` code2 616 `appOL` toOL [ MULLO format dst src1 src2 617 , MFOV format dst 618 ] 619 return (Any format code) 620 621 MO_S_Quot rep -> divCode rep True x y 622 MO_U_Quot rep -> divCode rep False x y 623 624 MO_S_Rem rep -> remainder rep True x y 625 MO_U_Rem rep -> remainder rep False x y 626 627 MO_And rep -> case y of 628 (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4 629 -> do 630 (src, srcCode) <- getSomeReg x 631 let clear_mask = if imm == -4 then 2 else 3 632 fmt = intFormat rep 633 code dst = srcCode 634 `appOL` unitOL (CLRRI fmt dst src clear_mask) 635 return (Any fmt code) 636 _ -> trivialCode rep False AND x y 637 MO_Or rep -> trivialCode rep False OR x y 638 MO_Xor rep -> trivialCode rep False XOR x y 639 640 MO_Shl rep -> shiftMulCode rep False SL x y 641 MO_S_Shr rep -> srCode rep True SRA x y 642 MO_U_Shr rep -> srCode rep False SR x y 643 _ -> panic "PPC.CodeGen.getRegister: no match" 644 645 where 646 triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register 647 triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y 648 649 remainder :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register 650 remainder rep sgn x y = do 651 let fmt = intFormat rep 652 tmp <- getNewRegNat fmt 653 code <- remainderCode rep sgn tmp x y 654 return (Any fmt code) 655 656 657getRegister' _ (CmmLit (CmmInt i rep)) 658 | Just imm <- makeImmediate rep True i 659 = let 660 code dst = unitOL (LI dst imm) 661 in 662 return (Any (intFormat rep) code) 663 664getRegister' _ (CmmLit (CmmFloat f frep)) = do 665 lbl <- getNewLabelNat 666 dflags <- getDynFlags 667 dynRef <- cmmMakeDynamicReference dflags DataReference lbl 668 Amode addr addr_code <- getAmode D dynRef 669 let format = floatFormat frep 670 code dst = 671 LDATA (Section ReadOnlyData lbl) 672 (Statics lbl [CmmStaticLit (CmmFloat f frep)]) 673 `consOL` (addr_code `snocOL` LD format dst addr) 674 return (Any format code) 675 676getRegister' dflags (CmmLit lit) 677 | target32Bit (targetPlatform dflags) 678 = let rep = cmmLitType dflags lit 679 imm = litToImm lit 680 code dst = toOL [ 681 LIS dst (HA imm), 682 ADD dst dst (RIImm (LO imm)) 683 ] 684 in return (Any (cmmTypeFormat rep) code) 685 | otherwise 686 = do lbl <- getNewLabelNat 687 dflags <- getDynFlags 688 dynRef <- cmmMakeDynamicReference dflags DataReference lbl 689 Amode addr addr_code <- getAmode D dynRef 690 let rep = cmmLitType dflags lit 691 format = cmmTypeFormat rep 692 code dst = 693 LDATA (Section ReadOnlyData lbl) (Statics lbl [CmmStaticLit lit]) 694 `consOL` (addr_code `snocOL` LD format dst addr) 695 return (Any format code) 696 697getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other) 698 699 -- extend?Rep: wrap integer expression of type `from` 700 -- in a conversion to `to` 701extendSExpr :: Width -> Width -> CmmExpr -> CmmExpr 702extendSExpr from to x = CmmMachOp (MO_SS_Conv from to) [x] 703 704extendUExpr :: Width -> Width -> CmmExpr -> CmmExpr 705extendUExpr from to x = CmmMachOp (MO_UU_Conv from to) [x] 706 707-- ----------------------------------------------------------------------------- 708-- The 'Amode' type: Memory addressing modes passed up the tree. 709 710data Amode 711 = Amode AddrMode InstrBlock 712 713{- 714Now, given a tree (the argument to a CmmLoad) that references memory, 715produce a suitable addressing mode. 716 717A Rule of the Game (tm) for Amodes: use of the addr bit must 718immediately follow use of the code part, since the code part puts 719values in registers which the addr then refers to. So you can't put 720anything in between, lest it overwrite some of those registers. If 721you need to do some other computation between the code part and use of 722the addr bit, first store the effective address from the amode in a 723temporary, then do the other computation, and then use the temporary: 724 725 code 726 LEA amode, tmp 727 ... other computation ... 728 ... (tmp) ... 729-} 730 731{- Note [Power instruction format] 732In some instructions the 16 bit offset must be a multiple of 4, i.e. 733the two least significant bits must be zero. The "Power ISA" specification 734calls these instruction formats "DS-FORM" and the instructions with 735arbitrary 16 bit offsets are "D-FORM". 736 737The Power ISA specification document can be obtained from www.power.org. 738-} 739data InstrForm = D | DS 740 741getAmode :: InstrForm -> CmmExpr -> NatM Amode 742getAmode inf tree@(CmmRegOff _ _) 743 = do dflags <- getDynFlags 744 getAmode inf (mangleIndexTree dflags tree) 745 746getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)]) 747 | Just off <- makeImmediate W32 True (-i) 748 = do 749 (reg, code) <- getSomeReg x 750 return (Amode (AddrRegImm reg off) code) 751 752 753getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)]) 754 | Just off <- makeImmediate W32 True i 755 = do 756 (reg, code) <- getSomeReg x 757 return (Amode (AddrRegImm reg off) code) 758 759getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)]) 760 | Just off <- makeImmediate W64 True (-i) 761 = do 762 (reg, code) <- getSomeReg x 763 return (Amode (AddrRegImm reg off) code) 764 765 766getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)]) 767 | Just off <- makeImmediate W64 True i 768 = do 769 (reg, code) <- getSomeReg x 770 return (Amode (AddrRegImm reg off) code) 771 772getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)]) 773 | Just off <- makeImmediate W64 True (-i) 774 = do 775 (reg, code) <- getSomeReg x 776 (reg', off', code') <- 777 if i `mod` 4 == 0 778 then do return (reg, off, code) 779 else do 780 tmp <- getNewRegNat II64 781 return (tmp, ImmInt 0, 782 code `snocOL` ADD tmp reg (RIImm off)) 783 return (Amode (AddrRegImm reg' off') code') 784 785getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)]) 786 | Just off <- makeImmediate W64 True i 787 = do 788 (reg, code) <- getSomeReg x 789 (reg', off', code') <- 790 if i `mod` 4 == 0 791 then do return (reg, off, code) 792 else do 793 tmp <- getNewRegNat II64 794 return (tmp, ImmInt 0, 795 code `snocOL` ADD tmp reg (RIImm off)) 796 return (Amode (AddrRegImm reg' off') code') 797 798 -- optimize addition with 32-bit immediate 799 -- (needed for PIC) 800getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit]) 801 = do 802 dflags <- getDynFlags 803 (src, srcCode) <- getSomeReg x 804 let imm = litToImm lit 805 case () of 806 _ | OSAIX <- platformOS (targetPlatform dflags) 807 , isCmmLabelType lit -> 808 -- HA16/LO16 relocations on labels not supported on AIX 809 return (Amode (AddrRegImm src imm) srcCode) 810 | otherwise -> do 811 tmp <- getNewRegNat II32 812 let code = srcCode `snocOL` ADDIS tmp src (HA imm) 813 return (Amode (AddrRegImm tmp (LO imm)) code) 814 where 815 isCmmLabelType (CmmLabel {}) = True 816 isCmmLabelType (CmmLabelOff {}) = True 817 isCmmLabelType (CmmLabelDiffOff {}) = True 818 isCmmLabelType _ = False 819 820getAmode _ (CmmLit lit) 821 = do 822 dflags <- getDynFlags 823 case platformArch $ targetPlatform dflags of 824 ArchPPC -> do 825 tmp <- getNewRegNat II32 826 let imm = litToImm lit 827 code = unitOL (LIS tmp (HA imm)) 828 return (Amode (AddrRegImm tmp (LO imm)) code) 829 _ -> do -- TODO: Load from TOC, 830 -- see getRegister' _ (CmmLit lit) 831 tmp <- getNewRegNat II64 832 let imm = litToImm lit 833 code = toOL [ 834 LIS tmp (HIGHESTA imm), 835 OR tmp tmp (RIImm (HIGHERA imm)), 836 SL II64 tmp tmp (RIImm (ImmInt 32)), 837 ORIS tmp tmp (HA imm) 838 ] 839 return (Amode (AddrRegImm tmp (LO imm)) code) 840 841getAmode _ (CmmMachOp (MO_Add W32) [x, y]) 842 = do 843 (regX, codeX) <- getSomeReg x 844 (regY, codeY) <- getSomeReg y 845 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) 846 847getAmode _ (CmmMachOp (MO_Add W64) [x, y]) 848 = do 849 (regX, codeX) <- getSomeReg x 850 (regY, codeY) <- getSomeReg y 851 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) 852 853getAmode _ other 854 = do 855 (reg, code) <- getSomeReg other 856 let 857 off = ImmInt 0 858 return (Amode (AddrRegImm reg off) code) 859 860 861-- The 'CondCode' type: Condition codes passed up the tree. 862data CondCode 863 = CondCode Bool Cond InstrBlock 864 865-- Set up a condition code for a conditional branch. 866 867getCondCode :: CmmExpr -> NatM CondCode 868 869-- almost the same as everywhere else - but we need to 870-- extend small integers to 32 bit or 64 bit first 871 872getCondCode (CmmMachOp mop [x, y]) 873 = do 874 case mop of 875 MO_F_Eq W32 -> condFltCode EQQ x y 876 MO_F_Ne W32 -> condFltCode NE x y 877 MO_F_Gt W32 -> condFltCode GTT x y 878 MO_F_Ge W32 -> condFltCode GE x y 879 MO_F_Lt W32 -> condFltCode LTT x y 880 MO_F_Le W32 -> condFltCode LE x y 881 882 MO_F_Eq W64 -> condFltCode EQQ x y 883 MO_F_Ne W64 -> condFltCode NE x y 884 MO_F_Gt W64 -> condFltCode GTT x y 885 MO_F_Ge W64 -> condFltCode GE x y 886 MO_F_Lt W64 -> condFltCode LTT x y 887 MO_F_Le W64 -> condFltCode LE x y 888 889 MO_Eq rep -> condIntCode EQQ rep x y 890 MO_Ne rep -> condIntCode NE rep x y 891 892 MO_S_Gt rep -> condIntCode GTT rep x y 893 MO_S_Ge rep -> condIntCode GE rep x y 894 MO_S_Lt rep -> condIntCode LTT rep x y 895 MO_S_Le rep -> condIntCode LE rep x y 896 897 MO_U_Gt rep -> condIntCode GU rep x y 898 MO_U_Ge rep -> condIntCode GEU rep x y 899 MO_U_Lt rep -> condIntCode LU rep x y 900 MO_U_Le rep -> condIntCode LEU rep x y 901 902 _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop) 903 904getCondCode _ = panic "getCondCode(2)(powerpc)" 905 906 907-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be 908-- passed back up the tree. 909 910condIntCode :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode 911condIntCode cond width x y = do 912 dflags <- getDynFlags 913 condIntCode' (target32Bit (targetPlatform dflags)) cond width x y 914 915condIntCode' :: Bool -> Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode 916 917-- simple code for 64-bit on 32-bit platforms 918condIntCode' True cond W64 x y 919 | condUnsigned cond 920 = do 921 ChildCode64 code_x x_lo <- iselExpr64 x 922 ChildCode64 code_y y_lo <- iselExpr64 y 923 let x_hi = getHiVRegFromLo x_lo 924 y_hi = getHiVRegFromLo y_lo 925 end_lbl <- getBlockIdNat 926 let code = code_x `appOL` code_y `appOL` toOL 927 [ CMPL II32 x_hi (RIReg y_hi) 928 , BCC NE end_lbl Nothing 929 , CMPL II32 x_lo (RIReg y_lo) 930 , BCC ALWAYS end_lbl Nothing 931 932 , NEWBLOCK end_lbl 933 ] 934 return (CondCode False cond code) 935 | otherwise 936 = do 937 ChildCode64 code_x x_lo <- iselExpr64 x 938 ChildCode64 code_y y_lo <- iselExpr64 y 939 let x_hi = getHiVRegFromLo x_lo 940 y_hi = getHiVRegFromLo y_lo 941 end_lbl <- getBlockIdNat 942 cmp_lo <- getBlockIdNat 943 let code = code_x `appOL` code_y `appOL` toOL 944 [ CMP II32 x_hi (RIReg y_hi) 945 , BCC NE end_lbl Nothing 946 , CMP II32 x_hi (RIImm (ImmInt 0)) 947 , BCC LE cmp_lo Nothing 948 , CMPL II32 x_lo (RIReg y_lo) 949 , BCC ALWAYS end_lbl Nothing 950 , NEWBLOCK cmp_lo 951 , CMPL II32 y_lo (RIReg x_lo) 952 , BCC ALWAYS end_lbl Nothing 953 954 , NEWBLOCK end_lbl 955 ] 956 return (CondCode False cond code) 957 958-- optimize pointer tag checks. Operation andi. sets condition register 959-- so cmpi ..., 0 is redundant. 960condIntCode' _ cond _ (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)]) 961 (CmmLit (CmmInt 0 _)) 962 | not $ condUnsigned cond, 963 Just src2 <- makeImmediate rep False imm 964 = do 965 (src1, code) <- getSomeReg x 966 let code' = code `snocOL` AND r0 src1 (RIImm src2) 967 return (CondCode False cond code') 968 969condIntCode' _ cond width x (CmmLit (CmmInt y rep)) 970 | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y 971 = do 972 let op_len = max W32 width 973 let extend = extendSExpr width op_len 974 (src1, code) <- getSomeReg (extend x) 975 let format = intFormat op_len 976 code' = code `snocOL` 977 (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2) 978 return (CondCode False cond code') 979 980condIntCode' _ cond width x y = do 981 let op_len = max W32 width 982 let extend = if condUnsigned cond then extendUExpr width op_len 983 else extendSExpr width op_len 984 (src1, code1) <- getSomeReg (extend x) 985 (src2, code2) <- getSomeReg (extend y) 986 let format = intFormat op_len 987 code' = code1 `appOL` code2 `snocOL` 988 (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2) 989 return (CondCode False cond code') 990 991condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode 992condFltCode cond x y = do 993 (src1, code1) <- getSomeReg x 994 (src2, code2) <- getSomeReg y 995 let 996 code' = code1 `appOL` code2 `snocOL` FCMP src1 src2 997 code'' = case cond of -- twiddle CR to handle unordered case 998 GE -> code' `snocOL` CRNOR ltbit eqbit gtbit 999 LE -> code' `snocOL` CRNOR gtbit eqbit ltbit 1000 _ -> code' 1001 where 1002 ltbit = 0 ; eqbit = 2 ; gtbit = 1 1003 return (CondCode True cond code'') 1004 1005 1006 1007-- ----------------------------------------------------------------------------- 1008-- Generating assignments 1009 1010-- Assignments are really at the heart of the whole code generation 1011-- business. Almost all top-level nodes of any real importance are 1012-- assignments, which correspond to loads, stores, or register 1013-- transfers. If we're really lucky, some of the register transfers 1014-- will go away, because we can use the destination register to 1015-- complete the code generation for the right hand side. This only 1016-- fails when the right hand side is forced into a fixed register 1017-- (e.g. the result of a call). 1018 1019assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock 1020assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock 1021 1022assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock 1023assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock 1024 1025assignMem_IntCode pk addr src = do 1026 (srcReg, code) <- getSomeReg src 1027 Amode dstAddr addr_code <- case pk of 1028 II64 -> getAmode DS addr 1029 _ -> getAmode D addr 1030 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr 1031 1032-- dst is a reg, but src could be anything 1033assignReg_IntCode _ reg src 1034 = do 1035 dflags <- getDynFlags 1036 let dst = getRegisterReg (targetPlatform dflags) reg 1037 r <- getRegister src 1038 return $ case r of 1039 Any _ code -> code dst 1040 Fixed _ freg fcode -> fcode `snocOL` MR dst freg 1041 1042 1043 1044-- Easy, isn't it? 1045assignMem_FltCode = assignMem_IntCode 1046assignReg_FltCode = assignReg_IntCode 1047 1048 1049 1050genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock 1051 1052genJump (CmmLit (CmmLabel lbl)) regs 1053 = return (unitOL $ JMP lbl regs) 1054 1055genJump tree gregs 1056 = do 1057 dflags <- getDynFlags 1058 genJump' tree (platformToGCP (targetPlatform dflags)) gregs 1059 1060genJump' :: CmmExpr -> GenCCallPlatform -> [Reg] -> NatM InstrBlock 1061 1062genJump' tree (GCP64ELF 1) regs 1063 = do 1064 (target,code) <- getSomeReg tree 1065 return (code 1066 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0)) 1067 `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8)) 1068 `snocOL` MTCTR r11 1069 `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16)) 1070 `snocOL` BCTR [] Nothing regs) 1071 1072genJump' tree (GCP64ELF 2) regs 1073 = do 1074 (target,code) <- getSomeReg tree 1075 return (code 1076 `snocOL` MR r12 target 1077 `snocOL` MTCTR r12 1078 `snocOL` BCTR [] Nothing regs) 1079 1080genJump' tree _ regs 1081 = do 1082 (target,code) <- getSomeReg tree 1083 return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing regs) 1084 1085-- ----------------------------------------------------------------------------- 1086-- Unconditional branches 1087genBranch :: BlockId -> NatM InstrBlock 1088genBranch = return . toOL . mkJumpInstr 1089 1090 1091-- ----------------------------------------------------------------------------- 1092-- Conditional jumps 1093 1094{- 1095Conditional jumps are always to local labels, so we can use branch 1096instructions. We peek at the arguments to decide what kind of 1097comparison to do. 1098-} 1099 1100 1101genCondJump 1102 :: BlockId -- the branch target 1103 -> CmmExpr -- the condition on which to branch 1104 -> Maybe Bool 1105 -> NatM InstrBlock 1106 1107genCondJump id bool prediction = do 1108 CondCode _ cond code <- getCondCode bool 1109 return (code `snocOL` BCC cond id prediction) 1110 1111 1112 1113-- ----------------------------------------------------------------------------- 1114-- Generating C calls 1115 1116-- Now the biggest nightmare---calls. Most of the nastiness is buried in 1117-- @get_arg@, which moves the arguments to the correct registers/stack 1118-- locations. Apart from that, the code is easy. 1119 1120genCCall :: ForeignTarget -- function to call 1121 -> [CmmFormal] -- where to put the result 1122 -> [CmmActual] -- arguments (of mixed type) 1123 -> NatM InstrBlock 1124genCCall (PrimTarget MO_ReadBarrier) _ _ 1125 = return $ unitOL LWSYNC 1126genCCall (PrimTarget MO_WriteBarrier) _ _ 1127 = return $ unitOL LWSYNC 1128 1129genCCall (PrimTarget MO_Touch) _ _ 1130 = return $ nilOL 1131 1132genCCall (PrimTarget (MO_Prefetch_Data _)) _ _ 1133 = return $ nilOL 1134 1135genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] 1136 = do dflags <- getDynFlags 1137 let platform = targetPlatform dflags 1138 fmt = intFormat width 1139 reg_dst = getRegisterReg platform (CmmLocal dst) 1140 (instr, n_code) <- case amop of 1141 AMO_Add -> getSomeRegOrImm ADD True reg_dst 1142 AMO_Sub -> case n of 1143 CmmLit (CmmInt i _) 1144 | Just imm <- makeImmediate width True (-i) 1145 -> return (ADD reg_dst reg_dst (RIImm imm), nilOL) 1146 _ 1147 -> do 1148 (n_reg, n_code) <- getSomeReg n 1149 return (SUBF reg_dst n_reg reg_dst, n_code) 1150 AMO_And -> getSomeRegOrImm AND False reg_dst 1151 AMO_Nand -> do (n_reg, n_code) <- getSomeReg n 1152 return (NAND reg_dst reg_dst n_reg, n_code) 1153 AMO_Or -> getSomeRegOrImm OR False reg_dst 1154 AMO_Xor -> getSomeRegOrImm XOR False reg_dst 1155 Amode addr_reg addr_code <- getAmodeIndex addr 1156 lbl_retry <- getBlockIdNat 1157 return $ n_code `appOL` addr_code 1158 `appOL` toOL [ HWSYNC 1159 , BCC ALWAYS lbl_retry Nothing 1160 1161 , NEWBLOCK lbl_retry 1162 , LDR fmt reg_dst addr_reg 1163 , instr 1164 , STC fmt reg_dst addr_reg 1165 , BCC NE lbl_retry (Just False) 1166 , ISYNC 1167 ] 1168 where 1169 getAmodeIndex (CmmMachOp (MO_Add _) [x, y]) 1170 = do 1171 (regX, codeX) <- getSomeReg x 1172 (regY, codeY) <- getSomeReg y 1173 return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) 1174 getAmodeIndex other 1175 = do 1176 (reg, code) <- getSomeReg other 1177 return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here! 1178 getSomeRegOrImm op sign dst 1179 = case n of 1180 CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i 1181 -> return (op dst dst (RIImm imm), nilOL) 1182 _ 1183 -> do 1184 (n_reg, n_code) <- getSomeReg n 1185 return (op dst dst (RIReg n_reg), n_code) 1186 1187genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr] 1188 = do dflags <- getDynFlags 1189 let platform = targetPlatform dflags 1190 fmt = intFormat width 1191 reg_dst = getRegisterReg platform (CmmLocal dst) 1192 form = if widthInBits width == 64 then DS else D 1193 Amode addr_reg addr_code <- getAmode form addr 1194 lbl_end <- getBlockIdNat 1195 return $ addr_code `appOL` toOL [ HWSYNC 1196 , LD fmt reg_dst addr_reg 1197 , CMP fmt reg_dst (RIReg reg_dst) 1198 , BCC NE lbl_end (Just False) 1199 , BCC ALWAYS lbl_end Nothing 1200 -- See Note [Seemingly useless cmp and bne] 1201 , NEWBLOCK lbl_end 1202 , ISYNC 1203 ] 1204 1205-- Note [Seemingly useless cmp and bne] 1206-- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction 1207-- the second paragraph says that isync may complete before storage accesses 1208-- "associated" with a preceding instruction have been performed. The cmp 1209-- operation and the following bne introduce a data and control dependency 1210-- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe 1211-- Fetch). 1212-- This is also what gcc does. 1213 1214 1215genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do 1216 code <- assignMem_IntCode (intFormat width) addr val 1217 return $ unitOL(HWSYNC) `appOL` code 1218 1219genCCall (PrimTarget (MO_Clz width)) [dst] [src] 1220 = do dflags <- getDynFlags 1221 let platform = targetPlatform dflags 1222 reg_dst = getRegisterReg platform (CmmLocal dst) 1223 if target32Bit platform && width == W64 1224 then do 1225 ChildCode64 code vr_lo <- iselExpr64 src 1226 lbl1 <- getBlockIdNat 1227 lbl2 <- getBlockIdNat 1228 lbl3 <- getBlockIdNat 1229 let vr_hi = getHiVRegFromLo vr_lo 1230 cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0)) 1231 , BCC NE lbl2 Nothing 1232 , BCC ALWAYS lbl1 Nothing 1233 1234 , NEWBLOCK lbl1 1235 , CNTLZ II32 reg_dst vr_lo 1236 , ADD reg_dst reg_dst (RIImm (ImmInt 32)) 1237 , BCC ALWAYS lbl3 Nothing 1238 1239 , NEWBLOCK lbl2 1240 , CNTLZ II32 reg_dst vr_hi 1241 , BCC ALWAYS lbl3 Nothing 1242 1243 , NEWBLOCK lbl3 1244 ] 1245 return $ code `appOL` cntlz 1246 else do 1247 let format = if width == W64 then II64 else II32 1248 (s_reg, s_code) <- getSomeReg src 1249 (pre, reg , post) <- 1250 case width of 1251 W64 -> return (nilOL, s_reg, nilOL) 1252 W32 -> return (nilOL, s_reg, nilOL) 1253 W16 -> do 1254 reg_tmp <- getNewRegNat format 1255 return 1256 ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535)) 1257 , reg_tmp 1258 , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16))) 1259 ) 1260 W8 -> do 1261 reg_tmp <- getNewRegNat format 1262 return 1263 ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255)) 1264 , reg_tmp 1265 , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24))) 1266 ) 1267 _ -> panic "genCall: Clz wrong format" 1268 let cntlz = unitOL (CNTLZ format reg_dst reg) 1269 return $ s_code `appOL` pre `appOL` cntlz `appOL` post 1270 1271genCCall (PrimTarget (MO_Ctz width)) [dst] [src] 1272 = do dflags <- getDynFlags 1273 let platform = targetPlatform dflags 1274 reg_dst = getRegisterReg platform (CmmLocal dst) 1275 if target32Bit platform && width == W64 1276 then do 1277 let format = II32 1278 ChildCode64 code vr_lo <- iselExpr64 src 1279 lbl1 <- getBlockIdNat 1280 lbl2 <- getBlockIdNat 1281 lbl3 <- getBlockIdNat 1282 x' <- getNewRegNat format 1283 x'' <- getNewRegNat format 1284 r' <- getNewRegNat format 1285 cnttzlo <- cnttz format reg_dst vr_lo 1286 let vr_hi = getHiVRegFromLo vr_lo 1287 cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0)) 1288 , BCC NE lbl2 Nothing 1289 , BCC ALWAYS lbl1 Nothing 1290 1291 , NEWBLOCK lbl1 1292 , ADD x' vr_hi (RIImm (ImmInt (-1))) 1293 , ANDC x'' x' vr_hi 1294 , CNTLZ format r' x'' 1295 -- 32 + (32 - clz(x'')) 1296 , SUBFC reg_dst r' (RIImm (ImmInt 64)) 1297 , BCC ALWAYS lbl3 Nothing 1298 1299 , NEWBLOCK lbl2 1300 ] 1301 `appOL` cnttzlo `appOL` 1302 toOL [ BCC ALWAYS lbl3 Nothing 1303 1304 , NEWBLOCK lbl3 1305 ] 1306 return $ code `appOL` cnttz64 1307 else do 1308 let format = if width == W64 then II64 else II32 1309 (s_reg, s_code) <- getSomeReg src 1310 (reg_ctz, pre_code) <- 1311 case width of 1312 W64 -> return (s_reg, nilOL) 1313 W32 -> return (s_reg, nilOL) 1314 W16 -> do 1315 reg_tmp <- getNewRegNat format 1316 return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1)) 1317 W8 -> do 1318 reg_tmp <- getNewRegNat format 1319 return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256))) 1320 _ -> panic "genCall: Ctz wrong format" 1321 ctz_code <- cnttz format reg_dst reg_ctz 1322 return $ s_code `appOL` pre_code `appOL` ctz_code 1323 where 1324 -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1)) 1325 -- see Henry S. Warren, Hacker's Delight, p 107 1326 cnttz format dst src = do 1327 let format_bits = 8 * formatInBytes format 1328 x' <- getNewRegNat format 1329 x'' <- getNewRegNat format 1330 r' <- getNewRegNat format 1331 return $ toOL [ ADD x' src (RIImm (ImmInt (-1))) 1332 , ANDC x'' x' src 1333 , CNTLZ format r' x'' 1334 , SUBFC dst r' (RIImm (ImmInt (format_bits))) 1335 ] 1336 1337genCCall target dest_regs argsAndHints 1338 = do dflags <- getDynFlags 1339 let platform = targetPlatform dflags 1340 case target of 1341 PrimTarget (MO_S_QuotRem width) -> divOp1 platform True width 1342 dest_regs argsAndHints 1343 PrimTarget (MO_U_QuotRem width) -> divOp1 platform False width 1344 dest_regs argsAndHints 1345 PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs 1346 argsAndHints 1347 PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs 1348 argsAndHints 1349 PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints 1350 PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints 1351 PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints 1352 PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width 1353 dest_regs argsAndHints 1354 PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width 1355 dest_regs argsAndHints 1356 PrimTarget MO_F64_Fabs -> fabs platform dest_regs argsAndHints 1357 PrimTarget MO_F32_Fabs -> fabs platform dest_regs argsAndHints 1358 _ -> genCCall' dflags (platformToGCP platform) 1359 target dest_regs argsAndHints 1360 where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y] 1361 = do let reg_q = getRegisterReg platform (CmmLocal res_q) 1362 reg_r = getRegisterReg platform (CmmLocal res_r) 1363 remainderCode width signed reg_q arg_x arg_y 1364 <*> pure reg_r 1365 1366 divOp1 _ _ _ _ _ 1367 = panic "genCCall: Wrong number of arguments for divOp1" 1368 divOp2 platform width [res_q, res_r] 1369 [arg_x_high, arg_x_low, arg_y] 1370 = do let reg_q = getRegisterReg platform (CmmLocal res_q) 1371 reg_r = getRegisterReg platform (CmmLocal res_r) 1372 fmt = intFormat width 1373 half = 4 * (formatInBytes fmt) 1374 (xh_reg, xh_code) <- getSomeReg arg_x_high 1375 (xl_reg, xl_code) <- getSomeReg arg_x_low 1376 (y_reg, y_code) <- getSomeReg arg_y 1377 s <- getNewRegNat fmt 1378 b <- getNewRegNat fmt 1379 v <- getNewRegNat fmt 1380 vn1 <- getNewRegNat fmt 1381 vn0 <- getNewRegNat fmt 1382 un32 <- getNewRegNat fmt 1383 tmp <- getNewRegNat fmt 1384 un10 <- getNewRegNat fmt 1385 un1 <- getNewRegNat fmt 1386 un0 <- getNewRegNat fmt 1387 q1 <- getNewRegNat fmt 1388 rhat <- getNewRegNat fmt 1389 tmp1 <- getNewRegNat fmt 1390 q0 <- getNewRegNat fmt 1391 un21 <- getNewRegNat fmt 1392 again1 <- getBlockIdNat 1393 no1 <- getBlockIdNat 1394 then1 <- getBlockIdNat 1395 endif1 <- getBlockIdNat 1396 again2 <- getBlockIdNat 1397 no2 <- getBlockIdNat 1398 then2 <- getBlockIdNat 1399 endif2 <- getBlockIdNat 1400 return $ y_code `appOL` xl_code `appOL` xh_code `appOL` 1401 -- see Hacker's Delight p 196 Figure 9-3 1402 toOL [ -- b = 2 ^ (bits_in_word / 2) 1403 LI b (ImmInt 1) 1404 , SL fmt b b (RIImm (ImmInt half)) 1405 -- s = clz(y) 1406 , CNTLZ fmt s y_reg 1407 -- v = y << s 1408 , SL fmt v y_reg (RIReg s) 1409 -- vn1 = upper half of v 1410 , SR fmt vn1 v (RIImm (ImmInt half)) 1411 -- vn0 = lower half of v 1412 , CLRLI fmt vn0 v half 1413 -- un32 = (u1 << s) 1414 -- | (u0 >> (bits_in_word - s)) 1415 , SL fmt un32 xh_reg (RIReg s) 1416 , SUBFC tmp s 1417 (RIImm (ImmInt (8 * formatInBytes fmt))) 1418 , SR fmt tmp xl_reg (RIReg tmp) 1419 , OR un32 un32 (RIReg tmp) 1420 -- un10 = u0 << s 1421 , SL fmt un10 xl_reg (RIReg s) 1422 -- un1 = upper half of un10 1423 , SR fmt un1 un10 (RIImm (ImmInt half)) 1424 -- un0 = lower half of un10 1425 , CLRLI fmt un0 un10 half 1426 -- q1 = un32/vn1 1427 , DIV fmt False q1 un32 vn1 1428 -- rhat = un32 - q1*vn1 1429 , MULL fmt tmp q1 (RIReg vn1) 1430 , SUBF rhat tmp un32 1431 , BCC ALWAYS again1 Nothing 1432 1433 , NEWBLOCK again1 1434 -- if (q1 >= b || q1*vn0 > b*rhat + un1) 1435 , CMPL fmt q1 (RIReg b) 1436 , BCC GEU then1 Nothing 1437 , BCC ALWAYS no1 Nothing 1438 1439 , NEWBLOCK no1 1440 , MULL fmt tmp q1 (RIReg vn0) 1441 , SL fmt tmp1 rhat (RIImm (ImmInt half)) 1442 , ADD tmp1 tmp1 (RIReg un1) 1443 , CMPL fmt tmp (RIReg tmp1) 1444 , BCC LEU endif1 Nothing 1445 , BCC ALWAYS then1 Nothing 1446 1447 , NEWBLOCK then1 1448 -- q1 = q1 - 1 1449 , ADD q1 q1 (RIImm (ImmInt (-1))) 1450 -- rhat = rhat + vn1 1451 , ADD rhat rhat (RIReg vn1) 1452 -- if (rhat < b) goto again1 1453 , CMPL fmt rhat (RIReg b) 1454 , BCC LTT again1 Nothing 1455 , BCC ALWAYS endif1 Nothing 1456 1457 , NEWBLOCK endif1 1458 -- un21 = un32*b + un1 - q1*v 1459 , SL fmt un21 un32 (RIImm (ImmInt half)) 1460 , ADD un21 un21 (RIReg un1) 1461 , MULL fmt tmp q1 (RIReg v) 1462 , SUBF un21 tmp un21 1463 -- compute second quotient digit 1464 -- q0 = un21/vn1 1465 , DIV fmt False q0 un21 vn1 1466 -- rhat = un21- q0*vn1 1467 , MULL fmt tmp q0 (RIReg vn1) 1468 , SUBF rhat tmp un21 1469 , BCC ALWAYS again2 Nothing 1470 1471 , NEWBLOCK again2 1472 -- if (q0>b || q0*vn0 > b*rhat + un0) 1473 , CMPL fmt q0 (RIReg b) 1474 , BCC GEU then2 Nothing 1475 , BCC ALWAYS no2 Nothing 1476 1477 , NEWBLOCK no2 1478 , MULL fmt tmp q0 (RIReg vn0) 1479 , SL fmt tmp1 rhat (RIImm (ImmInt half)) 1480 , ADD tmp1 tmp1 (RIReg un0) 1481 , CMPL fmt tmp (RIReg tmp1) 1482 , BCC LEU endif2 Nothing 1483 , BCC ALWAYS then2 Nothing 1484 1485 , NEWBLOCK then2 1486 -- q0 = q0 - 1 1487 , ADD q0 q0 (RIImm (ImmInt (-1))) 1488 -- rhat = rhat + vn1 1489 , ADD rhat rhat (RIReg vn1) 1490 -- if (rhat<b) goto again2 1491 , CMPL fmt rhat (RIReg b) 1492 , BCC LTT again2 Nothing 1493 , BCC ALWAYS endif2 Nothing 1494 1495 , NEWBLOCK endif2 1496 -- compute remainder 1497 -- r = (un21*b + un0 - q0*v) >> s 1498 , SL fmt reg_r un21 (RIImm (ImmInt half)) 1499 , ADD reg_r reg_r (RIReg un0) 1500 , MULL fmt tmp q0 (RIReg v) 1501 , SUBF reg_r tmp reg_r 1502 , SR fmt reg_r reg_r (RIReg s) 1503 -- compute quotient 1504 -- q = q1*b + q0 1505 , SL fmt reg_q q1 (RIImm (ImmInt half)) 1506 , ADD reg_q reg_q (RIReg q0) 1507 ] 1508 divOp2 _ _ _ _ 1509 = panic "genCCall: Wrong number of arguments for divOp2" 1510 multOp2 platform width [res_h, res_l] [arg_x, arg_y] 1511 = do let reg_h = getRegisterReg platform (CmmLocal res_h) 1512 reg_l = getRegisterReg platform (CmmLocal res_l) 1513 fmt = intFormat width 1514 (x_reg, x_code) <- getSomeReg arg_x 1515 (y_reg, y_code) <- getSomeReg arg_y 1516 return $ y_code `appOL` x_code 1517 `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg) 1518 , MULHU fmt reg_h x_reg y_reg 1519 ] 1520 multOp2 _ _ _ _ 1521 = panic "genCall: Wrong number of arguments for multOp2" 1522 add2Op platform [res_h, res_l] [arg_x, arg_y] 1523 = do let reg_h = getRegisterReg platform (CmmLocal res_h) 1524 reg_l = getRegisterReg platform (CmmLocal res_l) 1525 (x_reg, x_code) <- getSomeReg arg_x 1526 (y_reg, y_code) <- getSomeReg arg_y 1527 return $ y_code `appOL` x_code 1528 `appOL` toOL [ LI reg_h (ImmInt 0) 1529 , ADDC reg_l x_reg y_reg 1530 , ADDZE reg_h reg_h 1531 ] 1532 add2Op _ _ _ 1533 = panic "genCCall: Wrong number of arguments/results for add2" 1534 1535 addcOp platform [res_r, res_c] [arg_x, arg_y] 1536 = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y] 1537 addcOp _ _ _ 1538 = panic "genCCall: Wrong number of arguments/results for addc" 1539 1540 -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1, 1541 -- which is 0 for borrow and 1 otherwise. We need 1 and 0 1542 -- so xor with 1. 1543 subcOp platform [res_r, res_c] [arg_x, arg_y] 1544 = do let reg_r = getRegisterReg platform (CmmLocal res_r) 1545 reg_c = getRegisterReg platform (CmmLocal res_c) 1546 (x_reg, x_code) <- getSomeReg arg_x 1547 (y_reg, y_code) <- getSomeReg arg_y 1548 return $ y_code `appOL` x_code 1549 `appOL` toOL [ LI reg_c (ImmInt 0) 1550 , SUBFC reg_r y_reg (RIReg x_reg) 1551 , ADDZE reg_c reg_c 1552 , XOR reg_c reg_c (RIImm (ImmInt 1)) 1553 ] 1554 subcOp _ _ _ 1555 = panic "genCCall: Wrong number of arguments/results for subc" 1556 addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y] 1557 = do let reg_r = getRegisterReg platform (CmmLocal res_r) 1558 reg_c = getRegisterReg platform (CmmLocal res_c) 1559 (x_reg, x_code) <- getSomeReg arg_x 1560 (y_reg, y_code) <- getSomeReg arg_y 1561 return $ y_code `appOL` x_code 1562 `appOL` toOL [ instr reg_r y_reg x_reg, 1563 -- SUBFO argument order reversed! 1564 MFOV (intFormat width) reg_c 1565 ] 1566 addSubCOp _ _ _ _ _ 1567 = panic "genCall: Wrong number of arguments/results for addC" 1568 fabs platform [res] [arg] 1569 = do let res_r = getRegisterReg platform (CmmLocal res) 1570 (arg_reg, arg_code) <- getSomeReg arg 1571 return $ arg_code `snocOL` FABS res_r arg_reg 1572 fabs _ _ _ 1573 = panic "genCall: Wrong number of arguments/results for fabs" 1574 1575-- TODO: replace 'Int' by an enum such as 'PPC_64ABI' 1576data GenCCallPlatform = GCP32ELF | GCP64ELF !Int | GCPAIX 1577 1578platformToGCP :: Platform -> GenCCallPlatform 1579platformToGCP platform 1580 = case platformOS platform of 1581 OSAIX -> GCPAIX 1582 _ -> case platformArch platform of 1583 ArchPPC -> GCP32ELF 1584 ArchPPC_64 ELF_V1 -> GCP64ELF 1 1585 ArchPPC_64 ELF_V2 -> GCP64ELF 2 1586 _ -> panic "platformToGCP: Not PowerPC" 1587 1588 1589genCCall' 1590 :: DynFlags 1591 -> GenCCallPlatform 1592 -> ForeignTarget -- function to call 1593 -> [CmmFormal] -- where to put the result 1594 -> [CmmActual] -- arguments (of mixed type) 1595 -> NatM InstrBlock 1596 1597{- 1598 PowerPC Linux uses the System V Release 4 Calling Convention 1599 for PowerPC. It is described in the 1600 "System V Application Binary Interface PowerPC Processor Supplement". 1601 1602 PowerPC 64 Linux uses the System V Release 4 Calling Convention for 1603 64-bit PowerPC. It is specified in 1604 "64-bit PowerPC ELF Application Binary Interface Supplement 1.9" 1605 (PPC64 ELF v1.9). 1606 1607 PowerPC 64 Linux in little endian mode uses the "Power Architecture 64-Bit 1608 ELF V2 ABI Specification -- OpenPOWER ABI for Linux Supplement" 1609 (PPC64 ELF v2). 1610 1611 AIX follows the "PowerOpen ABI: Application Binary Interface Big-Endian 1612 32-Bit Hardware Implementation" 1613 1614 All four conventions are similar: 1615 Parameters may be passed in general-purpose registers starting at r3, in 1616 floating point registers starting at f1, or on the stack. 1617 1618 But there are substantial differences: 1619 * The number of registers used for parameter passing and the exact set of 1620 nonvolatile registers differs (see MachRegs.hs). 1621 * On AIX and 64-bit ELF, stack space is always reserved for parameters, 1622 even if they are passed in registers. The called routine may choose to 1623 save parameters from registers to the corresponding space on the stack. 1624 * On AIX and 64-bit ELF, a corresponding amount of GPRs is skipped when 1625 a floating point parameter is passed in an FPR. 1626 * SysV insists on either passing I64 arguments on the stack, or in two GPRs, 1627 starting with an odd-numbered GPR. It may skip a GPR to achieve this. 1628 AIX just treats an I64 likt two separate I32s (high word first). 1629 * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only 1630 4-byte aligned like everything else on AIX. 1631 * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on 1632 PowerPC Linux does not agree, so neither do we. 1633 1634 According to all conventions, the parameter area should be part of the 1635 caller's stack frame, allocated in the caller's prologue code (large enough 1636 to hold the parameter lists for all called routines). The NCG already 1637 uses the stack for register spilling, leaving 64 bytes free at the top. 1638 If we need a larger parameter area than that, we increase the size 1639 of the stack frame just before ccalling. 1640-} 1641 1642 1643genCCall' dflags gcp target dest_regs args 1644 = do 1645 (finalStack,passArgumentsCode,usedRegs) <- passArguments 1646 (zip3 args argReps argHints) 1647 allArgRegs 1648 (allFPArgRegs platform) 1649 initialStackOffset 1650 nilOL [] 1651 1652 (labelOrExpr, reduceToFF32) <- case target of 1653 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do 1654 uses_pic_base_implicitly 1655 return (Left lbl, False) 1656 ForeignTarget expr _ -> do 1657 uses_pic_base_implicitly 1658 return (Right expr, False) 1659 PrimTarget mop -> outOfLineMachOp mop 1660 1661 let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode 1662 codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32 1663 1664 case labelOrExpr of 1665 Left lbl -> do -- the linker does all the work for us 1666 return ( codeBefore 1667 `snocOL` BL lbl usedRegs 1668 `appOL` maybeNOP -- some ABI require a NOP after BL 1669 `appOL` codeAfter) 1670 Right dyn -> do -- implement call through function pointer 1671 (dynReg, dynCode) <- getSomeReg dyn 1672 case gcp of 1673 GCP64ELF 1 -> return ( dynCode 1674 `appOL` codeBefore 1675 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 40)) 1676 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0)) 1677 `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8)) 1678 `snocOL` MTCTR r11 1679 `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16)) 1680 `snocOL` BCTRL usedRegs 1681 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 40)) 1682 `appOL` codeAfter) 1683 GCP64ELF 2 -> return ( dynCode 1684 `appOL` codeBefore 1685 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 24)) 1686 `snocOL` MR r12 dynReg 1687 `snocOL` MTCTR r12 1688 `snocOL` BCTRL usedRegs 1689 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 24)) 1690 `appOL` codeAfter) 1691 GCPAIX -> return ( dynCode 1692 -- AIX/XCOFF follows the PowerOPEN ABI 1693 -- which is quite similiar to LinuxPPC64/ELFv1 1694 `appOL` codeBefore 1695 `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 20)) 1696 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0)) 1697 `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4)) 1698 `snocOL` MTCTR r11 1699 `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8)) 1700 `snocOL` BCTRL usedRegs 1701 `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 20)) 1702 `appOL` codeAfter) 1703 _ -> return ( dynCode 1704 `snocOL` MTCTR dynReg 1705 `appOL` codeBefore 1706 `snocOL` BCTRL usedRegs 1707 `appOL` codeAfter) 1708 where 1709 platform = targetPlatform dflags 1710 1711 uses_pic_base_implicitly = do 1712 -- See Note [implicit register in PPC PIC code] 1713 -- on why we claim to use PIC register here 1714 when (positionIndependent dflags && target32Bit platform) $ do 1715 _ <- getPicBaseNat $ archWordFormat True 1716 return () 1717 1718 initialStackOffset = case gcp of 1719 GCPAIX -> 24 1720 GCP32ELF -> 8 1721 GCP64ELF 1 -> 48 1722 GCP64ELF 2 -> 32 1723 _ -> panic "genCall': unknown calling convention" 1724 -- size of linkage area + size of arguments, in bytes 1725 stackDelta finalStack = case gcp of 1726 GCPAIX -> 1727 roundTo 16 $ (24 +) $ max 32 $ sum $ 1728 map (widthInBytes . typeWidth) argReps 1729 GCP32ELF -> roundTo 16 finalStack 1730 GCP64ELF 1 -> 1731 roundTo 16 $ (48 +) $ max 64 $ sum $ 1732 map (roundTo 8 . widthInBytes . typeWidth) 1733 argReps 1734 GCP64ELF 2 -> 1735 roundTo 16 $ (32 +) $ max 64 $ sum $ 1736 map (roundTo 8 . widthInBytes . typeWidth) 1737 argReps 1738 _ -> panic "genCall': unknown calling conv." 1739 1740 argReps = map (cmmExprType dflags) args 1741 (argHints, _) = foreignTargetHints target 1742 1743 roundTo a x | x `mod` a == 0 = x 1744 | otherwise = x + a - (x `mod` a) 1745 1746 spFormat = if target32Bit platform then II32 else II64 1747 1748 -- TODO: Do not create a new stack frame if delta is too large. 1749 move_sp_down finalStack 1750 | delta > stackFrameHeaderSize dflags = 1751 toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))), 1752 DELTA (-delta)] 1753 | otherwise = nilOL 1754 where delta = stackDelta finalStack 1755 move_sp_up finalStack 1756 | delta > stackFrameHeaderSize dflags = 1757 toOL [ADD sp sp (RIImm (ImmInt delta)), 1758 DELTA 0] 1759 | otherwise = nilOL 1760 where delta = stackDelta finalStack 1761 1762 -- A NOP instruction is required after a call (bl instruction) 1763 -- on AIX and 64-Bit Linux. 1764 -- If the call is to a function with a different TOC (r2) the 1765 -- link editor replaces the NOP instruction with a load of the TOC 1766 -- from the stack to restore the TOC. 1767 maybeNOP = case gcp of 1768 GCP32ELF -> nilOL 1769 -- See Section 3.9.4 of OpenPower ABI 1770 GCPAIX -> unitOL NOP 1771 -- See Section 3.5.11 of PPC64 ELF v1.9 1772 GCP64ELF 1 -> unitOL NOP 1773 -- See Section 2.3.6 of PPC64 ELF v2 1774 GCP64ELF 2 -> unitOL NOP 1775 _ -> panic "maybeNOP: Unknown PowerPC 64-bit ABI" 1776 1777 passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed) 1778 passArguments ((arg,arg_ty,_):args) gprs fprs stackOffset 1779 accumCode accumUsed | isWord64 arg_ty 1780 && target32Bit (targetPlatform dflags) = 1781 do 1782 ChildCode64 code vr_lo <- iselExpr64 arg 1783 let vr_hi = getHiVRegFromLo vr_lo 1784 1785 case gcp of 1786 GCPAIX -> 1787 do let storeWord vr (gpr:_) _ = MR gpr vr 1788 storeWord vr [] offset 1789 = ST II32 vr (AddrRegImm sp (ImmInt offset)) 1790 passArguments args 1791 (drop 2 gprs) 1792 fprs 1793 (stackOffset+8) 1794 (accumCode `appOL` code 1795 `snocOL` storeWord vr_hi gprs stackOffset 1796 `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4)) 1797 ((take 2 gprs) ++ accumUsed) 1798 GCP32ELF -> 1799 do let stackOffset' = roundTo 8 stackOffset 1800 stackCode = accumCode `appOL` code 1801 `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset')) 1802 `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4))) 1803 regCode hireg loreg = 1804 accumCode `appOL` code 1805 `snocOL` MR hireg vr_hi 1806 `snocOL` MR loreg vr_lo 1807 1808 case gprs of 1809 hireg : loreg : regs | even (length gprs) -> 1810 passArguments args regs fprs stackOffset 1811 (regCode hireg loreg) (hireg : loreg : accumUsed) 1812 _skipped : hireg : loreg : regs -> 1813 passArguments args regs fprs stackOffset 1814 (regCode hireg loreg) (hireg : loreg : accumUsed) 1815 _ -> -- only one or no regs left 1816 passArguments args [] fprs (stackOffset'+8) 1817 stackCode accumUsed 1818 GCP64ELF _ -> panic "passArguments: 32 bit code" 1819 1820 passArguments ((arg,rep,hint):args) gprs fprs stackOffset accumCode accumUsed 1821 | reg : _ <- regs = do 1822 register <- getRegister arg_pro 1823 let code = case register of 1824 Fixed _ freg fcode -> fcode `snocOL` MR reg freg 1825 Any _ acode -> acode reg 1826 stackOffsetRes = case gcp of 1827 -- The PowerOpen ABI requires that we 1828 -- reserve stack slots for register 1829 -- parameters 1830 GCPAIX -> stackOffset + stackBytes 1831 -- ... the SysV ABI 32-bit doesn't. 1832 GCP32ELF -> stackOffset 1833 -- ... but SysV ABI 64-bit does. 1834 GCP64ELF _ -> stackOffset + stackBytes 1835 passArguments args 1836 (drop nGprs gprs) 1837 (drop nFprs fprs) 1838 stackOffsetRes 1839 (accumCode `appOL` code) 1840 (reg : accumUsed) 1841 | otherwise = do 1842 (vr, code) <- getSomeReg arg_pro 1843 passArguments args 1844 (drop nGprs gprs) 1845 (drop nFprs fprs) 1846 (stackOffset' + stackBytes) 1847 (accumCode `appOL` code 1848 `snocOL` ST format_pro vr stackSlot) 1849 accumUsed 1850 where 1851 arg_pro 1852 | isBitsType rep = CmmMachOp (conv_op (typeWidth rep) (wordWidth dflags)) [arg] 1853 | otherwise = arg 1854 format_pro 1855 | isBitsType rep = intFormat (wordWidth dflags) 1856 | otherwise = cmmTypeFormat rep 1857 conv_op = case hint of 1858 SignedHint -> MO_SS_Conv 1859 _ -> MO_UU_Conv 1860 1861 stackOffset' = case gcp of 1862 GCPAIX -> 1863 -- The 32bit PowerOPEN ABI is happy with 1864 -- 32bit-alignment ... 1865 stackOffset 1866 GCP32ELF 1867 -- ... the SysV ABI requires 8-byte 1868 -- alignment for doubles. 1869 | isFloatType rep && typeWidth rep == W64 -> 1870 roundTo 8 stackOffset 1871 | otherwise -> 1872 stackOffset 1873 GCP64ELF _ -> 1874 -- Everything on the stack is mapped to 1875 -- 8-byte aligned doublewords 1876 stackOffset 1877 stackOffset'' 1878 | isFloatType rep && typeWidth rep == W32 = 1879 case gcp of 1880 -- The ELF v1 ABI Section 3.2.3 requires: 1881 -- "Single precision floating point values 1882 -- are mapped to the second word in a single 1883 -- doubleword" 1884 GCP64ELF 1 -> stackOffset' + 4 1885 _ -> stackOffset' 1886 | otherwise = stackOffset' 1887 1888 stackSlot = AddrRegImm sp (ImmInt stackOffset'') 1889 (nGprs, nFprs, stackBytes, regs) 1890 = case gcp of 1891 GCPAIX -> 1892 case cmmTypeFormat rep of 1893 II8 -> (1, 0, 4, gprs) 1894 II16 -> (1, 0, 4, gprs) 1895 II32 -> (1, 0, 4, gprs) 1896 -- The PowerOpen ABI requires that we skip a 1897 -- corresponding number of GPRs when we use 1898 -- the FPRs. 1899 -- 1900 -- E.g. for a `double` two GPRs are skipped, 1901 -- whereas for a `float` one GPR is skipped 1902 -- when parameters are assigned to 1903 -- registers. 1904 -- 1905 -- The PowerOpen ABI specification can be found at 1906 -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/ 1907 FF32 -> (1, 1, 4, fprs) 1908 FF64 -> (2, 1, 8, fprs) 1909 II64 -> panic "genCCall' passArguments II64" 1910 1911 GCP32ELF -> 1912 case cmmTypeFormat rep of 1913 II8 -> (1, 0, 4, gprs) 1914 II16 -> (1, 0, 4, gprs) 1915 II32 -> (1, 0, 4, gprs) 1916 -- ... the SysV ABI doesn't. 1917 FF32 -> (0, 1, 4, fprs) 1918 FF64 -> (0, 1, 8, fprs) 1919 II64 -> panic "genCCall' passArguments II64" 1920 GCP64ELF _ -> 1921 case cmmTypeFormat rep of 1922 II8 -> (1, 0, 8, gprs) 1923 II16 -> (1, 0, 8, gprs) 1924 II32 -> (1, 0, 8, gprs) 1925 II64 -> (1, 0, 8, gprs) 1926 -- The ELFv1 ABI requires that we skip a 1927 -- corresponding number of GPRs when we use 1928 -- the FPRs. 1929 FF32 -> (1, 1, 8, fprs) 1930 FF64 -> (1, 1, 8, fprs) 1931 1932 moveResult reduceToFF32 = 1933 case dest_regs of 1934 [] -> nilOL 1935 [dest] 1936 | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1) 1937 | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1) 1938 | isWord64 rep && target32Bit (targetPlatform dflags) 1939 -> toOL [MR (getHiVRegFromLo r_dest) r3, 1940 MR r_dest r4] 1941 | otherwise -> unitOL (MR r_dest r3) 1942 where rep = cmmRegType dflags (CmmLocal dest) 1943 r_dest = getRegisterReg platform (CmmLocal dest) 1944 _ -> panic "genCCall' moveResult: Bad dest_regs" 1945 1946 outOfLineMachOp mop = 1947 do 1948 dflags <- getDynFlags 1949 mopExpr <- cmmMakeDynamicReference dflags CallReference $ 1950 mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction 1951 let mopLabelOrExpr = case mopExpr of 1952 CmmLit (CmmLabel lbl) -> Left lbl 1953 _ -> Right mopExpr 1954 return (mopLabelOrExpr, reduce) 1955 where 1956 (functionName, reduce) = case mop of 1957 MO_F32_Exp -> (fsLit "exp", True) 1958 MO_F32_ExpM1 -> (fsLit "expm1", True) 1959 MO_F32_Log -> (fsLit "log", True) 1960 MO_F32_Log1P -> (fsLit "log1p", True) 1961 MO_F32_Sqrt -> (fsLit "sqrt", True) 1962 MO_F32_Fabs -> unsupported 1963 1964 MO_F32_Sin -> (fsLit "sin", True) 1965 MO_F32_Cos -> (fsLit "cos", True) 1966 MO_F32_Tan -> (fsLit "tan", True) 1967 1968 MO_F32_Asin -> (fsLit "asin", True) 1969 MO_F32_Acos -> (fsLit "acos", True) 1970 MO_F32_Atan -> (fsLit "atan", True) 1971 1972 MO_F32_Sinh -> (fsLit "sinh", True) 1973 MO_F32_Cosh -> (fsLit "cosh", True) 1974 MO_F32_Tanh -> (fsLit "tanh", True) 1975 MO_F32_Pwr -> (fsLit "pow", True) 1976 1977 MO_F32_Asinh -> (fsLit "asinh", True) 1978 MO_F32_Acosh -> (fsLit "acosh", True) 1979 MO_F32_Atanh -> (fsLit "atanh", True) 1980 1981 MO_F64_Exp -> (fsLit "exp", False) 1982 MO_F64_ExpM1 -> (fsLit "expm1", False) 1983 MO_F64_Log -> (fsLit "log", False) 1984 MO_F64_Log1P -> (fsLit "log1p", False) 1985 MO_F64_Sqrt -> (fsLit "sqrt", False) 1986 MO_F64_Fabs -> unsupported 1987 1988 MO_F64_Sin -> (fsLit "sin", False) 1989 MO_F64_Cos -> (fsLit "cos", False) 1990 MO_F64_Tan -> (fsLit "tan", False) 1991 1992 MO_F64_Asin -> (fsLit "asin", False) 1993 MO_F64_Acos -> (fsLit "acos", False) 1994 MO_F64_Atan -> (fsLit "atan", False) 1995 1996 MO_F64_Sinh -> (fsLit "sinh", False) 1997 MO_F64_Cosh -> (fsLit "cosh", False) 1998 MO_F64_Tanh -> (fsLit "tanh", False) 1999 MO_F64_Pwr -> (fsLit "pow", False) 2000 2001 MO_F64_Asinh -> (fsLit "asinh", False) 2002 MO_F64_Acosh -> (fsLit "acosh", False) 2003 MO_F64_Atanh -> (fsLit "atanh", False) 2004 2005 MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False) 2006 2007 MO_Memcpy _ -> (fsLit "memcpy", False) 2008 MO_Memset _ -> (fsLit "memset", False) 2009 MO_Memmove _ -> (fsLit "memmove", False) 2010 MO_Memcmp _ -> (fsLit "memcmp", False) 2011 2012 MO_BSwap w -> (fsLit $ bSwapLabel w, False) 2013 MO_BRev w -> (fsLit $ bRevLabel w, False) 2014 MO_PopCnt w -> (fsLit $ popCntLabel w, False) 2015 MO_Pdep w -> (fsLit $ pdepLabel w, False) 2016 MO_Pext w -> (fsLit $ pextLabel w, False) 2017 MO_Clz _ -> unsupported 2018 MO_Ctz _ -> unsupported 2019 MO_AtomicRMW {} -> unsupported 2020 MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False) 2021 MO_AtomicRead _ -> unsupported 2022 MO_AtomicWrite _ -> unsupported 2023 2024 MO_S_QuotRem {} -> unsupported 2025 MO_U_QuotRem {} -> unsupported 2026 MO_U_QuotRem2 {} -> unsupported 2027 MO_Add2 {} -> unsupported 2028 MO_AddWordC {} -> unsupported 2029 MO_SubWordC {} -> unsupported 2030 MO_AddIntC {} -> unsupported 2031 MO_SubIntC {} -> unsupported 2032 MO_U_Mul2 {} -> unsupported 2033 MO_ReadBarrier -> unsupported 2034 MO_WriteBarrier -> unsupported 2035 MO_Touch -> unsupported 2036 MO_Prefetch_Data _ -> unsupported 2037 unsupported = panic ("outOfLineCmmOp: " ++ show mop 2038 ++ " not supported") 2039 2040-- ----------------------------------------------------------------------------- 2041-- Generating a table-branch 2042 2043genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock 2044genSwitch dflags expr targets 2045 | OSAIX <- platformOS (targetPlatform dflags) 2046 = do 2047 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) 2048 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags 2049 sha = if target32Bit $ targetPlatform dflags then 2 else 3 2050 tmp <- getNewRegNat fmt 2051 lbl <- getNewLabelNat 2052 dynRef <- cmmMakeDynamicReference dflags DataReference lbl 2053 (tableReg,t_code) <- getSomeReg $ dynRef 2054 let code = e_code `appOL` t_code `appOL` toOL [ 2055 SL fmt tmp reg (RIImm (ImmInt sha)), 2056 LD fmt tmp (AddrRegReg tableReg tmp), 2057 MTCTR tmp, 2058 BCTR ids (Just lbl) [] 2059 ] 2060 return code 2061 2062 | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags) 2063 = do 2064 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) 2065 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags 2066 sha = if target32Bit $ targetPlatform dflags then 2 else 3 2067 tmp <- getNewRegNat fmt 2068 lbl <- getNewLabelNat 2069 dynRef <- cmmMakeDynamicReference dflags DataReference lbl 2070 (tableReg,t_code) <- getSomeReg $ dynRef 2071 let code = e_code `appOL` t_code `appOL` toOL [ 2072 SL fmt tmp reg (RIImm (ImmInt sha)), 2073 LD fmt tmp (AddrRegReg tableReg tmp), 2074 ADD tmp tmp (RIReg tableReg), 2075 MTCTR tmp, 2076 BCTR ids (Just lbl) [] 2077 ] 2078 return code 2079 | otherwise 2080 = do 2081 (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) 2082 let fmt = archWordFormat $ target32Bit $ targetPlatform dflags 2083 sha = if target32Bit $ targetPlatform dflags then 2 else 3 2084 tmp <- getNewRegNat fmt 2085 lbl <- getNewLabelNat 2086 let code = e_code `appOL` toOL [ 2087 SL fmt tmp reg (RIImm (ImmInt sha)), 2088 ADDIS tmp tmp (HA (ImmCLbl lbl)), 2089 LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), 2090 MTCTR tmp, 2091 BCTR ids (Just lbl) [] 2092 ] 2093 return code 2094 where (offset, ids) = switchTargetsToTable targets 2095 2096generateJumpTableForInstr :: DynFlags -> Instr 2097 -> Maybe (NatCmmDecl CmmStatics Instr) 2098generateJumpTableForInstr dflags (BCTR ids (Just lbl) _) = 2099 let jumpTable 2100 | (positionIndependent dflags) 2101 || (not $ target32Bit $ targetPlatform dflags) 2102 = map jumpTableEntryRel ids 2103 | otherwise = map (jumpTableEntry dflags) ids 2104 where jumpTableEntryRel Nothing 2105 = CmmStaticLit (CmmInt 0 (wordWidth dflags)) 2106 jumpTableEntryRel (Just blockid) 2107 = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 2108 (wordWidth dflags)) 2109 where blockLabel = blockLbl blockid 2110 in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable)) 2111generateJumpTableForInstr _ _ = Nothing 2112 2113-- ----------------------------------------------------------------------------- 2114-- 'condIntReg' and 'condFltReg': condition codes into registers 2115 2116-- Turn those condition codes into integers now (when they appear on 2117-- the right hand side of an assignment). 2118 2119 2120 2121condReg :: NatM CondCode -> NatM Register 2122condReg getCond = do 2123 CondCode _ cond cond_code <- getCond 2124 dflags <- getDynFlags 2125 let 2126 code dst = cond_code 2127 `appOL` negate_code 2128 `appOL` toOL [ 2129 MFCR dst, 2130 RLWINM dst dst (bit + 1) 31 31 2131 ] 2132 2133 negate_code | do_negate = unitOL (CRNOR bit bit bit) 2134 | otherwise = nilOL 2135 2136 (bit, do_negate) = case cond of 2137 LTT -> (0, False) 2138 LE -> (1, True) 2139 EQQ -> (2, False) 2140 GE -> (0, True) 2141 GTT -> (1, False) 2142 2143 NE -> (2, True) 2144 2145 LU -> (0, False) 2146 LEU -> (1, True) 2147 GEU -> (0, True) 2148 GU -> (1, False) 2149 _ -> panic "PPC.CodeGen.codeReg: no match" 2150 2151 format = archWordFormat $ target32Bit $ targetPlatform dflags 2152 return (Any format code) 2153 2154condIntReg :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM Register 2155condIntReg cond width x y = condReg (condIntCode cond width x y) 2156condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register 2157condFltReg cond x y = condReg (condFltCode cond x y) 2158 2159 2160 2161-- ----------------------------------------------------------------------------- 2162-- 'trivial*Code': deal with trivial instructions 2163 2164-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', 2165-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. 2166-- Only look for constants on the right hand side, because that's 2167-- where the generic optimizer will have put them. 2168 2169-- Similarly, for unary instructions, we don't have to worry about 2170-- matching an StInt as the argument, because genericOpt will already 2171-- have handled the constant-folding. 2172 2173 2174 2175{- 2176Wolfgang's PowerPC version of The Rules: 2177 2178A slightly modified version of The Rules to take advantage of the fact 2179that PowerPC instructions work on all registers and don't implicitly 2180clobber any fixed registers. 2181 2182* The only expression for which getRegister returns Fixed is (CmmReg reg). 2183 2184* If getRegister returns Any, then the code it generates may modify only: 2185 (a) fresh temporaries 2186 (b) the destination register 2187 It may *not* modify global registers, unless the global 2188 register happens to be the destination register. 2189 It may not clobber any other registers. In fact, only ccalls clobber any 2190 fixed registers. 2191 Also, it may not modify the counter register (used by genCCall). 2192 2193 Corollary: If a getRegister for a subexpression returns Fixed, you need 2194 not move it to a fresh temporary before evaluating the next subexpression. 2195 The Fixed register won't be modified. 2196 Therefore, we don't need a counterpart for the x86's getStableReg on PPC. 2197 2198* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on 2199 the value of the destination register. 2200-} 2201 2202trivialCode 2203 :: Width 2204 -> Bool 2205 -> (Reg -> Reg -> RI -> Instr) 2206 -> CmmExpr 2207 -> CmmExpr 2208 -> NatM Register 2209 2210trivialCode rep signed instr x (CmmLit (CmmInt y _)) 2211 | Just imm <- makeImmediate rep signed y 2212 = do 2213 (src1, code1) <- getSomeReg x 2214 let code dst = code1 `snocOL` instr dst src1 (RIImm imm) 2215 return (Any (intFormat rep) code) 2216 2217trivialCode rep _ instr x y = do 2218 (src1, code1) <- getSomeReg x 2219 (src2, code2) <- getSomeReg y 2220 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2) 2221 return (Any (intFormat rep) code) 2222 2223shiftMulCode 2224 :: Width 2225 -> Bool 2226 -> (Format-> Reg -> Reg -> RI -> Instr) 2227 -> CmmExpr 2228 -> CmmExpr 2229 -> NatM Register 2230shiftMulCode width sign instr x (CmmLit (CmmInt y _)) 2231 | Just imm <- makeImmediate width sign y 2232 = do 2233 (src1, code1) <- getSomeReg x 2234 let format = intFormat width 2235 let ins_fmt = intFormat (max W32 width) 2236 let code dst = code1 `snocOL` instr ins_fmt dst src1 (RIImm imm) 2237 return (Any format code) 2238 2239shiftMulCode width _ instr x y = do 2240 (src1, code1) <- getSomeReg x 2241 (src2, code2) <- getSomeReg y 2242 let format = intFormat width 2243 let ins_fmt = intFormat (max W32 width) 2244 let code dst = code1 `appOL` code2 2245 `snocOL` instr ins_fmt dst src1 (RIReg src2) 2246 return (Any format code) 2247 2248trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr) 2249 -> CmmExpr -> CmmExpr -> NatM Register 2250trivialCodeNoImm' format instr x y = do 2251 (src1, code1) <- getSomeReg x 2252 (src2, code2) <- getSomeReg y 2253 let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2 2254 return (Any format code) 2255 2256trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr) 2257 -> CmmExpr -> CmmExpr -> NatM Register 2258trivialCodeNoImm format instr x y 2259 = trivialCodeNoImm' format (instr format) x y 2260 2261srCode :: Width -> Bool -> (Format-> Reg -> Reg -> RI -> Instr) 2262 -> CmmExpr -> CmmExpr -> NatM Register 2263srCode width sgn instr x (CmmLit (CmmInt y _)) 2264 | Just imm <- makeImmediate width sgn y 2265 = do 2266 let op_len = max W32 width 2267 extend = if sgn then extendSExpr else extendUExpr 2268 (src1, code1) <- getSomeReg (extend width op_len x) 2269 let code dst = code1 `snocOL` 2270 instr (intFormat op_len) dst src1 (RIImm imm) 2271 return (Any (intFormat width) code) 2272 2273srCode width sgn instr x y = do 2274 let op_len = max W32 width 2275 extend = if sgn then extendSExpr else extendUExpr 2276 (src1, code1) <- getSomeReg (extend width op_len x) 2277 (src2, code2) <- getSomeReg (extendUExpr width op_len y) 2278 -- Note: Shift amount `y` is unsigned 2279 let code dst = code1 `appOL` code2 `snocOL` 2280 instr (intFormat op_len) dst src1 (RIReg src2) 2281 return (Any (intFormat width) code) 2282 2283divCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register 2284divCode width sgn x y = do 2285 let op_len = max W32 width 2286 extend = if sgn then extendSExpr else extendUExpr 2287 (src1, code1) <- getSomeReg (extend width op_len x) 2288 (src2, code2) <- getSomeReg (extend width op_len y) 2289 let code dst = code1 `appOL` code2 `snocOL` 2290 DIV (intFormat op_len) sgn dst src1 src2 2291 return (Any (intFormat width) code) 2292 2293 2294trivialUCode :: Format 2295 -> (Reg -> Reg -> Instr) 2296 -> CmmExpr 2297 -> NatM Register 2298trivialUCode rep instr x = do 2299 (src, code) <- getSomeReg x 2300 let code' dst = code `snocOL` instr dst src 2301 return (Any rep code') 2302 2303-- There is no "remainder" instruction on the PPC, so we have to do 2304-- it the hard way. 2305-- The "sgn" parameter is the signedness for the division instruction 2306 2307remainderCode :: Width -> Bool -> Reg -> CmmExpr -> CmmExpr 2308 -> NatM (Reg -> InstrBlock) 2309remainderCode rep sgn reg_q arg_x arg_y = do 2310 let op_len = max W32 rep 2311 fmt = intFormat op_len 2312 extend = if sgn then extendSExpr else extendUExpr 2313 (x_reg, x_code) <- getSomeReg (extend rep op_len arg_x) 2314 (y_reg, y_code) <- getSomeReg (extend rep op_len arg_y) 2315 return $ \reg_r -> y_code `appOL` x_code 2316 `appOL` toOL [ DIV fmt sgn reg_q x_reg y_reg 2317 , MULL fmt reg_r reg_q (RIReg y_reg) 2318 , SUBF reg_r reg_r x_reg 2319 ] 2320 2321 2322coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register 2323coerceInt2FP fromRep toRep x = do 2324 dflags <- getDynFlags 2325 let arch = platformArch $ targetPlatform dflags 2326 coerceInt2FP' arch fromRep toRep x 2327 2328coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register 2329coerceInt2FP' ArchPPC fromRep toRep x = do 2330 (src, code) <- getSomeReg x 2331 lbl <- getNewLabelNat 2332 itmp <- getNewRegNat II32 2333 ftmp <- getNewRegNat FF64 2334 dflags <- getDynFlags 2335 dynRef <- cmmMakeDynamicReference dflags DataReference lbl 2336 Amode addr addr_code <- getAmode D dynRef 2337 let 2338 code' dst = code `appOL` maybe_exts `appOL` toOL [ 2339 LDATA (Section ReadOnlyData lbl) $ Statics lbl 2340 [CmmStaticLit (CmmInt 0x43300000 W32), 2341 CmmStaticLit (CmmInt 0x80000000 W32)], 2342 XORIS itmp src (ImmInt 0x8000), 2343 ST II32 itmp (spRel dflags 3), 2344 LIS itmp (ImmInt 0x4330), 2345 ST II32 itmp (spRel dflags 2), 2346 LD FF64 ftmp (spRel dflags 2) 2347 ] `appOL` addr_code `appOL` toOL [ 2348 LD FF64 dst addr, 2349 FSUB FF64 dst ftmp dst 2350 ] `appOL` maybe_frsp dst 2351 2352 maybe_exts = case fromRep of 2353 W8 -> unitOL $ EXTS II8 src src 2354 W16 -> unitOL $ EXTS II16 src src 2355 W32 -> nilOL 2356 _ -> panic "PPC.CodeGen.coerceInt2FP: no match" 2357 2358 maybe_frsp dst 2359 = case toRep of 2360 W32 -> unitOL $ FRSP dst dst 2361 W64 -> nilOL 2362 _ -> panic "PPC.CodeGen.coerceInt2FP: no match" 2363 2364 return (Any (floatFormat toRep) code') 2365 2366-- On an ELF v1 Linux we use the compiler doubleword in the stack frame 2367-- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only 2368-- set right before a call and restored right after return from the call. 2369-- So it is fine. 2370coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do 2371 (src, code) <- getSomeReg x 2372 dflags <- getDynFlags 2373 let 2374 code' dst = code `appOL` maybe_exts `appOL` toOL [ 2375 ST II64 src (spRel dflags 3), 2376 LD FF64 dst (spRel dflags 3), 2377 FCFID dst dst 2378 ] `appOL` maybe_frsp dst 2379 2380 maybe_exts = case fromRep of 2381 W8 -> unitOL $ EXTS II8 src src 2382 W16 -> unitOL $ EXTS II16 src src 2383 W32 -> unitOL $ EXTS II32 src src 2384 W64 -> nilOL 2385 _ -> panic "PPC.CodeGen.coerceInt2FP: no match" 2386 2387 maybe_frsp dst 2388 = case toRep of 2389 W32 -> unitOL $ FRSP dst dst 2390 W64 -> nilOL 2391 _ -> panic "PPC.CodeGen.coerceInt2FP: no match" 2392 2393 return (Any (floatFormat toRep) code') 2394 2395coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch" 2396 2397 2398coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register 2399coerceFP2Int fromRep toRep x = do 2400 dflags <- getDynFlags 2401 let arch = platformArch $ targetPlatform dflags 2402 coerceFP2Int' arch fromRep toRep x 2403 2404coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register 2405coerceFP2Int' ArchPPC _ toRep x = do 2406 dflags <- getDynFlags 2407 -- the reps don't really matter: F*->FF64 and II32->I* are no-ops 2408 (src, code) <- getSomeReg x 2409 tmp <- getNewRegNat FF64 2410 let 2411 code' dst = code `appOL` toOL [ 2412 -- convert to int in FP reg 2413 FCTIWZ tmp src, 2414 -- store value (64bit) from FP to stack 2415 ST FF64 tmp (spRel dflags 2), 2416 -- read low word of value (high word is undefined) 2417 LD II32 dst (spRel dflags 3)] 2418 return (Any (intFormat toRep) code') 2419 2420coerceFP2Int' (ArchPPC_64 _) _ toRep x = do 2421 dflags <- getDynFlags 2422 -- the reps don't really matter: F*->FF64 and II64->I* are no-ops 2423 (src, code) <- getSomeReg x 2424 tmp <- getNewRegNat FF64 2425 let 2426 code' dst = code `appOL` toOL [ 2427 -- convert to int in FP reg 2428 FCTIDZ tmp src, 2429 -- store value (64bit) from FP to compiler word on stack 2430 ST FF64 tmp (spRel dflags 3), 2431 LD II64 dst (spRel dflags 3)] 2432 return (Any (intFormat toRep) code') 2433 2434coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch" 2435 2436-- Note [.LCTOC1 in PPC PIC code] 2437-- The .LCTOC1 label is defined to point 32768 bytes into the GOT table 2438-- to make the most of the PPC's 16-bit displacements. 2439-- As 16-bit signed offset is used (usually via addi/lwz instructions) 2440-- first element will have '-32768' offset against .LCTOC1. 2441 2442-- Note [implicit register in PPC PIC code] 2443-- PPC generates calls by labels in assembly 2444-- in form of: 2445-- bl puts+32768@plt 2446-- in this form it's not seen directly (by GHC NCG) 2447-- that r30 (PicBaseReg) is used, 2448-- but r30 is a required part of PLT code setup: 2449-- puts+32768@plt: 2450-- lwz r11,-30484(r30) ; offset in .LCTOC1 2451-- mtctr r11 2452-- bctr 2453