1 //===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PowerPC implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H 14 #define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H 15 16 #include "PPCRegisterInfo.h" 17 #include "llvm/CodeGen/TargetInstrInfo.h" 18 19 #define GET_INSTRINFO_HEADER 20 #include "PPCGenInstrInfo.inc" 21 22 namespace llvm { 23 24 /// PPCII - This namespace holds all of the PowerPC target-specific 25 /// per-instruction flags. These must match the corresponding definitions in 26 /// PPC.td and PPCInstrFormats.td. 27 namespace PPCII { 28 enum { 29 // PPC970 Instruction Flags. These flags describe the characteristics of the 30 // PowerPC 970 (aka G5) dispatch groups and how they are formed out of 31 // raw machine instructions. 32 33 /// PPC970_First - This instruction starts a new dispatch group, so it will 34 /// always be the first one in the group. 35 PPC970_First = 0x1, 36 37 /// PPC970_Single - This instruction starts a new dispatch group and 38 /// terminates it, so it will be the sole instruction in the group. 39 PPC970_Single = 0x2, 40 41 /// PPC970_Cracked - This instruction is cracked into two pieces, requiring 42 /// two dispatch pipes to be available to issue. 43 PPC970_Cracked = 0x4, 44 45 /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that 46 /// an instruction is issued to. 47 PPC970_Shift = 3, 48 PPC970_Mask = 0x07 << PPC970_Shift 49 }; 50 enum PPC970_Unit { 51 /// These are the various PPC970 execution unit pipelines. Each instruction 52 /// is one of these. 53 PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction 54 PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit 55 PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit 56 PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit 57 PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit 58 PPC970_VALU = 5 << PPC970_Shift, // Vector ALU 59 PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit 60 PPC970_BRU = 7 << PPC970_Shift // Branch Unit 61 }; 62 63 enum { 64 /// Shift count to bypass PPC970 flags 65 NewDef_Shift = 6, 66 67 /// This instruction is an X-Form memory operation. 68 XFormMemOp = 0x1 << NewDef_Shift, 69 /// This instruction is prefixed. 70 Prefixed = 0x1 << (NewDef_Shift + 1), 71 /// This instruction produced a sign extended result. 72 SExt32To64 = 0x1 << (NewDef_Shift + 2), 73 /// This instruction produced a zero extended result. 74 ZExt32To64 = 0x1 << (NewDef_Shift + 3) 75 }; 76 } // end namespace PPCII 77 78 // Instructions that have an immediate form might be convertible to that 79 // form if the correct input is a result of a load immediate. In order to 80 // know whether the transformation is special, we might need to know some 81 // of the details of the two forms. 82 struct ImmInstrInfo { 83 // Is the immediate field in the immediate form signed or unsigned? 84 uint64_t SignedImm : 1; 85 // Does the immediate need to be a multiple of some value? 86 uint64_t ImmMustBeMultipleOf : 5; 87 // Is R0/X0 treated specially by the original r+r instruction? 88 // If so, in which operand? 89 uint64_t ZeroIsSpecialOrig : 3; 90 // Is R0/X0 treated specially by the new r+i instruction? 91 // If so, in which operand? 92 uint64_t ZeroIsSpecialNew : 3; 93 // Is the operation commutative? 94 uint64_t IsCommutative : 1; 95 // The operand number to check for add-immediate def. 96 uint64_t OpNoForForwarding : 3; 97 // The operand number for the immediate. 98 uint64_t ImmOpNo : 3; 99 // The opcode of the new instruction. 100 uint64_t ImmOpcode : 16; 101 // The size of the immediate. 102 uint64_t ImmWidth : 5; 103 // The immediate should be truncated to N bits. 104 uint64_t TruncateImmTo : 5; 105 // Is the instruction summing the operand 106 uint64_t IsSummingOperands : 1; 107 }; 108 109 // Information required to convert an instruction to just a materialized 110 // immediate. 111 struct LoadImmediateInfo { 112 unsigned Imm : 16; 113 unsigned Is64Bit : 1; 114 unsigned SetCR : 1; 115 }; 116 117 // Index into the OpcodesForSpill array. 118 enum SpillOpcodeKey { 119 SOK_Int4Spill, 120 SOK_Int8Spill, 121 SOK_Float8Spill, 122 SOK_Float4Spill, 123 SOK_CRSpill, 124 SOK_CRBitSpill, 125 SOK_VRVectorSpill, 126 SOK_VSXVectorSpill, 127 SOK_VectorFloat8Spill, 128 SOK_VectorFloat4Spill, 129 SOK_SpillToVSR, 130 SOK_PairedVecSpill, 131 SOK_AccumulatorSpill, 132 SOK_UAccumulatorSpill, 133 SOK_WAccumulatorSpill, 134 SOK_SPESpill, 135 SOK_PairedG8Spill, 136 SOK_LastOpcodeSpill // This must be last on the enum. 137 }; 138 139 // Define list of load and store spill opcodes. 140 #define NoInstr PPC::INSTRUCTION_LIST_END 141 #define Pwr8LoadOpcodes \ 142 { \ 143 PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ 144 PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \ 145 PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, NoInstr, PPC::EVLDD, \ 146 PPC::RESTORE_QUADWORD \ 147 } 148 149 #define Pwr9LoadOpcodes \ 150 { \ 151 PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ 152 PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \ 153 PPC::DFLOADf32, PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, \ 154 NoInstr, NoInstr, PPC::RESTORE_QUADWORD \ 155 } 156 157 #define Pwr10LoadOpcodes \ 158 { \ 159 PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ 160 PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \ 161 PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \ 162 PPC::RESTORE_UACC, NoInstr, NoInstr, PPC::RESTORE_QUADWORD \ 163 } 164 165 #define FutureLoadOpcodes \ 166 { \ 167 PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ 168 PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \ 169 PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \ 170 PPC::RESTORE_UACC, PPC::RESTORE_WACC, NoInstr, PPC::RESTORE_QUADWORD \ 171 } 172 173 #define Pwr8StoreOpcodes \ 174 { \ 175 PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ 176 PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, \ 177 PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr, PPC::EVSTDD, \ 178 PPC::SPILL_QUADWORD \ 179 } 180 181 #define Pwr9StoreOpcodes \ 182 { \ 183 PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ 184 PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \ 185 PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr, NoInstr, \ 186 PPC::SPILL_QUADWORD \ 187 } 188 189 #define Pwr10StoreOpcodes \ 190 { \ 191 PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ 192 PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \ 193 PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \ 194 NoInstr, NoInstr, PPC::SPILL_QUADWORD \ 195 } 196 197 #define FutureStoreOpcodes \ 198 { \ 199 PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ 200 PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \ 201 PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \ 202 PPC::SPILL_WACC, NoInstr, PPC::SPILL_QUADWORD \ 203 } 204 205 // Initialize arrays for load and store spill opcodes on supported subtargets. 206 #define StoreOpcodesForSpill \ 207 { Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes, FutureStoreOpcodes } 208 #define LoadOpcodesForSpill \ 209 { Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes, FutureLoadOpcodes } 210 211 class PPCSubtarget; 212 class PPCInstrInfo : public PPCGenInstrInfo { 213 PPCSubtarget &Subtarget; 214 const PPCRegisterInfo RI; 215 const unsigned StoreSpillOpcodesArray[4][SOK_LastOpcodeSpill] = 216 StoreOpcodesForSpill; 217 const unsigned LoadSpillOpcodesArray[4][SOK_LastOpcodeSpill] = 218 LoadOpcodesForSpill; 219 220 void StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, 221 int FrameIdx, const TargetRegisterClass *RC, 222 SmallVectorImpl<MachineInstr *> &NewMIs) const; 223 void LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, 224 unsigned DestReg, int FrameIdx, 225 const TargetRegisterClass *RC, 226 SmallVectorImpl<MachineInstr *> &NewMIs) const; 227 228 // Replace the instruction with single LI if possible. \p DefMI must be LI or 229 // LI8. 230 bool simplifyToLI(MachineInstr &MI, MachineInstr &DefMI, 231 unsigned OpNoForForwarding, MachineInstr **KilledDef) const; 232 // If the inst is imm-form and its register operand is produced by a ADDI, put 233 // the imm into the inst directly and remove the ADDI if possible. 234 bool transformToNewImmFormFedByAdd(MachineInstr &MI, MachineInstr &DefMI, 235 unsigned OpNoForForwarding) const; 236 // If the inst is x-form and has imm-form and one of its operand is produced 237 // by a LI, put the imm into the inst directly and remove the LI if possible. 238 bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III, 239 unsigned ConstantOpNo, 240 MachineInstr &DefMI) const; 241 // If the inst is x-form and has imm-form and one of its operand is produced 242 // by an add-immediate, try to transform it when possible. 243 bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III, 244 unsigned ConstantOpNo, MachineInstr &DefMI, 245 bool KillDefMI) const; 246 // Try to find that, if the instruction 'MI' contains any operand that 247 // could be forwarded from some inst that feeds it. If yes, return the 248 // Def of that operand. And OpNoForForwarding is the operand index in 249 // the 'MI' for that 'Def'. If we see another use of this Def between 250 // the Def and the MI, SeenIntermediateUse becomes 'true'. 251 MachineInstr *getForwardingDefMI(MachineInstr &MI, 252 unsigned &OpNoForForwarding, 253 bool &SeenIntermediateUse) const; 254 255 // Can the user MI have it's source at index \p OpNoForForwarding 256 // forwarded from an add-immediate that feeds it? 257 bool isUseMIElgibleForForwarding(MachineInstr &MI, const ImmInstrInfo &III, 258 unsigned OpNoForForwarding) const; 259 bool isDefMIElgibleForForwarding(MachineInstr &DefMI, 260 const ImmInstrInfo &III, 261 MachineOperand *&ImmMO, 262 MachineOperand *&RegMO) const; 263 bool isImmElgibleForForwarding(const MachineOperand &ImmMO, 264 const MachineInstr &DefMI, 265 const ImmInstrInfo &III, 266 int64_t &Imm, 267 int64_t BaseImm = 0) const; 268 bool isRegElgibleForForwarding(const MachineOperand &RegMO, 269 const MachineInstr &DefMI, 270 const MachineInstr &MI, bool KillDefMI, 271 bool &IsFwdFeederRegKilled, 272 bool &SeenIntermediateUse) const; 273 unsigned getSpillTarget() const; 274 ArrayRef<unsigned> getStoreOpcodesForSpillArray() const; 275 ArrayRef<unsigned> getLoadOpcodesForSpillArray() const; 276 unsigned getSpillIndex(const TargetRegisterClass *RC) const; 277 int16_t getFMAOpIdxInfo(unsigned Opcode) const; 278 void reassociateFMA(MachineInstr &Root, MachineCombinerPattern Pattern, 279 SmallVectorImpl<MachineInstr *> &InsInstrs, 280 SmallVectorImpl<MachineInstr *> &DelInstrs, 281 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const; 282 bool isLoadFromConstantPool(MachineInstr *I) const; 283 Register 284 generateLoadForNewConst(unsigned Idx, MachineInstr *MI, Type *Ty, 285 SmallVectorImpl<MachineInstr *> &InsInstrs) const; 286 const Constant *getConstantFromConstantPool(MachineInstr *I) const; 287 virtual void anchor(); 288 289 protected: 290 /// Commutes the operands in the given instruction. 291 /// The commutable operands are specified by their indices OpIdx1 and OpIdx2. 292 /// 293 /// Do not call this method for a non-commutable instruction or for 294 /// non-commutable pair of operand indices OpIdx1 and OpIdx2. 295 /// Even though the instruction is commutable, the method may still 296 /// fail to commute the operands, null pointer is returned in such cases. 297 /// 298 /// For example, we can commute rlwimi instructions, but only if the 299 /// rotate amt is zero. We also have to munge the immediates a bit. 300 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 301 unsigned OpIdx1, 302 unsigned OpIdx2) const override; 303 304 public: 305 explicit PPCInstrInfo(PPCSubtarget &STI); 306 307 /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As 308 /// such, whenever a client has an instance of instruction info, it should 309 /// always be able to get register info as well (through this method). 310 /// getRegisterInfo()311 const PPCRegisterInfo &getRegisterInfo() const { return RI; } 312 isXFormMemOp(unsigned Opcode)313 bool isXFormMemOp(unsigned Opcode) const { 314 return get(Opcode).TSFlags & PPCII::XFormMemOp; 315 } isPrefixed(unsigned Opcode)316 bool isPrefixed(unsigned Opcode) const { 317 return get(Opcode).TSFlags & PPCII::Prefixed; 318 } isSExt32To64(unsigned Opcode)319 bool isSExt32To64(unsigned Opcode) const { 320 return get(Opcode).TSFlags & PPCII::SExt32To64; 321 } isZExt32To64(unsigned Opcode)322 bool isZExt32To64(unsigned Opcode) const { 323 return get(Opcode).TSFlags & PPCII::ZExt32To64; 324 } 325 326 /// Check if Opcode corresponds to a call instruction that should be marked 327 /// with the NOTOC relocation. isNoTOCCallInstr(unsigned Opcode)328 bool isNoTOCCallInstr(unsigned Opcode) const { 329 if (!get(Opcode).isCall()) 330 return false; 331 332 switch (Opcode) { 333 default: 334 #ifndef NDEBUG 335 llvm_unreachable("Unknown call opcode"); 336 #endif 337 return false; 338 case PPC::BL8_NOTOC: 339 case PPC::BL8_NOTOC_TLS: 340 case PPC::BL8_NOTOC_RM: 341 return true; 342 #ifndef NDEBUG 343 case PPC::BL8: 344 case PPC::BL: 345 case PPC::BL8_TLS: 346 case PPC::BL_TLS: 347 case PPC::BLA8: 348 case PPC::BLA: 349 case PPC::BCCL: 350 case PPC::BCCLA: 351 case PPC::BCL: 352 case PPC::BCLn: 353 case PPC::BL8_NOP: 354 case PPC::BL_NOP: 355 case PPC::BL8_NOP_TLS: 356 case PPC::BLA8_NOP: 357 case PPC::BCTRL8: 358 case PPC::BCTRL: 359 case PPC::BCCCTRL8: 360 case PPC::BCCCTRL: 361 case PPC::BCCTRL8: 362 case PPC::BCCTRL: 363 case PPC::BCCTRL8n: 364 case PPC::BCCTRLn: 365 case PPC::BL8_RM: 366 case PPC::BLA8_RM: 367 case PPC::BL8_NOP_RM: 368 case PPC::BLA8_NOP_RM: 369 case PPC::BCTRL8_RM: 370 case PPC::BCTRL8_LDinto_toc: 371 case PPC::BCTRL8_LDinto_toc_RM: 372 case PPC::BL8_TLS_: 373 case PPC::TCRETURNdi8: 374 case PPC::TCRETURNai8: 375 case PPC::TCRETURNri8: 376 case PPC::TAILBCTR8: 377 case PPC::TAILB8: 378 case PPC::TAILBA8: 379 case PPC::BCLalways: 380 case PPC::BLRL: 381 case PPC::BCCLRL: 382 case PPC::BCLRL: 383 case PPC::BCLRLn: 384 case PPC::BDZL: 385 case PPC::BDNZL: 386 case PPC::BDZLA: 387 case PPC::BDNZLA: 388 case PPC::BDZLp: 389 case PPC::BDNZLp: 390 case PPC::BDZLAp: 391 case PPC::BDNZLAp: 392 case PPC::BDZLm: 393 case PPC::BDNZLm: 394 case PPC::BDZLAm: 395 case PPC::BDNZLAm: 396 case PPC::BDZLRL: 397 case PPC::BDNZLRL: 398 case PPC::BDZLRLp: 399 case PPC::BDNZLRLp: 400 case PPC::BDZLRLm: 401 case PPC::BDNZLRLm: 402 case PPC::BL_RM: 403 case PPC::BLA_RM: 404 case PPC::BL_NOP_RM: 405 case PPC::BCTRL_RM: 406 case PPC::TCRETURNdi: 407 case PPC::TCRETURNai: 408 case PPC::TCRETURNri: 409 case PPC::BCTRL_LWZinto_toc: 410 case PPC::BCTRL_LWZinto_toc_RM: 411 case PPC::TAILBCTR: 412 case PPC::TAILB: 413 case PPC::TAILBA: 414 return false; 415 #endif 416 } 417 } 418 isSameClassPhysRegCopy(unsigned Opcode)419 static bool isSameClassPhysRegCopy(unsigned Opcode) { 420 unsigned CopyOpcodes[] = {PPC::OR, PPC::OR8, PPC::FMR, 421 PPC::VOR, PPC::XXLOR, PPC::XXLORf, 422 PPC::XSCPSGNDP, PPC::MCRF, PPC::CROR, 423 PPC::EVOR, -1U}; 424 for (int i = 0; CopyOpcodes[i] != -1U; i++) 425 if (Opcode == CopyOpcodes[i]) 426 return true; 427 return false; 428 } 429 430 ScheduleHazardRecognizer * 431 CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, 432 const ScheduleDAG *DAG) const override; 433 ScheduleHazardRecognizer * 434 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 435 const ScheduleDAG *DAG) const override; 436 437 unsigned getInstrLatency(const InstrItineraryData *ItinData, 438 const MachineInstr &MI, 439 unsigned *PredCost = nullptr) const override; 440 441 int getOperandLatency(const InstrItineraryData *ItinData, 442 const MachineInstr &DefMI, unsigned DefIdx, 443 const MachineInstr &UseMI, 444 unsigned UseIdx) const override; getOperandLatency(const InstrItineraryData * ItinData,SDNode * DefNode,unsigned DefIdx,SDNode * UseNode,unsigned UseIdx)445 int getOperandLatency(const InstrItineraryData *ItinData, 446 SDNode *DefNode, unsigned DefIdx, 447 SDNode *UseNode, unsigned UseIdx) const override { 448 return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx, 449 UseNode, UseIdx); 450 } 451 hasLowDefLatency(const TargetSchedModel & SchedModel,const MachineInstr & DefMI,unsigned DefIdx)452 bool hasLowDefLatency(const TargetSchedModel &SchedModel, 453 const MachineInstr &DefMI, 454 unsigned DefIdx) const override { 455 // Machine LICM should hoist all instructions in low-register-pressure 456 // situations; none are sufficiently free to justify leaving in a loop 457 // body. 458 return false; 459 } 460 useMachineCombiner()461 bool useMachineCombiner() const override { 462 return true; 463 } 464 465 /// When getMachineCombinerPatterns() finds patterns, this function generates 466 /// the instructions that could replace the original code sequence 467 void genAlternativeCodeSequence( 468 MachineInstr &Root, MachineCombinerPattern Pattern, 469 SmallVectorImpl<MachineInstr *> &InsInstrs, 470 SmallVectorImpl<MachineInstr *> &DelInstrs, 471 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override; 472 473 /// Return true when there is potentially a faster code sequence for a fma 474 /// chain ending in \p Root. All potential patterns are output in the \p 475 /// P array. 476 bool getFMAPatterns(MachineInstr &Root, 477 SmallVectorImpl<MachineCombinerPattern> &P, 478 bool DoRegPressureReduce) const; 479 480 /// Return true when there is potentially a faster code sequence 481 /// for an instruction chain ending in <Root>. All potential patterns are 482 /// output in the <Pattern> array. 483 bool getMachineCombinerPatterns(MachineInstr &Root, 484 SmallVectorImpl<MachineCombinerPattern> &P, 485 bool DoRegPressureReduce) const override; 486 487 /// On PowerPC, we leverage machine combiner pass to reduce register pressure 488 /// when the register pressure is high for one BB. 489 /// Return true if register pressure for \p MBB is high and ABI is supported 490 /// to reduce register pressure. Otherwise return false. 491 bool shouldReduceRegisterPressure( 492 const MachineBasicBlock *MBB, 493 const RegisterClassInfo *RegClassInfo) const override; 494 495 /// Fixup the placeholders we put in genAlternativeCodeSequence() for 496 /// MachineCombiner. 497 void 498 finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, 499 SmallVectorImpl<MachineInstr *> &InsInstrs) const override; 500 501 bool isAssociativeAndCommutative(const MachineInstr &Inst, 502 bool Invert) const override; 503 504 /// On PowerPC, we try to reassociate FMA chain which will increase 505 /// instruction size. Set extension resource length limit to 1 for edge case. 506 /// Resource Length is calculated by scaled resource usage in getCycles(). 507 /// Because of the division in getCycles(), it returns different cycles due to 508 /// legacy scaled resource usage. So new resource length may be same with 509 /// legacy or 1 bigger than legacy. 510 /// We need to execlude the 1 bigger case even the resource length is not 511 /// perserved for more FMA chain reassociations on PowerPC. getExtendResourceLenLimit()512 int getExtendResourceLenLimit() const override { return 1; } 513 514 void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, 515 MachineInstr &NewMI1, 516 MachineInstr &NewMI2) const override; 517 518 // PowerPC specific version of setSpecialOperandAttr that copies Flags to MI 519 // and clears nuw, nsw, and exact flags. 520 void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const; 521 522 bool isCoalescableExtInstr(const MachineInstr &MI, 523 Register &SrcReg, Register &DstReg, 524 unsigned &SubIdx) const override; 525 unsigned isLoadFromStackSlot(const MachineInstr &MI, 526 int &FrameIndex) const override; 527 bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; 528 unsigned isStoreToStackSlot(const MachineInstr &MI, 529 int &FrameIndex) const override; 530 531 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, 532 unsigned &SrcOpIdx2) const override; 533 534 void insertNoop(MachineBasicBlock &MBB, 535 MachineBasicBlock::iterator MI) const override; 536 537 538 // Branch analysis. 539 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 540 MachineBasicBlock *&FBB, 541 SmallVectorImpl<MachineOperand> &Cond, 542 bool AllowModify) const override; 543 unsigned removeBranch(MachineBasicBlock &MBB, 544 int *BytesRemoved = nullptr) const override; 545 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 546 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 547 const DebugLoc &DL, 548 int *BytesAdded = nullptr) const override; 549 550 // Select analysis. 551 bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond, 552 Register, Register, Register, int &, int &, 553 int &) const override; 554 void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 555 const DebugLoc &DL, Register DstReg, 556 ArrayRef<MachineOperand> Cond, Register TrueReg, 557 Register FalseReg) const override; 558 559 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 560 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, 561 bool KillSrc) const override; 562 563 void storeRegToStackSlot(MachineBasicBlock &MBB, 564 MachineBasicBlock::iterator MBBI, Register SrcReg, 565 bool isKill, int FrameIndex, 566 const TargetRegisterClass *RC, 567 const TargetRegisterInfo *TRI, 568 Register VReg) const override; 569 570 // Emits a register spill without updating the register class for vector 571 // registers. This ensures that when we spill a vector register the 572 // element order in the register is the same as it was in memory. 573 void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, 574 MachineBasicBlock::iterator MBBI, 575 unsigned SrcReg, bool isKill, int FrameIndex, 576 const TargetRegisterClass *RC, 577 const TargetRegisterInfo *TRI) const; 578 579 void loadRegFromStackSlot(MachineBasicBlock &MBB, 580 MachineBasicBlock::iterator MBBI, Register DestReg, 581 int FrameIndex, const TargetRegisterClass *RC, 582 const TargetRegisterInfo *TRI, 583 Register VReg) const override; 584 585 // Emits a register reload without updating the register class for vector 586 // registers. This ensures that when we reload a vector register the 587 // element order in the register is the same as it was in memory. 588 void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, 589 MachineBasicBlock::iterator MBBI, 590 unsigned DestReg, int FrameIndex, 591 const TargetRegisterClass *RC, 592 const TargetRegisterInfo *TRI) const; 593 594 unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const; 595 596 unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const; 597 598 bool 599 reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; 600 601 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, 602 MachineRegisterInfo *MRI) const override; 603 604 bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, 605 Register Reg) const; 606 607 // If conversion by predication (only supported by some branch instructions). 608 // All of the profitability checks always return true; it is always 609 // profitable to use the predicated branches. isProfitableToIfCvt(MachineBasicBlock & MBB,unsigned NumCycles,unsigned ExtraPredCycles,BranchProbability Probability)610 bool isProfitableToIfCvt(MachineBasicBlock &MBB, 611 unsigned NumCycles, unsigned ExtraPredCycles, 612 BranchProbability Probability) const override { 613 return true; 614 } 615 616 bool isProfitableToIfCvt(MachineBasicBlock &TMBB, 617 unsigned NumT, unsigned ExtraT, 618 MachineBasicBlock &FMBB, 619 unsigned NumF, unsigned ExtraF, 620 BranchProbability Probability) const override; 621 isProfitableToDupForIfCvt(MachineBasicBlock & MBB,unsigned NumCycles,BranchProbability Probability)622 bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, 623 BranchProbability Probability) const override { 624 return true; 625 } 626 isProfitableToUnpredicate(MachineBasicBlock & TMBB,MachineBasicBlock & FMBB)627 bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, 628 MachineBasicBlock &FMBB) const override { 629 return false; 630 } 631 632 // Predication support. 633 bool isPredicated(const MachineInstr &MI) const override; 634 635 bool isSchedulingBoundary(const MachineInstr &MI, 636 const MachineBasicBlock *MBB, 637 const MachineFunction &MF) const override; 638 639 bool PredicateInstruction(MachineInstr &MI, 640 ArrayRef<MachineOperand> Pred) const override; 641 642 bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1, 643 ArrayRef<MachineOperand> Pred2) const override; 644 645 bool ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred, 646 bool SkipDead) const override; 647 648 // Comparison optimization. 649 650 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, 651 Register &SrcReg2, int64_t &Mask, 652 int64_t &Value) const override; 653 654 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, 655 Register SrcReg2, int64_t Mask, int64_t Value, 656 const MachineRegisterInfo *MRI) const override; 657 658 659 /// Return true if get the base operand, byte offset of an instruction and 660 /// the memory width. Width is the size of memory that is being 661 /// loaded/stored (e.g. 1, 2, 4, 8). 662 bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, 663 const MachineOperand *&BaseOp, 664 int64_t &Offset, unsigned &Width, 665 const TargetRegisterInfo *TRI) const; 666 667 bool optimizeCmpPostRA(MachineInstr &MI) const; 668 669 /// Get the base operand and byte offset of an instruction that reads/writes 670 /// memory. 671 bool getMemOperandsWithOffsetWidth( 672 const MachineInstr &LdSt, 673 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset, 674 bool &OffsetIsScalable, unsigned &Width, 675 const TargetRegisterInfo *TRI) const override; 676 677 /// Returns true if the two given memory operations should be scheduled 678 /// adjacent. 679 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, 680 ArrayRef<const MachineOperand *> BaseOps2, 681 unsigned NumLoads, unsigned NumBytes) const override; 682 683 /// Return true if two MIs access different memory addresses and false 684 /// otherwise 685 bool 686 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 687 const MachineInstr &MIb) const override; 688 689 /// GetInstSize - Return the number of bytes of code the specified 690 /// instruction may be. This returns the maximum number of bytes. 691 /// 692 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 693 694 MCInst getNop() const override; 695 696 std::pair<unsigned, unsigned> 697 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 698 699 ArrayRef<std::pair<unsigned, const char *>> 700 getSerializableDirectMachineOperandTargetFlags() const override; 701 702 ArrayRef<std::pair<unsigned, const char *>> 703 getSerializableBitmaskMachineOperandTargetFlags() const override; 704 705 // Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction. 706 bool expandVSXMemPseudo(MachineInstr &MI) const; 707 708 // Lower pseudo instructions after register allocation. 709 bool expandPostRAPseudo(MachineInstr &MI) const override; 710 isVFRegister(unsigned Reg)711 static bool isVFRegister(unsigned Reg) { 712 return Reg >= PPC::VF0 && Reg <= PPC::VF31; 713 } isVRRegister(unsigned Reg)714 static bool isVRRegister(unsigned Reg) { 715 return Reg >= PPC::V0 && Reg <= PPC::V31; 716 } 717 const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const; 718 static int getRecordFormOpcode(unsigned Opcode); 719 720 bool isTOCSaveMI(const MachineInstr &MI) const; 721 722 std::pair<bool, bool> 723 isSignOrZeroExtended(const unsigned Reg, const unsigned BinOpDepth, 724 const MachineRegisterInfo *MRI) const; 725 726 // Return true if the register is sign-extended from 32 to 64 bits. isSignExtended(const unsigned Reg,const MachineRegisterInfo * MRI)727 bool isSignExtended(const unsigned Reg, 728 const MachineRegisterInfo *MRI) const { 729 return isSignOrZeroExtended(Reg, 0, MRI).first; 730 } 731 732 // Return true if the register is zero-extended from 32 to 64 bits. isZeroExtended(const unsigned Reg,const MachineRegisterInfo * MRI)733 bool isZeroExtended(const unsigned Reg, 734 const MachineRegisterInfo *MRI) const { 735 return isSignOrZeroExtended(Reg, 0, MRI).second; 736 } 737 738 bool convertToImmediateForm(MachineInstr &MI, 739 MachineInstr **KilledDef = nullptr) const; 740 bool foldFrameOffset(MachineInstr &MI) const; 741 bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const; 742 bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const; 743 bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const; 744 bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, 745 unsigned &XFormOpcode, 746 int64_t &OffsetOfImmInstr, 747 ImmInstrInfo &III) const; 748 bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, 749 MachineInstr *&ADDIMI, int64_t &OffsetAddi, 750 int64_t OffsetImm) const; 751 752 /// Fixup killed/dead flag for register \p RegNo between instructions [\p 753 /// StartMI, \p EndMI]. Some pre-RA or post-RA transformations may violate 754 /// register killed/dead flags semantics, this function can be called to fix 755 /// up. Before calling this function, 756 /// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI. 757 /// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI) 758 /// and possible definition for \p RegNo is \p StartMI or \p EndMI. For 759 /// pre-RA cases, definition may be \p StartMI through COPY, \p StartMI 760 /// will be adjust to true definition. 761 /// 3. We can do accurate fixup for the case when all instructions between 762 /// [\p StartMI, \p EndMI] are in same basic block. 763 /// 4. For the case when \p StartMI and \p EndMI are not in same basic block, 764 /// we conservatively clear kill flag for all uses of \p RegNo for pre-RA 765 /// and for post-RA, we give an assertion as without reaching definition 766 /// analysis post-RA, \p StartMI and \p EndMI are hard to keep right. 767 void fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI, 768 unsigned RegNo) const; 769 void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const; 770 void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, 771 int64_t Imm) const; 772 773 bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, 774 bool PostRA) const; 775 776 // In PostRA phase, try to find instruction defines \p Reg before \p MI. 777 // \p SeenIntermediate is set to true if uses between DefMI and \p MI exist. 778 MachineInstr *getDefMIPostRA(unsigned Reg, MachineInstr &MI, 779 bool &SeenIntermediateUse) const; 780 781 // Materialize immediate after RA. 782 void materializeImmPostRA(MachineBasicBlock &MBB, 783 MachineBasicBlock::iterator MBBI, 784 const DebugLoc &DL, Register Reg, 785 int64_t Imm) const; 786 787 /// getRegNumForOperand - some operands use different numbering schemes 788 /// for the same registers. For example, a VSX instruction may have any of 789 /// vs0-vs63 allocated whereas an Altivec instruction could only have 790 /// vs32-vs63 allocated (numbered as v0-v31). This function returns the actual 791 /// register number needed for the opcode/operand number combination. 792 /// The operand number argument will be useful when we need to extend this 793 /// to instructions that use both Altivec and VSX numbering (for different 794 /// operands). getRegNumForOperand(const MCInstrDesc & Desc,unsigned Reg,unsigned OpNo)795 static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg, 796 unsigned OpNo) { 797 int16_t regClass = Desc.operands()[OpNo].RegClass; 798 switch (regClass) { 799 // We store F0-F31, VF0-VF31 in MCOperand and it should be F0-F31, 800 // VSX32-VSX63 during encoding/disassembling 801 case PPC::VSSRCRegClassID: 802 case PPC::VSFRCRegClassID: 803 if (isVFRegister(Reg)) 804 return PPC::VSX32 + (Reg - PPC::VF0); 805 break; 806 // We store VSL0-VSL31, V0-V31 in MCOperand and it should be VSL0-VSL31, 807 // VSX32-VSX63 during encoding/disassembling 808 case PPC::VSRCRegClassID: 809 if (isVRRegister(Reg)) 810 return PPC::VSX32 + (Reg - PPC::V0); 811 break; 812 // Other RegClass doesn't need mapping 813 default: 814 break; 815 } 816 return Reg; 817 } 818 819 /// Check \p Opcode is BDNZ (Decrement CTR and branch if it is still nonzero). 820 bool isBDNZ(unsigned Opcode) const; 821 822 /// Find the hardware loop instruction used to set-up the specified loop. 823 /// On PPC, we have two instructions used to set-up the hardware loop 824 /// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8) 825 /// instructions to indicate the end of a loop. 826 MachineInstr * 827 findLoopInstr(MachineBasicBlock &PreHeader, 828 SmallPtrSet<MachineBasicBlock *, 8> &Visited) const; 829 830 /// Analyze loop L, which must be a single-basic-block loop, and if the 831 /// conditions can be understood enough produce a PipelinerLoopInfo object. 832 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> 833 analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; 834 }; 835 836 } 837 838 #endif 839