1 //===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// SI DAG Lowering interface definition 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H 16 #define LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H 17 18 #include "AMDGPUISelLowering.h" 19 #include "AMDGPUArgumentUsageInfo.h" 20 #include "SIInstrInfo.h" 21 22 namespace llvm { 23 24 class SITargetLowering final : public AMDGPUTargetLowering { 25 private: 26 const GCNSubtarget *Subtarget; 27 28 public: 29 MVT getRegisterTypeForCallingConv(LLVMContext &Context, 30 CallingConv::ID CC, 31 EVT VT) const override; 32 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 33 CallingConv::ID CC, 34 EVT VT) const override; 35 36 unsigned getVectorTypeBreakdownForCallingConv( 37 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 38 unsigned &NumIntermediates, MVT &RegisterVT) const override; 39 40 private: 41 SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL, 42 SDValue Chain, uint64_t Offset) const; 43 SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const; 44 SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, 45 const SDLoc &SL, SDValue Chain, 46 uint64_t Offset, unsigned Align, bool Signed, 47 const ISD::InputArg *Arg = nullptr) const; 48 49 SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, 50 const SDLoc &SL, SDValue Chain, 51 const ISD::InputArg &Arg) const; 52 SDValue getPreloadedValue(SelectionDAG &DAG, 53 const SIMachineFunctionInfo &MFI, 54 EVT VT, 55 AMDGPUFunctionArgInfo::PreloadedValue) const; 56 57 SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, 58 SelectionDAG &DAG) const override; 59 SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, 60 MVT VT, unsigned Offset) const; 61 SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, 62 SelectionDAG &DAG) const; 63 SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset, 64 SDValue GLC, SelectionDAG &DAG) const; 65 66 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 67 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 68 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; 69 70 // The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset 71 // (the offset that is included in bounds checking and swizzling, to be split 72 // between the instruction's voffset and immoffset fields) and soffset (the 73 // offset that is excluded from bounds checking and swizzling, to go in the 74 // instruction's soffset field). This function takes the first kind of 75 // offset and figures out how to split it between voffset and immoffset. 76 std::pair<SDValue, SDValue> splitBufferOffsets(SDValue Offset, 77 SelectionDAG &DAG) const; 78 79 SDValue widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const; 80 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 81 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 82 SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const; 83 SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const; 84 SDValue LowerFDIV16(SDValue Op, SelectionDAG &DAG) const; 85 SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const; 86 SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; 87 SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const; 88 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const; 89 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 90 SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; 91 SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; 92 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 93 94 SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M, 95 SelectionDAG &DAG, ArrayRef<SDValue> Ops, 96 bool IsIntrinsic = false) const; 97 98 SDValue handleD16VData(SDValue VData, SelectionDAG &DAG) const; 99 100 /// Converts \p Op, which must be of floating point type, to the 101 /// floating point type \p VT, by either extending or truncating it. 102 SDValue getFPExtOrFPTrunc(SelectionDAG &DAG, 103 SDValue Op, 104 const SDLoc &DL, 105 EVT VT) const; 106 107 SDValue convertArgType( 108 SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val, 109 bool Signed, const ISD::InputArg *Arg = nullptr) const; 110 111 /// Custom lowering for ISD::FP_ROUND for MVT::f16. 112 SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 113 SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const; 114 115 SDValue getSegmentAperture(unsigned AS, const SDLoc &DL, 116 SelectionDAG &DAG) const; 117 118 SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const; 119 SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 120 SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 121 SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 122 SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const; 123 SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const; 124 125 SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; 126 127 SDValue performUCharToFloatCombine(SDNode *N, 128 DAGCombinerInfo &DCI) const; 129 SDValue performSHLPtrCombine(SDNode *N, 130 unsigned AS, 131 EVT MemVT, 132 DAGCombinerInfo &DCI) const; 133 134 SDValue performMemSDNodeCombine(MemSDNode *N, DAGCombinerInfo &DCI) const; 135 136 SDValue splitBinaryBitConstantOp(DAGCombinerInfo &DCI, const SDLoc &SL, 137 unsigned Opc, SDValue LHS, 138 const ConstantSDNode *CRHS) const; 139 140 SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const; 141 SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const; 142 SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const; 143 SDValue performZeroExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const; 144 SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const; 145 SDValue getCanonicalConstantFP(SelectionDAG &DAG, const SDLoc &SL, EVT VT, 146 const APFloat &C) const; 147 SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const; 148 149 SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL, 150 SDValue Op0, SDValue Op1) const; 151 SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL, 152 SDValue Op0, SDValue Op1, bool Signed) const; 153 SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const; 154 SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const; 155 SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const; 156 SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const; 157 SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const; 158 159 unsigned getFusedOpcode(const SelectionDAG &DAG, 160 const SDNode *N0, const SDNode *N1) const; 161 SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; 162 SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const; 163 SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; 164 SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; 165 SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; 166 SDValue performFMACombine(SDNode *N, DAGCombinerInfo &DCI) const; 167 SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; 168 SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const; 169 SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; 170 SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const; 171 172 bool isLegalFlatAddressingMode(const AddrMode &AM) const; 173 bool isLegalMUBUFAddressingMode(const AddrMode &AM) const; 174 175 unsigned isCFIntrinsic(const SDNode *Intr) const; 176 177 void createDebuggerPrologueStackObjects(MachineFunction &MF) const; 178 179 /// \returns True if fixup needs to be emitted for given global value \p GV, 180 /// false otherwise. 181 bool shouldEmitFixup(const GlobalValue *GV) const; 182 183 /// \returns True if GOT relocation needs to be emitted for given global value 184 /// \p GV, false otherwise. 185 bool shouldEmitGOTReloc(const GlobalValue *GV) const; 186 187 /// \returns True if PC-relative relocation needs to be emitted for given 188 /// global value \p GV, false otherwise. 189 bool shouldEmitPCReloc(const GlobalValue *GV) const; 190 191 // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the 192 // three offsets (voffset, soffset and instoffset) into the SDValue[3] array 193 // pointed to by Offsets. 194 void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, 195 SDValue *Offsets, unsigned Align = 4) const; 196 197 public: 198 SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI); 199 200 const GCNSubtarget *getSubtarget() const; 201 202 bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const override; 203 204 bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override; 205 206 bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, 207 MachineFunction &MF, 208 unsigned IntrinsicID) const override; 209 210 bool getAddrModeArguments(IntrinsicInst * /*I*/, 211 SmallVectorImpl<Value*> &/*Ops*/, 212 Type *&/*AccessTy*/) const override; 213 214 bool isLegalGlobalAddressingMode(const AddrMode &AM) const; 215 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 216 unsigned AS, 217 Instruction *I = nullptr) const override; 218 219 bool canMergeStoresTo(unsigned AS, EVT MemVT, 220 const SelectionDAG &DAG) const override; 221 222 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, 223 unsigned Align, 224 bool *IsFast) const override; 225 226 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, 227 unsigned SrcAlign, bool IsMemset, 228 bool ZeroMemset, 229 bool MemcpyStrSrc, 230 MachineFunction &MF) const override; 231 232 bool isMemOpUniform(const SDNode *N) const; 233 bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const; 234 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; 235 bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; 236 237 TargetLoweringBase::LegalizeTypeAction 238 getPreferredVectorAction(MVT VT) const override; 239 240 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 241 Type *Ty) const override; 242 243 bool isTypeDesirableForOp(unsigned Op, EVT VT) const override; 244 245 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 246 247 bool supportSplitCSR(MachineFunction *MF) const override; 248 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 249 void insertCopiesSplitCSR( 250 MachineBasicBlock *Entry, 251 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 252 253 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 254 bool isVarArg, 255 const SmallVectorImpl<ISD::InputArg> &Ins, 256 const SDLoc &DL, SelectionDAG &DAG, 257 SmallVectorImpl<SDValue> &InVals) const override; 258 259 bool CanLowerReturn(CallingConv::ID CallConv, 260 MachineFunction &MF, bool isVarArg, 261 const SmallVectorImpl<ISD::OutputArg> &Outs, 262 LLVMContext &Context) const override; 263 264 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 265 const SmallVectorImpl<ISD::OutputArg> &Outs, 266 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 267 SelectionDAG &DAG) const override; 268 269 void passSpecialInputs( 270 CallLoweringInfo &CLI, 271 CCState &CCInfo, 272 const SIMachineFunctionInfo &Info, 273 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, 274 SmallVectorImpl<SDValue> &MemOpChains, 275 SDValue Chain) const; 276 277 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 278 CallingConv::ID CallConv, bool isVarArg, 279 const SmallVectorImpl<ISD::InputArg> &Ins, 280 const SDLoc &DL, SelectionDAG &DAG, 281 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 282 SDValue ThisVal) const; 283 284 bool mayBeEmittedAsTailCall(const CallInst *) const override; 285 286 bool isEligibleForTailCallOptimization( 287 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, 288 const SmallVectorImpl<ISD::OutputArg> &Outs, 289 const SmallVectorImpl<SDValue> &OutVals, 290 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const; 291 292 SDValue LowerCall(CallLoweringInfo &CLI, 293 SmallVectorImpl<SDValue> &InVals) const override; 294 295 unsigned getRegisterByName(const char* RegName, EVT VT, 296 SelectionDAG &DAG) const override; 297 298 MachineBasicBlock *splitKillBlock(MachineInstr &MI, 299 MachineBasicBlock *BB) const; 300 301 MachineBasicBlock * 302 EmitInstrWithCustomInserter(MachineInstr &MI, 303 MachineBasicBlock *BB) const override; 304 305 bool hasBitPreservingFPLogic(EVT VT) const override; 306 bool enableAggressiveFMAFusion(EVT VT) const override; 307 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 308 EVT VT) const override; 309 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override; 310 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; 311 SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const; 312 SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const; 313 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 314 315 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 316 SelectionDAG &DAG) const override; 317 318 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 319 SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override; 320 void AdjustInstrPostInstrSelection(MachineInstr &MI, 321 SDNode *Node) const override; 322 323 SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const; 324 325 MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, 326 SDValue Ptr) const; 327 MachineSDNode *buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, 328 uint32_t RsrcDword1, uint64_t RsrcDword2And3) const; 329 std::pair<unsigned, const TargetRegisterClass *> 330 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 331 StringRef Constraint, MVT VT) const override; 332 ConstraintType getConstraintType(StringRef Constraint) const override; 333 SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, 334 SDValue V) const; 335 336 void finalizeLowering(MachineFunction &MF) const override; 337 338 void computeKnownBitsForFrameIndex(const SDValue Op, 339 KnownBits &Known, 340 const APInt &DemandedElts, 341 const SelectionDAG &DAG, 342 unsigned Depth = 0) const override; 343 344 bool isSDNodeSourceOfDivergence(const SDNode *N, 345 FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override; 346 347 bool isCanonicalized(SelectionDAG &DAG, SDValue Op, 348 unsigned MaxDepth = 5) const; 349 bool denormalsEnabledForType(EVT VT) const; 350 351 bool isKnownNeverNaNForTargetNode(SDValue Op, 352 const SelectionDAG &DAG, 353 bool SNaN = false, 354 unsigned Depth = 0) const override; 355 }; 356 357 } // End namespace llvm 358 359 #endif 360