1 //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Interface definition of the TargetLowering class that is common 12 /// to all AMD GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_LIB_TARGET_R600_AMDGPUISELLOWERING_H 17 #define LLVM_LIB_TARGET_R600_AMDGPUISELLOWERING_H 18 19 #include "llvm/Target/TargetLowering.h" 20 21 namespace llvm { 22 23 class AMDGPUMachineFunction; 24 class AMDGPUSubtarget; 25 class MachineRegisterInfo; 26 27 class AMDGPUTargetLowering : public TargetLowering { 28 protected: 29 const AMDGPUSubtarget *Subtarget; 30 31 private: 32 SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV, 33 const SDValue &InitPtr, 34 SDValue Chain, 35 SelectionDAG &DAG) const; 36 SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; 37 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 38 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 39 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 40 /// \brief Lower vector stores by merging the vector elements into an integer 41 /// of the same bitwidth. 42 SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const; 43 /// \brief Split a vector store into multiple scalar stores. 44 /// \returns The resulting chain. 45 46 SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; 47 SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const; 48 SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; 49 SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; 50 SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; 51 SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const; 52 SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; 53 54 SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const; 55 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 56 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 57 58 SDValue LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, bool Signed) const; 59 SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; 60 SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; 61 62 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 63 64 SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; 65 SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; 66 67 protected: 68 static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); 69 static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT); 70 71 virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, 72 SelectionDAG &DAG) const; 73 74 /// \brief Split a vector load into a scalar load of each component. 75 SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const; 76 77 /// \brief Split a vector load into 2 loads of half the vector. 78 SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; 79 80 /// \brief Split a vector store into a scalar store of each component. 81 SDValue ScalarizeVectorStore(SDValue Op, SelectionDAG &DAG) const; 82 83 /// \brief Split a vector store into 2 stores of half the vector. 84 SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; 85 86 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 87 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 88 SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; 89 SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const; 90 void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, 91 SmallVectorImpl<SDValue> &Results) const; 92 bool isHWTrueValue(SDValue Op) const; 93 bool isHWFalseValue(SDValue Op) const; 94 95 /// The SelectionDAGBuilder will automatically promote function arguments 96 /// with illegal types. However, this does not work for the AMDGPU targets 97 /// since the function arguments are stored in memory as these illegal types. 98 /// In order to handle this properly we need to get the origianl types sizes 99 /// from the LLVM IR Function and fixup the ISD:InputArg values before 100 /// passing them to AnalyzeFormalArguments() 101 void getOriginalFunctionArgs(SelectionDAG &DAG, 102 const Function *F, 103 const SmallVectorImpl<ISD::InputArg> &Ins, 104 SmallVectorImpl<ISD::InputArg> &OrigIns) const; 105 void AnalyzeFormalArguments(CCState &State, 106 const SmallVectorImpl<ISD::InputArg> &Ins) const; 107 108 public: 109 AMDGPUTargetLowering(TargetMachine &TM); 110 111 bool isFAbsFree(EVT VT) const override; 112 bool isFNegFree(EVT VT) const override; 113 bool isTruncateFree(EVT Src, EVT Dest) const override; 114 bool isTruncateFree(Type *Src, Type *Dest) const override; 115 116 bool isZExtFree(Type *Src, Type *Dest) const override; 117 bool isZExtFree(EVT Src, EVT Dest) const override; 118 bool isZExtFree(SDValue Val, EVT VT2) const override; 119 120 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 121 122 MVT getVectorIdxTy() const override; 123 bool isSelectSupported(SelectSupportKind) const override; 124 125 bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; 126 bool ShouldShrinkFPConstant(EVT VT) const override; 127 bool shouldReduceLoadWidth(SDNode *Load, 128 ISD::LoadExtType ExtType, 129 EVT ExtVT) const override; 130 131 bool isLoadBitCastBeneficial(EVT, EVT) const override; 132 bool isCheapToSpeculateCttz() const override; 133 bool isCheapToSpeculateCtlz() const override; 134 135 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, 136 bool isVarArg, 137 const SmallVectorImpl<ISD::OutputArg> &Outs, 138 const SmallVectorImpl<SDValue> &OutVals, 139 SDLoc DL, SelectionDAG &DAG) const override; 140 SDValue LowerCall(CallLoweringInfo &CLI, 141 SmallVectorImpl<SDValue> &InVals) const override; 142 143 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 144 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 145 void ReplaceNodeResults(SDNode * N, 146 SmallVectorImpl<SDValue> &Results, 147 SelectionDAG &DAG) const override; 148 149 SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; 150 SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; 151 SDValue CombineFMinMaxLegacy(SDLoc DL, 152 EVT VT, 153 SDValue LHS, 154 SDValue RHS, 155 SDValue True, 156 SDValue False, 157 SDValue CC, 158 DAGCombinerInfo &DCI) const; 159 SDValue CombineIMinMax(SDLoc DL, 160 EVT VT, 161 SDValue LHS, 162 SDValue RHS, 163 SDValue True, 164 SDValue False, 165 SDValue CC, 166 SelectionDAG &DAG) const; 167 168 const char* getTargetNodeName(unsigned Opcode) const override; 169 170 SDValue getRsqrtEstimate(SDValue Operand, 171 DAGCombinerInfo &DCI, 172 unsigned &RefinementSteps, 173 bool &UseOneConstNR) const override; 174 SDValue getRecipEstimate(SDValue Operand, 175 DAGCombinerInfo &DCI, 176 unsigned &RefinementSteps) const override; 177 PostISelFolding(MachineSDNode * N,SelectionDAG & DAG)178 virtual SDNode *PostISelFolding(MachineSDNode *N, 179 SelectionDAG &DAG) const { 180 return N; 181 } 182 183 /// \brief Determine which of the bits specified in \p Mask are known to be 184 /// either zero or one and return them in the \p KnownZero and \p KnownOne 185 /// bitsets. 186 void computeKnownBitsForTargetNode(const SDValue Op, 187 APInt &KnownZero, 188 APInt &KnownOne, 189 const SelectionDAG &DAG, 190 unsigned Depth = 0) const override; 191 192 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG, 193 unsigned Depth = 0) const override; 194 195 /// \brief Helper function that adds Reg to the LiveIn list of the DAG's 196 /// MachineFunction. 197 /// 198 /// \returns a RegisterSDNode representing Reg. 199 virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, 200 const TargetRegisterClass *RC, 201 unsigned Reg, EVT VT) const; 202 }; 203 204 namespace AMDGPUISD { 205 206 enum { 207 // AMDIL ISD Opcodes 208 FIRST_NUMBER = ISD::BUILTIN_OP_END, 209 CALL, // Function call based on a single integer 210 UMUL, // 32bit unsigned multiplication 211 RET_FLAG, 212 BRANCH_COND, 213 // End AMDIL ISD Opcodes 214 DWORDADDR, 215 FRACT, 216 CLAMP, 217 MAD, // Multiply + add with same result as the separate operations. 218 219 // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. 220 // Denormals handled on some parts. 221 COS_HW, 222 SIN_HW, 223 FMAX_LEGACY, 224 SMAX, 225 UMAX, 226 FMIN_LEGACY, 227 SMIN, 228 UMIN, 229 FMAX3, 230 SMAX3, 231 UMAX3, 232 FMIN3, 233 SMIN3, 234 UMIN3, 235 URECIP, 236 DIV_SCALE, 237 DIV_FMAS, 238 DIV_FIXUP, 239 TRIG_PREOP, // 1 ULP max error for f64 240 241 // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. 242 // For f64, max error 2^29 ULP, handles denormals. 243 RCP, 244 RSQ, 245 RSQ_LEGACY, 246 RSQ_CLAMPED, 247 LDEXP, 248 FP_CLASS, 249 DOT4, 250 BFE_U32, // Extract range of bits with zero extension to 32-bits. 251 BFE_I32, // Extract range of bits with sign extension to 32-bits. 252 BFI, // (src0 & src1) | (~src0 & src2) 253 BFM, // Insert a range of bits into a 32-bit word. 254 BREV, // Reverse bits. 255 MUL_U24, 256 MUL_I24, 257 MAD_U24, 258 MAD_I24, 259 TEXTURE_FETCH, 260 EXPORT, 261 CONST_ADDRESS, 262 REGISTER_LOAD, 263 REGISTER_STORE, 264 LOAD_INPUT, 265 SAMPLE, 266 SAMPLEB, 267 SAMPLED, 268 SAMPLEL, 269 270 // These cvt_f32_ubyte* nodes need to remain consecutive and in order. 271 CVT_F32_UBYTE0, 272 CVT_F32_UBYTE1, 273 CVT_F32_UBYTE2, 274 CVT_F32_UBYTE3, 275 /// This node is for VLIW targets and it is used to represent a vector 276 /// that is stored in consecutive registers with the same channel. 277 /// For example: 278 /// |X |Y|Z|W| 279 /// T0|v.x| | | | 280 /// T1|v.y| | | | 281 /// T2|v.z| | | | 282 /// T3|v.w| | | | 283 BUILD_VERTICAL_VECTOR, 284 /// Pointer to the start of the shader's constant data. 285 CONST_DATA_PTR, 286 FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, 287 STORE_MSKOR, 288 LOAD_CONSTANT, 289 TBUFFER_STORE_FORMAT, 290 LAST_AMDGPU_ISD_NUMBER 291 }; 292 293 294 } // End namespace AMDGPUISD 295 296 } // End namespace llvm 297 298 #endif 299