1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that NVPTX uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H 16 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H 17 18 #include "NVPTX.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/Target/TargetLowering.h" 21 22 namespace llvm { 23 namespace NVPTXISD { 24 enum NodeType { 25 // Start the numbering from where ISD NodeType finishes. 26 FIRST_NUMBER = ISD::BUILTIN_OP_END, 27 Wrapper, 28 CALL, 29 RET_FLAG, 30 LOAD_PARAM, 31 DeclareParam, 32 DeclareScalarParam, 33 DeclareRetParam, 34 DeclareRet, 35 DeclareScalarRet, 36 PrintCall, 37 PrintCallUni, 38 CallArgBegin, 39 CallArg, 40 LastCallArg, 41 CallArgEnd, 42 CallVoid, 43 CallVal, 44 CallSymbol, 45 Prototype, 46 MoveParam, 47 PseudoUseParam, 48 RETURN, 49 CallSeqBegin, 50 CallSeqEnd, 51 CallPrototype, 52 FUN_SHFL_CLAMP, 53 FUN_SHFR_CLAMP, 54 MUL_WIDE_SIGNED, 55 MUL_WIDE_UNSIGNED, 56 IMAD, 57 Dummy, 58 59 LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, 60 LoadV4, 61 LDGV2, // LDG.v2 62 LDGV4, // LDG.v4 63 LDUV2, // LDU.v2 64 LDUV4, // LDU.v4 65 StoreV2, 66 StoreV4, 67 LoadParam, 68 LoadParamV2, 69 LoadParamV4, 70 StoreParam, 71 StoreParamV2, 72 StoreParamV4, 73 StoreParamS32, // to sext and store a <32bit value, not used currently 74 StoreParamU32, // to zext and store a <32bit value, not used currently 75 StoreRetval, 76 StoreRetvalV2, 77 StoreRetvalV4, 78 79 // Texture intrinsics 80 Tex1DFloatS32, 81 Tex1DFloatFloat, 82 Tex1DFloatFloatLevel, 83 Tex1DFloatFloatGrad, 84 Tex1DS32S32, 85 Tex1DS32Float, 86 Tex1DS32FloatLevel, 87 Tex1DS32FloatGrad, 88 Tex1DU32S32, 89 Tex1DU32Float, 90 Tex1DU32FloatLevel, 91 Tex1DU32FloatGrad, 92 Tex1DArrayFloatS32, 93 Tex1DArrayFloatFloat, 94 Tex1DArrayFloatFloatLevel, 95 Tex1DArrayFloatFloatGrad, 96 Tex1DArrayS32S32, 97 Tex1DArrayS32Float, 98 Tex1DArrayS32FloatLevel, 99 Tex1DArrayS32FloatGrad, 100 Tex1DArrayU32S32, 101 Tex1DArrayU32Float, 102 Tex1DArrayU32FloatLevel, 103 Tex1DArrayU32FloatGrad, 104 Tex2DFloatS32, 105 Tex2DFloatFloat, 106 Tex2DFloatFloatLevel, 107 Tex2DFloatFloatGrad, 108 Tex2DS32S32, 109 Tex2DS32Float, 110 Tex2DS32FloatLevel, 111 Tex2DS32FloatGrad, 112 Tex2DU32S32, 113 Tex2DU32Float, 114 Tex2DU32FloatLevel, 115 Tex2DU32FloatGrad, 116 Tex2DArrayFloatS32, 117 Tex2DArrayFloatFloat, 118 Tex2DArrayFloatFloatLevel, 119 Tex2DArrayFloatFloatGrad, 120 Tex2DArrayS32S32, 121 Tex2DArrayS32Float, 122 Tex2DArrayS32FloatLevel, 123 Tex2DArrayS32FloatGrad, 124 Tex2DArrayU32S32, 125 Tex2DArrayU32Float, 126 Tex2DArrayU32FloatLevel, 127 Tex2DArrayU32FloatGrad, 128 Tex3DFloatS32, 129 Tex3DFloatFloat, 130 Tex3DFloatFloatLevel, 131 Tex3DFloatFloatGrad, 132 Tex3DS32S32, 133 Tex3DS32Float, 134 Tex3DS32FloatLevel, 135 Tex3DS32FloatGrad, 136 Tex3DU32S32, 137 Tex3DU32Float, 138 Tex3DU32FloatLevel, 139 Tex3DU32FloatGrad, 140 TexCubeFloatFloat, 141 TexCubeFloatFloatLevel, 142 TexCubeS32Float, 143 TexCubeS32FloatLevel, 144 TexCubeU32Float, 145 TexCubeU32FloatLevel, 146 TexCubeArrayFloatFloat, 147 TexCubeArrayFloatFloatLevel, 148 TexCubeArrayS32Float, 149 TexCubeArrayS32FloatLevel, 150 TexCubeArrayU32Float, 151 TexCubeArrayU32FloatLevel, 152 Tld4R2DFloatFloat, 153 Tld4G2DFloatFloat, 154 Tld4B2DFloatFloat, 155 Tld4A2DFloatFloat, 156 Tld4R2DS64Float, 157 Tld4G2DS64Float, 158 Tld4B2DS64Float, 159 Tld4A2DS64Float, 160 Tld4R2DU64Float, 161 Tld4G2DU64Float, 162 Tld4B2DU64Float, 163 Tld4A2DU64Float, 164 TexUnified1DFloatS32, 165 TexUnified1DFloatFloat, 166 TexUnified1DFloatFloatLevel, 167 TexUnified1DFloatFloatGrad, 168 TexUnified1DS32S32, 169 TexUnified1DS32Float, 170 TexUnified1DS32FloatLevel, 171 TexUnified1DS32FloatGrad, 172 TexUnified1DU32S32, 173 TexUnified1DU32Float, 174 TexUnified1DU32FloatLevel, 175 TexUnified1DU32FloatGrad, 176 TexUnified1DArrayFloatS32, 177 TexUnified1DArrayFloatFloat, 178 TexUnified1DArrayFloatFloatLevel, 179 TexUnified1DArrayFloatFloatGrad, 180 TexUnified1DArrayS32S32, 181 TexUnified1DArrayS32Float, 182 TexUnified1DArrayS32FloatLevel, 183 TexUnified1DArrayS32FloatGrad, 184 TexUnified1DArrayU32S32, 185 TexUnified1DArrayU32Float, 186 TexUnified1DArrayU32FloatLevel, 187 TexUnified1DArrayU32FloatGrad, 188 TexUnified2DFloatS32, 189 TexUnified2DFloatFloat, 190 TexUnified2DFloatFloatLevel, 191 TexUnified2DFloatFloatGrad, 192 TexUnified2DS32S32, 193 TexUnified2DS32Float, 194 TexUnified2DS32FloatLevel, 195 TexUnified2DS32FloatGrad, 196 TexUnified2DU32S32, 197 TexUnified2DU32Float, 198 TexUnified2DU32FloatLevel, 199 TexUnified2DU32FloatGrad, 200 TexUnified2DArrayFloatS32, 201 TexUnified2DArrayFloatFloat, 202 TexUnified2DArrayFloatFloatLevel, 203 TexUnified2DArrayFloatFloatGrad, 204 TexUnified2DArrayS32S32, 205 TexUnified2DArrayS32Float, 206 TexUnified2DArrayS32FloatLevel, 207 TexUnified2DArrayS32FloatGrad, 208 TexUnified2DArrayU32S32, 209 TexUnified2DArrayU32Float, 210 TexUnified2DArrayU32FloatLevel, 211 TexUnified2DArrayU32FloatGrad, 212 TexUnified3DFloatS32, 213 TexUnified3DFloatFloat, 214 TexUnified3DFloatFloatLevel, 215 TexUnified3DFloatFloatGrad, 216 TexUnified3DS32S32, 217 TexUnified3DS32Float, 218 TexUnified3DS32FloatLevel, 219 TexUnified3DS32FloatGrad, 220 TexUnified3DU32S32, 221 TexUnified3DU32Float, 222 TexUnified3DU32FloatLevel, 223 TexUnified3DU32FloatGrad, 224 TexUnifiedCubeFloatFloat, 225 TexUnifiedCubeFloatFloatLevel, 226 TexUnifiedCubeS32Float, 227 TexUnifiedCubeS32FloatLevel, 228 TexUnifiedCubeU32Float, 229 TexUnifiedCubeU32FloatLevel, 230 TexUnifiedCubeArrayFloatFloat, 231 TexUnifiedCubeArrayFloatFloatLevel, 232 TexUnifiedCubeArrayS32Float, 233 TexUnifiedCubeArrayS32FloatLevel, 234 TexUnifiedCubeArrayU32Float, 235 TexUnifiedCubeArrayU32FloatLevel, 236 Tld4UnifiedR2DFloatFloat, 237 Tld4UnifiedG2DFloatFloat, 238 Tld4UnifiedB2DFloatFloat, 239 Tld4UnifiedA2DFloatFloat, 240 Tld4UnifiedR2DS64Float, 241 Tld4UnifiedG2DS64Float, 242 Tld4UnifiedB2DS64Float, 243 Tld4UnifiedA2DS64Float, 244 Tld4UnifiedR2DU64Float, 245 Tld4UnifiedG2DU64Float, 246 Tld4UnifiedB2DU64Float, 247 Tld4UnifiedA2DU64Float, 248 249 // Surface intrinsics 250 Suld1DI8Clamp, 251 Suld1DI16Clamp, 252 Suld1DI32Clamp, 253 Suld1DI64Clamp, 254 Suld1DV2I8Clamp, 255 Suld1DV2I16Clamp, 256 Suld1DV2I32Clamp, 257 Suld1DV2I64Clamp, 258 Suld1DV4I8Clamp, 259 Suld1DV4I16Clamp, 260 Suld1DV4I32Clamp, 261 262 Suld1DArrayI8Clamp, 263 Suld1DArrayI16Clamp, 264 Suld1DArrayI32Clamp, 265 Suld1DArrayI64Clamp, 266 Suld1DArrayV2I8Clamp, 267 Suld1DArrayV2I16Clamp, 268 Suld1DArrayV2I32Clamp, 269 Suld1DArrayV2I64Clamp, 270 Suld1DArrayV4I8Clamp, 271 Suld1DArrayV4I16Clamp, 272 Suld1DArrayV4I32Clamp, 273 274 Suld2DI8Clamp, 275 Suld2DI16Clamp, 276 Suld2DI32Clamp, 277 Suld2DI64Clamp, 278 Suld2DV2I8Clamp, 279 Suld2DV2I16Clamp, 280 Suld2DV2I32Clamp, 281 Suld2DV2I64Clamp, 282 Suld2DV4I8Clamp, 283 Suld2DV4I16Clamp, 284 Suld2DV4I32Clamp, 285 286 Suld2DArrayI8Clamp, 287 Suld2DArrayI16Clamp, 288 Suld2DArrayI32Clamp, 289 Suld2DArrayI64Clamp, 290 Suld2DArrayV2I8Clamp, 291 Suld2DArrayV2I16Clamp, 292 Suld2DArrayV2I32Clamp, 293 Suld2DArrayV2I64Clamp, 294 Suld2DArrayV4I8Clamp, 295 Suld2DArrayV4I16Clamp, 296 Suld2DArrayV4I32Clamp, 297 298 Suld3DI8Clamp, 299 Suld3DI16Clamp, 300 Suld3DI32Clamp, 301 Suld3DI64Clamp, 302 Suld3DV2I8Clamp, 303 Suld3DV2I16Clamp, 304 Suld3DV2I32Clamp, 305 Suld3DV2I64Clamp, 306 Suld3DV4I8Clamp, 307 Suld3DV4I16Clamp, 308 Suld3DV4I32Clamp, 309 310 Suld1DI8Trap, 311 Suld1DI16Trap, 312 Suld1DI32Trap, 313 Suld1DI64Trap, 314 Suld1DV2I8Trap, 315 Suld1DV2I16Trap, 316 Suld1DV2I32Trap, 317 Suld1DV2I64Trap, 318 Suld1DV4I8Trap, 319 Suld1DV4I16Trap, 320 Suld1DV4I32Trap, 321 322 Suld1DArrayI8Trap, 323 Suld1DArrayI16Trap, 324 Suld1DArrayI32Trap, 325 Suld1DArrayI64Trap, 326 Suld1DArrayV2I8Trap, 327 Suld1DArrayV2I16Trap, 328 Suld1DArrayV2I32Trap, 329 Suld1DArrayV2I64Trap, 330 Suld1DArrayV4I8Trap, 331 Suld1DArrayV4I16Trap, 332 Suld1DArrayV4I32Trap, 333 334 Suld2DI8Trap, 335 Suld2DI16Trap, 336 Suld2DI32Trap, 337 Suld2DI64Trap, 338 Suld2DV2I8Trap, 339 Suld2DV2I16Trap, 340 Suld2DV2I32Trap, 341 Suld2DV2I64Trap, 342 Suld2DV4I8Trap, 343 Suld2DV4I16Trap, 344 Suld2DV4I32Trap, 345 346 Suld2DArrayI8Trap, 347 Suld2DArrayI16Trap, 348 Suld2DArrayI32Trap, 349 Suld2DArrayI64Trap, 350 Suld2DArrayV2I8Trap, 351 Suld2DArrayV2I16Trap, 352 Suld2DArrayV2I32Trap, 353 Suld2DArrayV2I64Trap, 354 Suld2DArrayV4I8Trap, 355 Suld2DArrayV4I16Trap, 356 Suld2DArrayV4I32Trap, 357 358 Suld3DI8Trap, 359 Suld3DI16Trap, 360 Suld3DI32Trap, 361 Suld3DI64Trap, 362 Suld3DV2I8Trap, 363 Suld3DV2I16Trap, 364 Suld3DV2I32Trap, 365 Suld3DV2I64Trap, 366 Suld3DV4I8Trap, 367 Suld3DV4I16Trap, 368 Suld3DV4I32Trap, 369 370 Suld1DI8Zero, 371 Suld1DI16Zero, 372 Suld1DI32Zero, 373 Suld1DI64Zero, 374 Suld1DV2I8Zero, 375 Suld1DV2I16Zero, 376 Suld1DV2I32Zero, 377 Suld1DV2I64Zero, 378 Suld1DV4I8Zero, 379 Suld1DV4I16Zero, 380 Suld1DV4I32Zero, 381 382 Suld1DArrayI8Zero, 383 Suld1DArrayI16Zero, 384 Suld1DArrayI32Zero, 385 Suld1DArrayI64Zero, 386 Suld1DArrayV2I8Zero, 387 Suld1DArrayV2I16Zero, 388 Suld1DArrayV2I32Zero, 389 Suld1DArrayV2I64Zero, 390 Suld1DArrayV4I8Zero, 391 Suld1DArrayV4I16Zero, 392 Suld1DArrayV4I32Zero, 393 394 Suld2DI8Zero, 395 Suld2DI16Zero, 396 Suld2DI32Zero, 397 Suld2DI64Zero, 398 Suld2DV2I8Zero, 399 Suld2DV2I16Zero, 400 Suld2DV2I32Zero, 401 Suld2DV2I64Zero, 402 Suld2DV4I8Zero, 403 Suld2DV4I16Zero, 404 Suld2DV4I32Zero, 405 406 Suld2DArrayI8Zero, 407 Suld2DArrayI16Zero, 408 Suld2DArrayI32Zero, 409 Suld2DArrayI64Zero, 410 Suld2DArrayV2I8Zero, 411 Suld2DArrayV2I16Zero, 412 Suld2DArrayV2I32Zero, 413 Suld2DArrayV2I64Zero, 414 Suld2DArrayV4I8Zero, 415 Suld2DArrayV4I16Zero, 416 Suld2DArrayV4I32Zero, 417 418 Suld3DI8Zero, 419 Suld3DI16Zero, 420 Suld3DI32Zero, 421 Suld3DI64Zero, 422 Suld3DV2I8Zero, 423 Suld3DV2I16Zero, 424 Suld3DV2I32Zero, 425 Suld3DV2I64Zero, 426 Suld3DV4I8Zero, 427 Suld3DV4I16Zero, 428 Suld3DV4I32Zero 429 }; 430 } 431 432 class NVPTXSubtarget; 433 434 //===--------------------------------------------------------------------===// 435 // TargetLowering Implementation 436 //===--------------------------------------------------------------------===// 437 class NVPTXTargetLowering : public TargetLowering { 438 public: 439 explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM); 440 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 441 442 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 443 SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset, 444 SelectionDAG &DAG) const; 445 446 const char *getTargetNodeName(unsigned Opcode) const override; 447 448 bool isTypeSupportedInIntrinsic(MVT VT) const; 449 450 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 451 unsigned Intrinsic) const override; 452 453 /// isLegalAddressingMode - Return true if the addressing mode represented 454 /// by AM is legal for this target, for a load/store of the specified type 455 /// Used to guide target specific optimizations, like loop strength 456 /// reduction (LoopStrengthReduce.cpp) and memory optimization for 457 /// address mode (CodeGenPrepare.cpp) 458 bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; 459 460 /// getFunctionAlignment - Return the Log2 alignment of this function. 461 unsigned getFunctionAlignment(const Function *F) const; 462 getSetCCResultType(LLVMContext & Ctx,EVT VT)463 EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override { 464 if (VT.isVector()) 465 return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); 466 return MVT::i1; 467 } 468 469 ConstraintType 470 getConstraintType(const std::string &Constraint) const override; 471 std::pair<unsigned, const TargetRegisterClass *> 472 getRegForInlineAsmConstraint(const std::string &Constraint, 473 MVT VT) const override; 474 475 SDValue LowerFormalArguments( 476 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 477 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, 478 SmallVectorImpl<SDValue> &InVals) const override; 479 480 SDValue LowerCall(CallLoweringInfo &CLI, 481 SmallVectorImpl<SDValue> &InVals) const override; 482 483 std::string getPrototype(Type *, const ArgListTy &, 484 const SmallVectorImpl<ISD::OutputArg> &, 485 unsigned retAlignment, 486 const ImmutableCallSite *CS) const; 487 488 SDValue 489 LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 490 const SmallVectorImpl<ISD::OutputArg> &Outs, 491 const SmallVectorImpl<SDValue> &OutVals, SDLoc dl, 492 SelectionDAG &DAG) const override; 493 494 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 495 std::vector<SDValue> &Ops, 496 SelectionDAG &DAG) const override; 497 498 const NVPTXTargetMachine *nvTM; 499 500 // PTX always uses 32-bit shift amounts getScalarShiftAmountTy(EVT LHSTy)501 MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } 502 503 TargetLoweringBase::LegalizeTypeAction 504 getPreferredVectorAction(EVT VT) const override; 505 506 bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const; 507 isFMAFasterThanFMulAndFAdd(EVT)508 bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; } 509 enableAggressiveFMAFusion(EVT VT)510 bool enableAggressiveFMAFusion(EVT VT) const override { return true; } 511 512 private: 513 const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here 514 515 SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, 516 EVT = MVT::i32) const; 517 SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; 518 SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); 519 520 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 521 522 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 523 SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const; 524 525 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 526 SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; 527 SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; 528 529 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; 530 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; 531 532 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 533 SelectionDAG &DAG) const override; 534 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 535 536 unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS, 537 Type *Ty, unsigned Idx) const; 538 }; 539 } // namespace llvm 540 541 #endif 542