10b57cec5SDimitry Andric //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines the interfaces that NVPTX uses to lower LLVM code into a
100b57cec5SDimitry Andric // selection DAG.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "NVPTXISelLowering.h"
150b57cec5SDimitry Andric #include "MCTargetDesc/NVPTXBaseInfo.h"
160b57cec5SDimitry Andric #include "NVPTX.h"
170b57cec5SDimitry Andric #include "NVPTXSubtarget.h"
180b57cec5SDimitry Andric #include "NVPTXTargetMachine.h"
190b57cec5SDimitry Andric #include "NVPTXTargetObjectFile.h"
200b57cec5SDimitry Andric #include "NVPTXUtilities.h"
210b57cec5SDimitry Andric #include "llvm/ADT/APInt.h"
22e8d8bef9SDimitry Andric #include "llvm/ADT/STLExtras.h"
230b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
240b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h"
265f757f3fSDimitry Andric #include "llvm/CodeGen/ISDOpcodes.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h"
2906c3fb27SDimitry Andric #include "llvm/CodeGen/MachineValueType.h"
300b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAG.h"
310b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAGNodes.h"
320b57cec5SDimitry Andric #include "llvm/CodeGen/TargetCallingConv.h"
330b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
340b57cec5SDimitry Andric #include "llvm/CodeGen/ValueTypes.h"
350b57cec5SDimitry Andric #include "llvm/IR/Argument.h"
360b57cec5SDimitry Andric #include "llvm/IR/Attributes.h"
370b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
380b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
390b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
405f757f3fSDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
4181ad6265SDimitry Andric #include "llvm/IR/FPEnv.h"
420b57cec5SDimitry Andric #include "llvm/IR/Function.h"
430b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h"
440b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
450b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
46480093f4SDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h"
470b57cec5SDimitry Andric #include "llvm/IR/Module.h"
480b57cec5SDimitry Andric #include "llvm/IR/Type.h"
490b57cec5SDimitry Andric #include "llvm/IR/Value.h"
500b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
510b57cec5SDimitry Andric #include "llvm/Support/CodeGen.h"
520b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
530b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
540b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
550b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
560b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h"
570b57cec5SDimitry Andric #include <algorithm>
580b57cec5SDimitry Andric #include <cassert>
59bdd1243dSDimitry Andric #include <cmath>
600b57cec5SDimitry Andric #include <cstdint>
610b57cec5SDimitry Andric #include <iterator>
620b57cec5SDimitry Andric #include <sstream>
630b57cec5SDimitry Andric #include <string>
640b57cec5SDimitry Andric #include <utility>
650b57cec5SDimitry Andric #include <vector>
660b57cec5SDimitry Andric 
670b57cec5SDimitry Andric #define DEBUG_TYPE "nvptx-lower"
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric using namespace llvm;
700b57cec5SDimitry Andric 
71e8d8bef9SDimitry Andric static std::atomic<unsigned> GlobalUniqueCallSite;
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric static cl::opt<bool> sched4reg(
740b57cec5SDimitry Andric     "nvptx-sched4reg",
750b57cec5SDimitry Andric     cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
760b57cec5SDimitry Andric 
7781ad6265SDimitry Andric static cl::opt<unsigned> FMAContractLevelOpt(
7881ad6265SDimitry Andric     "nvptx-fma-level", cl::Hidden,
790b57cec5SDimitry Andric     cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
800b57cec5SDimitry Andric              " 1: do it  2: do it aggressively"),
810b57cec5SDimitry Andric     cl::init(2));
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric static cl::opt<int> UsePrecDivF32(
8481ad6265SDimitry Andric     "nvptx-prec-divf32", cl::Hidden,
850b57cec5SDimitry Andric     cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
860b57cec5SDimitry Andric              " IEEE Compliant F32 div.rnd if available."),
870b57cec5SDimitry Andric     cl::init(2));
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric static cl::opt<bool> UsePrecSqrtF32(
900b57cec5SDimitry Andric     "nvptx-prec-sqrtf32", cl::Hidden,
910b57cec5SDimitry Andric     cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
920b57cec5SDimitry Andric     cl::init(true));
930b57cec5SDimitry Andric 
9406c3fb27SDimitry Andric static cl::opt<bool> ForceMinByValParamAlign(
9506c3fb27SDimitry Andric     "nvptx-force-min-byval-param-align", cl::Hidden,
9606c3fb27SDimitry Andric     cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval"
9706c3fb27SDimitry Andric              " params of device functions."),
9806c3fb27SDimitry Andric     cl::init(false));
9906c3fb27SDimitry Andric 
getDivF32Level() const1000b57cec5SDimitry Andric int NVPTXTargetLowering::getDivF32Level() const {
1010b57cec5SDimitry Andric   if (UsePrecDivF32.getNumOccurrences() > 0) {
1020b57cec5SDimitry Andric     // If nvptx-prec-div32=N is used on the command-line, always honor it
1030b57cec5SDimitry Andric     return UsePrecDivF32;
1040b57cec5SDimitry Andric   } else {
1050b57cec5SDimitry Andric     // Otherwise, use div.approx if fast math is enabled
1060b57cec5SDimitry Andric     if (getTargetMachine().Options.UnsafeFPMath)
1070b57cec5SDimitry Andric       return 0;
1080b57cec5SDimitry Andric     else
1090b57cec5SDimitry Andric       return 2;
1100b57cec5SDimitry Andric   }
1110b57cec5SDimitry Andric }
1120b57cec5SDimitry Andric 
usePrecSqrtF32() const1130b57cec5SDimitry Andric bool NVPTXTargetLowering::usePrecSqrtF32() const {
1140b57cec5SDimitry Andric   if (UsePrecSqrtF32.getNumOccurrences() > 0) {
1150b57cec5SDimitry Andric     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
1160b57cec5SDimitry Andric     return UsePrecSqrtF32;
1170b57cec5SDimitry Andric   } else {
1180b57cec5SDimitry Andric     // Otherwise, use sqrt.approx if fast math is enabled
1190b57cec5SDimitry Andric     return !getTargetMachine().Options.UnsafeFPMath;
1200b57cec5SDimitry Andric   }
1210b57cec5SDimitry Andric }
1220b57cec5SDimitry Andric 
useF32FTZ(const MachineFunction & MF) const1230b57cec5SDimitry Andric bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const {
1245ffd83dbSDimitry Andric   return MF.getDenormalMode(APFloat::IEEEsingle()).Output ==
1255ffd83dbSDimitry Andric          DenormalMode::PreserveSign;
1260b57cec5SDimitry Andric }
1270b57cec5SDimitry Andric 
IsPTXVectorType(MVT VT)1280b57cec5SDimitry Andric static bool IsPTXVectorType(MVT VT) {
1290b57cec5SDimitry Andric   switch (VT.SimpleTy) {
1300b57cec5SDimitry Andric   default:
1310b57cec5SDimitry Andric     return false;
1320b57cec5SDimitry Andric   case MVT::v2i1:
1330b57cec5SDimitry Andric   case MVT::v4i1:
1340b57cec5SDimitry Andric   case MVT::v2i8:
1350b57cec5SDimitry Andric   case MVT::v4i8:
1360b57cec5SDimitry Andric   case MVT::v2i16:
1370b57cec5SDimitry Andric   case MVT::v4i16:
1385f757f3fSDimitry Andric   case MVT::v8i16: // <4 x i16x2>
1390b57cec5SDimitry Andric   case MVT::v2i32:
1400b57cec5SDimitry Andric   case MVT::v4i32:
1410b57cec5SDimitry Andric   case MVT::v2i64:
1420b57cec5SDimitry Andric   case MVT::v2f16:
1430b57cec5SDimitry Andric   case MVT::v4f16:
1440b57cec5SDimitry Andric   case MVT::v8f16: // <4 x f16x2>
145bdd1243dSDimitry Andric   case MVT::v2bf16:
146bdd1243dSDimitry Andric   case MVT::v4bf16:
147bdd1243dSDimitry Andric   case MVT::v8bf16: // <4 x bf16x2>
1480b57cec5SDimitry Andric   case MVT::v2f32:
1490b57cec5SDimitry Andric   case MVT::v4f32:
1500b57cec5SDimitry Andric   case MVT::v2f64:
1510b57cec5SDimitry Andric     return true;
1520b57cec5SDimitry Andric   }
1530b57cec5SDimitry Andric }
1540b57cec5SDimitry Andric 
Is16bitsType(MVT VT)1555f757f3fSDimitry Andric static bool Is16bitsType(MVT VT) {
1565f757f3fSDimitry Andric   return (VT.SimpleTy == MVT::f16 || VT.SimpleTy == MVT::bf16 ||
1575f757f3fSDimitry Andric           VT.SimpleTy == MVT::i16);
15806c3fb27SDimitry Andric }
15906c3fb27SDimitry Andric 
1600b57cec5SDimitry Andric /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
1610b57cec5SDimitry Andric /// EVTs that compose it.  Unlike ComputeValueVTs, this will break apart vectors
1620b57cec5SDimitry Andric /// into their primitive components.
1630b57cec5SDimitry Andric /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
1640b57cec5SDimitry Andric /// same number of types as the Ins/Outs arrays in LowerFormalArguments,
1650b57cec5SDimitry Andric /// LowerCall, and LowerReturn.
ComputePTXValueVTs(const TargetLowering & TLI,const DataLayout & DL,Type * Ty,SmallVectorImpl<EVT> & ValueVTs,SmallVectorImpl<uint64_t> * Offsets=nullptr,uint64_t StartingOffset=0)1660b57cec5SDimitry Andric static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
1670b57cec5SDimitry Andric                                Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
1680b57cec5SDimitry Andric                                SmallVectorImpl<uint64_t> *Offsets = nullptr,
1690b57cec5SDimitry Andric                                uint64_t StartingOffset = 0) {
1700b57cec5SDimitry Andric   SmallVector<EVT, 16> TempVTs;
1710b57cec5SDimitry Andric   SmallVector<uint64_t, 16> TempOffsets;
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric   // Special case for i128 - decompose to (i64, i64)
1740b57cec5SDimitry Andric   if (Ty->isIntegerTy(128)) {
1750b57cec5SDimitry Andric     ValueVTs.push_back(EVT(MVT::i64));
1760b57cec5SDimitry Andric     ValueVTs.push_back(EVT(MVT::i64));
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric     if (Offsets) {
1790b57cec5SDimitry Andric       Offsets->push_back(StartingOffset + 0);
1800b57cec5SDimitry Andric       Offsets->push_back(StartingOffset + 8);
1810b57cec5SDimitry Andric     }
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric     return;
1840b57cec5SDimitry Andric   }
1850b57cec5SDimitry Andric 
1860b57cec5SDimitry Andric   // Given a struct type, recursively traverse the elements with custom ComputePTXValueVTs.
1870b57cec5SDimitry Andric   if (StructType *STy = dyn_cast<StructType>(Ty)) {
1880b57cec5SDimitry Andric     auto const *SL = DL.getStructLayout(STy);
1890b57cec5SDimitry Andric     auto ElementNum = 0;
1900b57cec5SDimitry Andric     for(auto *EI : STy->elements()) {
1910b57cec5SDimitry Andric       ComputePTXValueVTs(TLI, DL, EI, ValueVTs, Offsets,
1920b57cec5SDimitry Andric                          StartingOffset + SL->getElementOffset(ElementNum));
1930b57cec5SDimitry Andric       ++ElementNum;
1940b57cec5SDimitry Andric     }
1950b57cec5SDimitry Andric     return;
1960b57cec5SDimitry Andric   }
1970b57cec5SDimitry Andric 
1980b57cec5SDimitry Andric   ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
1990b57cec5SDimitry Andric   for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
2000b57cec5SDimitry Andric     EVT VT = TempVTs[i];
2010b57cec5SDimitry Andric     uint64_t Off = TempOffsets[i];
2020b57cec5SDimitry Andric     // Split vectors into individual elements, except for v2f16, which
2030b57cec5SDimitry Andric     // we will pass as a single scalar.
2040b57cec5SDimitry Andric     if (VT.isVector()) {
2050b57cec5SDimitry Andric       unsigned NumElts = VT.getVectorNumElements();
2060b57cec5SDimitry Andric       EVT EltVT = VT.getVectorElementType();
2070b57cec5SDimitry Andric       // Vectors with an even number of f16 elements will be passed to
208bdd1243dSDimitry Andric       // us as an array of v2f16/v2bf16 elements. We must match this so we
2090b57cec5SDimitry Andric       // stay in sync with Ins/Outs.
2105f757f3fSDimitry Andric       if ((Is16bitsType(EltVT.getSimpleVT())) && NumElts % 2 == 0) {
2115f757f3fSDimitry Andric         switch (EltVT.getSimpleVT().SimpleTy) {
2125f757f3fSDimitry Andric         case MVT::f16:
2135f757f3fSDimitry Andric           EltVT = MVT::v2f16;
2145f757f3fSDimitry Andric           break;
2155f757f3fSDimitry Andric         case MVT::bf16:
2165f757f3fSDimitry Andric           EltVT = MVT::v2bf16;
2175f757f3fSDimitry Andric           break;
2185f757f3fSDimitry Andric         case MVT::i16:
2195f757f3fSDimitry Andric           EltVT = MVT::v2i16;
2205f757f3fSDimitry Andric           break;
2215f757f3fSDimitry Andric         default:
2225f757f3fSDimitry Andric           llvm_unreachable("Unexpected type");
2235f757f3fSDimitry Andric         }
2240b57cec5SDimitry Andric         NumElts /= 2;
2255f757f3fSDimitry Andric       } else if (EltVT.getSimpleVT() == MVT::i8 &&
2265f757f3fSDimitry Andric                  (NumElts % 4 == 0 || NumElts == 3)) {
2275f757f3fSDimitry Andric         // v*i8 are formally lowered as v4i8
2285f757f3fSDimitry Andric         EltVT = MVT::v4i8;
2295f757f3fSDimitry Andric         NumElts = (NumElts + 3) / 4;
2300b57cec5SDimitry Andric       }
2310b57cec5SDimitry Andric       for (unsigned j = 0; j != NumElts; ++j) {
2320b57cec5SDimitry Andric         ValueVTs.push_back(EltVT);
2330b57cec5SDimitry Andric         if (Offsets)
2340b57cec5SDimitry Andric           Offsets->push_back(Off + j * EltVT.getStoreSize());
2350b57cec5SDimitry Andric       }
2360b57cec5SDimitry Andric     } else {
2370b57cec5SDimitry Andric       ValueVTs.push_back(VT);
2380b57cec5SDimitry Andric       if (Offsets)
2390b57cec5SDimitry Andric         Offsets->push_back(Off);
2400b57cec5SDimitry Andric     }
2410b57cec5SDimitry Andric   }
2420b57cec5SDimitry Andric }
2430b57cec5SDimitry Andric 
244fcaf7f86SDimitry Andric /// PromoteScalarIntegerPTX
245fcaf7f86SDimitry Andric /// Used to make sure the arguments/returns are suitable for passing
246fcaf7f86SDimitry Andric /// and promote them to a larger size if they're not.
247fcaf7f86SDimitry Andric ///
248fcaf7f86SDimitry Andric /// The promoted type is placed in \p PromoteVT if the function returns true.
PromoteScalarIntegerPTX(const EVT & VT,MVT * PromotedVT)249fcaf7f86SDimitry Andric static bool PromoteScalarIntegerPTX(const EVT &VT, MVT *PromotedVT) {
250fcaf7f86SDimitry Andric   if (VT.isScalarInteger()) {
251fcaf7f86SDimitry Andric     switch (PowerOf2Ceil(VT.getFixedSizeInBits())) {
252fcaf7f86SDimitry Andric     default:
253fcaf7f86SDimitry Andric       llvm_unreachable(
254fcaf7f86SDimitry Andric           "Promotion is not suitable for scalars of size larger than 64-bits");
255fcaf7f86SDimitry Andric     case 1:
256fcaf7f86SDimitry Andric       *PromotedVT = MVT::i1;
257fcaf7f86SDimitry Andric       break;
258fcaf7f86SDimitry Andric     case 2:
259fcaf7f86SDimitry Andric     case 4:
260fcaf7f86SDimitry Andric     case 8:
261fcaf7f86SDimitry Andric       *PromotedVT = MVT::i8;
262fcaf7f86SDimitry Andric       break;
263fcaf7f86SDimitry Andric     case 16:
264fcaf7f86SDimitry Andric       *PromotedVT = MVT::i16;
265fcaf7f86SDimitry Andric       break;
266fcaf7f86SDimitry Andric     case 32:
267fcaf7f86SDimitry Andric       *PromotedVT = MVT::i32;
268fcaf7f86SDimitry Andric       break;
269fcaf7f86SDimitry Andric     case 64:
270fcaf7f86SDimitry Andric       *PromotedVT = MVT::i64;
271fcaf7f86SDimitry Andric       break;
272fcaf7f86SDimitry Andric     }
273fcaf7f86SDimitry Andric     return EVT(*PromotedVT) != VT;
274fcaf7f86SDimitry Andric   }
275fcaf7f86SDimitry Andric   return false;
276fcaf7f86SDimitry Andric }
277fcaf7f86SDimitry Andric 
2780b57cec5SDimitry Andric // Check whether we can merge loads/stores of some of the pieces of a
2790b57cec5SDimitry Andric // flattened function parameter or return value into a single vector
2800b57cec5SDimitry Andric // load/store.
2810b57cec5SDimitry Andric //
2820b57cec5SDimitry Andric // The flattened parameter is represented as a list of EVTs and
2830b57cec5SDimitry Andric // offsets, and the whole structure is aligned to ParamAlignment. This
2840b57cec5SDimitry Andric // function determines whether we can load/store pieces of the
2850b57cec5SDimitry Andric // parameter starting at index Idx using a single vectorized op of
2860b57cec5SDimitry Andric // size AccessSize. If so, it returns the number of param pieces
2870b57cec5SDimitry Andric // covered by the vector op. Otherwise, it returns 1.
CanMergeParamLoadStoresStartingAt(unsigned Idx,uint32_t AccessSize,const SmallVectorImpl<EVT> & ValueVTs,const SmallVectorImpl<uint64_t> & Offsets,Align ParamAlignment)2880b57cec5SDimitry Andric static unsigned CanMergeParamLoadStoresStartingAt(
2890b57cec5SDimitry Andric     unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
2905ffd83dbSDimitry Andric     const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) {
2910b57cec5SDimitry Andric 
2920b57cec5SDimitry Andric   // Can't vectorize if param alignment is not sufficient.
2935ffd83dbSDimitry Andric   if (ParamAlignment < AccessSize)
2940b57cec5SDimitry Andric     return 1;
2950b57cec5SDimitry Andric   // Can't vectorize if offset is not aligned.
2960b57cec5SDimitry Andric   if (Offsets[Idx] & (AccessSize - 1))
2970b57cec5SDimitry Andric     return 1;
2980b57cec5SDimitry Andric 
2990b57cec5SDimitry Andric   EVT EltVT = ValueVTs[Idx];
3000b57cec5SDimitry Andric   unsigned EltSize = EltVT.getStoreSize();
3010b57cec5SDimitry Andric 
3020b57cec5SDimitry Andric   // Element is too large to vectorize.
3030b57cec5SDimitry Andric   if (EltSize >= AccessSize)
3040b57cec5SDimitry Andric     return 1;
3050b57cec5SDimitry Andric 
3060b57cec5SDimitry Andric   unsigned NumElts = AccessSize / EltSize;
3070b57cec5SDimitry Andric   // Can't vectorize if AccessBytes if not a multiple of EltSize.
3080b57cec5SDimitry Andric   if (AccessSize != EltSize * NumElts)
3090b57cec5SDimitry Andric     return 1;
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric   // We don't have enough elements to vectorize.
3120b57cec5SDimitry Andric   if (Idx + NumElts > ValueVTs.size())
3130b57cec5SDimitry Andric     return 1;
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric   // PTX ISA can only deal with 2- and 4-element vector ops.
3160b57cec5SDimitry Andric   if (NumElts != 4 && NumElts != 2)
3170b57cec5SDimitry Andric     return 1;
3180b57cec5SDimitry Andric 
3190b57cec5SDimitry Andric   for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) {
3200b57cec5SDimitry Andric     // Types do not match.
3210b57cec5SDimitry Andric     if (ValueVTs[j] != EltVT)
3220b57cec5SDimitry Andric       return 1;
3230b57cec5SDimitry Andric 
3240b57cec5SDimitry Andric     // Elements are not contiguous.
3250b57cec5SDimitry Andric     if (Offsets[j] - Offsets[j - 1] != EltSize)
3260b57cec5SDimitry Andric       return 1;
3270b57cec5SDimitry Andric   }
3280b57cec5SDimitry Andric   // OK. We can vectorize ValueVTs[i..i+NumElts)
3290b57cec5SDimitry Andric   return NumElts;
3300b57cec5SDimitry Andric }
3310b57cec5SDimitry Andric 
3320b57cec5SDimitry Andric // Flags for tracking per-element vectorization state of loads/stores
3330b57cec5SDimitry Andric // of a flattened function parameter or return value.
3340b57cec5SDimitry Andric enum ParamVectorizationFlags {
3350b57cec5SDimitry Andric   PVF_INNER = 0x0, // Middle elements of a vector.
3360b57cec5SDimitry Andric   PVF_FIRST = 0x1, // First element of the vector.
3370b57cec5SDimitry Andric   PVF_LAST = 0x2,  // Last element of the vector.
3380b57cec5SDimitry Andric   // Scalar is effectively a 1-element vector.
3390b57cec5SDimitry Andric   PVF_SCALAR = PVF_FIRST | PVF_LAST
3400b57cec5SDimitry Andric };
3410b57cec5SDimitry Andric 
3420b57cec5SDimitry Andric // Computes whether and how we can vectorize the loads/stores of a
3430b57cec5SDimitry Andric // flattened function parameter or return value.
3440b57cec5SDimitry Andric //
3450b57cec5SDimitry Andric // The flattened parameter is represented as the list of ValueVTs and
3460b57cec5SDimitry Andric // Offsets, and is aligned to ParamAlignment bytes. We return a vector
3470b57cec5SDimitry Andric // of the same size as ValueVTs indicating how each piece should be
3480b57cec5SDimitry Andric // loaded/stored (i.e. as a scalar, or as part of a vector
3490b57cec5SDimitry Andric // load/store).
3500b57cec5SDimitry Andric static SmallVector<ParamVectorizationFlags, 16>
VectorizePTXValueVTs(const SmallVectorImpl<EVT> & ValueVTs,const SmallVectorImpl<uint64_t> & Offsets,Align ParamAlignment,bool IsVAArg=false)3510b57cec5SDimitry Andric VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
3520b57cec5SDimitry Andric                      const SmallVectorImpl<uint64_t> &Offsets,
353bdd1243dSDimitry Andric                      Align ParamAlignment, bool IsVAArg = false) {
3540b57cec5SDimitry Andric   // Set vector size to match ValueVTs and mark all elements as
3550b57cec5SDimitry Andric   // scalars by default.
3560b57cec5SDimitry Andric   SmallVector<ParamVectorizationFlags, 16> VectorInfo;
3570b57cec5SDimitry Andric   VectorInfo.assign(ValueVTs.size(), PVF_SCALAR);
3580b57cec5SDimitry Andric 
359bdd1243dSDimitry Andric   if (IsVAArg)
360bdd1243dSDimitry Andric     return VectorInfo;
361bdd1243dSDimitry Andric 
3620b57cec5SDimitry Andric   // Check what we can vectorize using 128/64/32-bit accesses.
3630b57cec5SDimitry Andric   for (int I = 0, E = ValueVTs.size(); I != E; ++I) {
3640b57cec5SDimitry Andric     // Skip elements we've already processed.
3650b57cec5SDimitry Andric     assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state.");
3660b57cec5SDimitry Andric     for (unsigned AccessSize : {16, 8, 4, 2}) {
3670b57cec5SDimitry Andric       unsigned NumElts = CanMergeParamLoadStoresStartingAt(
3680b57cec5SDimitry Andric           I, AccessSize, ValueVTs, Offsets, ParamAlignment);
3690b57cec5SDimitry Andric       // Mark vectorized elements.
3700b57cec5SDimitry Andric       switch (NumElts) {
3710b57cec5SDimitry Andric       default:
3720b57cec5SDimitry Andric         llvm_unreachable("Unexpected return value");
3730b57cec5SDimitry Andric       case 1:
3740b57cec5SDimitry Andric         // Can't vectorize using this size, try next smaller size.
3750b57cec5SDimitry Andric         continue;
3760b57cec5SDimitry Andric       case 2:
3770b57cec5SDimitry Andric         assert(I + 1 < E && "Not enough elements.");
3780b57cec5SDimitry Andric         VectorInfo[I] = PVF_FIRST;
3790b57cec5SDimitry Andric         VectorInfo[I + 1] = PVF_LAST;
3800b57cec5SDimitry Andric         I += 1;
3810b57cec5SDimitry Andric         break;
3820b57cec5SDimitry Andric       case 4:
3830b57cec5SDimitry Andric         assert(I + 3 < E && "Not enough elements.");
3840b57cec5SDimitry Andric         VectorInfo[I] = PVF_FIRST;
3850b57cec5SDimitry Andric         VectorInfo[I + 1] = PVF_INNER;
3860b57cec5SDimitry Andric         VectorInfo[I + 2] = PVF_INNER;
3870b57cec5SDimitry Andric         VectorInfo[I + 3] = PVF_LAST;
3880b57cec5SDimitry Andric         I += 3;
3890b57cec5SDimitry Andric         break;
3900b57cec5SDimitry Andric       }
3910b57cec5SDimitry Andric       // Break out of the inner loop because we've already succeeded
3920b57cec5SDimitry Andric       // using largest possible AccessSize.
3930b57cec5SDimitry Andric       break;
3940b57cec5SDimitry Andric     }
3950b57cec5SDimitry Andric   }
3960b57cec5SDimitry Andric   return VectorInfo;
3970b57cec5SDimitry Andric }
3980b57cec5SDimitry Andric 
3990b57cec5SDimitry Andric // NVPTXTargetLowering Constructor.
NVPTXTargetLowering(const NVPTXTargetMachine & TM,const NVPTXSubtarget & STI)4000b57cec5SDimitry Andric NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
4010b57cec5SDimitry Andric                                          const NVPTXSubtarget &STI)
4020b57cec5SDimitry Andric     : TargetLowering(TM), nvTM(&TM), STI(STI) {
4030b57cec5SDimitry Andric   // always lower memset, memcpy, and memmove intrinsics to load/store
4040b57cec5SDimitry Andric   // instructions, rather
4050b57cec5SDimitry Andric   // then generating calls to memset, mempcy or memmove.
4065f757f3fSDimitry Andric   MaxStoresPerMemset = MaxStoresPerMemsetOptSize = (unsigned)0xFFFFFFFF;
4075f757f3fSDimitry Andric   MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = (unsigned) 0xFFFFFFFF;
4085f757f3fSDimitry Andric   MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = (unsigned) 0xFFFFFFFF;
4090b57cec5SDimitry Andric 
4100b57cec5SDimitry Andric   setBooleanContents(ZeroOrNegativeOneBooleanContent);
4110b57cec5SDimitry Andric   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
4120b57cec5SDimitry Andric 
4130b57cec5SDimitry Andric   // Jump is Expensive. Don't create extra control flow for 'and', 'or'
4140b57cec5SDimitry Andric   // condition branches.
4150b57cec5SDimitry Andric   setJumpIsExpensive(true);
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric   // Wide divides are _very_ slow. Try to reduce the width of the divide if
4180b57cec5SDimitry Andric   // possible.
4190b57cec5SDimitry Andric   addBypassSlowDiv(64, 32);
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric   // By default, use the Source scheduling
4220b57cec5SDimitry Andric   if (sched4reg)
4230b57cec5SDimitry Andric     setSchedulingPreference(Sched::RegPressure);
4240b57cec5SDimitry Andric   else
4250b57cec5SDimitry Andric     setSchedulingPreference(Sched::Source);
4260b57cec5SDimitry Andric 
4270b57cec5SDimitry Andric   auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
4280b57cec5SDimitry Andric                                     LegalizeAction NoF16Action) {
4290b57cec5SDimitry Andric     setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action);
4300b57cec5SDimitry Andric   };
4310b57cec5SDimitry Andric 
43206c3fb27SDimitry Andric   auto setBF16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
43306c3fb27SDimitry Andric                                     LegalizeAction NoBF16Action) {
43406c3fb27SDimitry Andric     bool IsOpSupported = STI.hasBF16Math();
43506c3fb27SDimitry Andric     // Few instructions are available on sm_90 only
43606c3fb27SDimitry Andric     switch(Op) {
43706c3fb27SDimitry Andric       case ISD::FADD:
43806c3fb27SDimitry Andric       case ISD::FMUL:
43906c3fb27SDimitry Andric       case ISD::FSUB:
4405f757f3fSDimitry Andric       case ISD::SELECT:
4415f757f3fSDimitry Andric       case ISD::SELECT_CC:
4425f757f3fSDimitry Andric       case ISD::SETCC:
4435f757f3fSDimitry Andric       case ISD::FEXP2:
4445f757f3fSDimitry Andric       case ISD::FCEIL:
4455f757f3fSDimitry Andric       case ISD::FFLOOR:
4465f757f3fSDimitry Andric       case ISD::FNEARBYINT:
4475f757f3fSDimitry Andric       case ISD::FRINT:
4485f757f3fSDimitry Andric       case ISD::FTRUNC:
44906c3fb27SDimitry Andric         IsOpSupported = STI.getSmVersion() >= 90 && STI.getPTXVersion() >= 78;
45006c3fb27SDimitry Andric         break;
45106c3fb27SDimitry Andric     }
45206c3fb27SDimitry Andric     setOperationAction(
45306c3fb27SDimitry Andric         Op, VT, IsOpSupported ? Action : NoBF16Action);
45406c3fb27SDimitry Andric   };
45506c3fb27SDimitry Andric 
4565f757f3fSDimitry Andric   auto setI16x2OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
4575f757f3fSDimitry Andric                                      LegalizeAction NoI16x2Action) {
4585f757f3fSDimitry Andric     bool IsOpSupported = false;
4595f757f3fSDimitry Andric     // instructions are available on sm_90 only
4605f757f3fSDimitry Andric     switch (Op) {
4615f757f3fSDimitry Andric     case ISD::ADD:
4625f757f3fSDimitry Andric     case ISD::SMAX:
4635f757f3fSDimitry Andric     case ISD::SMIN:
4645f757f3fSDimitry Andric     case ISD::UMIN:
4655f757f3fSDimitry Andric     case ISD::UMAX:
4665f757f3fSDimitry Andric     case ISD::SUB:
4675f757f3fSDimitry Andric       IsOpSupported = STI.getSmVersion() >= 90 && STI.getPTXVersion() >= 80;
4685f757f3fSDimitry Andric       break;
4695f757f3fSDimitry Andric     }
4705f757f3fSDimitry Andric     setOperationAction(Op, VT, IsOpSupported ? Action : NoI16x2Action);
4715f757f3fSDimitry Andric   };
4725f757f3fSDimitry Andric 
4730b57cec5SDimitry Andric   addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
4740b57cec5SDimitry Andric   addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
4755f757f3fSDimitry Andric   addRegisterClass(MVT::v2i16, &NVPTX::Int32RegsRegClass);
4765f757f3fSDimitry Andric   addRegisterClass(MVT::v4i8, &NVPTX::Int32RegsRegClass);
4770b57cec5SDimitry Andric   addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
4780b57cec5SDimitry Andric   addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
4790b57cec5SDimitry Andric   addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
4800b57cec5SDimitry Andric   addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
48106c3fb27SDimitry Andric   addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass);
48206c3fb27SDimitry Andric   addRegisterClass(MVT::v2f16, &NVPTX::Int32RegsRegClass);
48306c3fb27SDimitry Andric   addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass);
48406c3fb27SDimitry Andric   addRegisterClass(MVT::v2bf16, &NVPTX::Int32RegsRegClass);
4850b57cec5SDimitry Andric 
4860b57cec5SDimitry Andric   // Conversion to/from FP16/FP16x2 is always legal.
4870b57cec5SDimitry Andric   setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
4880b57cec5SDimitry Andric   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
4890b57cec5SDimitry Andric   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand);
4900b57cec5SDimitry Andric   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand);
4910b57cec5SDimitry Andric 
4920b57cec5SDimitry Andric   setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
4930b57cec5SDimitry Andric   setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
4940b57cec5SDimitry Andric 
49506c3fb27SDimitry Andric   // Conversion to/from BFP16/BFP16x2 is always legal.
49606c3fb27SDimitry Andric   setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Custom);
49706c3fb27SDimitry Andric   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2bf16, Custom);
49806c3fb27SDimitry Andric   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2bf16, Expand);
49906c3fb27SDimitry Andric   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2bf16, Expand);
50006c3fb27SDimitry Andric 
50106c3fb27SDimitry Andric   setBF16OperationAction(ISD::SETCC, MVT::v2bf16, Legal, Expand);
5025f757f3fSDimitry Andric   setBF16OperationAction(ISD::SETCC, MVT::bf16, Legal, Promote);
5035f757f3fSDimitry Andric   if (getOperationAction(ISD::SETCC, MVT::bf16) == Promote)
5045f757f3fSDimitry Andric     AddPromotedToType(ISD::SETCC, MVT::bf16, MVT::f32);
5055f757f3fSDimitry Andric 
5065f757f3fSDimitry Andric   // Conversion to/from i16/i16x2 is always legal.
5075f757f3fSDimitry Andric   setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16, Custom);
5085f757f3fSDimitry Andric   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
5095f757f3fSDimitry Andric   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Expand);
5105f757f3fSDimitry Andric   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i16, Expand);
5115f757f3fSDimitry Andric 
5125f757f3fSDimitry Andric   setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8, Custom);
5135f757f3fSDimitry Andric   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i8, Custom);
5145f757f3fSDimitry Andric   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom);
5155f757f3fSDimitry Andric   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
5165f757f3fSDimitry Andric   // Only logical ops can be done on v4i8 directly, others must be done
5175f757f3fSDimitry Andric   // elementwise.
5185f757f3fSDimitry Andric   setOperationAction(
5195f757f3fSDimitry Andric       {ISD::ABS,         ISD::ADD,        ISD::ADDC,        ISD::ADDE,
5205f757f3fSDimitry Andric        ISD::BITREVERSE,  ISD::CTLZ,       ISD::CTPOP,       ISD::CTTZ,
5215f757f3fSDimitry Andric        ISD::FP_TO_SINT,  ISD::FP_TO_UINT, ISD::FSHL,        ISD::FSHR,
5225f757f3fSDimitry Andric        ISD::MUL,         ISD::MULHS,      ISD::MULHU,       ISD::PARITY,
5235f757f3fSDimitry Andric        ISD::ROTL,        ISD::ROTR,       ISD::SADDO,       ISD::SADDO_CARRY,
5245f757f3fSDimitry Andric        ISD::SADDSAT,     ISD::SDIV,       ISD::SDIVREM,     ISD::SELECT_CC,
5255f757f3fSDimitry Andric        ISD::SETCC,       ISD::SHL,        ISD::SINT_TO_FP,  ISD::SMAX,
5265f757f3fSDimitry Andric        ISD::SMIN,        ISD::SMULO,      ISD::SMUL_LOHI,   ISD::SRA,
5275f757f3fSDimitry Andric        ISD::SREM,        ISD::SRL,        ISD::SSHLSAT,     ISD::SSUBO,
5285f757f3fSDimitry Andric        ISD::SSUBO_CARRY, ISD::SSUBSAT,    ISD::SUB,         ISD::SUBC,
5295f757f3fSDimitry Andric        ISD::SUBE,        ISD::UADDO,      ISD::UADDO_CARRY, ISD::UADDSAT,
5305f757f3fSDimitry Andric        ISD::UDIV,        ISD::UDIVREM,    ISD::UINT_TO_FP,  ISD::UMAX,
5315f757f3fSDimitry Andric        ISD::UMIN,        ISD::UMULO,      ISD::UMUL_LOHI,   ISD::UREM,
5325f757f3fSDimitry Andric        ISD::USHLSAT,     ISD::USUBO,      ISD::USUBO_CARRY, ISD::VSELECT,
5335f757f3fSDimitry Andric        ISD::USUBSAT},
5345f757f3fSDimitry Andric       MVT::v4i8, Expand);
5355f757f3fSDimitry Andric 
5360b57cec5SDimitry Andric   // Operations not directly supported by NVPTX.
53706c3fb27SDimitry Andric   for (MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32,
5385f757f3fSDimitry Andric                  MVT::f64, MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, MVT::v4i8,
5395f757f3fSDimitry Andric                  MVT::i32, MVT::i64}) {
5400b57cec5SDimitry Andric     setOperationAction(ISD::SELECT_CC, VT, Expand);
5410b57cec5SDimitry Andric     setOperationAction(ISD::BR_CC, VT, Expand);
5420b57cec5SDimitry Andric   }
5430b57cec5SDimitry Andric 
5440b57cec5SDimitry Andric   // Some SIGN_EXTEND_INREG can be done using cvt instruction.
5450b57cec5SDimitry Andric   // For others we will expand to a SHL/SRA pair.
5460b57cec5SDimitry Andric   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal);
5470b57cec5SDimitry Andric   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
5480b57cec5SDimitry Andric   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
5490b57cec5SDimitry Andric   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
5500b57cec5SDimitry Andric   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
5515f757f3fSDimitry Andric   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
5520b57cec5SDimitry Andric 
5530b57cec5SDimitry Andric   setOperationAction(ISD::SHL_PARTS, MVT::i32  , Custom);
5540b57cec5SDimitry Andric   setOperationAction(ISD::SRA_PARTS, MVT::i32  , Custom);
5550b57cec5SDimitry Andric   setOperationAction(ISD::SRL_PARTS, MVT::i32  , Custom);
5560b57cec5SDimitry Andric   setOperationAction(ISD::SHL_PARTS, MVT::i64  , Custom);
5570b57cec5SDimitry Andric   setOperationAction(ISD::SRA_PARTS, MVT::i64  , Custom);
5580b57cec5SDimitry Andric   setOperationAction(ISD::SRL_PARTS, MVT::i64  , Custom);
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
5610b57cec5SDimitry Andric   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
5620b57cec5SDimitry Andric 
5630b57cec5SDimitry Andric   // TODO: we may consider expanding ROTL/ROTR on older GPUs.  Currently on GPUs
5640b57cec5SDimitry Andric   // that don't have h/w rotation we lower them to multi-instruction assembly.
5650b57cec5SDimitry Andric   // See ROT*_sw in NVPTXIntrInfo.td
5660b57cec5SDimitry Andric   setOperationAction(ISD::ROTL, MVT::i64, Legal);
5670b57cec5SDimitry Andric   setOperationAction(ISD::ROTR, MVT::i64, Legal);
5680b57cec5SDimitry Andric   setOperationAction(ISD::ROTL, MVT::i32, Legal);
5690b57cec5SDimitry Andric   setOperationAction(ISD::ROTR, MVT::i32, Legal);
5700b57cec5SDimitry Andric 
5710b57cec5SDimitry Andric   setOperationAction(ISD::ROTL, MVT::i16, Expand);
5725f757f3fSDimitry Andric   setOperationAction(ISD::ROTL, MVT::v2i16, Expand);
5730b57cec5SDimitry Andric   setOperationAction(ISD::ROTR, MVT::i16, Expand);
5745f757f3fSDimitry Andric   setOperationAction(ISD::ROTR, MVT::v2i16, Expand);
5750b57cec5SDimitry Andric   setOperationAction(ISD::ROTL, MVT::i8, Expand);
5760b57cec5SDimitry Andric   setOperationAction(ISD::ROTR, MVT::i8, Expand);
5770b57cec5SDimitry Andric   setOperationAction(ISD::BSWAP, MVT::i16, Expand);
5785f757f3fSDimitry Andric   setOperationAction(ISD::BSWAP, MVT::v2i16, Expand);
5790b57cec5SDimitry Andric   setOperationAction(ISD::BSWAP, MVT::i32, Expand);
5800b57cec5SDimitry Andric   setOperationAction(ISD::BSWAP, MVT::i64, Expand);
5810b57cec5SDimitry Andric 
5820b57cec5SDimitry Andric   // Indirect branch is not supported.
5830b57cec5SDimitry Andric   // This also disables Jump Table creation.
5840b57cec5SDimitry Andric   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
5850b57cec5SDimitry Andric   setOperationAction(ISD::BRIND, MVT::Other, Expand);
5860b57cec5SDimitry Andric 
5870b57cec5SDimitry Andric   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
5880b57cec5SDimitry Andric   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
5890b57cec5SDimitry Andric 
5900b57cec5SDimitry Andric   // We want to legalize constant related memmove and memcopy
5910b57cec5SDimitry Andric   // intrinsics.
5920b57cec5SDimitry Andric   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
5930b57cec5SDimitry Andric 
5940b57cec5SDimitry Andric   // Turn FP extload into load/fpextend
5950b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
5960b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
59706c3fb27SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
59806c3fb27SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
5990b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
6000b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
6010b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
60206c3fb27SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand);
60306c3fb27SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand);
6040b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
6050b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
6060b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
60706c3fb27SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
60806c3fb27SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
6090b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
6105f757f3fSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
6115f757f3fSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
6125f757f3fSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand);
6135f757f3fSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand);
6140b57cec5SDimitry Andric   // Turn FP truncstore into trunc + store.
6150b57cec5SDimitry Andric   // FIXME: vector types should also be expanded
6160b57cec5SDimitry Andric   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
6170b57cec5SDimitry Andric   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
61806c3fb27SDimitry Andric   setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
61906c3fb27SDimitry Andric   setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
6200b57cec5SDimitry Andric   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
6210b57cec5SDimitry Andric 
6220b57cec5SDimitry Andric   // PTX does not support load / store predicate registers
6230b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::i1, Custom);
6240b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::i1, Custom);
6250b57cec5SDimitry Andric 
6260b57cec5SDimitry Andric   for (MVT VT : MVT::integer_valuetypes()) {
6270b57cec5SDimitry Andric     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
6280b57cec5SDimitry Andric     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
6290b57cec5SDimitry Andric     setTruncStoreAction(VT, MVT::i1, Expand);
6300b57cec5SDimitry Andric   }
6310b57cec5SDimitry Andric 
6325f757f3fSDimitry Andric   // expand extload of vector of integers.
6335f757f3fSDimitry Andric   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
6345f757f3fSDimitry Andric                    MVT::v2i8, Expand);
6355f757f3fSDimitry Andric   setTruncStoreAction(MVT::v2i16, MVT::v2i8, Expand);
6365f757f3fSDimitry Andric 
6370b57cec5SDimitry Andric   // This is legal in NVPTX
6380b57cec5SDimitry Andric   setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
6390b57cec5SDimitry Andric   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
6400b57cec5SDimitry Andric   setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
641bdd1243dSDimitry Andric   setOperationAction(ISD::ConstantFP, MVT::bf16, Legal);
6420b57cec5SDimitry Andric 
6435f757f3fSDimitry Andric   // Lowering of DYNAMIC_STACKALLOC is unsupported.
6445f757f3fSDimitry Andric   // Custom lower to produce an error.
6455f757f3fSDimitry Andric   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
6465f757f3fSDimitry Andric   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
6475f757f3fSDimitry Andric 
6480b57cec5SDimitry Andric   // TRAP can be lowered to PTX trap
6490b57cec5SDimitry Andric   setOperationAction(ISD::TRAP, MVT::Other, Legal);
6500b57cec5SDimitry Andric 
6510b57cec5SDimitry Andric   // Register custom handling for vector loads/stores
6528bcb0991SDimitry Andric   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
6530b57cec5SDimitry Andric     if (IsPTXVectorType(VT)) {
6540b57cec5SDimitry Andric       setOperationAction(ISD::LOAD, VT, Custom);
6550b57cec5SDimitry Andric       setOperationAction(ISD::STORE, VT, Custom);
6560b57cec5SDimitry Andric       setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
6570b57cec5SDimitry Andric     }
6580b57cec5SDimitry Andric   }
6590b57cec5SDimitry Andric 
660bdd1243dSDimitry Andric   // Support varargs.
661bdd1243dSDimitry Andric   setOperationAction(ISD::VASTART, MVT::Other, Custom);
662bdd1243dSDimitry Andric   setOperationAction(ISD::VAARG, MVT::Other, Custom);
663bdd1243dSDimitry Andric   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
664bdd1243dSDimitry Andric   setOperationAction(ISD::VAEND, MVT::Other, Expand);
665bdd1243dSDimitry Andric 
6660b57cec5SDimitry Andric   // Custom handling for i8 intrinsics
6670b57cec5SDimitry Andric   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
6680b57cec5SDimitry Andric 
6690b57cec5SDimitry Andric   for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) {
6700b57cec5SDimitry Andric     setOperationAction(ISD::ABS,  Ty, Legal);
6710b57cec5SDimitry Andric     setOperationAction(ISD::SMIN, Ty, Legal);
6720b57cec5SDimitry Andric     setOperationAction(ISD::SMAX, Ty, Legal);
6730b57cec5SDimitry Andric     setOperationAction(ISD::UMIN, Ty, Legal);
6740b57cec5SDimitry Andric     setOperationAction(ISD::UMAX, Ty, Legal);
6750b57cec5SDimitry Andric 
6760b57cec5SDimitry Andric     setOperationAction(ISD::CTPOP, Ty, Legal);
6770b57cec5SDimitry Andric     setOperationAction(ISD::CTLZ, Ty, Legal);
6780b57cec5SDimitry Andric   }
6790b57cec5SDimitry Andric 
6805f757f3fSDimitry Andric   setI16x2OperationAction(ISD::ABS, MVT::v2i16, Legal, Custom);
6815f757f3fSDimitry Andric   setI16x2OperationAction(ISD::SMIN, MVT::v2i16, Legal, Custom);
6825f757f3fSDimitry Andric   setI16x2OperationAction(ISD::SMAX, MVT::v2i16, Legal, Custom);
6835f757f3fSDimitry Andric   setI16x2OperationAction(ISD::UMIN, MVT::v2i16, Legal, Custom);
6845f757f3fSDimitry Andric   setI16x2OperationAction(ISD::UMAX, MVT::v2i16, Legal, Custom);
6855f757f3fSDimitry Andric   setI16x2OperationAction(ISD::CTPOP, MVT::v2i16, Legal, Expand);
6865f757f3fSDimitry Andric   setI16x2OperationAction(ISD::CTLZ, MVT::v2i16, Legal, Expand);
6875f757f3fSDimitry Andric 
6885f757f3fSDimitry Andric   setI16x2OperationAction(ISD::ADD, MVT::v2i16, Legal, Custom);
6895f757f3fSDimitry Andric   setI16x2OperationAction(ISD::SUB, MVT::v2i16, Legal, Custom);
6905f757f3fSDimitry Andric   setI16x2OperationAction(ISD::MUL, MVT::v2i16, Legal, Custom);
6915f757f3fSDimitry Andric   setI16x2OperationAction(ISD::SHL, MVT::v2i16, Legal, Custom);
6925f757f3fSDimitry Andric   setI16x2OperationAction(ISD::SREM, MVT::v2i16, Legal, Custom);
6935f757f3fSDimitry Andric   setI16x2OperationAction(ISD::UREM, MVT::v2i16, Legal, Custom);
6945f757f3fSDimitry Andric 
6955f757f3fSDimitry Andric   // Other arithmetic and logic ops are unsupported.
6965f757f3fSDimitry Andric   setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SRA, ISD::SRL, ISD::MULHS,
6975f757f3fSDimitry Andric                       ISD::MULHU, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
6985f757f3fSDimitry Andric                       ISD::SINT_TO_FP, ISD::UINT_TO_FP},
6995f757f3fSDimitry Andric                      MVT::v2i16, Expand);
7005f757f3fSDimitry Andric 
70181ad6265SDimitry Andric   setOperationAction(ISD::ADDC, MVT::i32, Legal);
70281ad6265SDimitry Andric   setOperationAction(ISD::ADDE, MVT::i32, Legal);
70381ad6265SDimitry Andric   setOperationAction(ISD::SUBC, MVT::i32, Legal);
70481ad6265SDimitry Andric   setOperationAction(ISD::SUBE, MVT::i32, Legal);
70581ad6265SDimitry Andric   if (STI.getPTXVersion() >= 43) {
70681ad6265SDimitry Andric     setOperationAction(ISD::ADDC, MVT::i64, Legal);
70781ad6265SDimitry Andric     setOperationAction(ISD::ADDE, MVT::i64, Legal);
70881ad6265SDimitry Andric     setOperationAction(ISD::SUBC, MVT::i64, Legal);
70981ad6265SDimitry Andric     setOperationAction(ISD::SUBE, MVT::i64, Legal);
71081ad6265SDimitry Andric   }
71181ad6265SDimitry Andric 
7120b57cec5SDimitry Andric   setOperationAction(ISD::CTTZ, MVT::i16, Expand);
7135f757f3fSDimitry Andric   setOperationAction(ISD::CTTZ, MVT::v2i16, Expand);
7140b57cec5SDimitry Andric   setOperationAction(ISD::CTTZ, MVT::i32, Expand);
7150b57cec5SDimitry Andric   setOperationAction(ISD::CTTZ, MVT::i64, Expand);
7160b57cec5SDimitry Andric 
7170b57cec5SDimitry Andric   // PTX does not directly support SELP of i1, so promote to i32 first
7180b57cec5SDimitry Andric   setOperationAction(ISD::SELECT, MVT::i1, Custom);
7190b57cec5SDimitry Andric 
7200b57cec5SDimitry Andric   // PTX cannot multiply two i64s in a single instruction.
7210b57cec5SDimitry Andric   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
7220b57cec5SDimitry Andric   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
7230b57cec5SDimitry Andric 
7240b57cec5SDimitry Andric   // We have some custom DAG combine patterns for these nodes
7255f757f3fSDimitry Andric   setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::EXTRACT_VECTOR_ELT, ISD::FADD,
7265f757f3fSDimitry Andric                        ISD::LOAD, ISD::MUL, ISD::SHL, ISD::SREM, ISD::UREM,
7275f757f3fSDimitry Andric                        ISD::VSELECT});
7280b57cec5SDimitry Andric 
72906c3fb27SDimitry Andric   // setcc for f16x2 and bf16x2 needs special handling to prevent
73006c3fb27SDimitry Andric   // legalizer's attempt to scalarize it due to v2i1 not being legal.
73106c3fb27SDimitry Andric   if (STI.allowFP16Math() || STI.hasBF16Math())
7320b57cec5SDimitry Andric     setTargetDAGCombine(ISD::SETCC);
7330b57cec5SDimitry Andric 
7340b57cec5SDimitry Andric   // Promote fp16 arithmetic if fp16 hardware isn't available or the
7350b57cec5SDimitry Andric   // user passed --nvptx-no-fp16-math. The flag is useful because,
7360b57cec5SDimitry Andric   // although sm_53+ GPUs have some sort of FP16 support in
7370b57cec5SDimitry Andric   // hardware, only sm_53 and sm_60 have full implementation. Others
7380b57cec5SDimitry Andric   // only have token amount of hardware and are likely to run faster
7390b57cec5SDimitry Andric   // by using fp32 units instead.
7400b57cec5SDimitry Andric   for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) {
7410b57cec5SDimitry Andric     setFP16OperationAction(Op, MVT::f16, Legal, Promote);
7420b57cec5SDimitry Andric     setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
74306c3fb27SDimitry Andric     setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
74406c3fb27SDimitry Andric     // bf16 must be promoted to f32.
7455f757f3fSDimitry Andric     setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
74606c3fb27SDimitry Andric     if (getOperationAction(Op, MVT::bf16) == Promote)
74706c3fb27SDimitry Andric       AddPromotedToType(Op, MVT::bf16, MVT::f32);
7480b57cec5SDimitry Andric   }
7490b57cec5SDimitry Andric 
750bdd1243dSDimitry Andric   // f16/f16x2 neg was introduced in PTX 60, SM_53.
751bdd1243dSDimitry Andric   const bool IsFP16FP16x2NegAvailable = STI.getSmVersion() >= 53 &&
752bdd1243dSDimitry Andric                                         STI.getPTXVersion() >= 60 &&
753bdd1243dSDimitry Andric                                         STI.allowFP16Math();
754bdd1243dSDimitry Andric   for (const auto &VT : {MVT::f16, MVT::v2f16})
755bdd1243dSDimitry Andric     setOperationAction(ISD::FNEG, VT,
756bdd1243dSDimitry Andric                        IsFP16FP16x2NegAvailable ? Legal : Expand);
7570b57cec5SDimitry Andric 
75806c3fb27SDimitry Andric   setBF16OperationAction(ISD::FNEG, MVT::bf16, Legal, Expand);
75906c3fb27SDimitry Andric   setBF16OperationAction(ISD::FNEG, MVT::v2bf16, Legal, Expand);
7600b57cec5SDimitry Andric   // (would be) Library functions.
7610b57cec5SDimitry Andric 
7620b57cec5SDimitry Andric   // These map to conversion instructions for scalar FP types.
7630b57cec5SDimitry Andric   for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
764bdd1243dSDimitry Andric                          ISD::FROUNDEVEN, ISD::FTRUNC}) {
7650b57cec5SDimitry Andric     setOperationAction(Op, MVT::f16, Legal);
7660b57cec5SDimitry Andric     setOperationAction(Op, MVT::f32, Legal);
7670b57cec5SDimitry Andric     setOperationAction(Op, MVT::f64, Legal);
7680b57cec5SDimitry Andric     setOperationAction(Op, MVT::v2f16, Expand);
76906c3fb27SDimitry Andric     setOperationAction(Op, MVT::v2bf16, Expand);
7705f757f3fSDimitry Andric     setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
7715f757f3fSDimitry Andric     if (getOperationAction(Op, MVT::bf16) == Promote)
7725f757f3fSDimitry Andric       AddPromotedToType(Op, MVT::bf16, MVT::f32);
7735f757f3fSDimitry Andric   }
7745f757f3fSDimitry Andric 
7755f757f3fSDimitry Andric   // sm_80 only has conversions between f32 and bf16. Custom lower all other
7765f757f3fSDimitry Andric   // bf16 conversions.
7775f757f3fSDimitry Andric   if (STI.hasBF16Math() &&
7785f757f3fSDimitry Andric       (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78)) {
7795f757f3fSDimitry Andric     for (MVT VT : {MVT::i1, MVT::i16, MVT::i32, MVT::i64}) {
7805f757f3fSDimitry Andric       setOperationAction(
7815f757f3fSDimitry Andric           {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
7825f757f3fSDimitry Andric           VT, Custom);
7835f757f3fSDimitry Andric     }
7840b57cec5SDimitry Andric   }
7850b57cec5SDimitry Andric 
7860b57cec5SDimitry Andric   setOperationAction(ISD::FROUND, MVT::f16, Promote);
7870b57cec5SDimitry Andric   setOperationAction(ISD::FROUND, MVT::v2f16, Expand);
78806c3fb27SDimitry Andric   setOperationAction(ISD::FROUND, MVT::v2bf16, Expand);
7890b57cec5SDimitry Andric   setOperationAction(ISD::FROUND, MVT::f32, Custom);
7900b57cec5SDimitry Andric   setOperationAction(ISD::FROUND, MVT::f64, Custom);
7915f757f3fSDimitry Andric   setOperationAction(ISD::FROUND, MVT::bf16, Promote);
7925f757f3fSDimitry Andric   AddPromotedToType(ISD::FROUND, MVT::bf16, MVT::f32);
7930b57cec5SDimitry Andric 
7940b57cec5SDimitry Andric   // 'Expand' implements FCOPYSIGN without calling an external library.
7950b57cec5SDimitry Andric   setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
7960b57cec5SDimitry Andric   setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
79706c3fb27SDimitry Andric   setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
79806c3fb27SDimitry Andric   setOperationAction(ISD::FCOPYSIGN, MVT::v2bf16, Expand);
7990b57cec5SDimitry Andric   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
8000b57cec5SDimitry Andric   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
8010b57cec5SDimitry Andric 
8020b57cec5SDimitry Andric   // These map to corresponding instructions for f32/f64. f16 must be
8030b57cec5SDimitry Andric   // promoted to f32. v2f16 is expanded to f16, which is then promoted
8040b57cec5SDimitry Andric   // to f32.
80504eeddc0SDimitry Andric   for (const auto &Op :
8065f757f3fSDimitry Andric        {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS}) {
8070b57cec5SDimitry Andric     setOperationAction(Op, MVT::f16, Promote);
8080b57cec5SDimitry Andric     setOperationAction(Op, MVT::f32, Legal);
8090b57cec5SDimitry Andric     setOperationAction(Op, MVT::f64, Legal);
8100b57cec5SDimitry Andric     setOperationAction(Op, MVT::v2f16, Expand);
81106c3fb27SDimitry Andric     setOperationAction(Op, MVT::v2bf16, Expand);
8125f757f3fSDimitry Andric     setOperationAction(Op, MVT::bf16, Promote);
8135f757f3fSDimitry Andric     AddPromotedToType(Op, MVT::bf16, MVT::f32);
8140b57cec5SDimitry Andric   }
8155f757f3fSDimitry Andric   for (const auto &Op : {ISD::FABS}) {
8165f757f3fSDimitry Andric     setOperationAction(Op, MVT::f16, Promote);
8175f757f3fSDimitry Andric     setOperationAction(Op, MVT::f32, Legal);
8185f757f3fSDimitry Andric     setOperationAction(Op, MVT::f64, Legal);
8195f757f3fSDimitry Andric     setOperationAction(Op, MVT::v2f16, Expand);
8205f757f3fSDimitry Andric     setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
8215f757f3fSDimitry Andric     setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
8225f757f3fSDimitry Andric     if (getOperationAction(Op, MVT::bf16) == Promote)
8235f757f3fSDimitry Andric       AddPromotedToType(Op, MVT::bf16, MVT::f32);
8245f757f3fSDimitry Andric   }
8255f757f3fSDimitry Andric 
82604eeddc0SDimitry Andric   // max.f16, max.f16x2 and max.NaN are supported on sm_80+.
82704eeddc0SDimitry Andric   auto GetMinMaxAction = [&](LegalizeAction NotSm80Action) {
82804eeddc0SDimitry Andric     bool IsAtLeastSm80 = STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 70;
82904eeddc0SDimitry Andric     return IsAtLeastSm80 ? Legal : NotSm80Action;
83004eeddc0SDimitry Andric   };
83104eeddc0SDimitry Andric   for (const auto &Op : {ISD::FMINNUM, ISD::FMAXNUM}) {
83204eeddc0SDimitry Andric     setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Promote), Promote);
83304eeddc0SDimitry Andric     setOperationAction(Op, MVT::f32, Legal);
83404eeddc0SDimitry Andric     setOperationAction(Op, MVT::f64, Legal);
83504eeddc0SDimitry Andric     setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
83606c3fb27SDimitry Andric     setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
8375f757f3fSDimitry Andric     setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
8385f757f3fSDimitry Andric     if (getOperationAction(Op, MVT::bf16) == Promote)
8395f757f3fSDimitry Andric       AddPromotedToType(Op, MVT::bf16, MVT::f32);
84004eeddc0SDimitry Andric   }
84104eeddc0SDimitry Andric   for (const auto &Op : {ISD::FMINIMUM, ISD::FMAXIMUM}) {
84204eeddc0SDimitry Andric     setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Expand), Expand);
84306c3fb27SDimitry Andric     setFP16OperationAction(Op, MVT::bf16, Legal, Expand);
84404eeddc0SDimitry Andric     setOperationAction(Op, MVT::f32, GetMinMaxAction(Expand));
84504eeddc0SDimitry Andric     setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
84606c3fb27SDimitry Andric     setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
84704eeddc0SDimitry Andric   }
8480b57cec5SDimitry Andric 
8490b57cec5SDimitry Andric   // No FEXP2, FLOG2.  The PTX ex2 and log2 functions are always approximate.
8500b57cec5SDimitry Andric   // No FPOW or FREM in PTX.
8510b57cec5SDimitry Andric 
8520b57cec5SDimitry Andric   // Now deduce the information based on the above mentioned
8530b57cec5SDimitry Andric   // actions
8540b57cec5SDimitry Andric   computeRegisterProperties(STI.getRegisterInfo());
85581ad6265SDimitry Andric 
85681ad6265SDimitry Andric   setMinCmpXchgSizeInBits(32);
8571db9f3b2SDimitry Andric   setMaxAtomicSizeInBitsSupported(64);
8580b57cec5SDimitry Andric }
8590b57cec5SDimitry Andric 
getTargetNodeName(unsigned Opcode) const8600b57cec5SDimitry Andric const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
8610b57cec5SDimitry Andric   switch ((NVPTXISD::NodeType)Opcode) {
8620b57cec5SDimitry Andric   case NVPTXISD::FIRST_NUMBER:
8630b57cec5SDimitry Andric     break;
8640b57cec5SDimitry Andric   case NVPTXISD::CALL:
8650b57cec5SDimitry Andric     return "NVPTXISD::CALL";
86606c3fb27SDimitry Andric   case NVPTXISD::RET_GLUE:
86706c3fb27SDimitry Andric     return "NVPTXISD::RET_GLUE";
8680b57cec5SDimitry Andric   case NVPTXISD::LOAD_PARAM:
8690b57cec5SDimitry Andric     return "NVPTXISD::LOAD_PARAM";
8700b57cec5SDimitry Andric   case NVPTXISD::Wrapper:
8710b57cec5SDimitry Andric     return "NVPTXISD::Wrapper";
8720b57cec5SDimitry Andric   case NVPTXISD::DeclareParam:
8730b57cec5SDimitry Andric     return "NVPTXISD::DeclareParam";
8740b57cec5SDimitry Andric   case NVPTXISD::DeclareScalarParam:
8750b57cec5SDimitry Andric     return "NVPTXISD::DeclareScalarParam";
8760b57cec5SDimitry Andric   case NVPTXISD::DeclareRet:
8770b57cec5SDimitry Andric     return "NVPTXISD::DeclareRet";
8780b57cec5SDimitry Andric   case NVPTXISD::DeclareScalarRet:
8790b57cec5SDimitry Andric     return "NVPTXISD::DeclareScalarRet";
8800b57cec5SDimitry Andric   case NVPTXISD::DeclareRetParam:
8810b57cec5SDimitry Andric     return "NVPTXISD::DeclareRetParam";
8820b57cec5SDimitry Andric   case NVPTXISD::PrintCall:
8830b57cec5SDimitry Andric     return "NVPTXISD::PrintCall";
8840b57cec5SDimitry Andric   case NVPTXISD::PrintConvergentCall:
8850b57cec5SDimitry Andric     return "NVPTXISD::PrintConvergentCall";
8860b57cec5SDimitry Andric   case NVPTXISD::PrintCallUni:
8870b57cec5SDimitry Andric     return "NVPTXISD::PrintCallUni";
8880b57cec5SDimitry Andric   case NVPTXISD::PrintConvergentCallUni:
8890b57cec5SDimitry Andric     return "NVPTXISD::PrintConvergentCallUni";
8900b57cec5SDimitry Andric   case NVPTXISD::LoadParam:
8910b57cec5SDimitry Andric     return "NVPTXISD::LoadParam";
8920b57cec5SDimitry Andric   case NVPTXISD::LoadParamV2:
8930b57cec5SDimitry Andric     return "NVPTXISD::LoadParamV2";
8940b57cec5SDimitry Andric   case NVPTXISD::LoadParamV4:
8950b57cec5SDimitry Andric     return "NVPTXISD::LoadParamV4";
8960b57cec5SDimitry Andric   case NVPTXISD::StoreParam:
8970b57cec5SDimitry Andric     return "NVPTXISD::StoreParam";
8980b57cec5SDimitry Andric   case NVPTXISD::StoreParamV2:
8990b57cec5SDimitry Andric     return "NVPTXISD::StoreParamV2";
9000b57cec5SDimitry Andric   case NVPTXISD::StoreParamV4:
9010b57cec5SDimitry Andric     return "NVPTXISD::StoreParamV4";
9020b57cec5SDimitry Andric   case NVPTXISD::StoreParamS32:
9030b57cec5SDimitry Andric     return "NVPTXISD::StoreParamS32";
9040b57cec5SDimitry Andric   case NVPTXISD::StoreParamU32:
9050b57cec5SDimitry Andric     return "NVPTXISD::StoreParamU32";
9060b57cec5SDimitry Andric   case NVPTXISD::CallArgBegin:
9070b57cec5SDimitry Andric     return "NVPTXISD::CallArgBegin";
9080b57cec5SDimitry Andric   case NVPTXISD::CallArg:
9090b57cec5SDimitry Andric     return "NVPTXISD::CallArg";
9100b57cec5SDimitry Andric   case NVPTXISD::LastCallArg:
9110b57cec5SDimitry Andric     return "NVPTXISD::LastCallArg";
9120b57cec5SDimitry Andric   case NVPTXISD::CallArgEnd:
9130b57cec5SDimitry Andric     return "NVPTXISD::CallArgEnd";
9140b57cec5SDimitry Andric   case NVPTXISD::CallVoid:
9150b57cec5SDimitry Andric     return "NVPTXISD::CallVoid";
9160b57cec5SDimitry Andric   case NVPTXISD::CallVal:
9170b57cec5SDimitry Andric     return "NVPTXISD::CallVal";
9180b57cec5SDimitry Andric   case NVPTXISD::CallSymbol:
9190b57cec5SDimitry Andric     return "NVPTXISD::CallSymbol";
9200b57cec5SDimitry Andric   case NVPTXISD::Prototype:
9210b57cec5SDimitry Andric     return "NVPTXISD::Prototype";
9220b57cec5SDimitry Andric   case NVPTXISD::MoveParam:
9230b57cec5SDimitry Andric     return "NVPTXISD::MoveParam";
9240b57cec5SDimitry Andric   case NVPTXISD::StoreRetval:
9250b57cec5SDimitry Andric     return "NVPTXISD::StoreRetval";
9260b57cec5SDimitry Andric   case NVPTXISD::StoreRetvalV2:
9270b57cec5SDimitry Andric     return "NVPTXISD::StoreRetvalV2";
9280b57cec5SDimitry Andric   case NVPTXISD::StoreRetvalV4:
9290b57cec5SDimitry Andric     return "NVPTXISD::StoreRetvalV4";
9300b57cec5SDimitry Andric   case NVPTXISD::PseudoUseParam:
9310b57cec5SDimitry Andric     return "NVPTXISD::PseudoUseParam";
9320b57cec5SDimitry Andric   case NVPTXISD::RETURN:
9330b57cec5SDimitry Andric     return "NVPTXISD::RETURN";
9340b57cec5SDimitry Andric   case NVPTXISD::CallSeqBegin:
9350b57cec5SDimitry Andric     return "NVPTXISD::CallSeqBegin";
9360b57cec5SDimitry Andric   case NVPTXISD::CallSeqEnd:
9370b57cec5SDimitry Andric     return "NVPTXISD::CallSeqEnd";
9380b57cec5SDimitry Andric   case NVPTXISD::CallPrototype:
9390b57cec5SDimitry Andric     return "NVPTXISD::CallPrototype";
9400b57cec5SDimitry Andric   case NVPTXISD::ProxyReg:
9410b57cec5SDimitry Andric     return "NVPTXISD::ProxyReg";
9420b57cec5SDimitry Andric   case NVPTXISD::LoadV2:
9430b57cec5SDimitry Andric     return "NVPTXISD::LoadV2";
9440b57cec5SDimitry Andric   case NVPTXISD::LoadV4:
9450b57cec5SDimitry Andric     return "NVPTXISD::LoadV4";
9460b57cec5SDimitry Andric   case NVPTXISD::LDGV2:
9470b57cec5SDimitry Andric     return "NVPTXISD::LDGV2";
9480b57cec5SDimitry Andric   case NVPTXISD::LDGV4:
9490b57cec5SDimitry Andric     return "NVPTXISD::LDGV4";
9500b57cec5SDimitry Andric   case NVPTXISD::LDUV2:
9510b57cec5SDimitry Andric     return "NVPTXISD::LDUV2";
9520b57cec5SDimitry Andric   case NVPTXISD::LDUV4:
9530b57cec5SDimitry Andric     return "NVPTXISD::LDUV4";
9540b57cec5SDimitry Andric   case NVPTXISD::StoreV2:
9550b57cec5SDimitry Andric     return "NVPTXISD::StoreV2";
9560b57cec5SDimitry Andric   case NVPTXISD::StoreV4:
9570b57cec5SDimitry Andric     return "NVPTXISD::StoreV4";
9580b57cec5SDimitry Andric   case NVPTXISD::FUN_SHFL_CLAMP:
9590b57cec5SDimitry Andric     return "NVPTXISD::FUN_SHFL_CLAMP";
9600b57cec5SDimitry Andric   case NVPTXISD::FUN_SHFR_CLAMP:
9610b57cec5SDimitry Andric     return "NVPTXISD::FUN_SHFR_CLAMP";
9620b57cec5SDimitry Andric   case NVPTXISD::IMAD:
9630b57cec5SDimitry Andric     return "NVPTXISD::IMAD";
9645f757f3fSDimitry Andric   case NVPTXISD::BFE:
9655f757f3fSDimitry Andric     return "NVPTXISD::BFE";
9665f757f3fSDimitry Andric   case NVPTXISD::BFI:
9675f757f3fSDimitry Andric     return "NVPTXISD::BFI";
9685f757f3fSDimitry Andric   case NVPTXISD::PRMT:
9695f757f3fSDimitry Andric     return "NVPTXISD::PRMT";
9700b57cec5SDimitry Andric   case NVPTXISD::SETP_F16X2:
9710b57cec5SDimitry Andric     return "NVPTXISD::SETP_F16X2";
9725f757f3fSDimitry Andric   case NVPTXISD::SETP_BF16X2:
9735f757f3fSDimitry Andric     return "NVPTXISD::SETP_BF16X2";
9740b57cec5SDimitry Andric   case NVPTXISD::Dummy:
9750b57cec5SDimitry Andric     return "NVPTXISD::Dummy";
9760b57cec5SDimitry Andric   case NVPTXISD::MUL_WIDE_SIGNED:
9770b57cec5SDimitry Andric     return "NVPTXISD::MUL_WIDE_SIGNED";
9780b57cec5SDimitry Andric   case NVPTXISD::MUL_WIDE_UNSIGNED:
9790b57cec5SDimitry Andric     return "NVPTXISD::MUL_WIDE_UNSIGNED";
9800b57cec5SDimitry Andric   case NVPTXISD::Tex1DFloatS32:        return "NVPTXISD::Tex1DFloatS32";
9810b57cec5SDimitry Andric   case NVPTXISD::Tex1DFloatFloat:      return "NVPTXISD::Tex1DFloatFloat";
9820b57cec5SDimitry Andric   case NVPTXISD::Tex1DFloatFloatLevel:
9830b57cec5SDimitry Andric     return "NVPTXISD::Tex1DFloatFloatLevel";
9840b57cec5SDimitry Andric   case NVPTXISD::Tex1DFloatFloatGrad:
9850b57cec5SDimitry Andric     return "NVPTXISD::Tex1DFloatFloatGrad";
9860b57cec5SDimitry Andric   case NVPTXISD::Tex1DS32S32:          return "NVPTXISD::Tex1DS32S32";
9870b57cec5SDimitry Andric   case NVPTXISD::Tex1DS32Float:        return "NVPTXISD::Tex1DS32Float";
9880b57cec5SDimitry Andric   case NVPTXISD::Tex1DS32FloatLevel:
9890b57cec5SDimitry Andric     return "NVPTXISD::Tex1DS32FloatLevel";
9900b57cec5SDimitry Andric   case NVPTXISD::Tex1DS32FloatGrad:
9910b57cec5SDimitry Andric     return "NVPTXISD::Tex1DS32FloatGrad";
9920b57cec5SDimitry Andric   case NVPTXISD::Tex1DU32S32:          return "NVPTXISD::Tex1DU32S32";
9930b57cec5SDimitry Andric   case NVPTXISD::Tex1DU32Float:        return "NVPTXISD::Tex1DU32Float";
9940b57cec5SDimitry Andric   case NVPTXISD::Tex1DU32FloatLevel:
9950b57cec5SDimitry Andric     return "NVPTXISD::Tex1DU32FloatLevel";
9960b57cec5SDimitry Andric   case NVPTXISD::Tex1DU32FloatGrad:
9970b57cec5SDimitry Andric     return "NVPTXISD::Tex1DU32FloatGrad";
9980b57cec5SDimitry Andric   case NVPTXISD::Tex1DArrayFloatS32:   return "NVPTXISD::Tex1DArrayFloatS32";
9990b57cec5SDimitry Andric   case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
10000b57cec5SDimitry Andric   case NVPTXISD::Tex1DArrayFloatFloatLevel:
10010b57cec5SDimitry Andric     return "NVPTXISD::Tex1DArrayFloatFloatLevel";
10020b57cec5SDimitry Andric   case NVPTXISD::Tex1DArrayFloatFloatGrad:
10030b57cec5SDimitry Andric     return "NVPTXISD::Tex1DArrayFloatFloatGrad";
10040b57cec5SDimitry Andric   case NVPTXISD::Tex1DArrayS32S32:     return "NVPTXISD::Tex1DArrayS32S32";
10050b57cec5SDimitry Andric   case NVPTXISD::Tex1DArrayS32Float:   return "NVPTXISD::Tex1DArrayS32Float";
10060b57cec5SDimitry Andric   case NVPTXISD::Tex1DArrayS32FloatLevel:
10070b57cec5SDimitry Andric     return "NVPTXISD::Tex1DArrayS32FloatLevel";
10080b57cec5SDimitry Andric   case NVPTXISD::Tex1DArrayS32FloatGrad:
10090b57cec5SDimitry Andric     return "NVPTXISD::Tex1DArrayS32FloatGrad";
10100b57cec5SDimitry Andric   case NVPTXISD::Tex1DArrayU32S32:     return "NVPTXISD::Tex1DArrayU32S32";
10110b57cec5SDimitry Andric   case NVPTXISD::Tex1DArrayU32Float:   return "NVPTXISD::Tex1DArrayU32Float";
10120b57cec5SDimitry Andric   case NVPTXISD::Tex1DArrayU32FloatLevel:
10130b57cec5SDimitry Andric     return "NVPTXISD::Tex1DArrayU32FloatLevel";
10140b57cec5SDimitry Andric   case NVPTXISD::Tex1DArrayU32FloatGrad:
10150b57cec5SDimitry Andric     return "NVPTXISD::Tex1DArrayU32FloatGrad";
10160b57cec5SDimitry Andric   case NVPTXISD::Tex2DFloatS32:        return "NVPTXISD::Tex2DFloatS32";
10170b57cec5SDimitry Andric   case NVPTXISD::Tex2DFloatFloat:      return "NVPTXISD::Tex2DFloatFloat";
10180b57cec5SDimitry Andric   case NVPTXISD::Tex2DFloatFloatLevel:
10190b57cec5SDimitry Andric     return "NVPTXISD::Tex2DFloatFloatLevel";
10200b57cec5SDimitry Andric   case NVPTXISD::Tex2DFloatFloatGrad:
10210b57cec5SDimitry Andric     return "NVPTXISD::Tex2DFloatFloatGrad";
10220b57cec5SDimitry Andric   case NVPTXISD::Tex2DS32S32:          return "NVPTXISD::Tex2DS32S32";
10230b57cec5SDimitry Andric   case NVPTXISD::Tex2DS32Float:        return "NVPTXISD::Tex2DS32Float";
10240b57cec5SDimitry Andric   case NVPTXISD::Tex2DS32FloatLevel:
10250b57cec5SDimitry Andric     return "NVPTXISD::Tex2DS32FloatLevel";
10260b57cec5SDimitry Andric   case NVPTXISD::Tex2DS32FloatGrad:
10270b57cec5SDimitry Andric     return "NVPTXISD::Tex2DS32FloatGrad";
10280b57cec5SDimitry Andric   case NVPTXISD::Tex2DU32S32:          return "NVPTXISD::Tex2DU32S32";
10290b57cec5SDimitry Andric   case NVPTXISD::Tex2DU32Float:        return "NVPTXISD::Tex2DU32Float";
10300b57cec5SDimitry Andric   case NVPTXISD::Tex2DU32FloatLevel:
10310b57cec5SDimitry Andric     return "NVPTXISD::Tex2DU32FloatLevel";
10320b57cec5SDimitry Andric   case NVPTXISD::Tex2DU32FloatGrad:
10330b57cec5SDimitry Andric     return "NVPTXISD::Tex2DU32FloatGrad";
10340b57cec5SDimitry Andric   case NVPTXISD::Tex2DArrayFloatS32:   return "NVPTXISD::Tex2DArrayFloatS32";
10350b57cec5SDimitry Andric   case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
10360b57cec5SDimitry Andric   case NVPTXISD::Tex2DArrayFloatFloatLevel:
10370b57cec5SDimitry Andric     return "NVPTXISD::Tex2DArrayFloatFloatLevel";
10380b57cec5SDimitry Andric   case NVPTXISD::Tex2DArrayFloatFloatGrad:
10390b57cec5SDimitry Andric     return "NVPTXISD::Tex2DArrayFloatFloatGrad";
10400b57cec5SDimitry Andric   case NVPTXISD::Tex2DArrayS32S32:     return "NVPTXISD::Tex2DArrayS32S32";
10410b57cec5SDimitry Andric   case NVPTXISD::Tex2DArrayS32Float:   return "NVPTXISD::Tex2DArrayS32Float";
10420b57cec5SDimitry Andric   case NVPTXISD::Tex2DArrayS32FloatLevel:
10430b57cec5SDimitry Andric     return "NVPTXISD::Tex2DArrayS32FloatLevel";
10440b57cec5SDimitry Andric   case NVPTXISD::Tex2DArrayS32FloatGrad:
10450b57cec5SDimitry Andric     return "NVPTXISD::Tex2DArrayS32FloatGrad";
10460b57cec5SDimitry Andric   case NVPTXISD::Tex2DArrayU32S32:     return "NVPTXISD::Tex2DArrayU32S32";
10470b57cec5SDimitry Andric   case NVPTXISD::Tex2DArrayU32Float:   return "NVPTXISD::Tex2DArrayU32Float";
10480b57cec5SDimitry Andric   case NVPTXISD::Tex2DArrayU32FloatLevel:
10490b57cec5SDimitry Andric     return "NVPTXISD::Tex2DArrayU32FloatLevel";
10500b57cec5SDimitry Andric   case NVPTXISD::Tex2DArrayU32FloatGrad:
10510b57cec5SDimitry Andric     return "NVPTXISD::Tex2DArrayU32FloatGrad";
10520b57cec5SDimitry Andric   case NVPTXISD::Tex3DFloatS32:        return "NVPTXISD::Tex3DFloatS32";
10530b57cec5SDimitry Andric   case NVPTXISD::Tex3DFloatFloat:      return "NVPTXISD::Tex3DFloatFloat";
10540b57cec5SDimitry Andric   case NVPTXISD::Tex3DFloatFloatLevel:
10550b57cec5SDimitry Andric     return "NVPTXISD::Tex3DFloatFloatLevel";
10560b57cec5SDimitry Andric   case NVPTXISD::Tex3DFloatFloatGrad:
10570b57cec5SDimitry Andric     return "NVPTXISD::Tex3DFloatFloatGrad";
10580b57cec5SDimitry Andric   case NVPTXISD::Tex3DS32S32:          return "NVPTXISD::Tex3DS32S32";
10590b57cec5SDimitry Andric   case NVPTXISD::Tex3DS32Float:        return "NVPTXISD::Tex3DS32Float";
10600b57cec5SDimitry Andric   case NVPTXISD::Tex3DS32FloatLevel:
10610b57cec5SDimitry Andric     return "NVPTXISD::Tex3DS32FloatLevel";
10620b57cec5SDimitry Andric   case NVPTXISD::Tex3DS32FloatGrad:
10630b57cec5SDimitry Andric     return "NVPTXISD::Tex3DS32FloatGrad";
10640b57cec5SDimitry Andric   case NVPTXISD::Tex3DU32S32:          return "NVPTXISD::Tex3DU32S32";
10650b57cec5SDimitry Andric   case NVPTXISD::Tex3DU32Float:        return "NVPTXISD::Tex3DU32Float";
10660b57cec5SDimitry Andric   case NVPTXISD::Tex3DU32FloatLevel:
10670b57cec5SDimitry Andric     return "NVPTXISD::Tex3DU32FloatLevel";
10680b57cec5SDimitry Andric   case NVPTXISD::Tex3DU32FloatGrad:
10690b57cec5SDimitry Andric     return "NVPTXISD::Tex3DU32FloatGrad";
10700b57cec5SDimitry Andric   case NVPTXISD::TexCubeFloatFloat:      return "NVPTXISD::TexCubeFloatFloat";
10710b57cec5SDimitry Andric   case NVPTXISD::TexCubeFloatFloatLevel:
10720b57cec5SDimitry Andric     return "NVPTXISD::TexCubeFloatFloatLevel";
10730b57cec5SDimitry Andric   case NVPTXISD::TexCubeS32Float:        return "NVPTXISD::TexCubeS32Float";
10740b57cec5SDimitry Andric   case NVPTXISD::TexCubeS32FloatLevel:
10750b57cec5SDimitry Andric     return "NVPTXISD::TexCubeS32FloatLevel";
10760b57cec5SDimitry Andric   case NVPTXISD::TexCubeU32Float:        return "NVPTXISD::TexCubeU32Float";
10770b57cec5SDimitry Andric   case NVPTXISD::TexCubeU32FloatLevel:
10780b57cec5SDimitry Andric     return "NVPTXISD::TexCubeU32FloatLevel";
10790b57cec5SDimitry Andric   case NVPTXISD::TexCubeArrayFloatFloat:
10800b57cec5SDimitry Andric     return "NVPTXISD::TexCubeArrayFloatFloat";
10810b57cec5SDimitry Andric   case NVPTXISD::TexCubeArrayFloatFloatLevel:
10820b57cec5SDimitry Andric     return "NVPTXISD::TexCubeArrayFloatFloatLevel";
10830b57cec5SDimitry Andric   case NVPTXISD::TexCubeArrayS32Float:
10840b57cec5SDimitry Andric     return "NVPTXISD::TexCubeArrayS32Float";
10850b57cec5SDimitry Andric   case NVPTXISD::TexCubeArrayS32FloatLevel:
10860b57cec5SDimitry Andric     return "NVPTXISD::TexCubeArrayS32FloatLevel";
10870b57cec5SDimitry Andric   case NVPTXISD::TexCubeArrayU32Float:
10880b57cec5SDimitry Andric     return "NVPTXISD::TexCubeArrayU32Float";
10890b57cec5SDimitry Andric   case NVPTXISD::TexCubeArrayU32FloatLevel:
10900b57cec5SDimitry Andric     return "NVPTXISD::TexCubeArrayU32FloatLevel";
10910b57cec5SDimitry Andric   case NVPTXISD::Tld4R2DFloatFloat:
10920b57cec5SDimitry Andric     return "NVPTXISD::Tld4R2DFloatFloat";
10930b57cec5SDimitry Andric   case NVPTXISD::Tld4G2DFloatFloat:
10940b57cec5SDimitry Andric     return "NVPTXISD::Tld4G2DFloatFloat";
10950b57cec5SDimitry Andric   case NVPTXISD::Tld4B2DFloatFloat:
10960b57cec5SDimitry Andric     return "NVPTXISD::Tld4B2DFloatFloat";
10970b57cec5SDimitry Andric   case NVPTXISD::Tld4A2DFloatFloat:
10980b57cec5SDimitry Andric     return "NVPTXISD::Tld4A2DFloatFloat";
10990b57cec5SDimitry Andric   case NVPTXISD::Tld4R2DS64Float:
11000b57cec5SDimitry Andric     return "NVPTXISD::Tld4R2DS64Float";
11010b57cec5SDimitry Andric   case NVPTXISD::Tld4G2DS64Float:
11020b57cec5SDimitry Andric     return "NVPTXISD::Tld4G2DS64Float";
11030b57cec5SDimitry Andric   case NVPTXISD::Tld4B2DS64Float:
11040b57cec5SDimitry Andric     return "NVPTXISD::Tld4B2DS64Float";
11050b57cec5SDimitry Andric   case NVPTXISD::Tld4A2DS64Float:
11060b57cec5SDimitry Andric     return "NVPTXISD::Tld4A2DS64Float";
11070b57cec5SDimitry Andric   case NVPTXISD::Tld4R2DU64Float:
11080b57cec5SDimitry Andric     return "NVPTXISD::Tld4R2DU64Float";
11090b57cec5SDimitry Andric   case NVPTXISD::Tld4G2DU64Float:
11100b57cec5SDimitry Andric     return "NVPTXISD::Tld4G2DU64Float";
11110b57cec5SDimitry Andric   case NVPTXISD::Tld4B2DU64Float:
11120b57cec5SDimitry Andric     return "NVPTXISD::Tld4B2DU64Float";
11130b57cec5SDimitry Andric   case NVPTXISD::Tld4A2DU64Float:
11140b57cec5SDimitry Andric     return "NVPTXISD::Tld4A2DU64Float";
11150b57cec5SDimitry Andric 
11160b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DFloatS32:
11170b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DFloatS32";
11180b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DFloatFloat:
11190b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DFloatFloat";
11200b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DFloatFloatLevel:
11210b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DFloatFloatLevel";
11220b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DFloatFloatGrad:
11230b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DFloatFloatGrad";
11240b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DS32S32:
11250b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DS32S32";
11260b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DS32Float:
11270b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DS32Float";
11280b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DS32FloatLevel:
11290b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DS32FloatLevel";
11300b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DS32FloatGrad:
11310b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DS32FloatGrad";
11320b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DU32S32:
11330b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DU32S32";
11340b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DU32Float:
11350b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DU32Float";
11360b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DU32FloatLevel:
11370b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DU32FloatLevel";
11380b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DU32FloatGrad:
11390b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DU32FloatGrad";
11400b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DArrayFloatS32:
11410b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DArrayFloatS32";
11420b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DArrayFloatFloat:
11430b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DArrayFloatFloat";
11440b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
11450b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
11460b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
11470b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
11480b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DArrayS32S32:
11490b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DArrayS32S32";
11500b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DArrayS32Float:
11510b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DArrayS32Float";
11520b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
11530b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
11540b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
11550b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
11560b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DArrayU32S32:
11570b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DArrayU32S32";
11580b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DArrayU32Float:
11590b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DArrayU32Float";
11600b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
11610b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
11620b57cec5SDimitry Andric   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
11630b57cec5SDimitry Andric     return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
11640b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DFloatS32:
11650b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DFloatS32";
11660b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DFloatFloat:
11670b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DFloatFloat";
11680b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DFloatFloatLevel:
11690b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DFloatFloatLevel";
11700b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DFloatFloatGrad:
11710b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DFloatFloatGrad";
11720b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DS32S32:
11730b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DS32S32";
11740b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DS32Float:
11750b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DS32Float";
11760b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DS32FloatLevel:
11770b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DS32FloatLevel";
11780b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DS32FloatGrad:
11790b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DS32FloatGrad";
11800b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DU32S32:
11810b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DU32S32";
11820b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DU32Float:
11830b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DU32Float";
11840b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DU32FloatLevel:
11850b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DU32FloatLevel";
11860b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DU32FloatGrad:
11870b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DU32FloatGrad";
11880b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DArrayFloatS32:
11890b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DArrayFloatS32";
11900b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DArrayFloatFloat:
11910b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DArrayFloatFloat";
11920b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
11930b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
11940b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
11950b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
11960b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DArrayS32S32:
11970b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DArrayS32S32";
11980b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DArrayS32Float:
11990b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DArrayS32Float";
12000b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
12010b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
12020b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
12030b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
12040b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DArrayU32S32:
12050b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DArrayU32S32";
12060b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DArrayU32Float:
12070b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DArrayU32Float";
12080b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
12090b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
12100b57cec5SDimitry Andric   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
12110b57cec5SDimitry Andric     return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
12120b57cec5SDimitry Andric   case NVPTXISD::TexUnified3DFloatS32:
12130b57cec5SDimitry Andric     return "NVPTXISD::TexUnified3DFloatS32";
12140b57cec5SDimitry Andric   case NVPTXISD::TexUnified3DFloatFloat:
12150b57cec5SDimitry Andric     return "NVPTXISD::TexUnified3DFloatFloat";
12160b57cec5SDimitry Andric   case NVPTXISD::TexUnified3DFloatFloatLevel:
12170b57cec5SDimitry Andric     return "NVPTXISD::TexUnified3DFloatFloatLevel";
12180b57cec5SDimitry Andric   case NVPTXISD::TexUnified3DFloatFloatGrad:
12190b57cec5SDimitry Andric     return "NVPTXISD::TexUnified3DFloatFloatGrad";
12200b57cec5SDimitry Andric   case NVPTXISD::TexUnified3DS32S32:
12210b57cec5SDimitry Andric     return "NVPTXISD::TexUnified3DS32S32";
12220b57cec5SDimitry Andric   case NVPTXISD::TexUnified3DS32Float:
12230b57cec5SDimitry Andric     return "NVPTXISD::TexUnified3DS32Float";
12240b57cec5SDimitry Andric   case NVPTXISD::TexUnified3DS32FloatLevel:
12250b57cec5SDimitry Andric     return "NVPTXISD::TexUnified3DS32FloatLevel";
12260b57cec5SDimitry Andric   case NVPTXISD::TexUnified3DS32FloatGrad:
12270b57cec5SDimitry Andric     return "NVPTXISD::TexUnified3DS32FloatGrad";
12280b57cec5SDimitry Andric   case NVPTXISD::TexUnified3DU32S32:
12290b57cec5SDimitry Andric     return "NVPTXISD::TexUnified3DU32S32";
12300b57cec5SDimitry Andric   case NVPTXISD::TexUnified3DU32Float:
12310b57cec5SDimitry Andric     return "NVPTXISD::TexUnified3DU32Float";
12320b57cec5SDimitry Andric   case NVPTXISD::TexUnified3DU32FloatLevel:
12330b57cec5SDimitry Andric     return "NVPTXISD::TexUnified3DU32FloatLevel";
12340b57cec5SDimitry Andric   case NVPTXISD::TexUnified3DU32FloatGrad:
12350b57cec5SDimitry Andric     return "NVPTXISD::TexUnified3DU32FloatGrad";
12360b57cec5SDimitry Andric   case NVPTXISD::TexUnifiedCubeFloatFloat:
12370b57cec5SDimitry Andric     return "NVPTXISD::TexUnifiedCubeFloatFloat";
12380b57cec5SDimitry Andric   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
12390b57cec5SDimitry Andric     return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
12400b57cec5SDimitry Andric   case NVPTXISD::TexUnifiedCubeS32Float:
12410b57cec5SDimitry Andric     return "NVPTXISD::TexUnifiedCubeS32Float";
12420b57cec5SDimitry Andric   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
12430b57cec5SDimitry Andric     return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
12440b57cec5SDimitry Andric   case NVPTXISD::TexUnifiedCubeU32Float:
12450b57cec5SDimitry Andric     return "NVPTXISD::TexUnifiedCubeU32Float";
12460b57cec5SDimitry Andric   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
12470b57cec5SDimitry Andric     return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
12480b57cec5SDimitry Andric   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
12490b57cec5SDimitry Andric     return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
12500b57cec5SDimitry Andric   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
12510b57cec5SDimitry Andric     return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
12520b57cec5SDimitry Andric   case NVPTXISD::TexUnifiedCubeArrayS32Float:
12530b57cec5SDimitry Andric     return "NVPTXISD::TexUnifiedCubeArrayS32Float";
12540b57cec5SDimitry Andric   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
12550b57cec5SDimitry Andric     return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
12560b57cec5SDimitry Andric   case NVPTXISD::TexUnifiedCubeArrayU32Float:
12570b57cec5SDimitry Andric     return "NVPTXISD::TexUnifiedCubeArrayU32Float";
12580b57cec5SDimitry Andric   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
12590b57cec5SDimitry Andric     return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
12607a6dacacSDimitry Andric   case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
12617a6dacacSDimitry Andric     return "NVPTXISD::TexUnifiedCubeFloatFloatGrad";
12627a6dacacSDimitry Andric   case NVPTXISD::TexUnifiedCubeS32FloatGrad:
12637a6dacacSDimitry Andric     return "NVPTXISD::TexUnifiedCubeS32FloatGrad";
12647a6dacacSDimitry Andric   case NVPTXISD::TexUnifiedCubeU32FloatGrad:
12657a6dacacSDimitry Andric     return "NVPTXISD::TexUnifiedCubeU32FloatGrad";
12667a6dacacSDimitry Andric   case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
12677a6dacacSDimitry Andric     return "NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad";
12687a6dacacSDimitry Andric   case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
12697a6dacacSDimitry Andric     return "NVPTXISD::TexUnifiedCubeArrayS32FloatGrad";
12707a6dacacSDimitry Andric   case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
12717a6dacacSDimitry Andric     return "NVPTXISD::TexUnifiedCubeArrayU32FloatGrad";
12720b57cec5SDimitry Andric   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
12730b57cec5SDimitry Andric     return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
12740b57cec5SDimitry Andric   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
12750b57cec5SDimitry Andric     return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
12760b57cec5SDimitry Andric   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
12770b57cec5SDimitry Andric     return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
12780b57cec5SDimitry Andric   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
12790b57cec5SDimitry Andric     return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
12800b57cec5SDimitry Andric   case NVPTXISD::Tld4UnifiedR2DS64Float:
12810b57cec5SDimitry Andric     return "NVPTXISD::Tld4UnifiedR2DS64Float";
12820b57cec5SDimitry Andric   case NVPTXISD::Tld4UnifiedG2DS64Float:
12830b57cec5SDimitry Andric     return "NVPTXISD::Tld4UnifiedG2DS64Float";
12840b57cec5SDimitry Andric   case NVPTXISD::Tld4UnifiedB2DS64Float:
12850b57cec5SDimitry Andric     return "NVPTXISD::Tld4UnifiedB2DS64Float";
12860b57cec5SDimitry Andric   case NVPTXISD::Tld4UnifiedA2DS64Float:
12870b57cec5SDimitry Andric     return "NVPTXISD::Tld4UnifiedA2DS64Float";
12880b57cec5SDimitry Andric   case NVPTXISD::Tld4UnifiedR2DU64Float:
12890b57cec5SDimitry Andric     return "NVPTXISD::Tld4UnifiedR2DU64Float";
12900b57cec5SDimitry Andric   case NVPTXISD::Tld4UnifiedG2DU64Float:
12910b57cec5SDimitry Andric     return "NVPTXISD::Tld4UnifiedG2DU64Float";
12920b57cec5SDimitry Andric   case NVPTXISD::Tld4UnifiedB2DU64Float:
12930b57cec5SDimitry Andric     return "NVPTXISD::Tld4UnifiedB2DU64Float";
12940b57cec5SDimitry Andric   case NVPTXISD::Tld4UnifiedA2DU64Float:
12950b57cec5SDimitry Andric     return "NVPTXISD::Tld4UnifiedA2DU64Float";
12960b57cec5SDimitry Andric 
12970b57cec5SDimitry Andric   case NVPTXISD::Suld1DI8Clamp:          return "NVPTXISD::Suld1DI8Clamp";
12980b57cec5SDimitry Andric   case NVPTXISD::Suld1DI16Clamp:         return "NVPTXISD::Suld1DI16Clamp";
12990b57cec5SDimitry Andric   case NVPTXISD::Suld1DI32Clamp:         return "NVPTXISD::Suld1DI32Clamp";
13000b57cec5SDimitry Andric   case NVPTXISD::Suld1DI64Clamp:         return "NVPTXISD::Suld1DI64Clamp";
13010b57cec5SDimitry Andric   case NVPTXISD::Suld1DV2I8Clamp:        return "NVPTXISD::Suld1DV2I8Clamp";
13020b57cec5SDimitry Andric   case NVPTXISD::Suld1DV2I16Clamp:       return "NVPTXISD::Suld1DV2I16Clamp";
13030b57cec5SDimitry Andric   case NVPTXISD::Suld1DV2I32Clamp:       return "NVPTXISD::Suld1DV2I32Clamp";
13040b57cec5SDimitry Andric   case NVPTXISD::Suld1DV2I64Clamp:       return "NVPTXISD::Suld1DV2I64Clamp";
13050b57cec5SDimitry Andric   case NVPTXISD::Suld1DV4I8Clamp:        return "NVPTXISD::Suld1DV4I8Clamp";
13060b57cec5SDimitry Andric   case NVPTXISD::Suld1DV4I16Clamp:       return "NVPTXISD::Suld1DV4I16Clamp";
13070b57cec5SDimitry Andric   case NVPTXISD::Suld1DV4I32Clamp:       return "NVPTXISD::Suld1DV4I32Clamp";
13080b57cec5SDimitry Andric 
13090b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayI8Clamp:   return "NVPTXISD::Suld1DArrayI8Clamp";
13100b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayI16Clamp:  return "NVPTXISD::Suld1DArrayI16Clamp";
13110b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayI32Clamp:  return "NVPTXISD::Suld1DArrayI32Clamp";
13120b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayI64Clamp:  return "NVPTXISD::Suld1DArrayI64Clamp";
13130b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
13140b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
13150b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
13160b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
13170b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
13180b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
13190b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
13200b57cec5SDimitry Andric 
13210b57cec5SDimitry Andric   case NVPTXISD::Suld2DI8Clamp:          return "NVPTXISD::Suld2DI8Clamp";
13220b57cec5SDimitry Andric   case NVPTXISD::Suld2DI16Clamp:         return "NVPTXISD::Suld2DI16Clamp";
13230b57cec5SDimitry Andric   case NVPTXISD::Suld2DI32Clamp:         return "NVPTXISD::Suld2DI32Clamp";
13240b57cec5SDimitry Andric   case NVPTXISD::Suld2DI64Clamp:         return "NVPTXISD::Suld2DI64Clamp";
13250b57cec5SDimitry Andric   case NVPTXISD::Suld2DV2I8Clamp:        return "NVPTXISD::Suld2DV2I8Clamp";
13260b57cec5SDimitry Andric   case NVPTXISD::Suld2DV2I16Clamp:       return "NVPTXISD::Suld2DV2I16Clamp";
13270b57cec5SDimitry Andric   case NVPTXISD::Suld2DV2I32Clamp:       return "NVPTXISD::Suld2DV2I32Clamp";
13280b57cec5SDimitry Andric   case NVPTXISD::Suld2DV2I64Clamp:       return "NVPTXISD::Suld2DV2I64Clamp";
13290b57cec5SDimitry Andric   case NVPTXISD::Suld2DV4I8Clamp:        return "NVPTXISD::Suld2DV4I8Clamp";
13300b57cec5SDimitry Andric   case NVPTXISD::Suld2DV4I16Clamp:       return "NVPTXISD::Suld2DV4I16Clamp";
13310b57cec5SDimitry Andric   case NVPTXISD::Suld2DV4I32Clamp:       return "NVPTXISD::Suld2DV4I32Clamp";
13320b57cec5SDimitry Andric 
13330b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayI8Clamp:   return "NVPTXISD::Suld2DArrayI8Clamp";
13340b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayI16Clamp:  return "NVPTXISD::Suld2DArrayI16Clamp";
13350b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayI32Clamp:  return "NVPTXISD::Suld2DArrayI32Clamp";
13360b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayI64Clamp:  return "NVPTXISD::Suld2DArrayI64Clamp";
13370b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
13380b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
13390b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
13400b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
13410b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
13420b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
13430b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
13440b57cec5SDimitry Andric 
13450b57cec5SDimitry Andric   case NVPTXISD::Suld3DI8Clamp:          return "NVPTXISD::Suld3DI8Clamp";
13460b57cec5SDimitry Andric   case NVPTXISD::Suld3DI16Clamp:         return "NVPTXISD::Suld3DI16Clamp";
13470b57cec5SDimitry Andric   case NVPTXISD::Suld3DI32Clamp:         return "NVPTXISD::Suld3DI32Clamp";
13480b57cec5SDimitry Andric   case NVPTXISD::Suld3DI64Clamp:         return "NVPTXISD::Suld3DI64Clamp";
13490b57cec5SDimitry Andric   case NVPTXISD::Suld3DV2I8Clamp:        return "NVPTXISD::Suld3DV2I8Clamp";
13500b57cec5SDimitry Andric   case NVPTXISD::Suld3DV2I16Clamp:       return "NVPTXISD::Suld3DV2I16Clamp";
13510b57cec5SDimitry Andric   case NVPTXISD::Suld3DV2I32Clamp:       return "NVPTXISD::Suld3DV2I32Clamp";
13520b57cec5SDimitry Andric   case NVPTXISD::Suld3DV2I64Clamp:       return "NVPTXISD::Suld3DV2I64Clamp";
13530b57cec5SDimitry Andric   case NVPTXISD::Suld3DV4I8Clamp:        return "NVPTXISD::Suld3DV4I8Clamp";
13540b57cec5SDimitry Andric   case NVPTXISD::Suld3DV4I16Clamp:       return "NVPTXISD::Suld3DV4I16Clamp";
13550b57cec5SDimitry Andric   case NVPTXISD::Suld3DV4I32Clamp:       return "NVPTXISD::Suld3DV4I32Clamp";
13560b57cec5SDimitry Andric 
13570b57cec5SDimitry Andric   case NVPTXISD::Suld1DI8Trap:          return "NVPTXISD::Suld1DI8Trap";
13580b57cec5SDimitry Andric   case NVPTXISD::Suld1DI16Trap:         return "NVPTXISD::Suld1DI16Trap";
13590b57cec5SDimitry Andric   case NVPTXISD::Suld1DI32Trap:         return "NVPTXISD::Suld1DI32Trap";
13600b57cec5SDimitry Andric   case NVPTXISD::Suld1DI64Trap:         return "NVPTXISD::Suld1DI64Trap";
13610b57cec5SDimitry Andric   case NVPTXISD::Suld1DV2I8Trap:        return "NVPTXISD::Suld1DV2I8Trap";
13620b57cec5SDimitry Andric   case NVPTXISD::Suld1DV2I16Trap:       return "NVPTXISD::Suld1DV2I16Trap";
13630b57cec5SDimitry Andric   case NVPTXISD::Suld1DV2I32Trap:       return "NVPTXISD::Suld1DV2I32Trap";
13640b57cec5SDimitry Andric   case NVPTXISD::Suld1DV2I64Trap:       return "NVPTXISD::Suld1DV2I64Trap";
13650b57cec5SDimitry Andric   case NVPTXISD::Suld1DV4I8Trap:        return "NVPTXISD::Suld1DV4I8Trap";
13660b57cec5SDimitry Andric   case NVPTXISD::Suld1DV4I16Trap:       return "NVPTXISD::Suld1DV4I16Trap";
13670b57cec5SDimitry Andric   case NVPTXISD::Suld1DV4I32Trap:       return "NVPTXISD::Suld1DV4I32Trap";
13680b57cec5SDimitry Andric 
13690b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayI8Trap:     return "NVPTXISD::Suld1DArrayI8Trap";
13700b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayI16Trap:    return "NVPTXISD::Suld1DArrayI16Trap";
13710b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayI32Trap:    return "NVPTXISD::Suld1DArrayI32Trap";
13720b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayI64Trap:    return "NVPTXISD::Suld1DArrayI64Trap";
13730b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV2I8Trap:   return "NVPTXISD::Suld1DArrayV2I8Trap";
13740b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV2I16Trap:  return "NVPTXISD::Suld1DArrayV2I16Trap";
13750b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV2I32Trap:  return "NVPTXISD::Suld1DArrayV2I32Trap";
13760b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV2I64Trap:  return "NVPTXISD::Suld1DArrayV2I64Trap";
13770b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV4I8Trap:   return "NVPTXISD::Suld1DArrayV4I8Trap";
13780b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV4I16Trap:  return "NVPTXISD::Suld1DArrayV4I16Trap";
13790b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV4I32Trap:  return "NVPTXISD::Suld1DArrayV4I32Trap";
13800b57cec5SDimitry Andric 
13810b57cec5SDimitry Andric   case NVPTXISD::Suld2DI8Trap:          return "NVPTXISD::Suld2DI8Trap";
13820b57cec5SDimitry Andric   case NVPTXISD::Suld2DI16Trap:         return "NVPTXISD::Suld2DI16Trap";
13830b57cec5SDimitry Andric   case NVPTXISD::Suld2DI32Trap:         return "NVPTXISD::Suld2DI32Trap";
13840b57cec5SDimitry Andric   case NVPTXISD::Suld2DI64Trap:         return "NVPTXISD::Suld2DI64Trap";
13850b57cec5SDimitry Andric   case NVPTXISD::Suld2DV2I8Trap:        return "NVPTXISD::Suld2DV2I8Trap";
13860b57cec5SDimitry Andric   case NVPTXISD::Suld2DV2I16Trap:       return "NVPTXISD::Suld2DV2I16Trap";
13870b57cec5SDimitry Andric   case NVPTXISD::Suld2DV2I32Trap:       return "NVPTXISD::Suld2DV2I32Trap";
13880b57cec5SDimitry Andric   case NVPTXISD::Suld2DV2I64Trap:       return "NVPTXISD::Suld2DV2I64Trap";
13890b57cec5SDimitry Andric   case NVPTXISD::Suld2DV4I8Trap:        return "NVPTXISD::Suld2DV4I8Trap";
13900b57cec5SDimitry Andric   case NVPTXISD::Suld2DV4I16Trap:       return "NVPTXISD::Suld2DV4I16Trap";
13910b57cec5SDimitry Andric   case NVPTXISD::Suld2DV4I32Trap:       return "NVPTXISD::Suld2DV4I32Trap";
13920b57cec5SDimitry Andric 
13930b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayI8Trap:     return "NVPTXISD::Suld2DArrayI8Trap";
13940b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayI16Trap:    return "NVPTXISD::Suld2DArrayI16Trap";
13950b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayI32Trap:    return "NVPTXISD::Suld2DArrayI32Trap";
13960b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayI64Trap:    return "NVPTXISD::Suld2DArrayI64Trap";
13970b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV2I8Trap:   return "NVPTXISD::Suld2DArrayV2I8Trap";
13980b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV2I16Trap:  return "NVPTXISD::Suld2DArrayV2I16Trap";
13990b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV2I32Trap:  return "NVPTXISD::Suld2DArrayV2I32Trap";
14000b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV2I64Trap:  return "NVPTXISD::Suld2DArrayV2I64Trap";
14010b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV4I8Trap:   return "NVPTXISD::Suld2DArrayV4I8Trap";
14020b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV4I16Trap:  return "NVPTXISD::Suld2DArrayV4I16Trap";
14030b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV4I32Trap:  return "NVPTXISD::Suld2DArrayV4I32Trap";
14040b57cec5SDimitry Andric 
14050b57cec5SDimitry Andric   case NVPTXISD::Suld3DI8Trap:          return "NVPTXISD::Suld3DI8Trap";
14060b57cec5SDimitry Andric   case NVPTXISD::Suld3DI16Trap:         return "NVPTXISD::Suld3DI16Trap";
14070b57cec5SDimitry Andric   case NVPTXISD::Suld3DI32Trap:         return "NVPTXISD::Suld3DI32Trap";
14080b57cec5SDimitry Andric   case NVPTXISD::Suld3DI64Trap:         return "NVPTXISD::Suld3DI64Trap";
14090b57cec5SDimitry Andric   case NVPTXISD::Suld3DV2I8Trap:        return "NVPTXISD::Suld3DV2I8Trap";
14100b57cec5SDimitry Andric   case NVPTXISD::Suld3DV2I16Trap:       return "NVPTXISD::Suld3DV2I16Trap";
14110b57cec5SDimitry Andric   case NVPTXISD::Suld3DV2I32Trap:       return "NVPTXISD::Suld3DV2I32Trap";
14120b57cec5SDimitry Andric   case NVPTXISD::Suld3DV2I64Trap:       return "NVPTXISD::Suld3DV2I64Trap";
14130b57cec5SDimitry Andric   case NVPTXISD::Suld3DV4I8Trap:        return "NVPTXISD::Suld3DV4I8Trap";
14140b57cec5SDimitry Andric   case NVPTXISD::Suld3DV4I16Trap:       return "NVPTXISD::Suld3DV4I16Trap";
14150b57cec5SDimitry Andric   case NVPTXISD::Suld3DV4I32Trap:       return "NVPTXISD::Suld3DV4I32Trap";
14160b57cec5SDimitry Andric 
14170b57cec5SDimitry Andric   case NVPTXISD::Suld1DI8Zero:          return "NVPTXISD::Suld1DI8Zero";
14180b57cec5SDimitry Andric   case NVPTXISD::Suld1DI16Zero:         return "NVPTXISD::Suld1DI16Zero";
14190b57cec5SDimitry Andric   case NVPTXISD::Suld1DI32Zero:         return "NVPTXISD::Suld1DI32Zero";
14200b57cec5SDimitry Andric   case NVPTXISD::Suld1DI64Zero:         return "NVPTXISD::Suld1DI64Zero";
14210b57cec5SDimitry Andric   case NVPTXISD::Suld1DV2I8Zero:        return "NVPTXISD::Suld1DV2I8Zero";
14220b57cec5SDimitry Andric   case NVPTXISD::Suld1DV2I16Zero:       return "NVPTXISD::Suld1DV2I16Zero";
14230b57cec5SDimitry Andric   case NVPTXISD::Suld1DV2I32Zero:       return "NVPTXISD::Suld1DV2I32Zero";
14240b57cec5SDimitry Andric   case NVPTXISD::Suld1DV2I64Zero:       return "NVPTXISD::Suld1DV2I64Zero";
14250b57cec5SDimitry Andric   case NVPTXISD::Suld1DV4I8Zero:        return "NVPTXISD::Suld1DV4I8Zero";
14260b57cec5SDimitry Andric   case NVPTXISD::Suld1DV4I16Zero:       return "NVPTXISD::Suld1DV4I16Zero";
14270b57cec5SDimitry Andric   case NVPTXISD::Suld1DV4I32Zero:       return "NVPTXISD::Suld1DV4I32Zero";
14280b57cec5SDimitry Andric 
14290b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayI8Zero:     return "NVPTXISD::Suld1DArrayI8Zero";
14300b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayI16Zero:    return "NVPTXISD::Suld1DArrayI16Zero";
14310b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayI32Zero:    return "NVPTXISD::Suld1DArrayI32Zero";
14320b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayI64Zero:    return "NVPTXISD::Suld1DArrayI64Zero";
14330b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV2I8Zero:   return "NVPTXISD::Suld1DArrayV2I8Zero";
14340b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV2I16Zero:  return "NVPTXISD::Suld1DArrayV2I16Zero";
14350b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV2I32Zero:  return "NVPTXISD::Suld1DArrayV2I32Zero";
14360b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV2I64Zero:  return "NVPTXISD::Suld1DArrayV2I64Zero";
14370b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV4I8Zero:   return "NVPTXISD::Suld1DArrayV4I8Zero";
14380b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV4I16Zero:  return "NVPTXISD::Suld1DArrayV4I16Zero";
14390b57cec5SDimitry Andric   case NVPTXISD::Suld1DArrayV4I32Zero:  return "NVPTXISD::Suld1DArrayV4I32Zero";
14400b57cec5SDimitry Andric 
14410b57cec5SDimitry Andric   case NVPTXISD::Suld2DI8Zero:          return "NVPTXISD::Suld2DI8Zero";
14420b57cec5SDimitry Andric   case NVPTXISD::Suld2DI16Zero:         return "NVPTXISD::Suld2DI16Zero";
14430b57cec5SDimitry Andric   case NVPTXISD::Suld2DI32Zero:         return "NVPTXISD::Suld2DI32Zero";
14440b57cec5SDimitry Andric   case NVPTXISD::Suld2DI64Zero:         return "NVPTXISD::Suld2DI64Zero";
14450b57cec5SDimitry Andric   case NVPTXISD::Suld2DV2I8Zero:        return "NVPTXISD::Suld2DV2I8Zero";
14460b57cec5SDimitry Andric   case NVPTXISD::Suld2DV2I16Zero:       return "NVPTXISD::Suld2DV2I16Zero";
14470b57cec5SDimitry Andric   case NVPTXISD::Suld2DV2I32Zero:       return "NVPTXISD::Suld2DV2I32Zero";
14480b57cec5SDimitry Andric   case NVPTXISD::Suld2DV2I64Zero:       return "NVPTXISD::Suld2DV2I64Zero";
14490b57cec5SDimitry Andric   case NVPTXISD::Suld2DV4I8Zero:        return "NVPTXISD::Suld2DV4I8Zero";
14500b57cec5SDimitry Andric   case NVPTXISD::Suld2DV4I16Zero:       return "NVPTXISD::Suld2DV4I16Zero";
14510b57cec5SDimitry Andric   case NVPTXISD::Suld2DV4I32Zero:       return "NVPTXISD::Suld2DV4I32Zero";
14520b57cec5SDimitry Andric 
14530b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayI8Zero:     return "NVPTXISD::Suld2DArrayI8Zero";
14540b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayI16Zero:    return "NVPTXISD::Suld2DArrayI16Zero";
14550b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayI32Zero:    return "NVPTXISD::Suld2DArrayI32Zero";
14560b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayI64Zero:    return "NVPTXISD::Suld2DArrayI64Zero";
14570b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV2I8Zero:   return "NVPTXISD::Suld2DArrayV2I8Zero";
14580b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV2I16Zero:  return "NVPTXISD::Suld2DArrayV2I16Zero";
14590b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV2I32Zero:  return "NVPTXISD::Suld2DArrayV2I32Zero";
14600b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV2I64Zero:  return "NVPTXISD::Suld2DArrayV2I64Zero";
14610b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV4I8Zero:   return "NVPTXISD::Suld2DArrayV4I8Zero";
14620b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV4I16Zero:  return "NVPTXISD::Suld2DArrayV4I16Zero";
14630b57cec5SDimitry Andric   case NVPTXISD::Suld2DArrayV4I32Zero:  return "NVPTXISD::Suld2DArrayV4I32Zero";
14640b57cec5SDimitry Andric 
14650b57cec5SDimitry Andric   case NVPTXISD::Suld3DI8Zero:          return "NVPTXISD::Suld3DI8Zero";
14660b57cec5SDimitry Andric   case NVPTXISD::Suld3DI16Zero:         return "NVPTXISD::Suld3DI16Zero";
14670b57cec5SDimitry Andric   case NVPTXISD::Suld3DI32Zero:         return "NVPTXISD::Suld3DI32Zero";
14680b57cec5SDimitry Andric   case NVPTXISD::Suld3DI64Zero:         return "NVPTXISD::Suld3DI64Zero";
14690b57cec5SDimitry Andric   case NVPTXISD::Suld3DV2I8Zero:        return "NVPTXISD::Suld3DV2I8Zero";
14700b57cec5SDimitry Andric   case NVPTXISD::Suld3DV2I16Zero:       return "NVPTXISD::Suld3DV2I16Zero";
14710b57cec5SDimitry Andric   case NVPTXISD::Suld3DV2I32Zero:       return "NVPTXISD::Suld3DV2I32Zero";
14720b57cec5SDimitry Andric   case NVPTXISD::Suld3DV2I64Zero:       return "NVPTXISD::Suld3DV2I64Zero";
14730b57cec5SDimitry Andric   case NVPTXISD::Suld3DV4I8Zero:        return "NVPTXISD::Suld3DV4I8Zero";
14740b57cec5SDimitry Andric   case NVPTXISD::Suld3DV4I16Zero:       return "NVPTXISD::Suld3DV4I16Zero";
14750b57cec5SDimitry Andric   case NVPTXISD::Suld3DV4I32Zero:       return "NVPTXISD::Suld3DV4I32Zero";
14760b57cec5SDimitry Andric   }
14770b57cec5SDimitry Andric   return nullptr;
14780b57cec5SDimitry Andric }
14790b57cec5SDimitry Andric 
14800b57cec5SDimitry Andric TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const14810b57cec5SDimitry Andric NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
1482fe6060f1SDimitry Andric   if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
1483fe6060f1SDimitry Andric       VT.getScalarType() == MVT::i1)
14840b57cec5SDimitry Andric     return TypeSplitVector;
14855f757f3fSDimitry Andric   if (Isv2x16VT(VT))
14860b57cec5SDimitry Andric     return TypeLegal;
14870b57cec5SDimitry Andric   return TargetLoweringBase::getPreferredVectorAction(VT);
14880b57cec5SDimitry Andric }
14890b57cec5SDimitry Andric 
getSqrtEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & ExtraSteps,bool & UseOneConst,bool Reciprocal) const14900b57cec5SDimitry Andric SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14910b57cec5SDimitry Andric                                              int Enabled, int &ExtraSteps,
14920b57cec5SDimitry Andric                                              bool &UseOneConst,
14930b57cec5SDimitry Andric                                              bool Reciprocal) const {
14940b57cec5SDimitry Andric   if (!(Enabled == ReciprocalEstimate::Enabled ||
14950b57cec5SDimitry Andric         (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32())))
14960b57cec5SDimitry Andric     return SDValue();
14970b57cec5SDimitry Andric 
14980b57cec5SDimitry Andric   if (ExtraSteps == ReciprocalEstimate::Unspecified)
14990b57cec5SDimitry Andric     ExtraSteps = 0;
15000b57cec5SDimitry Andric 
15010b57cec5SDimitry Andric   SDLoc DL(Operand);
15020b57cec5SDimitry Andric   EVT VT = Operand.getValueType();
15030b57cec5SDimitry Andric   bool Ftz = useF32FTZ(DAG.getMachineFunction());
15040b57cec5SDimitry Andric 
15050b57cec5SDimitry Andric   auto MakeIntrinsicCall = [&](Intrinsic::ID IID) {
15060b57cec5SDimitry Andric     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
15070b57cec5SDimitry Andric                        DAG.getConstant(IID, DL, MVT::i32), Operand);
15080b57cec5SDimitry Andric   };
15090b57cec5SDimitry Andric 
15100b57cec5SDimitry Andric   // The sqrt and rsqrt refinement processes assume we always start out with an
15110b57cec5SDimitry Andric   // approximation of the rsqrt.  Therefore, if we're going to do any refinement
15120b57cec5SDimitry Andric   // (i.e. ExtraSteps > 0), we must return an rsqrt.  But if we're *not* doing
15130b57cec5SDimitry Andric   // any refinement, we must return a regular sqrt.
15140b57cec5SDimitry Andric   if (Reciprocal || ExtraSteps > 0) {
15150b57cec5SDimitry Andric     if (VT == MVT::f32)
15160b57cec5SDimitry Andric       return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f
15170b57cec5SDimitry Andric                                    : Intrinsic::nvvm_rsqrt_approx_f);
15180b57cec5SDimitry Andric     else if (VT == MVT::f64)
15190b57cec5SDimitry Andric       return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d);
15200b57cec5SDimitry Andric     else
15210b57cec5SDimitry Andric       return SDValue();
15220b57cec5SDimitry Andric   } else {
15230b57cec5SDimitry Andric     if (VT == MVT::f32)
15240b57cec5SDimitry Andric       return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
15250b57cec5SDimitry Andric                                    : Intrinsic::nvvm_sqrt_approx_f);
15260b57cec5SDimitry Andric     else {
15270b57cec5SDimitry Andric       // There's no sqrt.approx.f64 instruction, so we emit
15280b57cec5SDimitry Andric       // reciprocal(rsqrt(x)).  This is faster than
15290b57cec5SDimitry Andric       // select(x == 0, 0, x * rsqrt(x)).  (In fact, it's faster than plain
15300b57cec5SDimitry Andric       // x * rsqrt(x).)
15310b57cec5SDimitry Andric       return DAG.getNode(
15320b57cec5SDimitry Andric           ISD::INTRINSIC_WO_CHAIN, DL, VT,
15330b57cec5SDimitry Andric           DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32),
15340b57cec5SDimitry Andric           MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
15350b57cec5SDimitry Andric     }
15360b57cec5SDimitry Andric   }
15370b57cec5SDimitry Andric }
15380b57cec5SDimitry Andric 
15390b57cec5SDimitry Andric SDValue
LowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const15400b57cec5SDimitry Andric NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
15410b57cec5SDimitry Andric   SDLoc dl(Op);
15420b57cec5SDimitry Andric   const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op);
15430b57cec5SDimitry Andric   auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());
15440b57cec5SDimitry Andric   Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);
15450b57cec5SDimitry Andric   return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
15460b57cec5SDimitry Andric }
15470b57cec5SDimitry Andric 
IsTypePassedAsArray(const Type * Ty)154806c3fb27SDimitry Andric static bool IsTypePassedAsArray(const Type *Ty) {
154906c3fb27SDimitry Andric   return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||
155006c3fb27SDimitry Andric          Ty->isHalfTy() || Ty->isBFloatTy();
155106c3fb27SDimitry Andric }
155206c3fb27SDimitry Andric 
getPrototype(const DataLayout & DL,Type * retTy,const ArgListTy & Args,const SmallVectorImpl<ISD::OutputArg> & Outs,MaybeAlign retAlignment,std::optional<std::pair<unsigned,const APInt &>> VAInfo,const CallBase & CB,unsigned UniqueCallSite) const15530b57cec5SDimitry Andric std::string NVPTXTargetLowering::getPrototype(
15540b57cec5SDimitry Andric     const DataLayout &DL, Type *retTy, const ArgListTy &Args,
15555ffd83dbSDimitry Andric     const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment,
1556bdd1243dSDimitry Andric     std::optional<std::pair<unsigned, const APInt &>> VAInfo,
1557e8d8bef9SDimitry Andric     const CallBase &CB, unsigned UniqueCallSite) const {
15580b57cec5SDimitry Andric   auto PtrVT = getPointerTy(DL);
15590b57cec5SDimitry Andric 
15600b57cec5SDimitry Andric   bool isABI = (STI.getSmVersion() >= 20);
15610b57cec5SDimitry Andric   assert(isABI && "Non-ABI compilation is not supported");
15620b57cec5SDimitry Andric   if (!isABI)
15630b57cec5SDimitry Andric     return "";
15640b57cec5SDimitry Andric 
1565bdd1243dSDimitry Andric   std::string Prototype;
1566bdd1243dSDimitry Andric   raw_string_ostream O(Prototype);
1567e8d8bef9SDimitry Andric   O << "prototype_" << UniqueCallSite << " : .callprototype ";
15680b57cec5SDimitry Andric 
15690b57cec5SDimitry Andric   if (retTy->getTypeID() == Type::VoidTyID) {
15700b57cec5SDimitry Andric     O << "()";
15710b57cec5SDimitry Andric   } else {
15720b57cec5SDimitry Andric     O << "(";
157306c3fb27SDimitry Andric     if ((retTy->isFloatingPointTy() || retTy->isIntegerTy()) &&
157406c3fb27SDimitry Andric         !IsTypePassedAsArray(retTy)) {
15750b57cec5SDimitry Andric       unsigned size = 0;
15760b57cec5SDimitry Andric       if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
15770b57cec5SDimitry Andric         size = ITy->getBitWidth();
15780b57cec5SDimitry Andric       } else {
15790b57cec5SDimitry Andric         assert(retTy->isFloatingPointTy() &&
15800b57cec5SDimitry Andric                "Floating point type expected here");
15810b57cec5SDimitry Andric         size = retTy->getPrimitiveSizeInBits();
15820b57cec5SDimitry Andric       }
15830b57cec5SDimitry Andric       // PTX ABI requires all scalar return values to be at least 32
15840b57cec5SDimitry Andric       // bits in size.  fp16 normally uses .b16 as its storage type in
15850b57cec5SDimitry Andric       // PTX, so its size must be adjusted here, too.
1586fcaf7f86SDimitry Andric       size = promoteScalarArgumentSize(size);
15870b57cec5SDimitry Andric 
15880b57cec5SDimitry Andric       O << ".param .b" << size << " _";
15890b57cec5SDimitry Andric     } else if (isa<PointerType>(retTy)) {
15900b57cec5SDimitry Andric       O << ".param .b" << PtrVT.getSizeInBits() << " _";
159106c3fb27SDimitry Andric     } else if (IsTypePassedAsArray(retTy)) {
15925ffd83dbSDimitry Andric       O << ".param .align " << (retAlignment ? retAlignment->value() : 0)
15935ffd83dbSDimitry Andric         << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]";
15940b57cec5SDimitry Andric     } else {
15950b57cec5SDimitry Andric       llvm_unreachable("Unknown return type");
15960b57cec5SDimitry Andric     }
15970b57cec5SDimitry Andric     O << ") ";
15980b57cec5SDimitry Andric   }
15990b57cec5SDimitry Andric   O << "_ (";
16000b57cec5SDimitry Andric 
16010b57cec5SDimitry Andric   bool first = true;
16020b57cec5SDimitry Andric 
160381ad6265SDimitry Andric   const Function *F = CB.getFunction();
1604bdd1243dSDimitry Andric   unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();
1605bdd1243dSDimitry Andric   for (unsigned i = 0, OIdx = 0; i != NumArgs; ++i, ++OIdx) {
16060b57cec5SDimitry Andric     Type *Ty = Args[i].Ty;
16070b57cec5SDimitry Andric     if (!first) {
16080b57cec5SDimitry Andric       O << ", ";
16090b57cec5SDimitry Andric     }
16100b57cec5SDimitry Andric     first = false;
16110b57cec5SDimitry Andric 
16120b57cec5SDimitry Andric     if (!Outs[OIdx].Flags.isByVal()) {
161306c3fb27SDimitry Andric       if (IsTypePassedAsArray(Ty)) {
161481ad6265SDimitry Andric         unsigned ParamAlign = 0;
16155ffd83dbSDimitry Andric         const CallInst *CallI = cast<CallInst>(&CB);
16160b57cec5SDimitry Andric         // +1 because index 0 is reserved for return type alignment
161781ad6265SDimitry Andric         if (!getAlign(*CallI, i + 1, ParamAlign))
161881ad6265SDimitry Andric           ParamAlign = getFunctionParamOptimizedAlign(F, Ty, DL).value();
161981ad6265SDimitry Andric         O << ".param .align " << ParamAlign << " .b8 ";
16200b57cec5SDimitry Andric         O << "_";
162181ad6265SDimitry Andric         O << "[" << DL.getTypeAllocSize(Ty) << "]";
16220b57cec5SDimitry Andric         // update the index for Outs
16230b57cec5SDimitry Andric         SmallVector<EVT, 16> vtparts;
16240b57cec5SDimitry Andric         ComputeValueVTs(*this, DL, Ty, vtparts);
16250b57cec5SDimitry Andric         if (unsigned len = vtparts.size())
16260b57cec5SDimitry Andric           OIdx += len - 1;
16270b57cec5SDimitry Andric         continue;
16280b57cec5SDimitry Andric       }
16290b57cec5SDimitry Andric       // i8 types in IR will be i16 types in SDAG
16300b57cec5SDimitry Andric       assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
16310b57cec5SDimitry Andric               (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
16320b57cec5SDimitry Andric              "type mismatch between callee prototype and arguments");
16330b57cec5SDimitry Andric       // scalar type
16340b57cec5SDimitry Andric       unsigned sz = 0;
16350b57cec5SDimitry Andric       if (isa<IntegerType>(Ty)) {
16360b57cec5SDimitry Andric         sz = cast<IntegerType>(Ty)->getBitWidth();
1637fcaf7f86SDimitry Andric         sz = promoteScalarArgumentSize(sz);
16380b57cec5SDimitry Andric       } else if (isa<PointerType>(Ty)) {
16390b57cec5SDimitry Andric         sz = PtrVT.getSizeInBits();
164006c3fb27SDimitry Andric       } else {
16410b57cec5SDimitry Andric         sz = Ty->getPrimitiveSizeInBits();
164206c3fb27SDimitry Andric       }
16430b57cec5SDimitry Andric       O << ".param .b" << sz << " ";
16440b57cec5SDimitry Andric       O << "_";
16450b57cec5SDimitry Andric       continue;
16460b57cec5SDimitry Andric     }
16470b57cec5SDimitry Andric 
164881ad6265SDimitry Andric     Type *ETy = Args[i].IndirectType;
1649bdd1243dSDimitry Andric     Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1650bdd1243dSDimitry Andric     Align ParamByValAlign =
1651bdd1243dSDimitry Andric         getFunctionByValParamAlign(F, ETy, InitialAlign, DL);
165281ad6265SDimitry Andric 
165381ad6265SDimitry Andric     O << ".param .align " << ParamByValAlign.value() << " .b8 ";
16540b57cec5SDimitry Andric     O << "_";
165581ad6265SDimitry Andric     O << "[" << Outs[OIdx].Flags.getByValSize() << "]";
16560b57cec5SDimitry Andric   }
1657bdd1243dSDimitry Andric 
1658bdd1243dSDimitry Andric   if (VAInfo)
1659bdd1243dSDimitry Andric     O << (first ? "" : ",") << " .param .align " << VAInfo->second
1660bdd1243dSDimitry Andric       << " .b8 _[]\n";
1661bdd1243dSDimitry Andric   O << ")";
1662bdd1243dSDimitry Andric   if (shouldEmitPTXNoReturn(&CB, *nvTM))
1663bdd1243dSDimitry Andric     O << " .noreturn";
1664bdd1243dSDimitry Andric   O << ";";
1665bdd1243dSDimitry Andric 
1666bdd1243dSDimitry Andric   return Prototype;
16670b57cec5SDimitry Andric }
16680b57cec5SDimitry Andric 
getArgumentAlignment(const CallBase * CB,Type * Ty,unsigned Idx,const DataLayout & DL) const16697a6dacacSDimitry Andric Align NVPTXTargetLowering::getArgumentAlignment(const CallBase *CB, Type *Ty,
16705ffd83dbSDimitry Andric                                                 unsigned Idx,
16710b57cec5SDimitry Andric                                                 const DataLayout &DL) const {
16725ffd83dbSDimitry Andric   if (!CB) {
16730b57cec5SDimitry Andric     // CallSite is zero, fallback to ABI type alignment
16745ffd83dbSDimitry Andric     return DL.getABITypeAlign(Ty);
16750b57cec5SDimitry Andric   }
16760b57cec5SDimitry Andric 
16775ffd83dbSDimitry Andric   unsigned Alignment = 0;
16785ffd83dbSDimitry Andric   const Function *DirectCallee = CB->getCalledFunction();
16790b57cec5SDimitry Andric 
16800b57cec5SDimitry Andric   if (!DirectCallee) {
16810b57cec5SDimitry Andric     // We don't have a direct function symbol, but that may be because of
16820b57cec5SDimitry Andric     // constant cast instructions in the call.
16830b57cec5SDimitry Andric 
16840b57cec5SDimitry Andric     // With bitcast'd call targets, the instruction will be the call
16855ffd83dbSDimitry Andric     if (const auto *CI = dyn_cast<CallInst>(CB)) {
16860b57cec5SDimitry Andric       // Check if we have call alignment metadata
16875ffd83dbSDimitry Andric       if (getAlign(*CI, Idx, Alignment))
16885ffd83dbSDimitry Andric         return Align(Alignment);
16890b57cec5SDimitry Andric     }
1690bdd1243dSDimitry Andric     DirectCallee = getMaybeBitcastedCallee(CB);
16910b57cec5SDimitry Andric   }
16920b57cec5SDimitry Andric 
16930b57cec5SDimitry Andric   // Check for function alignment information if we found that the
16940b57cec5SDimitry Andric   // ultimate target is a Function
169581ad6265SDimitry Andric   if (DirectCallee) {
16965ffd83dbSDimitry Andric     if (getAlign(*DirectCallee, Idx, Alignment))
16975ffd83dbSDimitry Andric       return Align(Alignment);
169881ad6265SDimitry Andric     // If alignment information is not available, fall back to the
169981ad6265SDimitry Andric     // default function param optimized type alignment
170081ad6265SDimitry Andric     return getFunctionParamOptimizedAlign(DirectCallee, Ty, DL);
170181ad6265SDimitry Andric   }
17020b57cec5SDimitry Andric 
170381ad6265SDimitry Andric   // Call is indirect, fall back to the ABI type alignment
17045ffd83dbSDimitry Andric   return DL.getABITypeAlign(Ty);
17050b57cec5SDimitry Andric }
17060b57cec5SDimitry Andric 
LowerCall(TargetLowering::CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const17070b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
17080b57cec5SDimitry Andric                                        SmallVectorImpl<SDValue> &InVals) const {
1709bdd1243dSDimitry Andric 
1710bdd1243dSDimitry Andric   if (CLI.IsVarArg && (STI.getPTXVersion() < 60 || STI.getSmVersion() < 30))
1711bdd1243dSDimitry Andric     report_fatal_error(
1712bdd1243dSDimitry Andric         "Support for variadic functions (unsized array parameter) introduced "
1713bdd1243dSDimitry Andric         "in PTX ISA version 6.0 and requires target sm_30.");
1714bdd1243dSDimitry Andric 
17150b57cec5SDimitry Andric   SelectionDAG &DAG = CLI.DAG;
17160b57cec5SDimitry Andric   SDLoc dl = CLI.DL;
17170b57cec5SDimitry Andric   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
17180b57cec5SDimitry Andric   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
17190b57cec5SDimitry Andric   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
17200b57cec5SDimitry Andric   SDValue Chain = CLI.Chain;
17210b57cec5SDimitry Andric   SDValue Callee = CLI.Callee;
17220b57cec5SDimitry Andric   bool &isTailCall = CLI.IsTailCall;
17230b57cec5SDimitry Andric   ArgListTy &Args = CLI.getArgs();
17240b57cec5SDimitry Andric   Type *RetTy = CLI.RetTy;
17255ffd83dbSDimitry Andric   const CallBase *CB = CLI.CB;
17260b57cec5SDimitry Andric   const DataLayout &DL = DAG.getDataLayout();
17270b57cec5SDimitry Andric 
17280b57cec5SDimitry Andric   bool isABI = (STI.getSmVersion() >= 20);
17290b57cec5SDimitry Andric   assert(isABI && "Non-ABI compilation is not supported");
17300b57cec5SDimitry Andric   if (!isABI)
17310b57cec5SDimitry Andric     return Chain;
17320b57cec5SDimitry Andric 
1733bdd1243dSDimitry Andric   // Variadic arguments.
1734bdd1243dSDimitry Andric   //
1735bdd1243dSDimitry Andric   // Normally, for each argument, we declare a param scalar or a param
1736bdd1243dSDimitry Andric   // byte array in the .param space, and store the argument value to that
1737bdd1243dSDimitry Andric   // param scalar or array starting at offset 0.
1738bdd1243dSDimitry Andric   //
1739bdd1243dSDimitry Andric   // In the case of the first variadic argument, we declare a vararg byte array
1740bdd1243dSDimitry Andric   // with size 0. The exact size of this array isn't known at this point, so
1741bdd1243dSDimitry Andric   // it'll be patched later. All the variadic arguments will be stored to this
1742bdd1243dSDimitry Andric   // array at a certain offset (which gets tracked by 'VAOffset'). The offset is
1743bdd1243dSDimitry Andric   // initially set to 0, so it can be used for non-variadic arguments (which use
1744bdd1243dSDimitry Andric   // 0 offset) to simplify the code.
1745bdd1243dSDimitry Andric   //
1746bdd1243dSDimitry Andric   // After all vararg is processed, 'VAOffset' holds the size of the
1747bdd1243dSDimitry Andric   // vararg byte array.
1748bdd1243dSDimitry Andric 
1749bdd1243dSDimitry Andric   SDValue VADeclareParam;                 // vararg byte array
1750bdd1243dSDimitry Andric   unsigned FirstVAArg = CLI.NumFixedArgs; // position of the first variadic
1751bdd1243dSDimitry Andric   unsigned VAOffset = 0;                  // current offset in the param array
1752bdd1243dSDimitry Andric 
1753e8d8bef9SDimitry Andric   unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1);
175481ad6265SDimitry Andric   SDValue TempChain = Chain;
1755e8d8bef9SDimitry Andric   Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl);
175606c3fb27SDimitry Andric   SDValue InGlue = Chain.getValue(1);
17570b57cec5SDimitry Andric 
175881ad6265SDimitry Andric   unsigned ParamCount = 0;
17590b57cec5SDimitry Andric   // Args.size() and Outs.size() need not match.
17600b57cec5SDimitry Andric   // Outs.size() will be larger
17610b57cec5SDimitry Andric   //   * if there is an aggregate argument with multiple fields (each field
17620b57cec5SDimitry Andric   //     showing up separately in Outs)
17630b57cec5SDimitry Andric   //   * if there is a vector argument with more than typical vector-length
17640b57cec5SDimitry Andric   //     elements (generally if more than 4) where each vector element is
17650b57cec5SDimitry Andric   //     individually present in Outs.
17660b57cec5SDimitry Andric   // So a different index should be used for indexing into Outs/OutVals.
17670b57cec5SDimitry Andric   // See similar issue in LowerFormalArguments.
17680b57cec5SDimitry Andric   unsigned OIdx = 0;
17690b57cec5SDimitry Andric   // Declare the .params or .reg need to pass values
17700b57cec5SDimitry Andric   // to the function
17710b57cec5SDimitry Andric   for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
17720b57cec5SDimitry Andric     EVT VT = Outs[OIdx].VT;
17730b57cec5SDimitry Andric     Type *Ty = Args[i].Ty;
1774bdd1243dSDimitry Andric     bool IsVAArg = (i >= CLI.NumFixedArgs);
177581ad6265SDimitry Andric     bool IsByVal = Outs[OIdx].Flags.isByVal();
17760b57cec5SDimitry Andric 
17770b57cec5SDimitry Andric     SmallVector<EVT, 16> VTs;
17780b57cec5SDimitry Andric     SmallVector<uint64_t, 16> Offsets;
177981ad6265SDimitry Andric 
178081ad6265SDimitry Andric     assert((!IsByVal || Args[i].IndirectType) &&
178181ad6265SDimitry Andric            "byval arg must have indirect type");
178281ad6265SDimitry Andric     Type *ETy = (IsByVal ? Args[i].IndirectType : Ty);
1783bdd1243dSDimitry Andric     ComputePTXValueVTs(*this, DL, ETy, VTs, &Offsets, IsByVal ? 0 : VAOffset);
178481ad6265SDimitry Andric 
178581ad6265SDimitry Andric     Align ArgAlign;
178681ad6265SDimitry Andric     if (IsByVal) {
178781ad6265SDimitry Andric       // The ByValAlign in the Outs[OIdx].Flags is always set at this point,
178881ad6265SDimitry Andric       // so we don't need to worry whether it's naturally aligned or not.
178981ad6265SDimitry Andric       // See TargetLowering::LowerCallTo().
1790bdd1243dSDimitry Andric       Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1791bdd1243dSDimitry Andric       ArgAlign = getFunctionByValParamAlign(CB->getCalledFunction(), ETy,
1792bdd1243dSDimitry Andric                                             InitialAlign, DL);
1793bdd1243dSDimitry Andric       if (IsVAArg)
1794bdd1243dSDimitry Andric         VAOffset = alignTo(VAOffset, ArgAlign);
179581ad6265SDimitry Andric     } else {
17967a6dacacSDimitry Andric       ArgAlign = getArgumentAlignment(CB, Ty, ParamCount + 1, DL);
179781ad6265SDimitry Andric     }
179881ad6265SDimitry Andric 
179981ad6265SDimitry Andric     unsigned TypeSize =
180081ad6265SDimitry Andric         (IsByVal ? Outs[OIdx].Flags.getByValSize() : DL.getTypeAllocSize(Ty));
18010b57cec5SDimitry Andric     SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
180281ad6265SDimitry Andric 
18030b57cec5SDimitry Andric     bool NeedAlign; // Does argument declaration specify alignment?
180406c3fb27SDimitry Andric     bool PassAsArray = IsByVal || IsTypePassedAsArray(Ty);
1805bdd1243dSDimitry Andric     if (IsVAArg) {
1806bdd1243dSDimitry Andric       if (ParamCount == FirstVAArg) {
1807bdd1243dSDimitry Andric         SDValue DeclareParamOps[] = {
1808bdd1243dSDimitry Andric             Chain, DAG.getConstant(STI.getMaxRequiredAlignment(), dl, MVT::i32),
1809bdd1243dSDimitry Andric             DAG.getConstant(ParamCount, dl, MVT::i32),
181006c3fb27SDimitry Andric             DAG.getConstant(1, dl, MVT::i32), InGlue};
1811bdd1243dSDimitry Andric         VADeclareParam = Chain = DAG.getNode(NVPTXISD::DeclareParam, dl,
1812bdd1243dSDimitry Andric                                              DeclareParamVTs, DeclareParamOps);
1813bdd1243dSDimitry Andric       }
181406c3fb27SDimitry Andric       NeedAlign = PassAsArray;
181506c3fb27SDimitry Andric     } else if (PassAsArray) {
18160b57cec5SDimitry Andric       // declare .param .align <align> .b8 .param<n>[<size>];
18170b57cec5SDimitry Andric       SDValue DeclareParamOps[] = {
18185ffd83dbSDimitry Andric           Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
181981ad6265SDimitry Andric           DAG.getConstant(ParamCount, dl, MVT::i32),
182006c3fb27SDimitry Andric           DAG.getConstant(TypeSize, dl, MVT::i32), InGlue};
18210b57cec5SDimitry Andric       Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
18220b57cec5SDimitry Andric                           DeclareParamOps);
18230b57cec5SDimitry Andric       NeedAlign = true;
18240b57cec5SDimitry Andric     } else {
18250b57cec5SDimitry Andric       // declare .param .b<size> .param<n>;
1826fcaf7f86SDimitry Andric       if (VT.isInteger() || VT.isFloatingPoint()) {
18270b57cec5SDimitry Andric         // PTX ABI requires integral types to be at least 32 bits in
18280b57cec5SDimitry Andric         // size. FP16 is loaded/stored using i16, so it's handled
18290b57cec5SDimitry Andric         // here as well.
1830fcaf7f86SDimitry Andric         TypeSize = promoteScalarArgumentSize(TypeSize * 8) / 8;
18310b57cec5SDimitry Andric       }
18320b57cec5SDimitry Andric       SDValue DeclareScalarParamOps[] = {
183381ad6265SDimitry Andric           Chain, DAG.getConstant(ParamCount, dl, MVT::i32),
183481ad6265SDimitry Andric           DAG.getConstant(TypeSize * 8, dl, MVT::i32),
183506c3fb27SDimitry Andric           DAG.getConstant(0, dl, MVT::i32), InGlue};
18360b57cec5SDimitry Andric       Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
18370b57cec5SDimitry Andric                           DeclareScalarParamOps);
18380b57cec5SDimitry Andric       NeedAlign = false;
18390b57cec5SDimitry Andric     }
184006c3fb27SDimitry Andric     InGlue = Chain.getValue(1);
18410b57cec5SDimitry Andric 
18420b57cec5SDimitry Andric     // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
18430b57cec5SDimitry Andric     // than 32-bits are sign extended or zero extended, depending on
18440b57cec5SDimitry Andric     // whether they are signed or unsigned types. This case applies
18450b57cec5SDimitry Andric     // only to scalar parameters and not to aggregate values.
18460b57cec5SDimitry Andric     bool ExtendIntegerParam =
18470b57cec5SDimitry Andric         Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;
18480b57cec5SDimitry Andric 
1849bdd1243dSDimitry Andric     auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign, IsVAArg);
18500b57cec5SDimitry Andric     SmallVector<SDValue, 6> StoreOperands;
18510b57cec5SDimitry Andric     for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
185281ad6265SDimitry Andric       EVT EltVT = VTs[j];
185381ad6265SDimitry Andric       int CurOffset = Offsets[j];
185481ad6265SDimitry Andric       MaybeAlign PartAlign;
185581ad6265SDimitry Andric       if (NeedAlign)
185681ad6265SDimitry Andric         PartAlign = commonAlignment(ArgAlign, CurOffset);
185781ad6265SDimitry Andric 
18580b57cec5SDimitry Andric       // New store.
18590b57cec5SDimitry Andric       if (VectorInfo[j] & PVF_FIRST) {
18600b57cec5SDimitry Andric         assert(StoreOperands.empty() && "Unfinished preceding store.");
18610b57cec5SDimitry Andric         StoreOperands.push_back(Chain);
1862bdd1243dSDimitry Andric         StoreOperands.push_back(
1863bdd1243dSDimitry Andric             DAG.getConstant(IsVAArg ? FirstVAArg : ParamCount, dl, MVT::i32));
1864bdd1243dSDimitry Andric         StoreOperands.push_back(DAG.getConstant(
1865bdd1243dSDimitry Andric             IsByVal ? CurOffset + VAOffset : (IsVAArg ? VAOffset : CurOffset),
1866bdd1243dSDimitry Andric             dl, MVT::i32));
18670b57cec5SDimitry Andric       }
18680b57cec5SDimitry Andric 
18690b57cec5SDimitry Andric       SDValue StVal = OutVals[OIdx];
1870fcaf7f86SDimitry Andric 
1871fcaf7f86SDimitry Andric       MVT PromotedVT;
1872fcaf7f86SDimitry Andric       if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) {
1873fcaf7f86SDimitry Andric         EltVT = EVT(PromotedVT);
1874fcaf7f86SDimitry Andric       }
1875fcaf7f86SDimitry Andric       if (PromoteScalarIntegerPTX(StVal.getValueType(), &PromotedVT)) {
1876fcaf7f86SDimitry Andric         llvm::ISD::NodeType Ext =
1877fcaf7f86SDimitry Andric             Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1878fcaf7f86SDimitry Andric         StVal = DAG.getNode(Ext, dl, PromotedVT, StVal);
1879fcaf7f86SDimitry Andric       }
1880fcaf7f86SDimitry Andric 
188181ad6265SDimitry Andric       if (IsByVal) {
188281ad6265SDimitry Andric         auto PtrVT = getPointerTy(DL);
188381ad6265SDimitry Andric         SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StVal,
188481ad6265SDimitry Andric                                       DAG.getConstant(CurOffset, dl, PtrVT));
188581ad6265SDimitry Andric         StVal = DAG.getLoad(EltVT, dl, TempChain, srcAddr, MachinePointerInfo(),
188681ad6265SDimitry Andric                             PartAlign);
188781ad6265SDimitry Andric       } else if (ExtendIntegerParam) {
18880b57cec5SDimitry Andric         assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
18890b57cec5SDimitry Andric         // zext/sext to i32
18900b57cec5SDimitry Andric         StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
18910b57cec5SDimitry Andric                                                       : ISD::ZERO_EXTEND,
18920b57cec5SDimitry Andric                             dl, MVT::i32, StVal);
189381ad6265SDimitry Andric       }
189481ad6265SDimitry Andric 
189581ad6265SDimitry Andric       if (!ExtendIntegerParam && EltVT.getSizeInBits() < 16) {
18960b57cec5SDimitry Andric         // Use 16-bit registers for small stores as it's the
18970b57cec5SDimitry Andric         // smallest general purpose register size supported by NVPTX.
18980b57cec5SDimitry Andric         StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
18990b57cec5SDimitry Andric       }
19000b57cec5SDimitry Andric 
19010b57cec5SDimitry Andric       // Record the value to store.
19020b57cec5SDimitry Andric       StoreOperands.push_back(StVal);
19030b57cec5SDimitry Andric 
19040b57cec5SDimitry Andric       if (VectorInfo[j] & PVF_LAST) {
19050b57cec5SDimitry Andric         unsigned NumElts = StoreOperands.size() - 3;
19060b57cec5SDimitry Andric         NVPTXISD::NodeType Op;
19070b57cec5SDimitry Andric         switch (NumElts) {
19080b57cec5SDimitry Andric         case 1:
19090b57cec5SDimitry Andric           Op = NVPTXISD::StoreParam;
19100b57cec5SDimitry Andric           break;
19110b57cec5SDimitry Andric         case 2:
19120b57cec5SDimitry Andric           Op = NVPTXISD::StoreParamV2;
19130b57cec5SDimitry Andric           break;
19140b57cec5SDimitry Andric         case 4:
19150b57cec5SDimitry Andric           Op = NVPTXISD::StoreParamV4;
19160b57cec5SDimitry Andric           break;
19170b57cec5SDimitry Andric         default:
19180b57cec5SDimitry Andric           llvm_unreachable("Invalid vector info.");
19190b57cec5SDimitry Andric         }
19200b57cec5SDimitry Andric 
192106c3fb27SDimitry Andric         StoreOperands.push_back(InGlue);
19220b57cec5SDimitry Andric 
19230b57cec5SDimitry Andric         // Adjust type of the store op if we've extended the scalar
19240b57cec5SDimitry Andric         // return value.
192581ad6265SDimitry Andric         EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
19260b57cec5SDimitry Andric 
19270b57cec5SDimitry Andric         Chain = DAG.getMemIntrinsicNode(
19280b57cec5SDimitry Andric             Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
192981ad6265SDimitry Andric             TheStoreType, MachinePointerInfo(), PartAlign,
19300b57cec5SDimitry Andric             MachineMemOperand::MOStore);
193106c3fb27SDimitry Andric         InGlue = Chain.getValue(1);
19320b57cec5SDimitry Andric 
19330b57cec5SDimitry Andric         // Cleanup.
19340b57cec5SDimitry Andric         StoreOperands.clear();
1935bdd1243dSDimitry Andric 
1936bdd1243dSDimitry Andric         // TODO: We may need to support vector types that can be passed
1937bdd1243dSDimitry Andric         // as scalars in variadic arguments.
1938bdd1243dSDimitry Andric         if (!IsByVal && IsVAArg) {
1939bdd1243dSDimitry Andric           assert(NumElts == 1 &&
1940bdd1243dSDimitry Andric                  "Vectorization is expected to be disabled for variadics.");
1941bdd1243dSDimitry Andric           VAOffset += DL.getTypeAllocSize(
1942bdd1243dSDimitry Andric               TheStoreType.getTypeForEVT(*DAG.getContext()));
1943bdd1243dSDimitry Andric         }
19440b57cec5SDimitry Andric       }
194581ad6265SDimitry Andric       if (!IsByVal)
19460b57cec5SDimitry Andric         ++OIdx;
19470b57cec5SDimitry Andric     }
19480b57cec5SDimitry Andric     assert(StoreOperands.empty() && "Unfinished parameter store.");
194981ad6265SDimitry Andric     if (!IsByVal && VTs.size() > 0)
19500b57cec5SDimitry Andric       --OIdx;
195181ad6265SDimitry Andric     ++ParamCount;
1952bdd1243dSDimitry Andric     if (IsByVal && IsVAArg)
1953bdd1243dSDimitry Andric       VAOffset += TypeSize;
19540b57cec5SDimitry Andric   }
19550b57cec5SDimitry Andric 
19560b57cec5SDimitry Andric   GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
1957bdd1243dSDimitry Andric   MaybeAlign retAlignment = std::nullopt;
19580b57cec5SDimitry Andric 
19590b57cec5SDimitry Andric   // Handle Result
19600b57cec5SDimitry Andric   if (Ins.size() > 0) {
19610b57cec5SDimitry Andric     SmallVector<EVT, 16> resvtparts;
19620b57cec5SDimitry Andric     ComputeValueVTs(*this, DL, RetTy, resvtparts);
19630b57cec5SDimitry Andric 
19640b57cec5SDimitry Andric     // Declare
196506c3fb27SDimitry Andric     //  .param .align N .b8 retval0[<size-in-bytes>], or
19660b57cec5SDimitry Andric     //  .param .b<size-in-bits> retval0
19670b57cec5SDimitry Andric     unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy);
196806c3fb27SDimitry Andric     if (!IsTypePassedAsArray(RetTy)) {
1969fcaf7f86SDimitry Andric       resultsz = promoteScalarArgumentSize(resultsz);
19700b57cec5SDimitry Andric       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
19710b57cec5SDimitry Andric       SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
19720b57cec5SDimitry Andric                                   DAG.getConstant(resultsz, dl, MVT::i32),
197306c3fb27SDimitry Andric                                   DAG.getConstant(0, dl, MVT::i32), InGlue };
19740b57cec5SDimitry Andric       Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
19750b57cec5SDimitry Andric                           DeclareRetOps);
197606c3fb27SDimitry Andric       InGlue = Chain.getValue(1);
19770b57cec5SDimitry Andric     } else {
19787a6dacacSDimitry Andric       retAlignment = getArgumentAlignment(CB, RetTy, 0, DL);
19795ffd83dbSDimitry Andric       assert(retAlignment && "retAlignment is guaranteed to be set");
19800b57cec5SDimitry Andric       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
19815ffd83dbSDimitry Andric       SDValue DeclareRetOps[] = {
19825ffd83dbSDimitry Andric           Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32),
19830b57cec5SDimitry Andric           DAG.getConstant(resultsz / 8, dl, MVT::i32),
198406c3fb27SDimitry Andric           DAG.getConstant(0, dl, MVT::i32), InGlue};
19850b57cec5SDimitry Andric       Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
19860b57cec5SDimitry Andric                           DeclareRetOps);
198706c3fb27SDimitry Andric       InGlue = Chain.getValue(1);
19880b57cec5SDimitry Andric     }
19890b57cec5SDimitry Andric   }
19900b57cec5SDimitry Andric 
1991bdd1243dSDimitry Andric   bool HasVAArgs = CLI.IsVarArg && (CLI.Args.size() > CLI.NumFixedArgs);
1992bdd1243dSDimitry Andric   // Set the size of the vararg param byte array if the callee is a variadic
1993bdd1243dSDimitry Andric   // function and the variadic part is not empty.
1994bdd1243dSDimitry Andric   if (HasVAArgs) {
1995bdd1243dSDimitry Andric     SDValue DeclareParamOps[] = {
1996bdd1243dSDimitry Andric         VADeclareParam.getOperand(0), VADeclareParam.getOperand(1),
1997bdd1243dSDimitry Andric         VADeclareParam.getOperand(2), DAG.getConstant(VAOffset, dl, MVT::i32),
1998bdd1243dSDimitry Andric         VADeclareParam.getOperand(4)};
1999bdd1243dSDimitry Andric     DAG.MorphNodeTo(VADeclareParam.getNode(), VADeclareParam.getOpcode(),
2000bdd1243dSDimitry Andric                     VADeclareParam->getVTList(), DeclareParamOps);
2001bdd1243dSDimitry Andric   }
2002bdd1243dSDimitry Andric 
20030b57cec5SDimitry Andric   // Both indirect calls and libcalls have nullptr Func. In order to distinguish
20040b57cec5SDimitry Andric   // between them we must rely on the call site value which is valid for
20050b57cec5SDimitry Andric   // indirect calls but is always null for libcalls.
20065ffd83dbSDimitry Andric   bool isIndirectCall = !Func && CB;
20070b57cec5SDimitry Andric 
20080b57cec5SDimitry Andric   if (isa<ExternalSymbolSDNode>(Callee)) {
20090b57cec5SDimitry Andric     Function* CalleeFunc = nullptr;
20100b57cec5SDimitry Andric 
20110b57cec5SDimitry Andric     // Try to find the callee in the current module.
20120b57cec5SDimitry Andric     Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc);
20130b57cec5SDimitry Andric     assert(CalleeFunc != nullptr && "Libcall callee must be set.");
20140b57cec5SDimitry Andric 
20150b57cec5SDimitry Andric     // Set the "libcall callee" attribute to indicate that the function
20160b57cec5SDimitry Andric     // must always have a declaration.
20170b57cec5SDimitry Andric     CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");
20180b57cec5SDimitry Andric   }
20190b57cec5SDimitry Andric 
20200b57cec5SDimitry Andric   if (isIndirectCall) {
20210b57cec5SDimitry Andric     // This is indirect function call case : PTX requires a prototype of the
20220b57cec5SDimitry Andric     // form
20230b57cec5SDimitry Andric     // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
20240b57cec5SDimitry Andric     // to be emitted, and the label has to used as the last arg of call
20250b57cec5SDimitry Andric     // instruction.
20260b57cec5SDimitry Andric     // The prototype is embedded in a string and put as the operand for a
20270b57cec5SDimitry Andric     // CallPrototype SDNode which will print out to the value of the string.
20280b57cec5SDimitry Andric     SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
2029bdd1243dSDimitry Andric     std::string Proto = getPrototype(
2030bdd1243dSDimitry Andric         DL, RetTy, Args, Outs, retAlignment,
2031bdd1243dSDimitry Andric         HasVAArgs
2032bdd1243dSDimitry Andric             ? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair(
2033297eecfbSDimitry Andric                   CLI.NumFixedArgs, VADeclareParam->getConstantOperandAPInt(1)))
2034bdd1243dSDimitry Andric             : std::nullopt,
2035bdd1243dSDimitry Andric         *CB, UniqueCallSite);
2036bdd1243dSDimitry Andric     const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
20370b57cec5SDimitry Andric     SDValue ProtoOps[] = {
2038bdd1243dSDimitry Andric         Chain,
2039bdd1243dSDimitry Andric         DAG.getTargetExternalSymbol(ProtoStr, MVT::i32),
204006c3fb27SDimitry Andric         InGlue,
20410b57cec5SDimitry Andric     };
20420b57cec5SDimitry Andric     Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
204306c3fb27SDimitry Andric     InGlue = Chain.getValue(1);
20440b57cec5SDimitry Andric   }
20450b57cec5SDimitry Andric   // Op to just print "call"
20460b57cec5SDimitry Andric   SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
20470b57cec5SDimitry Andric   SDValue PrintCallOps[] = {
204806c3fb27SDimitry Andric     Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InGlue
20490b57cec5SDimitry Andric   };
20500b57cec5SDimitry Andric   // We model convergent calls as separate opcodes.
20510b57cec5SDimitry Andric   unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
20520b57cec5SDimitry Andric   if (CLI.IsConvergent)
20530b57cec5SDimitry Andric     Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
20540b57cec5SDimitry Andric                                               : NVPTXISD::PrintConvergentCall;
20550b57cec5SDimitry Andric   Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
205606c3fb27SDimitry Andric   InGlue = Chain.getValue(1);
20570b57cec5SDimitry Andric 
20580b57cec5SDimitry Andric   // Ops to print out the function name
20590b57cec5SDimitry Andric   SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
206006c3fb27SDimitry Andric   SDValue CallVoidOps[] = { Chain, Callee, InGlue };
20610b57cec5SDimitry Andric   Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
206206c3fb27SDimitry Andric   InGlue = Chain.getValue(1);
20630b57cec5SDimitry Andric 
20640b57cec5SDimitry Andric   // Ops to print out the param list
20650b57cec5SDimitry Andric   SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
206606c3fb27SDimitry Andric   SDValue CallArgBeginOps[] = { Chain, InGlue };
20670b57cec5SDimitry Andric   Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
20680b57cec5SDimitry Andric                       CallArgBeginOps);
206906c3fb27SDimitry Andric   InGlue = Chain.getValue(1);
20700b57cec5SDimitry Andric 
2071bdd1243dSDimitry Andric   for (unsigned i = 0, e = std::min(CLI.NumFixedArgs + 1, ParamCount); i != e;
2072bdd1243dSDimitry Andric        ++i) {
20730b57cec5SDimitry Andric     unsigned opcode;
20740b57cec5SDimitry Andric     if (i == (e - 1))
20750b57cec5SDimitry Andric       opcode = NVPTXISD::LastCallArg;
20760b57cec5SDimitry Andric     else
20770b57cec5SDimitry Andric       opcode = NVPTXISD::CallArg;
20780b57cec5SDimitry Andric     SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
20790b57cec5SDimitry Andric     SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
208006c3fb27SDimitry Andric                              DAG.getConstant(i, dl, MVT::i32), InGlue };
20810b57cec5SDimitry Andric     Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
208206c3fb27SDimitry Andric     InGlue = Chain.getValue(1);
20830b57cec5SDimitry Andric   }
20840b57cec5SDimitry Andric   SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
20850b57cec5SDimitry Andric   SDValue CallArgEndOps[] = { Chain,
20860b57cec5SDimitry Andric                               DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32),
208706c3fb27SDimitry Andric                               InGlue };
20880b57cec5SDimitry Andric   Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
208906c3fb27SDimitry Andric   InGlue = Chain.getValue(1);
20900b57cec5SDimitry Andric 
20910b57cec5SDimitry Andric   if (isIndirectCall) {
20920b57cec5SDimitry Andric     SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
2093e8d8bef9SDimitry Andric     SDValue PrototypeOps[] = {
209406c3fb27SDimitry Andric         Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InGlue};
20950b57cec5SDimitry Andric     Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
209606c3fb27SDimitry Andric     InGlue = Chain.getValue(1);
20970b57cec5SDimitry Andric   }
20980b57cec5SDimitry Andric 
20990b57cec5SDimitry Andric   SmallVector<SDValue, 16> ProxyRegOps;
2100bdd1243dSDimitry Andric   SmallVector<std::optional<MVT>, 16> ProxyRegTruncates;
21010b57cec5SDimitry Andric 
21020b57cec5SDimitry Andric   // Generate loads from param memory/moves from registers for result
21030b57cec5SDimitry Andric   if (Ins.size() > 0) {
21040b57cec5SDimitry Andric     SmallVector<EVT, 16> VTs;
21050b57cec5SDimitry Andric     SmallVector<uint64_t, 16> Offsets;
21060b57cec5SDimitry Andric     ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
21070b57cec5SDimitry Andric     assert(VTs.size() == Ins.size() && "Bad value decomposition");
21080b57cec5SDimitry Andric 
21097a6dacacSDimitry Andric     Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL);
21100b57cec5SDimitry Andric     auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
21110b57cec5SDimitry Andric 
21120b57cec5SDimitry Andric     SmallVector<EVT, 6> LoadVTs;
21130b57cec5SDimitry Andric     int VecIdx = -1; // Index of the first element of the vector.
21140b57cec5SDimitry Andric 
21150b57cec5SDimitry Andric     // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
21160b57cec5SDimitry Andric     // 32-bits are sign extended or zero extended, depending on whether
21170b57cec5SDimitry Andric     // they are signed or unsigned types.
21180b57cec5SDimitry Andric     bool ExtendIntegerRetVal =
21190b57cec5SDimitry Andric         RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
21200b57cec5SDimitry Andric 
21210b57cec5SDimitry Andric     for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
21220b57cec5SDimitry Andric       bool needTruncate = false;
21230b57cec5SDimitry Andric       EVT TheLoadType = VTs[i];
21240b57cec5SDimitry Andric       EVT EltType = Ins[i].VT;
21255ffd83dbSDimitry Andric       Align EltAlign = commonAlignment(RetAlign, Offsets[i]);
2126fcaf7f86SDimitry Andric       MVT PromotedVT;
2127fcaf7f86SDimitry Andric 
2128fcaf7f86SDimitry Andric       if (PromoteScalarIntegerPTX(TheLoadType, &PromotedVT)) {
2129fcaf7f86SDimitry Andric         TheLoadType = EVT(PromotedVT);
2130fcaf7f86SDimitry Andric         EltType = EVT(PromotedVT);
2131fcaf7f86SDimitry Andric         needTruncate = true;
2132fcaf7f86SDimitry Andric       }
2133fcaf7f86SDimitry Andric 
21340b57cec5SDimitry Andric       if (ExtendIntegerRetVal) {
21350b57cec5SDimitry Andric         TheLoadType = MVT::i32;
21360b57cec5SDimitry Andric         EltType = MVT::i32;
21370b57cec5SDimitry Andric         needTruncate = true;
21380b57cec5SDimitry Andric       } else if (TheLoadType.getSizeInBits() < 16) {
21390b57cec5SDimitry Andric         if (VTs[i].isInteger())
21400b57cec5SDimitry Andric           needTruncate = true;
21410b57cec5SDimitry Andric         EltType = MVT::i16;
21420b57cec5SDimitry Andric       }
21430b57cec5SDimitry Andric 
21440b57cec5SDimitry Andric       // Record index of the very first element of the vector.
21450b57cec5SDimitry Andric       if (VectorInfo[i] & PVF_FIRST) {
21460b57cec5SDimitry Andric         assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");
21470b57cec5SDimitry Andric         VecIdx = i;
21480b57cec5SDimitry Andric       }
21490b57cec5SDimitry Andric 
21500b57cec5SDimitry Andric       LoadVTs.push_back(EltType);
21510b57cec5SDimitry Andric 
21520b57cec5SDimitry Andric       if (VectorInfo[i] & PVF_LAST) {
21530b57cec5SDimitry Andric         unsigned NumElts = LoadVTs.size();
21540b57cec5SDimitry Andric         LoadVTs.push_back(MVT::Other);
21550b57cec5SDimitry Andric         LoadVTs.push_back(MVT::Glue);
21560b57cec5SDimitry Andric         NVPTXISD::NodeType Op;
21570b57cec5SDimitry Andric         switch (NumElts) {
21580b57cec5SDimitry Andric         case 1:
21590b57cec5SDimitry Andric           Op = NVPTXISD::LoadParam;
21600b57cec5SDimitry Andric           break;
21610b57cec5SDimitry Andric         case 2:
21620b57cec5SDimitry Andric           Op = NVPTXISD::LoadParamV2;
21630b57cec5SDimitry Andric           break;
21640b57cec5SDimitry Andric         case 4:
21650b57cec5SDimitry Andric           Op = NVPTXISD::LoadParamV4;
21660b57cec5SDimitry Andric           break;
21670b57cec5SDimitry Andric         default:
21680b57cec5SDimitry Andric           llvm_unreachable("Invalid vector info.");
21690b57cec5SDimitry Andric         }
21700b57cec5SDimitry Andric 
21710b57cec5SDimitry Andric         SDValue LoadOperands[] = {
21720b57cec5SDimitry Andric             Chain, DAG.getConstant(1, dl, MVT::i32),
217306c3fb27SDimitry Andric             DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InGlue};
21740b57cec5SDimitry Andric         SDValue RetVal = DAG.getMemIntrinsicNode(
21750b57cec5SDimitry Andric             Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
21760b57cec5SDimitry Andric             MachinePointerInfo(), EltAlign,
21770b57cec5SDimitry Andric             MachineMemOperand::MOLoad);
21780b57cec5SDimitry Andric 
21790b57cec5SDimitry Andric         for (unsigned j = 0; j < NumElts; ++j) {
21800b57cec5SDimitry Andric           ProxyRegOps.push_back(RetVal.getValue(j));
21810b57cec5SDimitry Andric 
21820b57cec5SDimitry Andric           if (needTruncate)
2183bdd1243dSDimitry Andric             ProxyRegTruncates.push_back(std::optional<MVT>(Ins[VecIdx + j].VT));
21840b57cec5SDimitry Andric           else
2185bdd1243dSDimitry Andric             ProxyRegTruncates.push_back(std::optional<MVT>());
21860b57cec5SDimitry Andric         }
21870b57cec5SDimitry Andric 
21880b57cec5SDimitry Andric         Chain = RetVal.getValue(NumElts);
218906c3fb27SDimitry Andric         InGlue = RetVal.getValue(NumElts + 1);
21900b57cec5SDimitry Andric 
21910b57cec5SDimitry Andric         // Cleanup
21920b57cec5SDimitry Andric         VecIdx = -1;
21930b57cec5SDimitry Andric         LoadVTs.clear();
21940b57cec5SDimitry Andric       }
21950b57cec5SDimitry Andric     }
21960b57cec5SDimitry Andric   }
21970b57cec5SDimitry Andric 
2198bdd1243dSDimitry Andric   Chain =
219906c3fb27SDimitry Andric       DAG.getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InGlue, dl);
220006c3fb27SDimitry Andric   InGlue = Chain.getValue(1);
22010b57cec5SDimitry Andric 
22020b57cec5SDimitry Andric   // Append ProxyReg instructions to the chain to make sure that `callseq_end`
22030b57cec5SDimitry Andric   // will not get lost. Otherwise, during libcalls expansion, the nodes can become
22040b57cec5SDimitry Andric   // dangling.
22050b57cec5SDimitry Andric   for (unsigned i = 0; i < ProxyRegOps.size(); ++i) {
22060b57cec5SDimitry Andric     SDValue Ret = DAG.getNode(
22070b57cec5SDimitry Andric       NVPTXISD::ProxyReg, dl,
22080b57cec5SDimitry Andric       DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
220906c3fb27SDimitry Andric       { Chain, ProxyRegOps[i], InGlue }
22100b57cec5SDimitry Andric     );
22110b57cec5SDimitry Andric 
22120b57cec5SDimitry Andric     Chain = Ret.getValue(1);
221306c3fb27SDimitry Andric     InGlue = Ret.getValue(2);
22140b57cec5SDimitry Andric 
221581ad6265SDimitry Andric     if (ProxyRegTruncates[i]) {
2216bdd1243dSDimitry Andric       Ret = DAG.getNode(ISD::TRUNCATE, dl, *ProxyRegTruncates[i], Ret);
22170b57cec5SDimitry Andric     }
22180b57cec5SDimitry Andric 
22190b57cec5SDimitry Andric     InVals.push_back(Ret);
22200b57cec5SDimitry Andric   }
22210b57cec5SDimitry Andric 
22220b57cec5SDimitry Andric   // set isTailCall to false for now, until we figure out how to express
22230b57cec5SDimitry Andric   // tail call optimization in PTX
22240b57cec5SDimitry Andric   isTailCall = false;
22250b57cec5SDimitry Andric   return Chain;
22260b57cec5SDimitry Andric }
22270b57cec5SDimitry Andric 
LowerDYNAMIC_STACKALLOC(SDValue Op,SelectionDAG & DAG) const22285f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
22295f757f3fSDimitry Andric                                                      SelectionDAG &DAG) const {
22305f757f3fSDimitry Andric   const Function &Fn = DAG.getMachineFunction().getFunction();
22315f757f3fSDimitry Andric 
22325f757f3fSDimitry Andric   DiagnosticInfoUnsupported NoDynamicAlloca(
22335f757f3fSDimitry Andric       Fn, "dynamic alloca unsupported by NVPTX backend",
22345f757f3fSDimitry Andric       SDLoc(Op).getDebugLoc());
22355f757f3fSDimitry Andric   DAG.getContext()->diagnose(NoDynamicAlloca);
22365f757f3fSDimitry Andric   auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
22375f757f3fSDimitry Andric   return DAG.getMergeValues(Ops, SDLoc());
22385f757f3fSDimitry Andric }
22395f757f3fSDimitry Andric 
22400b57cec5SDimitry Andric // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
22410b57cec5SDimitry Andric // (see LegalizeDAG.cpp). This is slow and uses local memory.
22420b57cec5SDimitry Andric // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
22430b57cec5SDimitry Andric SDValue
LowerCONCAT_VECTORS(SDValue Op,SelectionDAG & DAG) const22440b57cec5SDimitry Andric NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
22450b57cec5SDimitry Andric   SDNode *Node = Op.getNode();
22460b57cec5SDimitry Andric   SDLoc dl(Node);
22470b57cec5SDimitry Andric   SmallVector<SDValue, 8> Ops;
22480b57cec5SDimitry Andric   unsigned NumOperands = Node->getNumOperands();
22490b57cec5SDimitry Andric   for (unsigned i = 0; i < NumOperands; ++i) {
22500b57cec5SDimitry Andric     SDValue SubOp = Node->getOperand(i);
22510b57cec5SDimitry Andric     EVT VVT = SubOp.getNode()->getValueType(0);
22520b57cec5SDimitry Andric     EVT EltVT = VVT.getVectorElementType();
22530b57cec5SDimitry Andric     unsigned NumSubElem = VVT.getVectorNumElements();
22540b57cec5SDimitry Andric     for (unsigned j = 0; j < NumSubElem; ++j) {
22550b57cec5SDimitry Andric       Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
22560b57cec5SDimitry Andric                                 DAG.getIntPtrConstant(j, dl)));
22570b57cec5SDimitry Andric     }
22580b57cec5SDimitry Andric   }
22590b57cec5SDimitry Andric   return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
22600b57cec5SDimitry Andric }
22610b57cec5SDimitry Andric 
22625f757f3fSDimitry Andric // We can init constant f16x2/v2i16/v4i8 with a single .b32 move.  Normally it
22630b57cec5SDimitry Andric // would get lowered as two constant loads and vector-packing move.
22640b57cec5SDimitry Andric // Instead we want just a constant move:
22655f757f3fSDimitry Andric //        mov.b32         %r2, 0x40003C00
LowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const22660b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
22670b57cec5SDimitry Andric                                                SelectionDAG &DAG) const {
22685f757f3fSDimitry Andric   EVT VT = Op->getValueType(0);
22695f757f3fSDimitry Andric   if (!(Isv2x16VT(VT) || VT == MVT::v4i8))
22700b57cec5SDimitry Andric     return Op;
22710b57cec5SDimitry Andric 
22725f757f3fSDimitry Andric   SDLoc DL(Op);
22735f757f3fSDimitry Andric 
22745f757f3fSDimitry Andric   if (!llvm::all_of(Op->ops(), [](SDValue Operand) {
22755f757f3fSDimitry Andric         return Operand->isUndef() || isa<ConstantSDNode>(Operand) ||
22765f757f3fSDimitry Andric                isa<ConstantFPSDNode>(Operand);
22775f757f3fSDimitry Andric       })) {
22785f757f3fSDimitry Andric     // Lower non-const v4i8 vector as byte-wise constructed i32, which allows us
22795f757f3fSDimitry Andric     // to optimize calculation of constant parts.
22805f757f3fSDimitry Andric     if (VT == MVT::v4i8) {
22815f757f3fSDimitry Andric       SDValue C8 = DAG.getConstant(8, DL, MVT::i32);
22825f757f3fSDimitry Andric       SDValue E01 = DAG.getNode(
22835f757f3fSDimitry Andric           NVPTXISD::BFI, DL, MVT::i32,
22845f757f3fSDimitry Andric           DAG.getAnyExtOrTrunc(Op->getOperand(1), DL, MVT::i32),
22855f757f3fSDimitry Andric           DAG.getAnyExtOrTrunc(Op->getOperand(0), DL, MVT::i32), C8, C8);
22865f757f3fSDimitry Andric       SDValue E012 =
22875f757f3fSDimitry Andric           DAG.getNode(NVPTXISD::BFI, DL, MVT::i32,
22885f757f3fSDimitry Andric                       DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32),
22895f757f3fSDimitry Andric                       E01, DAG.getConstant(16, DL, MVT::i32), C8);
22905f757f3fSDimitry Andric       SDValue E0123 =
22915f757f3fSDimitry Andric           DAG.getNode(NVPTXISD::BFI, DL, MVT::i32,
22925f757f3fSDimitry Andric                       DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32),
22935f757f3fSDimitry Andric                       E012, DAG.getConstant(24, DL, MVT::i32), C8);
22945f757f3fSDimitry Andric       return DAG.getNode(ISD::BITCAST, DL, VT, E0123);
22955f757f3fSDimitry Andric     }
22965f757f3fSDimitry Andric     return Op;
22975f757f3fSDimitry Andric   }
22985f757f3fSDimitry Andric 
22995f757f3fSDimitry Andric   // Get value or the Nth operand as an APInt(32). Undef values treated as 0.
23005f757f3fSDimitry Andric   auto GetOperand = [](SDValue Op, int N) -> APInt {
23015f757f3fSDimitry Andric     const SDValue &Operand = Op->getOperand(N);
23025f757f3fSDimitry Andric     EVT VT = Op->getValueType(0);
23035f757f3fSDimitry Andric     if (Operand->isUndef())
23045f757f3fSDimitry Andric       return APInt(32, 0);
23055f757f3fSDimitry Andric     APInt Value;
23065f757f3fSDimitry Andric     if (VT == MVT::v2f16 || VT == MVT::v2bf16)
23075f757f3fSDimitry Andric       Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt();
23085f757f3fSDimitry Andric     else if (VT == MVT::v2i16 || VT == MVT::v4i8)
2309297eecfbSDimitry Andric       Value = Operand->getAsAPIntVal();
23105f757f3fSDimitry Andric     else
23115f757f3fSDimitry Andric       llvm_unreachable("Unsupported type");
23125f757f3fSDimitry Andric     // i8 values are carried around as i16, so we need to zero out upper bits,
23135f757f3fSDimitry Andric     // so they do not get in the way of combining individual byte values
23145f757f3fSDimitry Andric     if (VT == MVT::v4i8)
23155f757f3fSDimitry Andric       Value = Value.trunc(8);
23165f757f3fSDimitry Andric     return Value.zext(32);
23175f757f3fSDimitry Andric   };
23185f757f3fSDimitry Andric   APInt Value;
23195f757f3fSDimitry Andric   if (Isv2x16VT(VT)) {
23205f757f3fSDimitry Andric     Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(16);
23215f757f3fSDimitry Andric   } else if (VT == MVT::v4i8) {
23225f757f3fSDimitry Andric     Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(8) |
23235f757f3fSDimitry Andric             GetOperand(Op, 2).shl(16) | GetOperand(Op, 3).shl(24);
23245f757f3fSDimitry Andric   } else {
23255f757f3fSDimitry Andric     llvm_unreachable("Unsupported type");
23265f757f3fSDimitry Andric   }
23275f757f3fSDimitry Andric   SDValue Const = DAG.getConstant(Value, SDLoc(Op), MVT::i32);
232806c3fb27SDimitry Andric   return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op->getValueType(0), Const);
23290b57cec5SDimitry Andric }
23300b57cec5SDimitry Andric 
LowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const23310b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
23320b57cec5SDimitry Andric                                                      SelectionDAG &DAG) const {
23330b57cec5SDimitry Andric   SDValue Index = Op->getOperand(1);
23345f757f3fSDimitry Andric   SDValue Vector = Op->getOperand(0);
23355f757f3fSDimitry Andric   SDLoc DL(Op);
23365f757f3fSDimitry Andric   EVT VectorVT = Vector.getValueType();
23375f757f3fSDimitry Andric 
23385f757f3fSDimitry Andric   if (VectorVT == MVT::v4i8) {
23395f757f3fSDimitry Andric     SDValue BFE =
23405f757f3fSDimitry Andric         DAG.getNode(NVPTXISD::BFE, DL, MVT::i32,
23415f757f3fSDimitry Andric                     {Vector,
23425f757f3fSDimitry Andric                      DAG.getNode(ISD::MUL, DL, MVT::i32,
23435f757f3fSDimitry Andric                                  DAG.getZExtOrTrunc(Index, DL, MVT::i32),
23445f757f3fSDimitry Andric                                  DAG.getConstant(8, DL, MVT::i32)),
23455f757f3fSDimitry Andric                      DAG.getConstant(8, DL, MVT::i32)});
23465f757f3fSDimitry Andric     return DAG.getAnyExtOrTrunc(BFE, DL, Op->getValueType(0));
23475f757f3fSDimitry Andric   }
23485f757f3fSDimitry Andric 
23490b57cec5SDimitry Andric   // Constant index will be matched by tablegen.
23500b57cec5SDimitry Andric   if (isa<ConstantSDNode>(Index.getNode()))
23510b57cec5SDimitry Andric     return Op;
23520b57cec5SDimitry Andric 
23530b57cec5SDimitry Andric   // Extract individual elements and select one of them.
23545f757f3fSDimitry Andric   assert(Isv2x16VT(VectorVT) && "Unexpected vector type.");
23550b57cec5SDimitry Andric   EVT EltVT = VectorVT.getVectorElementType();
23560b57cec5SDimitry Andric 
23570b57cec5SDimitry Andric   SDLoc dl(Op.getNode());
23580b57cec5SDimitry Andric   SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
23590b57cec5SDimitry Andric                            DAG.getIntPtrConstant(0, dl));
23600b57cec5SDimitry Andric   SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
23610b57cec5SDimitry Andric                            DAG.getIntPtrConstant(1, dl));
23620b57cec5SDimitry Andric   return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1,
23630b57cec5SDimitry Andric                          ISD::CondCode::SETEQ);
23640b57cec5SDimitry Andric }
23650b57cec5SDimitry Andric 
LowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const23665f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
23675f757f3fSDimitry Andric                                                     SelectionDAG &DAG) const {
23685f757f3fSDimitry Andric   SDValue Vector = Op->getOperand(0);
23695f757f3fSDimitry Andric   EVT VectorVT = Vector.getValueType();
23705f757f3fSDimitry Andric 
23715f757f3fSDimitry Andric   if (VectorVT != MVT::v4i8)
23725f757f3fSDimitry Andric     return Op;
23735f757f3fSDimitry Andric   SDLoc DL(Op);
23745f757f3fSDimitry Andric   SDValue Value = Op->getOperand(1);
23755f757f3fSDimitry Andric   if (Value->isUndef())
23765f757f3fSDimitry Andric     return Vector;
23775f757f3fSDimitry Andric 
23785f757f3fSDimitry Andric   SDValue Index = Op->getOperand(2);
23795f757f3fSDimitry Andric 
23805f757f3fSDimitry Andric   SDValue BFI =
23815f757f3fSDimitry Andric       DAG.getNode(NVPTXISD::BFI, DL, MVT::i32,
23825f757f3fSDimitry Andric                   {DAG.getZExtOrTrunc(Value, DL, MVT::i32), Vector,
23835f757f3fSDimitry Andric                    DAG.getNode(ISD::MUL, DL, MVT::i32,
23845f757f3fSDimitry Andric                                DAG.getZExtOrTrunc(Index, DL, MVT::i32),
23855f757f3fSDimitry Andric                                DAG.getConstant(8, DL, MVT::i32)),
23865f757f3fSDimitry Andric                    DAG.getConstant(8, DL, MVT::i32)});
23875f757f3fSDimitry Andric   return DAG.getNode(ISD::BITCAST, DL, Op->getValueType(0), BFI);
23885f757f3fSDimitry Andric }
23895f757f3fSDimitry Andric 
LowerVECTOR_SHUFFLE(SDValue Op,SelectionDAG & DAG) const23905f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
23915f757f3fSDimitry Andric                                                  SelectionDAG &DAG) const {
23925f757f3fSDimitry Andric   SDValue V1 = Op.getOperand(0);
23935f757f3fSDimitry Andric   EVT VectorVT = V1.getValueType();
23945f757f3fSDimitry Andric   if (VectorVT != MVT::v4i8 || Op.getValueType() != MVT::v4i8)
23955f757f3fSDimitry Andric     return Op;
23965f757f3fSDimitry Andric 
23975f757f3fSDimitry Andric   // Lower shuffle to PRMT instruction.
23985f757f3fSDimitry Andric   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
23995f757f3fSDimitry Andric   SDValue V2 = Op.getOperand(1);
24005f757f3fSDimitry Andric   uint32_t Selector = 0;
24017a6dacacSDimitry Andric   for (auto I : llvm::enumerate(SVN->getMask())) {
24027a6dacacSDimitry Andric     if (I.value() != -1) // -1 is a placeholder for undef.
24035f757f3fSDimitry Andric       Selector |= (I.value() << (I.index() * 4));
24047a6dacacSDimitry Andric   }
24055f757f3fSDimitry Andric 
24065f757f3fSDimitry Andric   SDLoc DL(Op);
24075f757f3fSDimitry Andric   return DAG.getNode(NVPTXISD::PRMT, DL, MVT::v4i8, V1, V2,
24085f757f3fSDimitry Andric                      DAG.getConstant(Selector, DL, MVT::i32),
24095f757f3fSDimitry Andric                      DAG.getConstant(NVPTX::PTXPrmtMode::NONE, DL, MVT::i32));
24105f757f3fSDimitry Andric }
24110b57cec5SDimitry Andric /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
24120b57cec5SDimitry Andric /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
24130b57cec5SDimitry Andric ///    amount, or
24140b57cec5SDimitry Andric /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
24150b57cec5SDimitry Andric ///    amount.
LowerShiftRightParts(SDValue Op,SelectionDAG & DAG) const24160b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
24170b57cec5SDimitry Andric                                                   SelectionDAG &DAG) const {
24180b57cec5SDimitry Andric   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
24190b57cec5SDimitry Andric   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
24200b57cec5SDimitry Andric 
24210b57cec5SDimitry Andric   EVT VT = Op.getValueType();
24220b57cec5SDimitry Andric   unsigned VTBits = VT.getSizeInBits();
24230b57cec5SDimitry Andric   SDLoc dl(Op);
24240b57cec5SDimitry Andric   SDValue ShOpLo = Op.getOperand(0);
24250b57cec5SDimitry Andric   SDValue ShOpHi = Op.getOperand(1);
24260b57cec5SDimitry Andric   SDValue ShAmt  = Op.getOperand(2);
24270b57cec5SDimitry Andric   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
24280b57cec5SDimitry Andric 
24290b57cec5SDimitry Andric   if (VTBits == 32 && STI.getSmVersion() >= 35) {
24300b57cec5SDimitry Andric     // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
24310b57cec5SDimitry Andric     // {dHi, dLo} = {aHi, aLo} >> Amt
24320b57cec5SDimitry Andric     //   dHi = aHi >> Amt
24330b57cec5SDimitry Andric     //   dLo = shf.r.clamp aLo, aHi, Amt
24340b57cec5SDimitry Andric 
24350b57cec5SDimitry Andric     SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
24360b57cec5SDimitry Andric     SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
24370b57cec5SDimitry Andric                              ShAmt);
24380b57cec5SDimitry Andric 
24390b57cec5SDimitry Andric     SDValue Ops[2] = { Lo, Hi };
24400b57cec5SDimitry Andric     return DAG.getMergeValues(Ops, dl);
24410b57cec5SDimitry Andric   }
24420b57cec5SDimitry Andric   else {
24430b57cec5SDimitry Andric     // {dHi, dLo} = {aHi, aLo} >> Amt
24440b57cec5SDimitry Andric     // - if (Amt>=size) then
24450b57cec5SDimitry Andric     //      dLo = aHi >> (Amt-size)
24460b57cec5SDimitry Andric     //      dHi = aHi >> Amt (this is either all 0 or all 1)
24470b57cec5SDimitry Andric     //   else
24480b57cec5SDimitry Andric     //      dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
24490b57cec5SDimitry Andric     //      dHi = aHi >> Amt
24500b57cec5SDimitry Andric 
24510b57cec5SDimitry Andric     SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
24520b57cec5SDimitry Andric                                    DAG.getConstant(VTBits, dl, MVT::i32),
24530b57cec5SDimitry Andric                                    ShAmt);
24540b57cec5SDimitry Andric     SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
24550b57cec5SDimitry Andric     SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
24560b57cec5SDimitry Andric                                      DAG.getConstant(VTBits, dl, MVT::i32));
24570b57cec5SDimitry Andric     SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
24580b57cec5SDimitry Andric     SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
24590b57cec5SDimitry Andric     SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
24600b57cec5SDimitry Andric 
24610b57cec5SDimitry Andric     SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
24620b57cec5SDimitry Andric                                DAG.getConstant(VTBits, dl, MVT::i32),
24630b57cec5SDimitry Andric                                ISD::SETGE);
24640b57cec5SDimitry Andric     SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
24650b57cec5SDimitry Andric     SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
24660b57cec5SDimitry Andric 
24670b57cec5SDimitry Andric     SDValue Ops[2] = { Lo, Hi };
24680b57cec5SDimitry Andric     return DAG.getMergeValues(Ops, dl);
24690b57cec5SDimitry Andric   }
24700b57cec5SDimitry Andric }
24710b57cec5SDimitry Andric 
24720b57cec5SDimitry Andric /// LowerShiftLeftParts - Lower SHL_PARTS, which
24730b57cec5SDimitry Andric /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
24740b57cec5SDimitry Andric ///    amount, or
24750b57cec5SDimitry Andric /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
24760b57cec5SDimitry Andric ///    amount.
LowerShiftLeftParts(SDValue Op,SelectionDAG & DAG) const24770b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
24780b57cec5SDimitry Andric                                                  SelectionDAG &DAG) const {
24790b57cec5SDimitry Andric   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
24800b57cec5SDimitry Andric   assert(Op.getOpcode() == ISD::SHL_PARTS);
24810b57cec5SDimitry Andric 
24820b57cec5SDimitry Andric   EVT VT = Op.getValueType();
24830b57cec5SDimitry Andric   unsigned VTBits = VT.getSizeInBits();
24840b57cec5SDimitry Andric   SDLoc dl(Op);
24850b57cec5SDimitry Andric   SDValue ShOpLo = Op.getOperand(0);
24860b57cec5SDimitry Andric   SDValue ShOpHi = Op.getOperand(1);
24870b57cec5SDimitry Andric   SDValue ShAmt  = Op.getOperand(2);
24880b57cec5SDimitry Andric 
24890b57cec5SDimitry Andric   if (VTBits == 32 && STI.getSmVersion() >= 35) {
24900b57cec5SDimitry Andric     // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
24910b57cec5SDimitry Andric     // {dHi, dLo} = {aHi, aLo} << Amt
24920b57cec5SDimitry Andric     //   dHi = shf.l.clamp aLo, aHi, Amt
24930b57cec5SDimitry Andric     //   dLo = aLo << Amt
24940b57cec5SDimitry Andric 
24950b57cec5SDimitry Andric     SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
24960b57cec5SDimitry Andric                              ShAmt);
24970b57cec5SDimitry Andric     SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
24980b57cec5SDimitry Andric 
24990b57cec5SDimitry Andric     SDValue Ops[2] = { Lo, Hi };
25000b57cec5SDimitry Andric     return DAG.getMergeValues(Ops, dl);
25010b57cec5SDimitry Andric   }
25020b57cec5SDimitry Andric   else {
25030b57cec5SDimitry Andric     // {dHi, dLo} = {aHi, aLo} << Amt
25040b57cec5SDimitry Andric     // - if (Amt>=size) then
25050b57cec5SDimitry Andric     //      dLo = aLo << Amt (all 0)
25060b57cec5SDimitry Andric     //      dLo = aLo << (Amt-size)
25070b57cec5SDimitry Andric     //   else
25080b57cec5SDimitry Andric     //      dLo = aLo << Amt
25090b57cec5SDimitry Andric     //      dHi = (aHi << Amt) | (aLo >> (size-Amt))
25100b57cec5SDimitry Andric 
25110b57cec5SDimitry Andric     SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
25120b57cec5SDimitry Andric                                    DAG.getConstant(VTBits, dl, MVT::i32),
25130b57cec5SDimitry Andric                                    ShAmt);
25140b57cec5SDimitry Andric     SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
25150b57cec5SDimitry Andric     SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
25160b57cec5SDimitry Andric                                      DAG.getConstant(VTBits, dl, MVT::i32));
25170b57cec5SDimitry Andric     SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
25180b57cec5SDimitry Andric     SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
25190b57cec5SDimitry Andric     SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
25200b57cec5SDimitry Andric 
25210b57cec5SDimitry Andric     SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
25220b57cec5SDimitry Andric                                DAG.getConstant(VTBits, dl, MVT::i32),
25230b57cec5SDimitry Andric                                ISD::SETGE);
25240b57cec5SDimitry Andric     SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
25250b57cec5SDimitry Andric     SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
25260b57cec5SDimitry Andric 
25270b57cec5SDimitry Andric     SDValue Ops[2] = { Lo, Hi };
25280b57cec5SDimitry Andric     return DAG.getMergeValues(Ops, dl);
25290b57cec5SDimitry Andric   }
25300b57cec5SDimitry Andric }
25310b57cec5SDimitry Andric 
LowerFROUND(SDValue Op,SelectionDAG & DAG) const25320b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
25330b57cec5SDimitry Andric   EVT VT = Op.getValueType();
25340b57cec5SDimitry Andric 
25350b57cec5SDimitry Andric   if (VT == MVT::f32)
25360b57cec5SDimitry Andric     return LowerFROUND32(Op, DAG);
25370b57cec5SDimitry Andric 
25380b57cec5SDimitry Andric   if (VT == MVT::f64)
25390b57cec5SDimitry Andric     return LowerFROUND64(Op, DAG);
25400b57cec5SDimitry Andric 
25410b57cec5SDimitry Andric   llvm_unreachable("unhandled type");
25420b57cec5SDimitry Andric }
25430b57cec5SDimitry Andric 
25440b57cec5SDimitry Andric // This is the the rounding method used in CUDA libdevice in C like code:
25450b57cec5SDimitry Andric // float roundf(float A)
25460b57cec5SDimitry Andric // {
25470b57cec5SDimitry Andric //   float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f));
25480b57cec5SDimitry Andric //   RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
25490b57cec5SDimitry Andric //   return abs(A) < 0.5 ? (float)(int)A : RoundedA;
25500b57cec5SDimitry Andric // }
LowerFROUND32(SDValue Op,SelectionDAG & DAG) const25510b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFROUND32(SDValue Op,
25520b57cec5SDimitry Andric                                            SelectionDAG &DAG) const {
25530b57cec5SDimitry Andric   SDLoc SL(Op);
25540b57cec5SDimitry Andric   SDValue A = Op.getOperand(0);
25550b57cec5SDimitry Andric   EVT VT = Op.getValueType();
25560b57cec5SDimitry Andric 
25570b57cec5SDimitry Andric   SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
25580b57cec5SDimitry Andric 
25590b57cec5SDimitry Andric   // RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f))
25600b57cec5SDimitry Andric   SDValue Bitcast  = DAG.getNode(ISD::BITCAST, SL, MVT::i32, A);
25610b57cec5SDimitry Andric   const int SignBitMask = 0x80000000;
25620b57cec5SDimitry Andric   SDValue Sign = DAG.getNode(ISD::AND, SL, MVT::i32, Bitcast,
25630b57cec5SDimitry Andric                              DAG.getConstant(SignBitMask, SL, MVT::i32));
25640b57cec5SDimitry Andric   const int PointFiveInBits = 0x3F000000;
25650b57cec5SDimitry Andric   SDValue PointFiveWithSignRaw =
25660b57cec5SDimitry Andric       DAG.getNode(ISD::OR, SL, MVT::i32, Sign,
25670b57cec5SDimitry Andric                   DAG.getConstant(PointFiveInBits, SL, MVT::i32));
25680b57cec5SDimitry Andric   SDValue PointFiveWithSign =
25690b57cec5SDimitry Andric       DAG.getNode(ISD::BITCAST, SL, VT, PointFiveWithSignRaw);
25700b57cec5SDimitry Andric   SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, A, PointFiveWithSign);
25710b57cec5SDimitry Andric   SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
25720b57cec5SDimitry Andric 
25730b57cec5SDimitry Andric   // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
25740b57cec5SDimitry Andric   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
25750b57cec5SDimitry Andric   SDValue IsLarge =
25760b57cec5SDimitry Andric       DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 23.0), SL, VT),
25770b57cec5SDimitry Andric                    ISD::SETOGT);
25780b57cec5SDimitry Andric   RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
25790b57cec5SDimitry Andric 
25800b57cec5SDimitry Andric   // return abs(A) < 0.5 ? (float)(int)A : RoundedA;
25810b57cec5SDimitry Andric   SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
25820b57cec5SDimitry Andric                                 DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
25830b57cec5SDimitry Andric   SDValue RoundedAForSmallA = DAG.getNode(ISD::FTRUNC, SL, VT, A);
25840b57cec5SDimitry Andric   return DAG.getNode(ISD::SELECT, SL, VT, IsSmall, RoundedAForSmallA, RoundedA);
25850b57cec5SDimitry Andric }
25860b57cec5SDimitry Andric 
25870b57cec5SDimitry Andric // The implementation of round(double) is similar to that of round(float) in
25880b57cec5SDimitry Andric // that they both separate the value range into three regions and use a method
25890b57cec5SDimitry Andric // specific to the region to round the values. However, round(double) first
25900b57cec5SDimitry Andric // calculates the round of the absolute value and then adds the sign back while
25910b57cec5SDimitry Andric // round(float) directly rounds the value with sign.
LowerFROUND64(SDValue Op,SelectionDAG & DAG) const25920b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op,
25930b57cec5SDimitry Andric                                            SelectionDAG &DAG) const {
25940b57cec5SDimitry Andric   SDLoc SL(Op);
25950b57cec5SDimitry Andric   SDValue A = Op.getOperand(0);
25960b57cec5SDimitry Andric   EVT VT = Op.getValueType();
25970b57cec5SDimitry Andric 
25980b57cec5SDimitry Andric   SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
25990b57cec5SDimitry Andric 
26000b57cec5SDimitry Andric   // double RoundedA = (double) (int) (abs(A) + 0.5f);
26010b57cec5SDimitry Andric   SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, AbsA,
26020b57cec5SDimitry Andric                                   DAG.getConstantFP(0.5, SL, VT));
26030b57cec5SDimitry Andric   SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
26040b57cec5SDimitry Andric 
26050b57cec5SDimitry Andric   // RoundedA = abs(A) < 0.5 ? (double)0 : RoundedA;
26060b57cec5SDimitry Andric   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
26070b57cec5SDimitry Andric   SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
26080b57cec5SDimitry Andric                                 DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
26090b57cec5SDimitry Andric   RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsSmall,
26100b57cec5SDimitry Andric                          DAG.getConstantFP(0, SL, VT),
26110b57cec5SDimitry Andric                          RoundedA);
26120b57cec5SDimitry Andric 
26130b57cec5SDimitry Andric   // Add sign to rounded_A
26140b57cec5SDimitry Andric   RoundedA = DAG.getNode(ISD::FCOPYSIGN, SL, VT, RoundedA, A);
26150b57cec5SDimitry Andric   DAG.getNode(ISD::FTRUNC, SL, VT, A);
26160b57cec5SDimitry Andric 
26170b57cec5SDimitry Andric   // RoundedA = abs(A) > 0x1.0p52 ? A : RoundedA;
26180b57cec5SDimitry Andric   SDValue IsLarge =
26190b57cec5SDimitry Andric       DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 52.0), SL, VT),
26200b57cec5SDimitry Andric                    ISD::SETOGT);
26210b57cec5SDimitry Andric   return DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
26220b57cec5SDimitry Andric }
26230b57cec5SDimitry Andric 
LowerINT_TO_FP(SDValue Op,SelectionDAG & DAG) const26245f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerINT_TO_FP(SDValue Op,
26255f757f3fSDimitry Andric                                             SelectionDAG &DAG) const {
26265f757f3fSDimitry Andric   assert(STI.getSmVersion() < 90 || STI.getPTXVersion() < 78);
26270b57cec5SDimitry Andric 
26285f757f3fSDimitry Andric   if (Op.getValueType() == MVT::bf16) {
26295f757f3fSDimitry Andric     SDLoc Loc(Op);
26305f757f3fSDimitry Andric     return DAG.getNode(
26315f757f3fSDimitry Andric         ISD::FP_ROUND, Loc, MVT::bf16,
26325f757f3fSDimitry Andric         DAG.getNode(Op.getOpcode(), Loc, MVT::f32, Op.getOperand(0)),
26335f757f3fSDimitry Andric         DAG.getIntPtrConstant(0, Loc));
26345f757f3fSDimitry Andric   }
26355f757f3fSDimitry Andric 
26365f757f3fSDimitry Andric   // Everything else is considered legal.
26375f757f3fSDimitry Andric   return Op;
26385f757f3fSDimitry Andric }
26395f757f3fSDimitry Andric 
LowerFP_TO_INT(SDValue Op,SelectionDAG & DAG) const26405f757f3fSDimitry Andric SDValue NVPTXTargetLowering::LowerFP_TO_INT(SDValue Op,
26415f757f3fSDimitry Andric                                             SelectionDAG &DAG) const {
26425f757f3fSDimitry Andric   assert(STI.getSmVersion() < 90 || STI.getPTXVersion() < 78);
26435f757f3fSDimitry Andric 
26445f757f3fSDimitry Andric   if (Op.getOperand(0).getValueType() == MVT::bf16) {
26455f757f3fSDimitry Andric     SDLoc Loc(Op);
26465f757f3fSDimitry Andric     return DAG.getNode(
26475f757f3fSDimitry Andric         Op.getOpcode(), Loc, Op.getValueType(),
26485f757f3fSDimitry Andric         DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f32, Op.getOperand(0)));
26495f757f3fSDimitry Andric   }
26505f757f3fSDimitry Andric 
26515f757f3fSDimitry Andric   // Everything else is considered legal.
26525f757f3fSDimitry Andric   return Op;
26535f757f3fSDimitry Andric }
26545f757f3fSDimitry Andric 
LowerVectorArith(SDValue Op,SelectionDAG & DAG)26555f757f3fSDimitry Andric static SDValue LowerVectorArith(SDValue Op, SelectionDAG &DAG) {
26565f757f3fSDimitry Andric   SDLoc DL(Op);
26575f757f3fSDimitry Andric   if (Op.getValueType() != MVT::v2i16)
26585f757f3fSDimitry Andric     return Op;
26595f757f3fSDimitry Andric   EVT EltVT = Op.getValueType().getVectorElementType();
26605f757f3fSDimitry Andric   SmallVector<SDValue> VecElements;
26615f757f3fSDimitry Andric   for (int I = 0, E = Op.getValueType().getVectorNumElements(); I < E; I++) {
26625f757f3fSDimitry Andric     SmallVector<SDValue> ScalarArgs;
26635f757f3fSDimitry Andric     llvm::transform(Op->ops(), std::back_inserter(ScalarArgs),
26645f757f3fSDimitry Andric                     [&](const SDUse &O) {
26655f757f3fSDimitry Andric                       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
26665f757f3fSDimitry Andric                                          O.get(), DAG.getIntPtrConstant(I, DL));
26675f757f3fSDimitry Andric                     });
26685f757f3fSDimitry Andric     VecElements.push_back(DAG.getNode(Op.getOpcode(), DL, EltVT, ScalarArgs));
26695f757f3fSDimitry Andric   }
26705f757f3fSDimitry Andric   SDValue V =
26715f757f3fSDimitry Andric       DAG.getNode(ISD::BUILD_VECTOR, DL, Op.getValueType(), VecElements);
26725f757f3fSDimitry Andric   return V;
26735f757f3fSDimitry Andric }
26740b57cec5SDimitry Andric 
26750b57cec5SDimitry Andric SDValue
LowerOperation(SDValue Op,SelectionDAG & DAG) const26760b57cec5SDimitry Andric NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
26770b57cec5SDimitry Andric   switch (Op.getOpcode()) {
26780b57cec5SDimitry Andric   case ISD::RETURNADDR:
26790b57cec5SDimitry Andric     return SDValue();
26800b57cec5SDimitry Andric   case ISD::FRAMEADDR:
26810b57cec5SDimitry Andric     return SDValue();
26820b57cec5SDimitry Andric   case ISD::GlobalAddress:
26830b57cec5SDimitry Andric     return LowerGlobalAddress(Op, DAG);
26840b57cec5SDimitry Andric   case ISD::INTRINSIC_W_CHAIN:
26850b57cec5SDimitry Andric     return Op;
26860b57cec5SDimitry Andric   case ISD::BUILD_VECTOR:
26870b57cec5SDimitry Andric     return LowerBUILD_VECTOR(Op, DAG);
26880b57cec5SDimitry Andric   case ISD::EXTRACT_SUBVECTOR:
26890b57cec5SDimitry Andric     return Op;
26900b57cec5SDimitry Andric   case ISD::EXTRACT_VECTOR_ELT:
26910b57cec5SDimitry Andric     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
26925f757f3fSDimitry Andric   case ISD::INSERT_VECTOR_ELT:
26935f757f3fSDimitry Andric     return LowerINSERT_VECTOR_ELT(Op, DAG);
26945f757f3fSDimitry Andric   case ISD::VECTOR_SHUFFLE:
26955f757f3fSDimitry Andric     return LowerVECTOR_SHUFFLE(Op, DAG);
26960b57cec5SDimitry Andric   case ISD::CONCAT_VECTORS:
26970b57cec5SDimitry Andric     return LowerCONCAT_VECTORS(Op, DAG);
26980b57cec5SDimitry Andric   case ISD::STORE:
26990b57cec5SDimitry Andric     return LowerSTORE(Op, DAG);
27000b57cec5SDimitry Andric   case ISD::LOAD:
27010b57cec5SDimitry Andric     return LowerLOAD(Op, DAG);
27020b57cec5SDimitry Andric   case ISD::SHL_PARTS:
27030b57cec5SDimitry Andric     return LowerShiftLeftParts(Op, DAG);
27040b57cec5SDimitry Andric   case ISD::SRA_PARTS:
27050b57cec5SDimitry Andric   case ISD::SRL_PARTS:
27060b57cec5SDimitry Andric     return LowerShiftRightParts(Op, DAG);
27070b57cec5SDimitry Andric   case ISD::SELECT:
27080b57cec5SDimitry Andric     return LowerSelect(Op, DAG);
27090b57cec5SDimitry Andric   case ISD::FROUND:
27100b57cec5SDimitry Andric     return LowerFROUND(Op, DAG);
27115f757f3fSDimitry Andric   case ISD::SINT_TO_FP:
27125f757f3fSDimitry Andric   case ISD::UINT_TO_FP:
27135f757f3fSDimitry Andric     return LowerINT_TO_FP(Op, DAG);
27145f757f3fSDimitry Andric   case ISD::FP_TO_SINT:
27155f757f3fSDimitry Andric   case ISD::FP_TO_UINT:
27165f757f3fSDimitry Andric     return LowerFP_TO_INT(Op, DAG);
2717bdd1243dSDimitry Andric   case ISD::VAARG:
2718bdd1243dSDimitry Andric     return LowerVAARG(Op, DAG);
2719bdd1243dSDimitry Andric   case ISD::VASTART:
2720bdd1243dSDimitry Andric     return LowerVASTART(Op, DAG);
27215f757f3fSDimitry Andric   case ISD::ABS:
27225f757f3fSDimitry Andric   case ISD::SMIN:
27235f757f3fSDimitry Andric   case ISD::SMAX:
27245f757f3fSDimitry Andric   case ISD::UMIN:
27255f757f3fSDimitry Andric   case ISD::UMAX:
27265f757f3fSDimitry Andric   case ISD::ADD:
27275f757f3fSDimitry Andric   case ISD::SUB:
27285f757f3fSDimitry Andric   case ISD::MUL:
27295f757f3fSDimitry Andric   case ISD::SHL:
27305f757f3fSDimitry Andric   case ISD::SREM:
27315f757f3fSDimitry Andric   case ISD::UREM:
27325f757f3fSDimitry Andric     return LowerVectorArith(Op, DAG);
27335f757f3fSDimitry Andric   case ISD::DYNAMIC_STACKALLOC:
27345f757f3fSDimitry Andric     return LowerDYNAMIC_STACKALLOC(Op, DAG);
27350b57cec5SDimitry Andric   default:
27360b57cec5SDimitry Andric     llvm_unreachable("Custom lowering not defined for operation");
27370b57cec5SDimitry Andric   }
27380b57cec5SDimitry Andric }
27390b57cec5SDimitry Andric 
2740bdd1243dSDimitry Andric // This function is almost a copy of SelectionDAG::expandVAArg().
2741bdd1243dSDimitry Andric // The only diff is that this one produces loads from local address space.
LowerVAARG(SDValue Op,SelectionDAG & DAG) const2742bdd1243dSDimitry Andric SDValue NVPTXTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2743bdd1243dSDimitry Andric   const TargetLowering *TLI = STI.getTargetLowering();
2744bdd1243dSDimitry Andric   SDLoc DL(Op);
2745bdd1243dSDimitry Andric 
2746bdd1243dSDimitry Andric   SDNode *Node = Op.getNode();
2747bdd1243dSDimitry Andric   const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2748bdd1243dSDimitry Andric   EVT VT = Node->getValueType(0);
2749bdd1243dSDimitry Andric   auto *Ty = VT.getTypeForEVT(*DAG.getContext());
2750bdd1243dSDimitry Andric   SDValue Tmp1 = Node->getOperand(0);
2751bdd1243dSDimitry Andric   SDValue Tmp2 = Node->getOperand(1);
2752bdd1243dSDimitry Andric   const MaybeAlign MA(Node->getConstantOperandVal(3));
2753bdd1243dSDimitry Andric 
2754bdd1243dSDimitry Andric   SDValue VAListLoad = DAG.getLoad(TLI->getPointerTy(DAG.getDataLayout()), DL,
2755bdd1243dSDimitry Andric                                    Tmp1, Tmp2, MachinePointerInfo(V));
2756bdd1243dSDimitry Andric   SDValue VAList = VAListLoad;
2757bdd1243dSDimitry Andric 
2758bdd1243dSDimitry Andric   if (MA && *MA > TLI->getMinStackArgumentAlignment()) {
2759bdd1243dSDimitry Andric     VAList = DAG.getNode(
2760bdd1243dSDimitry Andric         ISD::ADD, DL, VAList.getValueType(), VAList,
2761bdd1243dSDimitry Andric         DAG.getConstant(MA->value() - 1, DL, VAList.getValueType()));
2762bdd1243dSDimitry Andric 
2763bdd1243dSDimitry Andric     VAList = DAG.getNode(
2764bdd1243dSDimitry Andric         ISD::AND, DL, VAList.getValueType(), VAList,
2765bdd1243dSDimitry Andric         DAG.getConstant(-(int64_t)MA->value(), DL, VAList.getValueType()));
2766bdd1243dSDimitry Andric   }
2767bdd1243dSDimitry Andric 
2768bdd1243dSDimitry Andric   // Increment the pointer, VAList, to the next vaarg
2769bdd1243dSDimitry Andric   Tmp1 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
2770bdd1243dSDimitry Andric                      DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(Ty),
2771bdd1243dSDimitry Andric                                      DL, VAList.getValueType()));
2772bdd1243dSDimitry Andric 
2773bdd1243dSDimitry Andric   // Store the incremented VAList to the legalized pointer
2774bdd1243dSDimitry Andric   Tmp1 = DAG.getStore(VAListLoad.getValue(1), DL, Tmp1, Tmp2,
2775bdd1243dSDimitry Andric                       MachinePointerInfo(V));
2776bdd1243dSDimitry Andric 
2777bdd1243dSDimitry Andric   const Value *SrcV =
2778bdd1243dSDimitry Andric       Constant::getNullValue(PointerType::get(Ty, ADDRESS_SPACE_LOCAL));
2779bdd1243dSDimitry Andric 
2780bdd1243dSDimitry Andric   // Load the actual argument out of the pointer VAList
2781bdd1243dSDimitry Andric   return DAG.getLoad(VT, DL, Tmp1, VAList, MachinePointerInfo(SrcV));
2782bdd1243dSDimitry Andric }
2783bdd1243dSDimitry Andric 
LowerVASTART(SDValue Op,SelectionDAG & DAG) const2784bdd1243dSDimitry Andric SDValue NVPTXTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2785bdd1243dSDimitry Andric   const TargetLowering *TLI = STI.getTargetLowering();
2786bdd1243dSDimitry Andric   SDLoc DL(Op);
2787bdd1243dSDimitry Andric   EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout());
2788bdd1243dSDimitry Andric 
2789bdd1243dSDimitry Andric   // Store the address of unsized array <function>_vararg[] in the ap object.
2790bdd1243dSDimitry Andric   SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);
2791bdd1243dSDimitry Andric   SDValue VAReg = DAG.getNode(NVPTXISD::Wrapper, DL, PtrVT, Arg);
2792bdd1243dSDimitry Andric 
2793bdd1243dSDimitry Andric   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2794bdd1243dSDimitry Andric   return DAG.getStore(Op.getOperand(0), DL, VAReg, Op.getOperand(1),
2795bdd1243dSDimitry Andric                       MachinePointerInfo(SV));
2796bdd1243dSDimitry Andric }
2797bdd1243dSDimitry Andric 
LowerSelect(SDValue Op,SelectionDAG & DAG) const27980b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
27990b57cec5SDimitry Andric   SDValue Op0 = Op->getOperand(0);
28000b57cec5SDimitry Andric   SDValue Op1 = Op->getOperand(1);
28010b57cec5SDimitry Andric   SDValue Op2 = Op->getOperand(2);
28020b57cec5SDimitry Andric   SDLoc DL(Op.getNode());
28030b57cec5SDimitry Andric 
28040b57cec5SDimitry Andric   assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
28050b57cec5SDimitry Andric 
28060b57cec5SDimitry Andric   Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
28070b57cec5SDimitry Andric   Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
28080b57cec5SDimitry Andric   SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
28090b57cec5SDimitry Andric   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
28100b57cec5SDimitry Andric 
28110b57cec5SDimitry Andric   return Trunc;
28120b57cec5SDimitry Andric }
28130b57cec5SDimitry Andric 
LowerLOAD(SDValue Op,SelectionDAG & DAG) const28140b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
28150b57cec5SDimitry Andric   if (Op.getValueType() == MVT::i1)
28160b57cec5SDimitry Andric     return LowerLOADi1(Op, DAG);
28170b57cec5SDimitry Andric 
28185f757f3fSDimitry Andric   // v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to handle
28195f757f3fSDimitry Andric   // unaligned loads and have to handle it here.
28205f757f3fSDimitry Andric   EVT VT = Op.getValueType();
28215f757f3fSDimitry Andric   if (Isv2x16VT(VT) || VT == MVT::v4i8) {
28220b57cec5SDimitry Andric     LoadSDNode *Load = cast<LoadSDNode>(Op);
28230b57cec5SDimitry Andric     EVT MemVT = Load->getMemoryVT();
28248bcb0991SDimitry Andric     if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
28258bcb0991SDimitry Andric                                         MemVT, *Load->getMemOperand())) {
28260b57cec5SDimitry Andric       SDValue Ops[2];
28270b57cec5SDimitry Andric       std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
28280b57cec5SDimitry Andric       return DAG.getMergeValues(Ops, SDLoc(Op));
28290b57cec5SDimitry Andric     }
28300b57cec5SDimitry Andric   }
28310b57cec5SDimitry Andric 
28320b57cec5SDimitry Andric   return SDValue();
28330b57cec5SDimitry Andric }
28340b57cec5SDimitry Andric 
28350b57cec5SDimitry Andric // v = ld i1* addr
28360b57cec5SDimitry Andric //   =>
28370b57cec5SDimitry Andric // v1 = ld i8* addr (-> i16)
28380b57cec5SDimitry Andric // v = trunc i16 to i1
LowerLOADi1(SDValue Op,SelectionDAG & DAG) const28390b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
28400b57cec5SDimitry Andric   SDNode *Node = Op.getNode();
28410b57cec5SDimitry Andric   LoadSDNode *LD = cast<LoadSDNode>(Node);
28420b57cec5SDimitry Andric   SDLoc dl(Node);
28430b57cec5SDimitry Andric   assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
28440b57cec5SDimitry Andric   assert(Node->getValueType(0) == MVT::i1 &&
28450b57cec5SDimitry Andric          "Custom lowering for i1 load only");
28460b57cec5SDimitry Andric   SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
284781ad6265SDimitry Andric                               LD->getPointerInfo(), LD->getAlign(),
28480b57cec5SDimitry Andric                               LD->getMemOperand()->getFlags());
28490b57cec5SDimitry Andric   SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
28500b57cec5SDimitry Andric   // The legalizer (the caller) is expecting two values from the legalized
28510b57cec5SDimitry Andric   // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
28520b57cec5SDimitry Andric   // in LegalizeDAG.cpp which also uses MergeValues.
28530b57cec5SDimitry Andric   SDValue Ops[] = { result, LD->getChain() };
28540b57cec5SDimitry Andric   return DAG.getMergeValues(Ops, dl);
28550b57cec5SDimitry Andric }
28560b57cec5SDimitry Andric 
LowerSTORE(SDValue Op,SelectionDAG & DAG) const28570b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
28580b57cec5SDimitry Andric   StoreSDNode *Store = cast<StoreSDNode>(Op);
28590b57cec5SDimitry Andric   EVT VT = Store->getMemoryVT();
28600b57cec5SDimitry Andric 
28610b57cec5SDimitry Andric   if (VT == MVT::i1)
28620b57cec5SDimitry Andric     return LowerSTOREi1(Op, DAG);
28630b57cec5SDimitry Andric 
28640b57cec5SDimitry Andric   // v2f16 is legal, so we can't rely on legalizer to handle unaligned
28650b57cec5SDimitry Andric   // stores and have to handle it here.
28665f757f3fSDimitry Andric   if ((Isv2x16VT(VT) || VT == MVT::v4i8) &&
28678bcb0991SDimitry Andric       !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
28688bcb0991SDimitry Andric                                       VT, *Store->getMemOperand()))
28690b57cec5SDimitry Andric     return expandUnalignedStore(Store, DAG);
28700b57cec5SDimitry Andric 
28715f757f3fSDimitry Andric   // v2f16, v2bf16 and v2i16 don't need special handling.
28725f757f3fSDimitry Andric   if (Isv2x16VT(VT) || VT == MVT::v4i8)
287306c3fb27SDimitry Andric     return SDValue();
287406c3fb27SDimitry Andric 
28750b57cec5SDimitry Andric   if (VT.isVector())
28760b57cec5SDimitry Andric     return LowerSTOREVector(Op, DAG);
28770b57cec5SDimitry Andric 
28780b57cec5SDimitry Andric   return SDValue();
28790b57cec5SDimitry Andric }
28800b57cec5SDimitry Andric 
28810b57cec5SDimitry Andric SDValue
LowerSTOREVector(SDValue Op,SelectionDAG & DAG) const28820b57cec5SDimitry Andric NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
28830b57cec5SDimitry Andric   SDNode *N = Op.getNode();
28840b57cec5SDimitry Andric   SDValue Val = N->getOperand(1);
28850b57cec5SDimitry Andric   SDLoc DL(N);
28860b57cec5SDimitry Andric   EVT ValVT = Val.getValueType();
28870b57cec5SDimitry Andric 
28880b57cec5SDimitry Andric   if (ValVT.isVector()) {
28890b57cec5SDimitry Andric     // We only handle "native" vector sizes for now, e.g. <4 x double> is not
28900b57cec5SDimitry Andric     // legal.  We can (and should) split that into 2 stores of <2 x double> here
28910b57cec5SDimitry Andric     // but I'm leaving that as a TODO for now.
28920b57cec5SDimitry Andric     if (!ValVT.isSimple())
28930b57cec5SDimitry Andric       return SDValue();
28940b57cec5SDimitry Andric     switch (ValVT.getSimpleVT().SimpleTy) {
28950b57cec5SDimitry Andric     default:
28960b57cec5SDimitry Andric       return SDValue();
28970b57cec5SDimitry Andric     case MVT::v2i8:
28980b57cec5SDimitry Andric     case MVT::v2i16:
28990b57cec5SDimitry Andric     case MVT::v2i32:
29000b57cec5SDimitry Andric     case MVT::v2i64:
29010b57cec5SDimitry Andric     case MVT::v2f16:
2902bdd1243dSDimitry Andric     case MVT::v2bf16:
29030b57cec5SDimitry Andric     case MVT::v2f32:
29040b57cec5SDimitry Andric     case MVT::v2f64:
29050b57cec5SDimitry Andric     case MVT::v4i8:
29060b57cec5SDimitry Andric     case MVT::v4i16:
29070b57cec5SDimitry Andric     case MVT::v4i32:
29080b57cec5SDimitry Andric     case MVT::v4f16:
2909bdd1243dSDimitry Andric     case MVT::v4bf16:
29100b57cec5SDimitry Andric     case MVT::v4f32:
29110b57cec5SDimitry Andric     case MVT::v8f16: // <4 x f16x2>
2912bdd1243dSDimitry Andric     case MVT::v8bf16: // <4 x bf16x2>
29135f757f3fSDimitry Andric     case MVT::v8i16:  // <4 x i16x2>
29140b57cec5SDimitry Andric       // This is a "native" vector type
29150b57cec5SDimitry Andric       break;
29160b57cec5SDimitry Andric     }
29170b57cec5SDimitry Andric 
29180b57cec5SDimitry Andric     MemSDNode *MemSD = cast<MemSDNode>(N);
29190b57cec5SDimitry Andric     const DataLayout &TD = DAG.getDataLayout();
29200b57cec5SDimitry Andric 
29215ffd83dbSDimitry Andric     Align Alignment = MemSD->getAlign();
29225ffd83dbSDimitry Andric     Align PrefAlign =
29235ffd83dbSDimitry Andric         TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext()));
29245ffd83dbSDimitry Andric     if (Alignment < PrefAlign) {
29250b57cec5SDimitry Andric       // This store is not sufficiently aligned, so bail out and let this vector
29260b57cec5SDimitry Andric       // store be scalarized.  Note that we may still be able to emit smaller
29270b57cec5SDimitry Andric       // vector stores.  For example, if we are storing a <4 x float> with an
29280b57cec5SDimitry Andric       // alignment of 8, this check will fail but the legalizer will try again
29290b57cec5SDimitry Andric       // with 2 x <2 x float>, which will succeed with an alignment of 8.
29300b57cec5SDimitry Andric       return SDValue();
29310b57cec5SDimitry Andric     }
29320b57cec5SDimitry Andric 
29330b57cec5SDimitry Andric     unsigned Opcode = 0;
29340b57cec5SDimitry Andric     EVT EltVT = ValVT.getVectorElementType();
29350b57cec5SDimitry Andric     unsigned NumElts = ValVT.getVectorNumElements();
29360b57cec5SDimitry Andric 
29370b57cec5SDimitry Andric     // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
29380b57cec5SDimitry Andric     // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
29390b57cec5SDimitry Andric     // stored type to i16 and propagate the "real" type as the memory type.
29400b57cec5SDimitry Andric     bool NeedExt = false;
29410b57cec5SDimitry Andric     if (EltVT.getSizeInBits() < 16)
29420b57cec5SDimitry Andric       NeedExt = true;
29430b57cec5SDimitry Andric 
29440b57cec5SDimitry Andric     bool StoreF16x2 = false;
29450b57cec5SDimitry Andric     switch (NumElts) {
29460b57cec5SDimitry Andric     default:
29470b57cec5SDimitry Andric       return SDValue();
29480b57cec5SDimitry Andric     case 2:
29490b57cec5SDimitry Andric       Opcode = NVPTXISD::StoreV2;
29500b57cec5SDimitry Andric       break;
29510b57cec5SDimitry Andric     case 4:
29520b57cec5SDimitry Andric       Opcode = NVPTXISD::StoreV4;
29530b57cec5SDimitry Andric       break;
29540b57cec5SDimitry Andric     case 8:
29550b57cec5SDimitry Andric       // v8f16 is a special case. PTX doesn't have st.v8.f16
29560b57cec5SDimitry Andric       // instruction. Instead, we split the vector into v2f16 chunks and
29570b57cec5SDimitry Andric       // store them with st.v4.b32.
29585f757f3fSDimitry Andric       assert(Is16bitsType(EltVT.getSimpleVT()) && "Wrong type for the vector.");
29590b57cec5SDimitry Andric       Opcode = NVPTXISD::StoreV4;
29600b57cec5SDimitry Andric       StoreF16x2 = true;
29610b57cec5SDimitry Andric       break;
29620b57cec5SDimitry Andric     }
29630b57cec5SDimitry Andric 
29640b57cec5SDimitry Andric     SmallVector<SDValue, 8> Ops;
29650b57cec5SDimitry Andric 
29660b57cec5SDimitry Andric     // First is the chain
29670b57cec5SDimitry Andric     Ops.push_back(N->getOperand(0));
29680b57cec5SDimitry Andric 
29690b57cec5SDimitry Andric     if (StoreF16x2) {
29700b57cec5SDimitry Andric       // Combine f16,f16 -> v2f16
29710b57cec5SDimitry Andric       NumElts /= 2;
29720b57cec5SDimitry Andric       for (unsigned i = 0; i < NumElts; ++i) {
297306c3fb27SDimitry Andric         SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
29740b57cec5SDimitry Andric                                  DAG.getIntPtrConstant(i * 2, DL));
297506c3fb27SDimitry Andric         SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
29760b57cec5SDimitry Andric                                  DAG.getIntPtrConstant(i * 2 + 1, DL));
297706c3fb27SDimitry Andric         EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, 2);
297806c3fb27SDimitry Andric         SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, E0, E1);
29790b57cec5SDimitry Andric         Ops.push_back(V2);
29800b57cec5SDimitry Andric       }
29810b57cec5SDimitry Andric     } else {
29820b57cec5SDimitry Andric       // Then the split values
29830b57cec5SDimitry Andric       for (unsigned i = 0; i < NumElts; ++i) {
29840b57cec5SDimitry Andric         SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
29850b57cec5SDimitry Andric                                      DAG.getIntPtrConstant(i, DL));
29860b57cec5SDimitry Andric         if (NeedExt)
29870b57cec5SDimitry Andric           ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
29880b57cec5SDimitry Andric         Ops.push_back(ExtVal);
29890b57cec5SDimitry Andric       }
29900b57cec5SDimitry Andric     }
29910b57cec5SDimitry Andric 
29920b57cec5SDimitry Andric     // Then any remaining arguments
29930b57cec5SDimitry Andric     Ops.append(N->op_begin() + 2, N->op_end());
29940b57cec5SDimitry Andric 
29950b57cec5SDimitry Andric     SDValue NewSt =
29960b57cec5SDimitry Andric         DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops,
29970b57cec5SDimitry Andric                                 MemSD->getMemoryVT(), MemSD->getMemOperand());
29980b57cec5SDimitry Andric 
29990b57cec5SDimitry Andric     // return DCI.CombineTo(N, NewSt, true);
30000b57cec5SDimitry Andric     return NewSt;
30010b57cec5SDimitry Andric   }
30020b57cec5SDimitry Andric 
30030b57cec5SDimitry Andric   return SDValue();
30040b57cec5SDimitry Andric }
30050b57cec5SDimitry Andric 
30060b57cec5SDimitry Andric // st i1 v, addr
30070b57cec5SDimitry Andric //    =>
30080b57cec5SDimitry Andric // v1 = zxt v to i16
30090b57cec5SDimitry Andric // st.u8 i16, addr
LowerSTOREi1(SDValue Op,SelectionDAG & DAG) const30100b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
30110b57cec5SDimitry Andric   SDNode *Node = Op.getNode();
30120b57cec5SDimitry Andric   SDLoc dl(Node);
30130b57cec5SDimitry Andric   StoreSDNode *ST = cast<StoreSDNode>(Node);
30140b57cec5SDimitry Andric   SDValue Tmp1 = ST->getChain();
30150b57cec5SDimitry Andric   SDValue Tmp2 = ST->getBasePtr();
30160b57cec5SDimitry Andric   SDValue Tmp3 = ST->getValue();
30170b57cec5SDimitry Andric   assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
30180b57cec5SDimitry Andric   Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
30190b57cec5SDimitry Andric   SDValue Result =
30200b57cec5SDimitry Andric       DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8,
302181ad6265SDimitry Andric                         ST->getAlign(), ST->getMemOperand()->getFlags());
30220b57cec5SDimitry Andric   return Result;
30230b57cec5SDimitry Andric }
30240b57cec5SDimitry Andric 
3025bdd1243dSDimitry Andric // This creates target external symbol for a function parameter.
3026bdd1243dSDimitry Andric // Name of the symbol is composed from its index and the function name.
3027bdd1243dSDimitry Andric // Negative index corresponds to special parameter (unsized array) used for
3028bdd1243dSDimitry Andric // passing variable arguments.
getParamSymbol(SelectionDAG & DAG,int idx,EVT v) const3029bdd1243dSDimitry Andric SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx,
3030bdd1243dSDimitry Andric                                             EVT v) const {
303106c3fb27SDimitry Andric   StringRef SavedStr = nvTM->getStrPool().save(
303206c3fb27SDimitry Andric       getParamName(&DAG.getMachineFunction().getFunction(), idx));
3033bdd1243dSDimitry Andric   return DAG.getTargetExternalSymbol(SavedStr.data(), v);
30340b57cec5SDimitry Andric }
30350b57cec5SDimitry Andric 
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const30360b57cec5SDimitry Andric SDValue NVPTXTargetLowering::LowerFormalArguments(
30370b57cec5SDimitry Andric     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
30380b57cec5SDimitry Andric     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
30390b57cec5SDimitry Andric     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
30400b57cec5SDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
30410b57cec5SDimitry Andric   const DataLayout &DL = DAG.getDataLayout();
30420b57cec5SDimitry Andric   auto PtrVT = getPointerTy(DAG.getDataLayout());
30430b57cec5SDimitry Andric 
30440b57cec5SDimitry Andric   const Function *F = &MF.getFunction();
30450b57cec5SDimitry Andric   const AttributeList &PAL = F->getAttributes();
30460b57cec5SDimitry Andric   const TargetLowering *TLI = STI.getTargetLowering();
30470b57cec5SDimitry Andric 
30480b57cec5SDimitry Andric   SDValue Root = DAG.getRoot();
30490b57cec5SDimitry Andric   std::vector<SDValue> OutChains;
30500b57cec5SDimitry Andric 
30510b57cec5SDimitry Andric   bool isABI = (STI.getSmVersion() >= 20);
30520b57cec5SDimitry Andric   assert(isABI && "Non-ABI compilation is not supported");
30530b57cec5SDimitry Andric   if (!isABI)
30540b57cec5SDimitry Andric     return Chain;
30550b57cec5SDimitry Andric 
30560b57cec5SDimitry Andric   std::vector<Type *> argTypes;
30570b57cec5SDimitry Andric   std::vector<const Argument *> theArgs;
30580b57cec5SDimitry Andric   for (const Argument &I : F->args()) {
30590b57cec5SDimitry Andric     theArgs.push_back(&I);
30600b57cec5SDimitry Andric     argTypes.push_back(I.getType());
30610b57cec5SDimitry Andric   }
30620b57cec5SDimitry Andric   // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
30630b57cec5SDimitry Andric   // Ins.size() will be larger
30640b57cec5SDimitry Andric   //   * if there is an aggregate argument with multiple fields (each field
30650b57cec5SDimitry Andric   //     showing up separately in Ins)
30660b57cec5SDimitry Andric   //   * if there is a vector argument with more than typical vector-length
30670b57cec5SDimitry Andric   //     elements (generally if more than 4) where each vector element is
30680b57cec5SDimitry Andric   //     individually present in Ins.
30690b57cec5SDimitry Andric   // So a different index should be used for indexing into Ins.
30700b57cec5SDimitry Andric   // See similar issue in LowerCall.
30710b57cec5SDimitry Andric   unsigned InsIdx = 0;
30720b57cec5SDimitry Andric 
30730b57cec5SDimitry Andric   int idx = 0;
30740b57cec5SDimitry Andric   for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
30750b57cec5SDimitry Andric     Type *Ty = argTypes[i];
30760b57cec5SDimitry Andric 
30770b57cec5SDimitry Andric     if (theArgs[i]->use_empty()) {
30780b57cec5SDimitry Andric       // argument is dead
307906c3fb27SDimitry Andric       if (IsTypePassedAsArray(Ty) && !Ty->isVectorTy()) {
30800b57cec5SDimitry Andric         SmallVector<EVT, 16> vtparts;
30810b57cec5SDimitry Andric 
30820b57cec5SDimitry Andric         ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
308306c3fb27SDimitry Andric         if (vtparts.empty())
308406c3fb27SDimitry Andric           report_fatal_error("Empty parameter types are not supported");
308506c3fb27SDimitry Andric 
30860b57cec5SDimitry Andric         for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
30870b57cec5SDimitry Andric              ++parti) {
30880b57cec5SDimitry Andric           InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
30890b57cec5SDimitry Andric           ++InsIdx;
30900b57cec5SDimitry Andric         }
30910b57cec5SDimitry Andric         if (vtparts.size() > 0)
30920b57cec5SDimitry Andric           --InsIdx;
30930b57cec5SDimitry Andric         continue;
30940b57cec5SDimitry Andric       }
30950b57cec5SDimitry Andric       if (Ty->isVectorTy()) {
30960b57cec5SDimitry Andric         EVT ObjectVT = getValueType(DL, Ty);
30970b57cec5SDimitry Andric         unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
30980b57cec5SDimitry Andric         for (unsigned parti = 0; parti < NumRegs; ++parti) {
30990b57cec5SDimitry Andric           InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
31000b57cec5SDimitry Andric           ++InsIdx;
31010b57cec5SDimitry Andric         }
31020b57cec5SDimitry Andric         if (NumRegs > 0)
31030b57cec5SDimitry Andric           --InsIdx;
31040b57cec5SDimitry Andric         continue;
31050b57cec5SDimitry Andric       }
31060b57cec5SDimitry Andric       InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
31070b57cec5SDimitry Andric       continue;
31080b57cec5SDimitry Andric     }
31090b57cec5SDimitry Andric 
31100b57cec5SDimitry Andric     // In the following cases, assign a node order of "idx+1"
31110b57cec5SDimitry Andric     // to newly created nodes. The SDNodes for params have to
31120b57cec5SDimitry Andric     // appear in the same order as their order of appearance
31130b57cec5SDimitry Andric     // in the original function. "idx+1" holds that order.
3114349cc55cSDimitry Andric     if (!PAL.hasParamAttr(i, Attribute::ByVal)) {
31150b57cec5SDimitry Andric       bool aggregateIsPacked = false;
31160b57cec5SDimitry Andric       if (StructType *STy = dyn_cast<StructType>(Ty))
31170b57cec5SDimitry Andric         aggregateIsPacked = STy->isPacked();
31180b57cec5SDimitry Andric 
31190b57cec5SDimitry Andric       SmallVector<EVT, 16> VTs;
31200b57cec5SDimitry Andric       SmallVector<uint64_t, 16> Offsets;
31210b57cec5SDimitry Andric       ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
312206c3fb27SDimitry Andric       if (VTs.empty())
312306c3fb27SDimitry Andric         report_fatal_error("Empty parameter types are not supported");
312406c3fb27SDimitry Andric 
31250b57cec5SDimitry Andric       auto VectorInfo =
31265ffd83dbSDimitry Andric           VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlign(Ty));
31270b57cec5SDimitry Andric 
31280b57cec5SDimitry Andric       SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
31290b57cec5SDimitry Andric       int VecIdx = -1; // Index of the first element of the current vector.
31300b57cec5SDimitry Andric       for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) {
31310b57cec5SDimitry Andric         if (VectorInfo[parti] & PVF_FIRST) {
31320b57cec5SDimitry Andric           assert(VecIdx == -1 && "Orphaned vector.");
31330b57cec5SDimitry Andric           VecIdx = parti;
31340b57cec5SDimitry Andric         }
31350b57cec5SDimitry Andric 
31360b57cec5SDimitry Andric         // That's the last element of this store op.
31370b57cec5SDimitry Andric         if (VectorInfo[parti] & PVF_LAST) {
31380b57cec5SDimitry Andric           unsigned NumElts = parti - VecIdx + 1;
31390b57cec5SDimitry Andric           EVT EltVT = VTs[parti];
31400b57cec5SDimitry Andric           // i1 is loaded/stored as i8.
31410b57cec5SDimitry Andric           EVT LoadVT = EltVT;
31420b57cec5SDimitry Andric           if (EltVT == MVT::i1)
31430b57cec5SDimitry Andric             LoadVT = MVT::i8;
31445f757f3fSDimitry Andric           else if (Isv2x16VT(EltVT) || EltVT == MVT::v4i8)
31450b57cec5SDimitry Andric             // getLoad needs a vector type, but it can't handle
314606c3fb27SDimitry Andric             // vectors which contain v2f16 or v2bf16 elements. So we must load
31470b57cec5SDimitry Andric             // using i32 here and then bitcast back.
31480b57cec5SDimitry Andric             LoadVT = MVT::i32;
31490b57cec5SDimitry Andric 
31500b57cec5SDimitry Andric           EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts);
31510b57cec5SDimitry Andric           SDValue VecAddr =
31520b57cec5SDimitry Andric               DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
31530b57cec5SDimitry Andric                           DAG.getConstant(Offsets[VecIdx], dl, PtrVT));
31540b57cec5SDimitry Andric           Value *srcValue = Constant::getNullValue(PointerType::get(
31550b57cec5SDimitry Andric               EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
3156bdd1243dSDimitry Andric           SDValue P = DAG.getLoad(VecVT, dl, Root, VecAddr,
3157bdd1243dSDimitry Andric                                   MachinePointerInfo(srcValue),
3158bdd1243dSDimitry Andric                                   MaybeAlign(aggregateIsPacked ? 1 : 0),
31590b57cec5SDimitry Andric                                   MachineMemOperand::MODereferenceable |
31600b57cec5SDimitry Andric                                       MachineMemOperand::MOInvariant);
31610b57cec5SDimitry Andric           if (P.getNode())
31620b57cec5SDimitry Andric             P.getNode()->setIROrder(idx + 1);
31630b57cec5SDimitry Andric           for (unsigned j = 0; j < NumElts; ++j) {
31640b57cec5SDimitry Andric             SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P,
31650b57cec5SDimitry Andric                                       DAG.getIntPtrConstant(j, dl));
31660b57cec5SDimitry Andric             // We've loaded i1 as an i8 and now must truncate it back to i1
31670b57cec5SDimitry Andric             if (EltVT == MVT::i1)
31680b57cec5SDimitry Andric               Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt);
31690b57cec5SDimitry Andric             // v2f16 was loaded as an i32. Now we must bitcast it back.
31705f757f3fSDimitry Andric             else if (EltVT != LoadVT)
317106c3fb27SDimitry Andric               Elt = DAG.getNode(ISD::BITCAST, dl, EltVT, Elt);
3172fcaf7f86SDimitry Andric 
3173fcaf7f86SDimitry Andric             // If a promoted integer type is used, truncate down to the original
3174fcaf7f86SDimitry Andric             MVT PromotedVT;
3175fcaf7f86SDimitry Andric             if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) {
3176fcaf7f86SDimitry Andric               Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
3177fcaf7f86SDimitry Andric             }
3178fcaf7f86SDimitry Andric 
31790b57cec5SDimitry Andric             // Extend the element if necessary (e.g. an i8 is loaded
31800b57cec5SDimitry Andric             // into an i16 register)
31810b57cec5SDimitry Andric             if (Ins[InsIdx].VT.isInteger() &&
3182e8d8bef9SDimitry Andric                 Ins[InsIdx].VT.getFixedSizeInBits() >
3183e8d8bef9SDimitry Andric                     LoadVT.getFixedSizeInBits()) {
31840b57cec5SDimitry Andric               unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
31850b57cec5SDimitry Andric                                                            : ISD::ZERO_EXTEND;
31860b57cec5SDimitry Andric               Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt);
31870b57cec5SDimitry Andric             }
31880b57cec5SDimitry Andric             InVals.push_back(Elt);
31890b57cec5SDimitry Andric           }
31900b57cec5SDimitry Andric 
31910b57cec5SDimitry Andric           // Reset vector tracking state.
31920b57cec5SDimitry Andric           VecIdx = -1;
31930b57cec5SDimitry Andric         }
31940b57cec5SDimitry Andric         ++InsIdx;
31950b57cec5SDimitry Andric       }
31960b57cec5SDimitry Andric       if (VTs.size() > 0)
31970b57cec5SDimitry Andric         --InsIdx;
31980b57cec5SDimitry Andric       continue;
31990b57cec5SDimitry Andric     }
32000b57cec5SDimitry Andric 
32010b57cec5SDimitry Andric     // Param has ByVal attribute
32020b57cec5SDimitry Andric     // Return MoveParam(param symbol).
32030b57cec5SDimitry Andric     // Ideally, the param symbol can be returned directly,
32040b57cec5SDimitry Andric     // but when SDNode builder decides to use it in a CopyToReg(),
32050b57cec5SDimitry Andric     // machine instruction fails because TargetExternalSymbol
32060b57cec5SDimitry Andric     // (not lowered) is target dependent, and CopyToReg assumes
32070b57cec5SDimitry Andric     // the source is lowered.
32080b57cec5SDimitry Andric     EVT ObjectVT = getValueType(DL, Ty);
32090b57cec5SDimitry Andric     assert(ObjectVT == Ins[InsIdx].VT &&
32100b57cec5SDimitry Andric            "Ins type did not match function type");
32110b57cec5SDimitry Andric     SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
32120b57cec5SDimitry Andric     SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
32130b57cec5SDimitry Andric     if (p.getNode())
32140b57cec5SDimitry Andric       p.getNode()->setIROrder(idx + 1);
32150b57cec5SDimitry Andric     InVals.push_back(p);
32160b57cec5SDimitry Andric   }
32170b57cec5SDimitry Andric 
32180b57cec5SDimitry Andric   if (!OutChains.empty())
32190b57cec5SDimitry Andric     DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
32200b57cec5SDimitry Andric 
32210b57cec5SDimitry Andric   return Chain;
32220b57cec5SDimitry Andric }
32230b57cec5SDimitry Andric 
32240b57cec5SDimitry Andric SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & dl,SelectionDAG & DAG) const32250b57cec5SDimitry Andric NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
32260b57cec5SDimitry Andric                                  bool isVarArg,
32270b57cec5SDimitry Andric                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
32280b57cec5SDimitry Andric                                  const SmallVectorImpl<SDValue> &OutVals,
32290b57cec5SDimitry Andric                                  const SDLoc &dl, SelectionDAG &DAG) const {
323081ad6265SDimitry Andric   const MachineFunction &MF = DAG.getMachineFunction();
323181ad6265SDimitry Andric   const Function &F = MF.getFunction();
32320b57cec5SDimitry Andric   Type *RetTy = MF.getFunction().getReturnType();
32330b57cec5SDimitry Andric 
32340b57cec5SDimitry Andric   bool isABI = (STI.getSmVersion() >= 20);
32350b57cec5SDimitry Andric   assert(isABI && "Non-ABI compilation is not supported");
32360b57cec5SDimitry Andric   if (!isABI)
32370b57cec5SDimitry Andric     return Chain;
32380b57cec5SDimitry Andric 
3239fe6060f1SDimitry Andric   const DataLayout &DL = DAG.getDataLayout();
3240fcaf7f86SDimitry Andric   SmallVector<SDValue, 16> PromotedOutVals;
32410b57cec5SDimitry Andric   SmallVector<EVT, 16> VTs;
32420b57cec5SDimitry Andric   SmallVector<uint64_t, 16> Offsets;
32430b57cec5SDimitry Andric   ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
32440b57cec5SDimitry Andric   assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
32450b57cec5SDimitry Andric 
3246fcaf7f86SDimitry Andric   for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
3247fcaf7f86SDimitry Andric     SDValue PromotedOutVal = OutVals[i];
3248fcaf7f86SDimitry Andric     MVT PromotedVT;
3249fcaf7f86SDimitry Andric     if (PromoteScalarIntegerPTX(VTs[i], &PromotedVT)) {
3250fcaf7f86SDimitry Andric       VTs[i] = EVT(PromotedVT);
3251fcaf7f86SDimitry Andric     }
3252fcaf7f86SDimitry Andric     if (PromoteScalarIntegerPTX(PromotedOutVal.getValueType(), &PromotedVT)) {
3253fcaf7f86SDimitry Andric       llvm::ISD::NodeType Ext =
3254fcaf7f86SDimitry Andric           Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3255fcaf7f86SDimitry Andric       PromotedOutVal = DAG.getNode(Ext, dl, PromotedVT, PromotedOutVal);
3256fcaf7f86SDimitry Andric     }
3257fcaf7f86SDimitry Andric     PromotedOutVals.push_back(PromotedOutVal);
3258fcaf7f86SDimitry Andric   }
3259fcaf7f86SDimitry Andric 
32600b57cec5SDimitry Andric   auto VectorInfo = VectorizePTXValueVTs(
326181ad6265SDimitry Andric       VTs, Offsets,
326281ad6265SDimitry Andric       RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL)
326381ad6265SDimitry Andric                        : Align(1));
32640b57cec5SDimitry Andric 
32650b57cec5SDimitry Andric   // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
32660b57cec5SDimitry Andric   // 32-bits are sign extended or zero extended, depending on whether
32670b57cec5SDimitry Andric   // they are signed or unsigned types.
32680b57cec5SDimitry Andric   bool ExtendIntegerRetVal =
32690b57cec5SDimitry Andric       RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
32700b57cec5SDimitry Andric 
32710b57cec5SDimitry Andric   SmallVector<SDValue, 6> StoreOperands;
32720b57cec5SDimitry Andric   for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
32730b57cec5SDimitry Andric     // New load/store. Record chain and offset operands.
32740b57cec5SDimitry Andric     if (VectorInfo[i] & PVF_FIRST) {
32750b57cec5SDimitry Andric       assert(StoreOperands.empty() && "Orphaned operand list.");
32760b57cec5SDimitry Andric       StoreOperands.push_back(Chain);
32770b57cec5SDimitry Andric       StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32));
32780b57cec5SDimitry Andric     }
32790b57cec5SDimitry Andric 
3280fcaf7f86SDimitry Andric     SDValue OutVal = OutVals[i];
3281fcaf7f86SDimitry Andric     SDValue RetVal = PromotedOutVals[i];
3282fcaf7f86SDimitry Andric 
32830b57cec5SDimitry Andric     if (ExtendIntegerRetVal) {
32840b57cec5SDimitry Andric       RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND
32850b57cec5SDimitry Andric                                                   : ISD::ZERO_EXTEND,
32860b57cec5SDimitry Andric                            dl, MVT::i32, RetVal);
3287fcaf7f86SDimitry Andric     } else if (OutVal.getValueSizeInBits() < 16) {
32880b57cec5SDimitry Andric       // Use 16-bit registers for small load-stores as it's the
32890b57cec5SDimitry Andric       // smallest general purpose register size supported by NVPTX.
32900b57cec5SDimitry Andric       RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal);
32910b57cec5SDimitry Andric     }
32920b57cec5SDimitry Andric 
32930b57cec5SDimitry Andric     // Record the value to return.
32940b57cec5SDimitry Andric     StoreOperands.push_back(RetVal);
32950b57cec5SDimitry Andric 
32960b57cec5SDimitry Andric     // That's the last element of this store op.
32970b57cec5SDimitry Andric     if (VectorInfo[i] & PVF_LAST) {
32980b57cec5SDimitry Andric       NVPTXISD::NodeType Op;
32990b57cec5SDimitry Andric       unsigned NumElts = StoreOperands.size() - 2;
33000b57cec5SDimitry Andric       switch (NumElts) {
33010b57cec5SDimitry Andric       case 1:
33020b57cec5SDimitry Andric         Op = NVPTXISD::StoreRetval;
33030b57cec5SDimitry Andric         break;
33040b57cec5SDimitry Andric       case 2:
33050b57cec5SDimitry Andric         Op = NVPTXISD::StoreRetvalV2;
33060b57cec5SDimitry Andric         break;
33070b57cec5SDimitry Andric       case 4:
33080b57cec5SDimitry Andric         Op = NVPTXISD::StoreRetvalV4;
33090b57cec5SDimitry Andric         break;
33100b57cec5SDimitry Andric       default:
33110b57cec5SDimitry Andric         llvm_unreachable("Invalid vector info.");
33120b57cec5SDimitry Andric       }
33130b57cec5SDimitry Andric 
33140b57cec5SDimitry Andric       // Adjust type of load/store op if we've extended the scalar
33150b57cec5SDimitry Andric       // return value.
33160b57cec5SDimitry Andric       EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
33175ffd83dbSDimitry Andric       Chain = DAG.getMemIntrinsicNode(
33185ffd83dbSDimitry Andric           Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType,
33195ffd83dbSDimitry Andric           MachinePointerInfo(), Align(1), MachineMemOperand::MOStore);
33200b57cec5SDimitry Andric       // Cleanup vector state.
33210b57cec5SDimitry Andric       StoreOperands.clear();
33220b57cec5SDimitry Andric     }
33230b57cec5SDimitry Andric   }
33240b57cec5SDimitry Andric 
332506c3fb27SDimitry Andric   return DAG.getNode(NVPTXISD::RET_GLUE, dl, MVT::Other, Chain);
33260b57cec5SDimitry Andric }
33270b57cec5SDimitry Andric 
LowerAsmOperandForConstraint(SDValue Op,StringRef Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const33280b57cec5SDimitry Andric void NVPTXTargetLowering::LowerAsmOperandForConstraint(
33295f757f3fSDimitry Andric     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
33300b57cec5SDimitry Andric     SelectionDAG &DAG) const {
33315f757f3fSDimitry Andric   if (Constraint.size() > 1)
33320b57cec5SDimitry Andric     return;
33330b57cec5SDimitry Andric   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
33340b57cec5SDimitry Andric }
33350b57cec5SDimitry Andric 
getOpcForTextureInstr(unsigned Intrinsic)33360b57cec5SDimitry Andric static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
33370b57cec5SDimitry Andric   switch (Intrinsic) {
33380b57cec5SDimitry Andric   default:
33390b57cec5SDimitry Andric     return 0;
33400b57cec5SDimitry Andric 
33410b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4f32_s32:
33420b57cec5SDimitry Andric     return NVPTXISD::Tex1DFloatS32;
33430b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4f32_f32:
33440b57cec5SDimitry Andric     return NVPTXISD::Tex1DFloatFloat;
33450b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
33460b57cec5SDimitry Andric     return NVPTXISD::Tex1DFloatFloatLevel;
33470b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
33480b57cec5SDimitry Andric     return NVPTXISD::Tex1DFloatFloatGrad;
33490b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4s32_s32:
33500b57cec5SDimitry Andric     return NVPTXISD::Tex1DS32S32;
33510b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4s32_f32:
33520b57cec5SDimitry Andric     return NVPTXISD::Tex1DS32Float;
33530b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
33540b57cec5SDimitry Andric     return NVPTXISD::Tex1DS32FloatLevel;
33550b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
33560b57cec5SDimitry Andric     return NVPTXISD::Tex1DS32FloatGrad;
33570b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4u32_s32:
33580b57cec5SDimitry Andric     return NVPTXISD::Tex1DU32S32;
33590b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4u32_f32:
33600b57cec5SDimitry Andric     return NVPTXISD::Tex1DU32Float;
33610b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
33620b57cec5SDimitry Andric     return NVPTXISD::Tex1DU32FloatLevel;
33630b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
33640b57cec5SDimitry Andric     return NVPTXISD::Tex1DU32FloatGrad;
33650b57cec5SDimitry Andric 
33660b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
33670b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayFloatS32;
33680b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
33690b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayFloatFloat;
33700b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
33710b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayFloatFloatLevel;
33720b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
33730b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayFloatFloatGrad;
33740b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
33750b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayS32S32;
33760b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
33770b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayS32Float;
33780b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
33790b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayS32FloatLevel;
33800b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
33810b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayS32FloatGrad;
33820b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
33830b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayU32S32;
33840b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
33850b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayU32Float;
33860b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
33870b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayU32FloatLevel;
33880b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
33890b57cec5SDimitry Andric     return NVPTXISD::Tex1DArrayU32FloatGrad;
33900b57cec5SDimitry Andric 
33910b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4f32_s32:
33920b57cec5SDimitry Andric     return NVPTXISD::Tex2DFloatS32;
33930b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4f32_f32:
33940b57cec5SDimitry Andric     return NVPTXISD::Tex2DFloatFloat;
33950b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
33960b57cec5SDimitry Andric     return NVPTXISD::Tex2DFloatFloatLevel;
33970b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
33980b57cec5SDimitry Andric     return NVPTXISD::Tex2DFloatFloatGrad;
33990b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4s32_s32:
34000b57cec5SDimitry Andric     return NVPTXISD::Tex2DS32S32;
34010b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4s32_f32:
34020b57cec5SDimitry Andric     return NVPTXISD::Tex2DS32Float;
34030b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
34040b57cec5SDimitry Andric     return NVPTXISD::Tex2DS32FloatLevel;
34050b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
34060b57cec5SDimitry Andric     return NVPTXISD::Tex2DS32FloatGrad;
34070b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4u32_s32:
34080b57cec5SDimitry Andric     return NVPTXISD::Tex2DU32S32;
34090b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4u32_f32:
34100b57cec5SDimitry Andric     return NVPTXISD::Tex2DU32Float;
34110b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
34120b57cec5SDimitry Andric     return NVPTXISD::Tex2DU32FloatLevel;
34130b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
34140b57cec5SDimitry Andric     return NVPTXISD::Tex2DU32FloatGrad;
34150b57cec5SDimitry Andric 
34160b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
34170b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayFloatS32;
34180b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
34190b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayFloatFloat;
34200b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
34210b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayFloatFloatLevel;
34220b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
34230b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayFloatFloatGrad;
34240b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
34250b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayS32S32;
34260b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
34270b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayS32Float;
34280b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
34290b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayS32FloatLevel;
34300b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
34310b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayS32FloatGrad;
34320b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
34330b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayU32S32;
34340b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
34350b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayU32Float;
34360b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
34370b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayU32FloatLevel;
34380b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
34390b57cec5SDimitry Andric     return NVPTXISD::Tex2DArrayU32FloatGrad;
34400b57cec5SDimitry Andric 
34410b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4f32_s32:
34420b57cec5SDimitry Andric     return NVPTXISD::Tex3DFloatS32;
34430b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4f32_f32:
34440b57cec5SDimitry Andric     return NVPTXISD::Tex3DFloatFloat;
34450b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
34460b57cec5SDimitry Andric     return NVPTXISD::Tex3DFloatFloatLevel;
34470b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
34480b57cec5SDimitry Andric     return NVPTXISD::Tex3DFloatFloatGrad;
34490b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4s32_s32:
34500b57cec5SDimitry Andric     return NVPTXISD::Tex3DS32S32;
34510b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4s32_f32:
34520b57cec5SDimitry Andric     return NVPTXISD::Tex3DS32Float;
34530b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
34540b57cec5SDimitry Andric     return NVPTXISD::Tex3DS32FloatLevel;
34550b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
34560b57cec5SDimitry Andric     return NVPTXISD::Tex3DS32FloatGrad;
34570b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4u32_s32:
34580b57cec5SDimitry Andric     return NVPTXISD::Tex3DU32S32;
34590b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4u32_f32:
34600b57cec5SDimitry Andric     return NVPTXISD::Tex3DU32Float;
34610b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
34620b57cec5SDimitry Andric     return NVPTXISD::Tex3DU32FloatLevel;
34630b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
34640b57cec5SDimitry Andric     return NVPTXISD::Tex3DU32FloatGrad;
34650b57cec5SDimitry Andric 
34660b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_v4f32_f32:
34670b57cec5SDimitry Andric     return NVPTXISD::TexCubeFloatFloat;
34680b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
34690b57cec5SDimitry Andric     return NVPTXISD::TexCubeFloatFloatLevel;
34700b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_v4s32_f32:
34710b57cec5SDimitry Andric     return NVPTXISD::TexCubeS32Float;
34720b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
34730b57cec5SDimitry Andric     return NVPTXISD::TexCubeS32FloatLevel;
34740b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_v4u32_f32:
34750b57cec5SDimitry Andric     return NVPTXISD::TexCubeU32Float;
34760b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
34770b57cec5SDimitry Andric     return NVPTXISD::TexCubeU32FloatLevel;
34780b57cec5SDimitry Andric 
34790b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
34800b57cec5SDimitry Andric     return NVPTXISD::TexCubeArrayFloatFloat;
34810b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
34820b57cec5SDimitry Andric     return NVPTXISD::TexCubeArrayFloatFloatLevel;
34830b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
34840b57cec5SDimitry Andric     return NVPTXISD::TexCubeArrayS32Float;
34850b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
34860b57cec5SDimitry Andric     return NVPTXISD::TexCubeArrayS32FloatLevel;
34870b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
34880b57cec5SDimitry Andric     return NVPTXISD::TexCubeArrayU32Float;
34890b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
34900b57cec5SDimitry Andric     return NVPTXISD::TexCubeArrayU32FloatLevel;
34910b57cec5SDimitry Andric 
34920b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
34930b57cec5SDimitry Andric     return NVPTXISD::Tld4R2DFloatFloat;
34940b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
34950b57cec5SDimitry Andric     return NVPTXISD::Tld4G2DFloatFloat;
34960b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
34970b57cec5SDimitry Andric     return NVPTXISD::Tld4B2DFloatFloat;
34980b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
34990b57cec5SDimitry Andric     return NVPTXISD::Tld4A2DFloatFloat;
35000b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
35010b57cec5SDimitry Andric     return NVPTXISD::Tld4R2DS64Float;
35020b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
35030b57cec5SDimitry Andric     return NVPTXISD::Tld4G2DS64Float;
35040b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
35050b57cec5SDimitry Andric     return NVPTXISD::Tld4B2DS64Float;
35060b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
35070b57cec5SDimitry Andric     return NVPTXISD::Tld4A2DS64Float;
35080b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
35090b57cec5SDimitry Andric     return NVPTXISD::Tld4R2DU64Float;
35100b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
35110b57cec5SDimitry Andric     return NVPTXISD::Tld4G2DU64Float;
35120b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
35130b57cec5SDimitry Andric     return NVPTXISD::Tld4B2DU64Float;
35140b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
35150b57cec5SDimitry Andric     return NVPTXISD::Tld4A2DU64Float;
35160b57cec5SDimitry Andric 
35170b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
35180b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DFloatS32;
35190b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
35200b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DFloatFloat;
35210b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
35220b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DFloatFloatLevel;
35230b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
35240b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DFloatFloatGrad;
35250b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
35260b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DS32S32;
35270b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
35280b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DS32Float;
35290b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
35300b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DS32FloatLevel;
35310b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
35320b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DS32FloatGrad;
35330b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
35340b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DU32S32;
35350b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
35360b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DU32Float;
35370b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
35380b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DU32FloatLevel;
35390b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
35400b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DU32FloatGrad;
35410b57cec5SDimitry Andric 
35420b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
35430b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayFloatS32;
35440b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
35450b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayFloatFloat;
35460b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
35470b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayFloatFloatLevel;
35480b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
35490b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayFloatFloatGrad;
35500b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
35510b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayS32S32;
35520b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
35530b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayS32Float;
35540b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
35550b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayS32FloatLevel;
35560b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
35570b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayS32FloatGrad;
35580b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
35590b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayU32S32;
35600b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
35610b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayU32Float;
35620b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
35630b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayU32FloatLevel;
35640b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
35650b57cec5SDimitry Andric     return NVPTXISD::TexUnified1DArrayU32FloatGrad;
35660b57cec5SDimitry Andric 
35670b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
35680b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DFloatS32;
35690b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
35700b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DFloatFloat;
35710b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
35720b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DFloatFloatLevel;
35730b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
35740b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DFloatFloatGrad;
35750b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
35760b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DS32S32;
35770b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
35780b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DS32Float;
35790b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
35800b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DS32FloatLevel;
35810b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
35820b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DS32FloatGrad;
35830b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
35840b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DU32S32;
35850b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
35860b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DU32Float;
35870b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
35880b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DU32FloatLevel;
35890b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
35900b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DU32FloatGrad;
35910b57cec5SDimitry Andric 
35920b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
35930b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayFloatS32;
35940b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
35950b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayFloatFloat;
35960b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
35970b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayFloatFloatLevel;
35980b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
35990b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayFloatFloatGrad;
36000b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
36010b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayS32S32;
36020b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
36030b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayS32Float;
36040b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
36050b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayS32FloatLevel;
36060b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
36070b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayS32FloatGrad;
36080b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
36090b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayU32S32;
36100b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
36110b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayU32Float;
36120b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
36130b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayU32FloatLevel;
36140b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
36150b57cec5SDimitry Andric     return NVPTXISD::TexUnified2DArrayU32FloatGrad;
36160b57cec5SDimitry Andric 
36170b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
36180b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DFloatS32;
36190b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
36200b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DFloatFloat;
36210b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
36220b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DFloatFloatLevel;
36230b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
36240b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DFloatFloatGrad;
36250b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
36260b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DS32S32;
36270b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
36280b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DS32Float;
36290b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
36300b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DS32FloatLevel;
36310b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
36320b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DS32FloatGrad;
36330b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
36340b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DU32S32;
36350b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
36360b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DU32Float;
36370b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
36380b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DU32FloatLevel;
36390b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
36400b57cec5SDimitry Andric     return NVPTXISD::TexUnified3DU32FloatGrad;
36410b57cec5SDimitry Andric 
36420b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
36430b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeFloatFloat;
36440b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
36450b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeFloatFloatLevel;
36460b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
36470b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeS32Float;
36480b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
36490b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeS32FloatLevel;
36500b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
36510b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeU32Float;
36520b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
36530b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeU32FloatLevel;
36540b57cec5SDimitry Andric 
36550b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
36560b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayFloatFloat;
36570b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
36580b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel;
36590b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
36600b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayS32Float;
36610b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
36620b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel;
36630b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
36640b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayU32Float;
36650b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
36660b57cec5SDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
36670b57cec5SDimitry Andric 
36687a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
36697a6dacacSDimitry Andric     return NVPTXISD::TexUnifiedCubeFloatFloatGrad;
36707a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
36717a6dacacSDimitry Andric     return NVPTXISD::TexUnifiedCubeS32FloatGrad;
36727a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
36737a6dacacSDimitry Andric     return NVPTXISD::TexUnifiedCubeU32FloatGrad;
36747a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
36757a6dacacSDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad;
36767a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
36777a6dacacSDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayS32FloatGrad;
36787a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
36797a6dacacSDimitry Andric     return NVPTXISD::TexUnifiedCubeArrayU32FloatGrad;
36807a6dacacSDimitry Andric 
36810b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
36820b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedR2DFloatFloat;
36830b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
36840b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedG2DFloatFloat;
36850b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
36860b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedB2DFloatFloat;
36870b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
36880b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedA2DFloatFloat;
36890b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
36900b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedR2DS64Float;
36910b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
36920b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedG2DS64Float;
36930b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
36940b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedB2DS64Float;
36950b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
36960b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedA2DS64Float;
36970b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
36980b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedR2DU64Float;
36990b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
37000b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedG2DU64Float;
37010b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
37020b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedB2DU64Float;
37030b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
37040b57cec5SDimitry Andric     return NVPTXISD::Tld4UnifiedA2DU64Float;
37050b57cec5SDimitry Andric   }
37060b57cec5SDimitry Andric }
37070b57cec5SDimitry Andric 
getOpcForSurfaceInstr(unsigned Intrinsic)37080b57cec5SDimitry Andric static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
37090b57cec5SDimitry Andric   switch (Intrinsic) {
37100b57cec5SDimitry Andric   default:
37110b57cec5SDimitry Andric     return 0;
37120b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i8_clamp:
37130b57cec5SDimitry Andric     return NVPTXISD::Suld1DI8Clamp;
37140b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i16_clamp:
37150b57cec5SDimitry Andric     return NVPTXISD::Suld1DI16Clamp;
37160b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i32_clamp:
37170b57cec5SDimitry Andric     return NVPTXISD::Suld1DI32Clamp;
37180b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i64_clamp:
37190b57cec5SDimitry Andric     return NVPTXISD::Suld1DI64Clamp;
37200b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i8_clamp:
37210b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I8Clamp;
37220b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i16_clamp:
37230b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I16Clamp;
37240b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i32_clamp:
37250b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I32Clamp;
37260b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i64_clamp:
37270b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I64Clamp;
37280b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i8_clamp:
37290b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I8Clamp;
37300b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i16_clamp:
37310b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I16Clamp;
37320b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i32_clamp:
37330b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I32Clamp;
37340b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i8_clamp:
37350b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI8Clamp;
37360b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i16_clamp:
37370b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI16Clamp;
37380b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i32_clamp:
37390b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI32Clamp;
37400b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i64_clamp:
37410b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI64Clamp;
37420b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
37430b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I8Clamp;
37440b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
37450b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I16Clamp;
37460b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
37470b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I32Clamp;
37480b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
37490b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I64Clamp;
37500b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
37510b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I8Clamp;
37520b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
37530b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I16Clamp;
37540b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
37550b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I32Clamp;
37560b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i8_clamp:
37570b57cec5SDimitry Andric     return NVPTXISD::Suld2DI8Clamp;
37580b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i16_clamp:
37590b57cec5SDimitry Andric     return NVPTXISD::Suld2DI16Clamp;
37600b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i32_clamp:
37610b57cec5SDimitry Andric     return NVPTXISD::Suld2DI32Clamp;
37620b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i64_clamp:
37630b57cec5SDimitry Andric     return NVPTXISD::Suld2DI64Clamp;
37640b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i8_clamp:
37650b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I8Clamp;
37660b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i16_clamp:
37670b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I16Clamp;
37680b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i32_clamp:
37690b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I32Clamp;
37700b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i64_clamp:
37710b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I64Clamp;
37720b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i8_clamp:
37730b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I8Clamp;
37740b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i16_clamp:
37750b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I16Clamp;
37760b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i32_clamp:
37770b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I32Clamp;
37780b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i8_clamp:
37790b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI8Clamp;
37800b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i16_clamp:
37810b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI16Clamp;
37820b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i32_clamp:
37830b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI32Clamp;
37840b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i64_clamp:
37850b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI64Clamp;
37860b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
37870b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I8Clamp;
37880b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
37890b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I16Clamp;
37900b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
37910b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I32Clamp;
37920b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
37930b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I64Clamp;
37940b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
37950b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I8Clamp;
37960b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
37970b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I16Clamp;
37980b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
37990b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I32Clamp;
38000b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i8_clamp:
38010b57cec5SDimitry Andric     return NVPTXISD::Suld3DI8Clamp;
38020b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i16_clamp:
38030b57cec5SDimitry Andric     return NVPTXISD::Suld3DI16Clamp;
38040b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i32_clamp:
38050b57cec5SDimitry Andric     return NVPTXISD::Suld3DI32Clamp;
38060b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i64_clamp:
38070b57cec5SDimitry Andric     return NVPTXISD::Suld3DI64Clamp;
38080b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i8_clamp:
38090b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I8Clamp;
38100b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i16_clamp:
38110b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I16Clamp;
38120b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i32_clamp:
38130b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I32Clamp;
38140b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i64_clamp:
38150b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I64Clamp;
38160b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i8_clamp:
38170b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I8Clamp;
38180b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i16_clamp:
38190b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I16Clamp;
38200b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i32_clamp:
38210b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I32Clamp;
38220b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i8_trap:
38230b57cec5SDimitry Andric     return NVPTXISD::Suld1DI8Trap;
38240b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i16_trap:
38250b57cec5SDimitry Andric     return NVPTXISD::Suld1DI16Trap;
38260b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i32_trap:
38270b57cec5SDimitry Andric     return NVPTXISD::Suld1DI32Trap;
38280b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i64_trap:
38290b57cec5SDimitry Andric     return NVPTXISD::Suld1DI64Trap;
38300b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i8_trap:
38310b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I8Trap;
38320b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i16_trap:
38330b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I16Trap;
38340b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i32_trap:
38350b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I32Trap;
38360b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i64_trap:
38370b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I64Trap;
38380b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i8_trap:
38390b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I8Trap;
38400b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i16_trap:
38410b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I16Trap;
38420b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i32_trap:
38430b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I32Trap;
38440b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i8_trap:
38450b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI8Trap;
38460b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i16_trap:
38470b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI16Trap;
38480b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i32_trap:
38490b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI32Trap;
38500b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i64_trap:
38510b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI64Trap;
38520b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
38530b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I8Trap;
38540b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
38550b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I16Trap;
38560b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
38570b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I32Trap;
38580b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
38590b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I64Trap;
38600b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
38610b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I8Trap;
38620b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
38630b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I16Trap;
38640b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
38650b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I32Trap;
38660b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i8_trap:
38670b57cec5SDimitry Andric     return NVPTXISD::Suld2DI8Trap;
38680b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i16_trap:
38690b57cec5SDimitry Andric     return NVPTXISD::Suld2DI16Trap;
38700b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i32_trap:
38710b57cec5SDimitry Andric     return NVPTXISD::Suld2DI32Trap;
38720b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i64_trap:
38730b57cec5SDimitry Andric     return NVPTXISD::Suld2DI64Trap;
38740b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i8_trap:
38750b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I8Trap;
38760b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i16_trap:
38770b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I16Trap;
38780b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i32_trap:
38790b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I32Trap;
38800b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i64_trap:
38810b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I64Trap;
38820b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i8_trap:
38830b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I8Trap;
38840b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i16_trap:
38850b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I16Trap;
38860b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i32_trap:
38870b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I32Trap;
38880b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i8_trap:
38890b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI8Trap;
38900b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i16_trap:
38910b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI16Trap;
38920b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i32_trap:
38930b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI32Trap;
38940b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i64_trap:
38950b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI64Trap;
38960b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
38970b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I8Trap;
38980b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
38990b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I16Trap;
39000b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
39010b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I32Trap;
39020b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
39030b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I64Trap;
39040b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
39050b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I8Trap;
39060b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
39070b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I16Trap;
39080b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
39090b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I32Trap;
39100b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i8_trap:
39110b57cec5SDimitry Andric     return NVPTXISD::Suld3DI8Trap;
39120b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i16_trap:
39130b57cec5SDimitry Andric     return NVPTXISD::Suld3DI16Trap;
39140b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i32_trap:
39150b57cec5SDimitry Andric     return NVPTXISD::Suld3DI32Trap;
39160b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i64_trap:
39170b57cec5SDimitry Andric     return NVPTXISD::Suld3DI64Trap;
39180b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i8_trap:
39190b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I8Trap;
39200b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i16_trap:
39210b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I16Trap;
39220b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i32_trap:
39230b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I32Trap;
39240b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i64_trap:
39250b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I64Trap;
39260b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i8_trap:
39270b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I8Trap;
39280b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i16_trap:
39290b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I16Trap;
39300b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i32_trap:
39310b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I32Trap;
39320b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i8_zero:
39330b57cec5SDimitry Andric     return NVPTXISD::Suld1DI8Zero;
39340b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i16_zero:
39350b57cec5SDimitry Andric     return NVPTXISD::Suld1DI16Zero;
39360b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i32_zero:
39370b57cec5SDimitry Andric     return NVPTXISD::Suld1DI32Zero;
39380b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i64_zero:
39390b57cec5SDimitry Andric     return NVPTXISD::Suld1DI64Zero;
39400b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i8_zero:
39410b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I8Zero;
39420b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i16_zero:
39430b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I16Zero;
39440b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i32_zero:
39450b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I32Zero;
39460b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i64_zero:
39470b57cec5SDimitry Andric     return NVPTXISD::Suld1DV2I64Zero;
39480b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i8_zero:
39490b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I8Zero;
39500b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i16_zero:
39510b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I16Zero;
39520b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i32_zero:
39530b57cec5SDimitry Andric     return NVPTXISD::Suld1DV4I32Zero;
39540b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i8_zero:
39550b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI8Zero;
39560b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i16_zero:
39570b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI16Zero;
39580b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i32_zero:
39590b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI32Zero;
39600b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i64_zero:
39610b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayI64Zero;
39620b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
39630b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I8Zero;
39640b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
39650b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I16Zero;
39660b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
39670b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I32Zero;
39680b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
39690b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV2I64Zero;
39700b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
39710b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I8Zero;
39720b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
39730b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I16Zero;
39740b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
39750b57cec5SDimitry Andric     return NVPTXISD::Suld1DArrayV4I32Zero;
39760b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i8_zero:
39770b57cec5SDimitry Andric     return NVPTXISD::Suld2DI8Zero;
39780b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i16_zero:
39790b57cec5SDimitry Andric     return NVPTXISD::Suld2DI16Zero;
39800b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i32_zero:
39810b57cec5SDimitry Andric     return NVPTXISD::Suld2DI32Zero;
39820b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i64_zero:
39830b57cec5SDimitry Andric     return NVPTXISD::Suld2DI64Zero;
39840b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i8_zero:
39850b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I8Zero;
39860b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i16_zero:
39870b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I16Zero;
39880b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i32_zero:
39890b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I32Zero;
39900b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i64_zero:
39910b57cec5SDimitry Andric     return NVPTXISD::Suld2DV2I64Zero;
39920b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i8_zero:
39930b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I8Zero;
39940b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i16_zero:
39950b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I16Zero;
39960b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i32_zero:
39970b57cec5SDimitry Andric     return NVPTXISD::Suld2DV4I32Zero;
39980b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i8_zero:
39990b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI8Zero;
40000b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i16_zero:
40010b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI16Zero;
40020b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i32_zero:
40030b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI32Zero;
40040b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i64_zero:
40050b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayI64Zero;
40060b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
40070b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I8Zero;
40080b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
40090b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I16Zero;
40100b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
40110b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I32Zero;
40120b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
40130b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV2I64Zero;
40140b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
40150b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I8Zero;
40160b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
40170b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I16Zero;
40180b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
40190b57cec5SDimitry Andric     return NVPTXISD::Suld2DArrayV4I32Zero;
40200b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i8_zero:
40210b57cec5SDimitry Andric     return NVPTXISD::Suld3DI8Zero;
40220b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i16_zero:
40230b57cec5SDimitry Andric     return NVPTXISD::Suld3DI16Zero;
40240b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i32_zero:
40250b57cec5SDimitry Andric     return NVPTXISD::Suld3DI32Zero;
40260b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i64_zero:
40270b57cec5SDimitry Andric     return NVPTXISD::Suld3DI64Zero;
40280b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i8_zero:
40290b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I8Zero;
40300b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i16_zero:
40310b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I16Zero;
40320b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i32_zero:
40330b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I32Zero;
40340b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i64_zero:
40350b57cec5SDimitry Andric     return NVPTXISD::Suld3DV2I64Zero;
40360b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i8_zero:
40370b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I8Zero;
40380b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i16_zero:
40390b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I16Zero;
40400b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i32_zero:
40410b57cec5SDimitry Andric     return NVPTXISD::Suld3DV4I32Zero;
40420b57cec5SDimitry Andric   }
40430b57cec5SDimitry Andric }
40440b57cec5SDimitry Andric 
40450b57cec5SDimitry Andric // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
40460b57cec5SDimitry Andric // TgtMemIntrinsic
40470b57cec5SDimitry Andric // because we need the information that is only available in the "Value" type
40480b57cec5SDimitry Andric // of destination
40490b57cec5SDimitry Andric // pointer. In particular, the address space information.
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,MachineFunction & MF,unsigned Intrinsic) const40500b57cec5SDimitry Andric bool NVPTXTargetLowering::getTgtMemIntrinsic(
40510b57cec5SDimitry Andric     IntrinsicInfo &Info, const CallInst &I,
40520b57cec5SDimitry Andric     MachineFunction &MF, unsigned Intrinsic) const {
40530b57cec5SDimitry Andric   switch (Intrinsic) {
40540b57cec5SDimitry Andric   default:
40550b57cec5SDimitry Andric     return false;
40560b57cec5SDimitry Andric   case Intrinsic::nvvm_match_all_sync_i32p:
40570b57cec5SDimitry Andric   case Intrinsic::nvvm_match_all_sync_i64p:
40580b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
40590b57cec5SDimitry Andric     // memVT is bogus. These intrinsics have IntrInaccessibleMemOnly attribute
40600b57cec5SDimitry Andric     // in order to model data exchange with other threads, but perform no real
40610b57cec5SDimitry Andric     // memory accesses.
40620b57cec5SDimitry Andric     Info.memVT = MVT::i1;
40630b57cec5SDimitry Andric 
40640b57cec5SDimitry Andric     // Our result depends on both our and other thread's arguments.
40650b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
40660b57cec5SDimitry Andric     return true;
40670b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col:
40680b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row:
40690b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride:
40700b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride:
40710b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col:
40720b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row:
40730b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride:
40740b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride:
40750b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col:
40760b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row:
40770b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride:
40780b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride:
40790b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col:
40800b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row:
40810b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride:
40820b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride:
40830b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col:
40840b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row:
40850b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride:
40860b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride:
40870b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col:
40880b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row:
40890b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride:
40900b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride: {
40910b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
40920b57cec5SDimitry Andric     Info.memVT = MVT::v8f16;
40930b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
40940b57cec5SDimitry Andric     Info.offset = 0;
40950b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
40968bcb0991SDimitry Andric     Info.align = Align(16);
40970b57cec5SDimitry Andric     return true;
40980b57cec5SDimitry Andric   }
40990b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col:
41000b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride:
41010b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride:
41020b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col:
41030b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row:
41040b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride:
41050b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride:
41060b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row:
4107fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col:
4108fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col_stride:
4109fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row:
4110fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row_stride:
41110b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col:
41120b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride:
41130b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride:
41140b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col:
41150b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row:
41160b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride:
41170b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride:
4118fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row:
4119fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col:
4120fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col_stride:
4121fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row:
4122fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row_stride: {
41230b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
41240b57cec5SDimitry Andric     Info.memVT = MVT::v2i32;
41250b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
41260b57cec5SDimitry Andric     Info.offset = 0;
41270b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
41288bcb0991SDimitry Andric     Info.align = Align(8);
41290b57cec5SDimitry Andric     return true;
41300b57cec5SDimitry Andric   }
41310b57cec5SDimitry Andric 
41320b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col:
41330b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride:
41340b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride:
41350b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col:
41360b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row:
41370b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride:
41380b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride:
41390b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row:
4140fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col:
4141fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col_stride:
4142fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row:
4143fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row_stride:
4144fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col:
4145fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col_stride:
4146fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row:
4147fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row_stride:
41480b57cec5SDimitry Andric 
41490b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col:
41500b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride:
41510b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride:
41520b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col:
41530b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row:
41540b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride:
41550b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride:
4156fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row:
4157fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col:
4158fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col_stride:
4159fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row:
4160fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row_stride:
4161fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col:
4162fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride:
4163fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row:
4164349cc55cSDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride:
4165349cc55cSDimitry Andric   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16:
4166349cc55cSDimitry Andric   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16: {
41670b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
41680b57cec5SDimitry Andric     Info.memVT = MVT::v4i32;
41690b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
41700b57cec5SDimitry Andric     Info.offset = 0;
41710b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
41728bcb0991SDimitry Andric     Info.align = Align(16);
41730b57cec5SDimitry Andric     return true;
41740b57cec5SDimitry Andric   }
41750b57cec5SDimitry Andric 
41760b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col:
41770b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride:
41780b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride:
41790b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col:
41800b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row:
41810b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride:
41820b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride:
41830b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row:
41840b57cec5SDimitry Andric 
41850b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col:
41860b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride:
41870b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride:
41880b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col:
41890b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row:
41900b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride:
41910b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride:
41920b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row:
41930b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row:
41940b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride:
41950b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col:
41960b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride:
41970b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row:
41980b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride:
41990b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride:
42000b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row:
42010b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
42020b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
42030b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
4204349cc55cSDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col:
4205349cc55cSDimitry Andric   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16:
4206349cc55cSDimitry Andric   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16: {
42070b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
42080b57cec5SDimitry Andric     Info.memVT = MVT::i32;
42090b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
42100b57cec5SDimitry Andric     Info.offset = 0;
42110b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
42128bcb0991SDimitry Andric     Info.align = Align(4);
42130b57cec5SDimitry Andric     return true;
42140b57cec5SDimitry Andric   }
42150b57cec5SDimitry Andric 
42160b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col:
42170b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row:
42180b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride:
42190b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride:
42200b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col:
42210b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row:
42220b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride:
42230b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride:
42240b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col:
42250b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row:
42260b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride:
42270b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: {
42280b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
42290b57cec5SDimitry Andric     Info.memVT = MVT::v4f16;
42300b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
42310b57cec5SDimitry Andric     Info.offset = 0;
42320b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
42338bcb0991SDimitry Andric     Info.align = Align(16);
42340b57cec5SDimitry Andric     return true;
42350b57cec5SDimitry Andric   }
42360b57cec5SDimitry Andric 
42370b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col:
42380b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row:
42390b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride:
42400b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride:
42410b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col:
42420b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row:
42430b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride:
42440b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride:
42450b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col:
42460b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row:
42470b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride:
4248fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride:
4249fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col:
4250fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row:
4251fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col_stride:
4252fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row_stride: {
42530b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
42540b57cec5SDimitry Andric     Info.memVT = MVT::v8f32;
42550b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
42560b57cec5SDimitry Andric     Info.offset = 0;
42570b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
42588bcb0991SDimitry Andric     Info.align = Align(16);
42590b57cec5SDimitry Andric     return true;
42600b57cec5SDimitry Andric   }
42610b57cec5SDimitry Andric 
4262fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col:
4263fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col_stride:
4264fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row:
4265fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row_stride:
4266fe6060f1SDimitry Andric 
4267fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col:
4268fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col_stride:
4269fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row:
4270fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row_stride:
4271fe6060f1SDimitry Andric 
42720b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col:
42730b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride:
42740b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row:
42750b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride:
42760b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col:
42770b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride:
42780b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row:
42790b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride:
42800b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col:
42810b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride:
42820b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row:
42830b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: {
42840b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
42850b57cec5SDimitry Andric     Info.memVT = MVT::v8i32;
42860b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
42870b57cec5SDimitry Andric     Info.offset = 0;
42880b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
42898bcb0991SDimitry Andric     Info.align = Align(16);
42900b57cec5SDimitry Andric     return true;
42910b57cec5SDimitry Andric   }
42920b57cec5SDimitry Andric 
42930b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col:
42940b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride:
42950b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row:
42960b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride:
42970b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
42980b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
42990b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
4300349cc55cSDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride:
4301349cc55cSDimitry Andric   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16:
4302349cc55cSDimitry Andric   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16: {
43030b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
43040b57cec5SDimitry Andric     Info.memVT = MVT::v2i32;
43050b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
43060b57cec5SDimitry Andric     Info.offset = 0;
43070b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
43088bcb0991SDimitry Andric     Info.align = Align(8);
43090b57cec5SDimitry Andric     return true;
43100b57cec5SDimitry Andric   }
43110b57cec5SDimitry Andric 
4312fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col:
4313fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col_stride:
4314fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row:
4315fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row_stride:
4316fe6060f1SDimitry Andric 
4317fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col:
4318fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col_stride:
4319fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row:
4320fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row_stride: {
4321fe6060f1SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
4322fe6060f1SDimitry Andric     Info.memVT = MVT::f64;
4323fe6060f1SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
4324fe6060f1SDimitry Andric     Info.offset = 0;
4325fe6060f1SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
4326fe6060f1SDimitry Andric     Info.align = Align(8);
4327fe6060f1SDimitry Andric     return true;
4328fe6060f1SDimitry Andric   }
4329fe6060f1SDimitry Andric 
4330fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col:
4331fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col_stride:
4332fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row:
4333fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row_stride: {
4334fe6060f1SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
4335fe6060f1SDimitry Andric     Info.memVT = MVT::v2f64;
4336fe6060f1SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
4337fe6060f1SDimitry Andric     Info.offset = 0;
4338fe6060f1SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
4339fe6060f1SDimitry Andric     Info.align = Align(16);
4340fe6060f1SDimitry Andric     return true;
4341fe6060f1SDimitry Andric   }
4342fe6060f1SDimitry Andric 
43430b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col:
43440b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row:
43450b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride:
43460b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride:
43470b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col:
43480b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row:
43490b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride:
43500b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride:
43510b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col:
43520b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row:
43530b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride:
43540b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: {
43550b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_VOID;
43560b57cec5SDimitry Andric     Info.memVT = MVT::v4f16;
43570b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
43580b57cec5SDimitry Andric     Info.offset = 0;
43590b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOStore;
43608bcb0991SDimitry Andric     Info.align = Align(16);
43610b57cec5SDimitry Andric     return true;
43620b57cec5SDimitry Andric   }
43630b57cec5SDimitry Andric 
43640b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col:
43650b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row:
43660b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride:
43670b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride:
43680b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col:
43690b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row:
43700b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride:
43710b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride:
43720b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col:
43730b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row:
43740b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride:
4375fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride:
4376fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col:
4377fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row:
4378fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col_stride:
4379fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row_stride: {
43800b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_VOID;
43810b57cec5SDimitry Andric     Info.memVT = MVT::v8f32;
43820b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
43830b57cec5SDimitry Andric     Info.offset = 0;
43840b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOStore;
43858bcb0991SDimitry Andric     Info.align = Align(16);
43860b57cec5SDimitry Andric     return true;
43870b57cec5SDimitry Andric   }
43880b57cec5SDimitry Andric 
43890b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col:
43900b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride:
43910b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row:
43920b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride:
43930b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col:
43940b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride:
43950b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row:
43960b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride:
43970b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col:
43980b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride:
43990b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row:
44000b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: {
44010b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_VOID;
44020b57cec5SDimitry Andric     Info.memVT = MVT::v8i32;
44030b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
44040b57cec5SDimitry Andric     Info.offset = 0;
44050b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOStore;
44068bcb0991SDimitry Andric     Info.align = Align(16);
44070b57cec5SDimitry Andric     return true;
44080b57cec5SDimitry Andric   }
44090b57cec5SDimitry Andric 
44100b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col:
44110b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride:
44120b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row:
44130b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride:
44140b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col:
44150b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride:
44160b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row:
44170b57cec5SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: {
44180b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_VOID;
44190b57cec5SDimitry Andric     Info.memVT = MVT::v2i32;
44200b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
44210b57cec5SDimitry Andric     Info.offset = 0;
44220b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOStore;
44238bcb0991SDimitry Andric     Info.align = Align(8);
44240b57cec5SDimitry Andric     return true;
44250b57cec5SDimitry Andric   }
44260b57cec5SDimitry Andric 
4427fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col:
4428fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col_stride:
4429fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row:
4430fe6060f1SDimitry Andric   case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row_stride: {
4431fe6060f1SDimitry Andric     Info.opc = ISD::INTRINSIC_VOID;
4432fe6060f1SDimitry Andric     Info.memVT = MVT::v2f64;
4433fe6060f1SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
4434fe6060f1SDimitry Andric     Info.offset = 0;
4435fe6060f1SDimitry Andric     Info.flags = MachineMemOperand::MOStore;
4436fe6060f1SDimitry Andric     Info.align = Align(16);
4437fe6060f1SDimitry Andric     return true;
4438fe6060f1SDimitry Andric   }
4439fe6060f1SDimitry Andric 
44400b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_load_inc_32:
44410b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_load_dec_32:
44420b57cec5SDimitry Andric 
44430b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_add_gen_f_cta:
44440b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_add_gen_f_sys:
44450b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_add_gen_i_cta:
44460b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_add_gen_i_sys:
44470b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_and_gen_i_cta:
44480b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_and_gen_i_sys:
44490b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_cas_gen_i_cta:
44500b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_cas_gen_i_sys:
44510b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_dec_gen_i_cta:
44520b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_dec_gen_i_sys:
44530b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_inc_gen_i_cta:
44540b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_inc_gen_i_sys:
44550b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_max_gen_i_cta:
44560b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_max_gen_i_sys:
44570b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_min_gen_i_cta:
44580b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_min_gen_i_sys:
44590b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_or_gen_i_cta:
44600b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_or_gen_i_sys:
44610b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_exch_gen_i_cta:
44620b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_exch_gen_i_sys:
44630b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_xor_gen_i_cta:
44640b57cec5SDimitry Andric   case Intrinsic::nvvm_atomic_xor_gen_i_sys: {
44650b57cec5SDimitry Andric     auto &DL = I.getModule()->getDataLayout();
44660b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
44670b57cec5SDimitry Andric     Info.memVT = getValueType(DL, I.getType());
44680b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
44690b57cec5SDimitry Andric     Info.offset = 0;
44700b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
44718bcb0991SDimitry Andric     Info.align.reset();
44720b57cec5SDimitry Andric     return true;
44730b57cec5SDimitry Andric   }
44740b57cec5SDimitry Andric 
44750b57cec5SDimitry Andric   case Intrinsic::nvvm_ldu_global_i:
44760b57cec5SDimitry Andric   case Intrinsic::nvvm_ldu_global_f:
44770b57cec5SDimitry Andric   case Intrinsic::nvvm_ldu_global_p: {
44780b57cec5SDimitry Andric     auto &DL = I.getModule()->getDataLayout();
44790b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
44800b57cec5SDimitry Andric     if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
44810b57cec5SDimitry Andric       Info.memVT = getValueType(DL, I.getType());
44820b57cec5SDimitry Andric     else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
44830b57cec5SDimitry Andric       Info.memVT = getPointerTy(DL);
44840b57cec5SDimitry Andric     else
44850b57cec5SDimitry Andric       Info.memVT = getValueType(DL, I.getType());
44860b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
44870b57cec5SDimitry Andric     Info.offset = 0;
44880b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
44895ffd83dbSDimitry Andric     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
44900b57cec5SDimitry Andric 
44910b57cec5SDimitry Andric     return true;
44920b57cec5SDimitry Andric   }
44930b57cec5SDimitry Andric   case Intrinsic::nvvm_ldg_global_i:
44940b57cec5SDimitry Andric   case Intrinsic::nvvm_ldg_global_f:
44950b57cec5SDimitry Andric   case Intrinsic::nvvm_ldg_global_p: {
44960b57cec5SDimitry Andric     auto &DL = I.getModule()->getDataLayout();
44970b57cec5SDimitry Andric 
44980b57cec5SDimitry Andric     Info.opc = ISD::INTRINSIC_W_CHAIN;
44990b57cec5SDimitry Andric     if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
45000b57cec5SDimitry Andric       Info.memVT = getValueType(DL, I.getType());
45010b57cec5SDimitry Andric     else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
45020b57cec5SDimitry Andric       Info.memVT = getPointerTy(DL);
45030b57cec5SDimitry Andric     else
45040b57cec5SDimitry Andric       Info.memVT = getValueType(DL, I.getType());
45050b57cec5SDimitry Andric     Info.ptrVal = I.getArgOperand(0);
45060b57cec5SDimitry Andric     Info.offset = 0;
45070b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
45085ffd83dbSDimitry Andric     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
45090b57cec5SDimitry Andric 
45100b57cec5SDimitry Andric     return true;
45110b57cec5SDimitry Andric   }
45120b57cec5SDimitry Andric 
45130b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4f32_s32:
45140b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4f32_f32:
45150b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
45160b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
45170b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
45180b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
45190b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
45200b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
45210b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4f32_s32:
45220b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4f32_f32:
45230b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
45240b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
45250b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
45260b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
45270b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
45280b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
45290b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4f32_s32:
45300b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4f32_f32:
45310b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
45320b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
45330b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_v4f32_f32:
45340b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
45350b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
45360b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
45370b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
45380b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
45390b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
45400b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
45410b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
45420b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
45430b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
45440b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
45450b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
45460b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
45470b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
45480b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
45490b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
45500b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
45510b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
45520b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
45530b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
45540b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
45550b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
45560b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
45570b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
45580b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
45590b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
45600b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
45610b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
45620b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
45630b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
45640b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
45657a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
45667a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
45670b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
45680b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
45690b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
45700b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
45710b57cec5SDimitry Andric     Info.opc = getOpcForTextureInstr(Intrinsic);
45720b57cec5SDimitry Andric     Info.memVT = MVT::v4f32;
45730b57cec5SDimitry Andric     Info.ptrVal = nullptr;
45740b57cec5SDimitry Andric     Info.offset = 0;
45750b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
45768bcb0991SDimitry Andric     Info.align = Align(16);
45770b57cec5SDimitry Andric     return true;
45780b57cec5SDimitry Andric 
45790b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4s32_s32:
45800b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4s32_f32:
45810b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
45820b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
45830b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
45840b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
45850b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
45860b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
45870b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4s32_s32:
45880b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4s32_f32:
45890b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
45900b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
45910b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
45920b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
45930b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
45940b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
45950b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4s32_s32:
45960b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4s32_f32:
45970b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
45980b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
45990b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_v4s32_f32:
46000b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
46010b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
46020b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
46030b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_v4u32_f32:
46040b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
46050b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
46060b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
46070b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4u32_s32:
46080b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_v4u32_f32:
46090b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
46100b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
46110b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
46120b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
46130b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
46140b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
46150b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4u32_s32:
46160b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_v4u32_f32:
46170b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
46180b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
46190b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
46200b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
46210b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
46220b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
46230b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4u32_s32:
46240b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_v4u32_f32:
46250b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
46260b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
46270b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
46280b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
46290b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
46300b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
46310b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
46320b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
46330b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
46340b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
46350b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
46360b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
46370b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
46380b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
46390b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
46400b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
46410b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
46420b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
46430b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
46440b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
46450b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
46460b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
46470b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
46480b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
46490b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
46500b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
46510b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
46520b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
46530b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
46540b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
46550b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
46560b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
46570b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
46580b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
46590b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
46600b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
46610b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
46620b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
46630b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
46640b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
46650b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
46660b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
46670b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
46680b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
46690b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
46700b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
46710b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
46720b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
46730b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
46740b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
46750b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
46760b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
46770b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
46780b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
46790b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
46800b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
46810b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
46820b57cec5SDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
46837a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
46847a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
46857a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
46867a6dacacSDimitry Andric   case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
46870b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
46880b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
46890b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
46900b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
46910b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
46920b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
46930b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
46940b57cec5SDimitry Andric   case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
46950b57cec5SDimitry Andric     Info.opc = getOpcForTextureInstr(Intrinsic);
46960b57cec5SDimitry Andric     Info.memVT = MVT::v4i32;
46970b57cec5SDimitry Andric     Info.ptrVal = nullptr;
46980b57cec5SDimitry Andric     Info.offset = 0;
46990b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
47008bcb0991SDimitry Andric     Info.align = Align(16);
47010b57cec5SDimitry Andric     return true;
47020b57cec5SDimitry Andric 
47030b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i8_clamp:
47040b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i8_clamp:
47050b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i8_clamp:
47060b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i8_clamp:
47070b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
47080b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
47090b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i8_clamp:
47100b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i8_clamp:
47110b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i8_clamp:
47120b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i8_clamp:
47130b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
47140b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
47150b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i8_clamp:
47160b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i8_clamp:
47170b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i8_clamp:
47180b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i8_trap:
47190b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i8_trap:
47200b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i8_trap:
47210b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i8_trap:
47220b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
47230b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
47240b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i8_trap:
47250b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i8_trap:
47260b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i8_trap:
47270b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i8_trap:
47280b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
47290b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
47300b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i8_trap:
47310b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i8_trap:
47320b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i8_trap:
47330b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i8_zero:
47340b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i8_zero:
47350b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i8_zero:
47360b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i8_zero:
47370b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
47380b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
47390b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i8_zero:
47400b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i8_zero:
47410b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i8_zero:
47420b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i8_zero:
47430b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
47440b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
47450b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i8_zero:
47460b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i8_zero:
47470b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i8_zero:
47480b57cec5SDimitry Andric     Info.opc = getOpcForSurfaceInstr(Intrinsic);
47490b57cec5SDimitry Andric     Info.memVT = MVT::i8;
47500b57cec5SDimitry Andric     Info.ptrVal = nullptr;
47510b57cec5SDimitry Andric     Info.offset = 0;
47520b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
47538bcb0991SDimitry Andric     Info.align = Align(16);
47540b57cec5SDimitry Andric     return true;
47550b57cec5SDimitry Andric 
47560b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i16_clamp:
47570b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i16_clamp:
47580b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i16_clamp:
47590b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i16_clamp:
47600b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
47610b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
47620b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i16_clamp:
47630b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i16_clamp:
47640b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i16_clamp:
47650b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i16_clamp:
47660b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
47670b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
47680b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i16_clamp:
47690b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i16_clamp:
47700b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i16_clamp:
47710b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i16_trap:
47720b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i16_trap:
47730b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i16_trap:
47740b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i16_trap:
47750b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
47760b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
47770b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i16_trap:
47780b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i16_trap:
47790b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i16_trap:
47800b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i16_trap:
47810b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
47820b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
47830b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i16_trap:
47840b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i16_trap:
47850b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i16_trap:
47860b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i16_zero:
47870b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i16_zero:
47880b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i16_zero:
47890b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i16_zero:
47900b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
47910b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
47920b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i16_zero:
47930b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i16_zero:
47940b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i16_zero:
47950b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i16_zero:
47960b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
47970b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
47980b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i16_zero:
47990b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i16_zero:
48000b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i16_zero:
48010b57cec5SDimitry Andric     Info.opc = getOpcForSurfaceInstr(Intrinsic);
48020b57cec5SDimitry Andric     Info.memVT = MVT::i16;
48030b57cec5SDimitry Andric     Info.ptrVal = nullptr;
48040b57cec5SDimitry Andric     Info.offset = 0;
48050b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
48068bcb0991SDimitry Andric     Info.align = Align(16);
48070b57cec5SDimitry Andric     return true;
48080b57cec5SDimitry Andric 
48090b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i32_clamp:
48100b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i32_clamp:
48110b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i32_clamp:
48120b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i32_clamp:
48130b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
48140b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
48150b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i32_clamp:
48160b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i32_clamp:
48170b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i32_clamp:
48180b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i32_clamp:
48190b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
48200b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
48210b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i32_clamp:
48220b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i32_clamp:
48230b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i32_clamp:
48240b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i32_trap:
48250b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i32_trap:
48260b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i32_trap:
48270b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i32_trap:
48280b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
48290b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
48300b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i32_trap:
48310b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i32_trap:
48320b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i32_trap:
48330b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i32_trap:
48340b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
48350b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
48360b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i32_trap:
48370b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i32_trap:
48380b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i32_trap:
48390b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i32_zero:
48400b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i32_zero:
48410b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v4i32_zero:
48420b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i32_zero:
48430b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
48440b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
48450b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i32_zero:
48460b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i32_zero:
48470b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v4i32_zero:
48480b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i32_zero:
48490b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
48500b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
48510b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i32_zero:
48520b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i32_zero:
48530b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v4i32_zero:
48540b57cec5SDimitry Andric     Info.opc = getOpcForSurfaceInstr(Intrinsic);
48550b57cec5SDimitry Andric     Info.memVT = MVT::i32;
48560b57cec5SDimitry Andric     Info.ptrVal = nullptr;
48570b57cec5SDimitry Andric     Info.offset = 0;
48580b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
48598bcb0991SDimitry Andric     Info.align = Align(16);
48600b57cec5SDimitry Andric     return true;
48610b57cec5SDimitry Andric 
48620b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i64_clamp:
48630b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i64_clamp:
48640b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i64_clamp:
48650b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
48660b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i64_clamp:
48670b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i64_clamp:
48680b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i64_clamp:
48690b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
48700b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i64_clamp:
48710b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i64_clamp:
48720b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i64_trap:
48730b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i64_trap:
48740b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i64_trap:
48750b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
48760b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i64_trap:
48770b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i64_trap:
48780b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i64_trap:
48790b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
48800b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i64_trap:
48810b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i64_trap:
48820b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_i64_zero:
48830b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_v2i64_zero:
48840b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_i64_zero:
48850b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
48860b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_i64_zero:
48870b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_v2i64_zero:
48880b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_i64_zero:
48890b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
48900b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_i64_zero:
48910b57cec5SDimitry Andric   case Intrinsic::nvvm_suld_3d_v2i64_zero:
48920b57cec5SDimitry Andric     Info.opc = getOpcForSurfaceInstr(Intrinsic);
48930b57cec5SDimitry Andric     Info.memVT = MVT::i64;
48940b57cec5SDimitry Andric     Info.ptrVal = nullptr;
48950b57cec5SDimitry Andric     Info.offset = 0;
48960b57cec5SDimitry Andric     Info.flags = MachineMemOperand::MOLoad;
48978bcb0991SDimitry Andric     Info.align = Align(16);
48980b57cec5SDimitry Andric     return true;
48990b57cec5SDimitry Andric   }
49000b57cec5SDimitry Andric   return false;
49010b57cec5SDimitry Andric }
49020b57cec5SDimitry Andric 
490381ad6265SDimitry Andric /// getFunctionParamOptimizedAlign - since function arguments are passed via
490481ad6265SDimitry Andric /// .param space, we may want to increase their alignment in a way that
490581ad6265SDimitry Andric /// ensures that we can effectively vectorize their loads & stores. We can
490681ad6265SDimitry Andric /// increase alignment only if the function has internal or has private
490781ad6265SDimitry Andric /// linkage as for other linkage types callers may already rely on default
490881ad6265SDimitry Andric /// alignment. To allow using 128-bit vectorized loads/stores, this function
490981ad6265SDimitry Andric /// ensures that alignment is 16 or greater.
getFunctionParamOptimizedAlign(const Function * F,Type * ArgTy,const DataLayout & DL) const491081ad6265SDimitry Andric Align NVPTXTargetLowering::getFunctionParamOptimizedAlign(
491181ad6265SDimitry Andric     const Function *F, Type *ArgTy, const DataLayout &DL) const {
491281ad6265SDimitry Andric   const uint64_t ABITypeAlign = DL.getABITypeAlign(ArgTy).value();
491381ad6265SDimitry Andric 
491481ad6265SDimitry Andric   // If a function has linkage different from internal or private, we
4915bdd1243dSDimitry Andric   // must use default ABI alignment as external users rely on it. Same
4916bdd1243dSDimitry Andric   // for a function that may be called from a function pointer.
4917bdd1243dSDimitry Andric   if (!F || !F->hasLocalLinkage() ||
4918bdd1243dSDimitry Andric       F->hasAddressTaken(/*Users=*/nullptr,
4919bdd1243dSDimitry Andric                          /*IgnoreCallbackUses=*/false,
4920bdd1243dSDimitry Andric                          /*IgnoreAssumeLikeCalls=*/true,
4921bdd1243dSDimitry Andric                          /*IgnoreLLVMUsed=*/true))
492281ad6265SDimitry Andric     return Align(ABITypeAlign);
492381ad6265SDimitry Andric 
492481ad6265SDimitry Andric   assert(!isKernelFunction(*F) && "Expect kernels to have non-local linkage");
492581ad6265SDimitry Andric   return Align(std::max(uint64_t(16), ABITypeAlign));
492681ad6265SDimitry Andric }
492781ad6265SDimitry Andric 
4928bdd1243dSDimitry Andric /// Helper for computing alignment of a device function byval parameter.
getFunctionByValParamAlign(const Function * F,Type * ArgTy,Align InitialAlign,const DataLayout & DL) const4929bdd1243dSDimitry Andric Align NVPTXTargetLowering::getFunctionByValParamAlign(
4930bdd1243dSDimitry Andric     const Function *F, Type *ArgTy, Align InitialAlign,
4931bdd1243dSDimitry Andric     const DataLayout &DL) const {
4932bdd1243dSDimitry Andric   Align ArgAlign = InitialAlign;
4933bdd1243dSDimitry Andric   // Try to increase alignment to enhance vectorization options.
4934bdd1243dSDimitry Andric   if (F)
4935bdd1243dSDimitry Andric     ArgAlign = std::max(ArgAlign, getFunctionParamOptimizedAlign(F, ArgTy, DL));
4936bdd1243dSDimitry Andric 
493706c3fb27SDimitry Andric   // Old ptx versions have a bug. When PTX code takes address of
4938bdd1243dSDimitry Andric   // byval parameter with alignment < 4, ptxas generates code to
4939bdd1243dSDimitry Andric   // spill argument into memory. Alas on sm_50+ ptxas generates
4940bdd1243dSDimitry Andric   // SASS code that fails with misaligned access. To work around
4941bdd1243dSDimitry Andric   // the problem, make sure that we align byval parameters by at
494206c3fb27SDimitry Andric   // least 4. This bug seems to be fixed at least starting from
494306c3fb27SDimitry Andric   // ptxas > 9.0.
494406c3fb27SDimitry Andric   // TODO: remove this after verifying the bug is not reproduced
494506c3fb27SDimitry Andric   // on non-deprecated ptxas versions.
494606c3fb27SDimitry Andric   if (ForceMinByValParamAlign)
4947bdd1243dSDimitry Andric     ArgAlign = std::max(ArgAlign, Align(4));
4948bdd1243dSDimitry Andric 
4949bdd1243dSDimitry Andric   return ArgAlign;
4950bdd1243dSDimitry Andric }
4951bdd1243dSDimitry Andric 
495206c3fb27SDimitry Andric // Helper for getting a function parameter name. Name is composed from
495306c3fb27SDimitry Andric // its index and the function name. Negative index corresponds to special
495406c3fb27SDimitry Andric // parameter (unsized array) used for passing variable arguments.
getParamName(const Function * F,int Idx) const495506c3fb27SDimitry Andric std::string NVPTXTargetLowering::getParamName(const Function *F,
495606c3fb27SDimitry Andric                                               int Idx) const {
495706c3fb27SDimitry Andric   std::string ParamName;
495806c3fb27SDimitry Andric   raw_string_ostream ParamStr(ParamName);
495906c3fb27SDimitry Andric 
496006c3fb27SDimitry Andric   ParamStr << getTargetMachine().getSymbol(F)->getName();
496106c3fb27SDimitry Andric   if (Idx < 0)
496206c3fb27SDimitry Andric     ParamStr << "_vararg";
496306c3fb27SDimitry Andric   else
496406c3fb27SDimitry Andric     ParamStr << "_param_" << Idx;
496506c3fb27SDimitry Andric 
496606c3fb27SDimitry Andric   return ParamName;
496706c3fb27SDimitry Andric }
496806c3fb27SDimitry Andric 
49690b57cec5SDimitry Andric /// isLegalAddressingMode - Return true if the addressing mode represented
49700b57cec5SDimitry Andric /// by AM is legal for this target, for a load/store of the specified type.
49710b57cec5SDimitry Andric /// Used to guide target specific optimizations, like loop strength reduction
49720b57cec5SDimitry Andric /// (LoopStrengthReduce.cpp) and memory optimization for address mode
49730b57cec5SDimitry Andric /// (CodeGenPrepare.cpp)
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const49740b57cec5SDimitry Andric bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
49750b57cec5SDimitry Andric                                                 const AddrMode &AM, Type *Ty,
49760b57cec5SDimitry Andric                                                 unsigned AS, Instruction *I) const {
49770b57cec5SDimitry Andric   // AddrMode - This represents an addressing mode of:
49780b57cec5SDimitry Andric   //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
49790b57cec5SDimitry Andric   //
49800b57cec5SDimitry Andric   // The legal address modes are
49810b57cec5SDimitry Andric   // - [avar]
49820b57cec5SDimitry Andric   // - [areg]
49830b57cec5SDimitry Andric   // - [areg+immoff]
49840b57cec5SDimitry Andric   // - [immAddr]
49850b57cec5SDimitry Andric 
49860b57cec5SDimitry Andric   if (AM.BaseGV) {
49870b57cec5SDimitry Andric     return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
49880b57cec5SDimitry Andric   }
49890b57cec5SDimitry Andric 
49900b57cec5SDimitry Andric   switch (AM.Scale) {
49910b57cec5SDimitry Andric   case 0: // "r", "r+i" or "i" is allowed
49920b57cec5SDimitry Andric     break;
49930b57cec5SDimitry Andric   case 1:
49940b57cec5SDimitry Andric     if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
49950b57cec5SDimitry Andric       return false;
49960b57cec5SDimitry Andric     // Otherwise we have r+i.
49970b57cec5SDimitry Andric     break;
49980b57cec5SDimitry Andric   default:
49990b57cec5SDimitry Andric     // No scale > 1 is allowed
50000b57cec5SDimitry Andric     return false;
50010b57cec5SDimitry Andric   }
50020b57cec5SDimitry Andric   return true;
50030b57cec5SDimitry Andric }
50040b57cec5SDimitry Andric 
50050b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
50060b57cec5SDimitry Andric //                         NVPTX Inline Assembly Support
50070b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
50080b57cec5SDimitry Andric 
50090b57cec5SDimitry Andric /// getConstraintType - Given a constraint letter, return the type of
50100b57cec5SDimitry Andric /// constraint it is for this target.
50110b57cec5SDimitry Andric NVPTXTargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const50120b57cec5SDimitry Andric NVPTXTargetLowering::getConstraintType(StringRef Constraint) const {
50130b57cec5SDimitry Andric   if (Constraint.size() == 1) {
50140b57cec5SDimitry Andric     switch (Constraint[0]) {
50150b57cec5SDimitry Andric     default:
50160b57cec5SDimitry Andric       break;
50170b57cec5SDimitry Andric     case 'b':
50180b57cec5SDimitry Andric     case 'r':
50190b57cec5SDimitry Andric     case 'h':
50200b57cec5SDimitry Andric     case 'c':
50210b57cec5SDimitry Andric     case 'l':
50220b57cec5SDimitry Andric     case 'f':
50230b57cec5SDimitry Andric     case 'd':
50240b57cec5SDimitry Andric     case '0':
50250b57cec5SDimitry Andric     case 'N':
50260b57cec5SDimitry Andric       return C_RegisterClass;
50270b57cec5SDimitry Andric     }
50280b57cec5SDimitry Andric   }
50290b57cec5SDimitry Andric   return TargetLowering::getConstraintType(Constraint);
50300b57cec5SDimitry Andric }
50310b57cec5SDimitry Andric 
50320b57cec5SDimitry Andric std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const50330b57cec5SDimitry Andric NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
50340b57cec5SDimitry Andric                                                   StringRef Constraint,
50350b57cec5SDimitry Andric                                                   MVT VT) const {
50360b57cec5SDimitry Andric   if (Constraint.size() == 1) {
50370b57cec5SDimitry Andric     switch (Constraint[0]) {
50380b57cec5SDimitry Andric     case 'b':
50390b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
50400b57cec5SDimitry Andric     case 'c':
50410b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
50420b57cec5SDimitry Andric     case 'h':
50430b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
50440b57cec5SDimitry Andric     case 'r':
50450b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
50460b57cec5SDimitry Andric     case 'l':
50470b57cec5SDimitry Andric     case 'N':
50480b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
50490b57cec5SDimitry Andric     case 'f':
50500b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
50510b57cec5SDimitry Andric     case 'd':
50520b57cec5SDimitry Andric       return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
50530b57cec5SDimitry Andric     }
50540b57cec5SDimitry Andric   }
50550b57cec5SDimitry Andric   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
50560b57cec5SDimitry Andric }
50570b57cec5SDimitry Andric 
50580b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
50590b57cec5SDimitry Andric //                         NVPTX DAG Combining
50600b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
50610b57cec5SDimitry Andric 
allowFMA(MachineFunction & MF,CodeGenOptLevel OptLevel) const50620b57cec5SDimitry Andric bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
50635f757f3fSDimitry Andric                                    CodeGenOptLevel OptLevel) const {
50640b57cec5SDimitry Andric   // Always honor command-line argument
50650b57cec5SDimitry Andric   if (FMAContractLevelOpt.getNumOccurrences() > 0)
50660b57cec5SDimitry Andric     return FMAContractLevelOpt > 0;
50670b57cec5SDimitry Andric 
50680b57cec5SDimitry Andric   // Do not contract if we're not optimizing the code.
50695f757f3fSDimitry Andric   if (OptLevel == CodeGenOptLevel::None)
50700b57cec5SDimitry Andric     return false;
50710b57cec5SDimitry Andric 
50720b57cec5SDimitry Andric   // Honor TargetOptions flags that explicitly say fusion is okay.
50730b57cec5SDimitry Andric   if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
50740b57cec5SDimitry Andric     return true;
50750b57cec5SDimitry Andric 
50760b57cec5SDimitry Andric   return allowUnsafeFPMath(MF);
50770b57cec5SDimitry Andric }
50780b57cec5SDimitry Andric 
allowUnsafeFPMath(MachineFunction & MF) const50790b57cec5SDimitry Andric bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const {
50800b57cec5SDimitry Andric   // Honor TargetOptions flags that explicitly say unsafe math is okay.
50810b57cec5SDimitry Andric   if (MF.getTarget().Options.UnsafeFPMath)
50820b57cec5SDimitry Andric     return true;
50830b57cec5SDimitry Andric 
50840b57cec5SDimitry Andric   // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
50850b57cec5SDimitry Andric   const Function &F = MF.getFunction();
5086fe6060f1SDimitry Andric   return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
50870b57cec5SDimitry Andric }
50880b57cec5SDimitry Andric 
50890b57cec5SDimitry Andric /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
50900b57cec5SDimitry Andric /// operands N0 and N1.  This is a helper for PerformADDCombine that is
50910b57cec5SDimitry Andric /// called with the default operands, and if that fails, with commuted
50920b57cec5SDimitry Andric /// operands.
PerformADDCombineWithOperands(SDNode * N,SDValue N0,SDValue N1,TargetLowering::DAGCombinerInfo & DCI,const NVPTXSubtarget & Subtarget,CodeGenOptLevel OptLevel)50935f757f3fSDimitry Andric static SDValue PerformADDCombineWithOperands(
50945f757f3fSDimitry Andric     SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI,
50955f757f3fSDimitry Andric     const NVPTXSubtarget &Subtarget, CodeGenOptLevel OptLevel) {
50960b57cec5SDimitry Andric   SelectionDAG  &DAG = DCI.DAG;
50970b57cec5SDimitry Andric   // Skip non-integer, non-scalar case
50980b57cec5SDimitry Andric   EVT VT=N0.getValueType();
50990b57cec5SDimitry Andric   if (VT.isVector())
51000b57cec5SDimitry Andric     return SDValue();
51010b57cec5SDimitry Andric 
51020b57cec5SDimitry Andric   // fold (add (mul a, b), c) -> (mad a, b, c)
51030b57cec5SDimitry Andric   //
51040b57cec5SDimitry Andric   if (N0.getOpcode() == ISD::MUL) {
51050b57cec5SDimitry Andric     assert (VT.isInteger());
51060b57cec5SDimitry Andric     // For integer:
51070b57cec5SDimitry Andric     // Since integer multiply-add costs the same as integer multiply
51080b57cec5SDimitry Andric     // but is more costly than integer add, do the fusion only when
51090b57cec5SDimitry Andric     // the mul is only used in the add.
51105f757f3fSDimitry Andric     if (OptLevel == CodeGenOptLevel::None || VT != MVT::i32 ||
51110b57cec5SDimitry Andric         !N0.getNode()->hasOneUse())
51120b57cec5SDimitry Andric       return SDValue();
51130b57cec5SDimitry Andric 
51140b57cec5SDimitry Andric     // Do the folding
51150b57cec5SDimitry Andric     return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
51160b57cec5SDimitry Andric                        N0.getOperand(0), N0.getOperand(1), N1);
51170b57cec5SDimitry Andric   }
51180b57cec5SDimitry Andric   else if (N0.getOpcode() == ISD::FMUL) {
51190b57cec5SDimitry Andric     if (VT == MVT::f32 || VT == MVT::f64) {
51200b57cec5SDimitry Andric       const auto *TLI = static_cast<const NVPTXTargetLowering *>(
51210b57cec5SDimitry Andric           &DAG.getTargetLoweringInfo());
51220b57cec5SDimitry Andric       if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
51230b57cec5SDimitry Andric         return SDValue();
51240b57cec5SDimitry Andric 
51250b57cec5SDimitry Andric       // For floating point:
51260b57cec5SDimitry Andric       // Do the fusion only when the mul has less than 5 uses and all
51270b57cec5SDimitry Andric       // are add.
51280b57cec5SDimitry Andric       // The heuristic is that if a use is not an add, then that use
51290b57cec5SDimitry Andric       // cannot be fused into fma, therefore mul is still needed anyway.
51300b57cec5SDimitry Andric       // If there are more than 4 uses, even if they are all add, fusing
51310b57cec5SDimitry Andric       // them will increase register pressue.
51320b57cec5SDimitry Andric       //
51330b57cec5SDimitry Andric       int numUses = 0;
51340b57cec5SDimitry Andric       int nonAddCount = 0;
5135349cc55cSDimitry Andric       for (const SDNode *User : N0.getNode()->uses()) {
51360b57cec5SDimitry Andric         numUses++;
51370b57cec5SDimitry Andric         if (User->getOpcode() != ISD::FADD)
51380b57cec5SDimitry Andric           ++nonAddCount;
51390b57cec5SDimitry Andric       }
51400b57cec5SDimitry Andric       if (numUses >= 5)
51410b57cec5SDimitry Andric         return SDValue();
51420b57cec5SDimitry Andric       if (nonAddCount) {
51430b57cec5SDimitry Andric         int orderNo = N->getIROrder();
51440b57cec5SDimitry Andric         int orderNo2 = N0.getNode()->getIROrder();
51450b57cec5SDimitry Andric         // simple heuristics here for considering potential register
51460b57cec5SDimitry Andric         // pressure, the logics here is that the differnce are used
51470b57cec5SDimitry Andric         // to measure the distance between def and use, the longer distance
51480b57cec5SDimitry Andric         // more likely cause register pressure.
51490b57cec5SDimitry Andric         if (orderNo - orderNo2 < 500)
51500b57cec5SDimitry Andric           return SDValue();
51510b57cec5SDimitry Andric 
51520b57cec5SDimitry Andric         // Now, check if at least one of the FMUL's operands is live beyond the node N,
51530b57cec5SDimitry Andric         // which guarantees that the FMA will not increase register pressure at node N.
51540b57cec5SDimitry Andric         bool opIsLive = false;
51550b57cec5SDimitry Andric         const SDNode *left = N0.getOperand(0).getNode();
51560b57cec5SDimitry Andric         const SDNode *right = N0.getOperand(1).getNode();
51570b57cec5SDimitry Andric 
51580b57cec5SDimitry Andric         if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
51590b57cec5SDimitry Andric           opIsLive = true;
51600b57cec5SDimitry Andric 
51610b57cec5SDimitry Andric         if (!opIsLive)
5162349cc55cSDimitry Andric           for (const SDNode *User : left->uses()) {
51630b57cec5SDimitry Andric             int orderNo3 = User->getIROrder();
51640b57cec5SDimitry Andric             if (orderNo3 > orderNo) {
51650b57cec5SDimitry Andric               opIsLive = true;
51660b57cec5SDimitry Andric               break;
51670b57cec5SDimitry Andric             }
51680b57cec5SDimitry Andric           }
51690b57cec5SDimitry Andric 
51700b57cec5SDimitry Andric         if (!opIsLive)
5171349cc55cSDimitry Andric           for (const SDNode *User : right->uses()) {
51720b57cec5SDimitry Andric             int orderNo3 = User->getIROrder();
51730b57cec5SDimitry Andric             if (orderNo3 > orderNo) {
51740b57cec5SDimitry Andric               opIsLive = true;
51750b57cec5SDimitry Andric               break;
51760b57cec5SDimitry Andric             }
51770b57cec5SDimitry Andric           }
51780b57cec5SDimitry Andric 
51790b57cec5SDimitry Andric         if (!opIsLive)
51800b57cec5SDimitry Andric           return SDValue();
51810b57cec5SDimitry Andric       }
51820b57cec5SDimitry Andric 
51830b57cec5SDimitry Andric       return DAG.getNode(ISD::FMA, SDLoc(N), VT,
51840b57cec5SDimitry Andric                          N0.getOperand(0), N0.getOperand(1), N1);
51850b57cec5SDimitry Andric     }
51860b57cec5SDimitry Andric   }
51870b57cec5SDimitry Andric 
51880b57cec5SDimitry Andric   return SDValue();
51890b57cec5SDimitry Andric }
51900b57cec5SDimitry Andric 
PerformStoreRetvalCombine(SDNode * N)519181ad6265SDimitry Andric static SDValue PerformStoreRetvalCombine(SDNode *N) {
519281ad6265SDimitry Andric   // Operands from the 2nd to the last one are the values to be stored
519381ad6265SDimitry Andric   for (std::size_t I = 2, OpsCount = N->ops().size(); I != OpsCount; ++I)
519481ad6265SDimitry Andric     if (!N->getOperand(I).isUndef())
519581ad6265SDimitry Andric       return SDValue();
519681ad6265SDimitry Andric 
519781ad6265SDimitry Andric   // Operand 0 is the previous value in the chain. Cannot return EntryToken
519881ad6265SDimitry Andric   // as the previous value will become unused and eliminated later.
519981ad6265SDimitry Andric   return N->getOperand(0);
520081ad6265SDimitry Andric }
520181ad6265SDimitry Andric 
52020b57cec5SDimitry Andric /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
52030b57cec5SDimitry Andric ///
PerformADDCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,const NVPTXSubtarget & Subtarget,CodeGenOptLevel OptLevel)52040b57cec5SDimitry Andric static SDValue PerformADDCombine(SDNode *N,
52050b57cec5SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI,
52060b57cec5SDimitry Andric                                  const NVPTXSubtarget &Subtarget,
52075f757f3fSDimitry Andric                                  CodeGenOptLevel OptLevel) {
52080b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
52090b57cec5SDimitry Andric   SDValue N1 = N->getOperand(1);
52100b57cec5SDimitry Andric 
52110b57cec5SDimitry Andric   // First try with the default operand order.
52120b57cec5SDimitry Andric   if (SDValue Result =
52130b57cec5SDimitry Andric           PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, OptLevel))
52140b57cec5SDimitry Andric     return Result;
52150b57cec5SDimitry Andric 
52160b57cec5SDimitry Andric   // If that didn't work, try again with the operands commuted.
52170b57cec5SDimitry Andric   return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
52180b57cec5SDimitry Andric }
52190b57cec5SDimitry Andric 
PerformANDCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)52200b57cec5SDimitry Andric static SDValue PerformANDCombine(SDNode *N,
52210b57cec5SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI) {
52220b57cec5SDimitry Andric   // The type legalizer turns a vector load of i8 values into a zextload to i16
52230b57cec5SDimitry Andric   // registers, optionally ANY_EXTENDs it (if target type is integer),
52240b57cec5SDimitry Andric   // and ANDs off the high 8 bits. Since we turn this load into a
52250b57cec5SDimitry Andric   // target-specific DAG node, the DAG combiner fails to eliminate these AND
52260b57cec5SDimitry Andric   // nodes. Do that here.
52270b57cec5SDimitry Andric   SDValue Val = N->getOperand(0);
52280b57cec5SDimitry Andric   SDValue Mask = N->getOperand(1);
52290b57cec5SDimitry Andric 
52300b57cec5SDimitry Andric   if (isa<ConstantSDNode>(Val)) {
52310b57cec5SDimitry Andric     std::swap(Val, Mask);
52320b57cec5SDimitry Andric   }
52330b57cec5SDimitry Andric 
52340b57cec5SDimitry Andric   SDValue AExt;
52355f757f3fSDimitry Andric 
52365f757f3fSDimitry Andric   // Convert BFE-> truncate i16 -> and 255
52375f757f3fSDimitry Andric   // To just BFE-> truncate i16, as the value already has all the bits in the
52385f757f3fSDimitry Andric   // right places.
52395f757f3fSDimitry Andric   if (Val.getOpcode() == ISD::TRUNCATE) {
52405f757f3fSDimitry Andric     SDValue BFE = Val.getOperand(0);
52415f757f3fSDimitry Andric     if (BFE.getOpcode() != NVPTXISD::BFE)
52425f757f3fSDimitry Andric       return SDValue();
52435f757f3fSDimitry Andric 
52445f757f3fSDimitry Andric     ConstantSDNode *BFEBits = dyn_cast<ConstantSDNode>(BFE.getOperand(0));
52455f757f3fSDimitry Andric     if (!BFEBits)
52465f757f3fSDimitry Andric       return SDValue();
52475f757f3fSDimitry Andric     uint64_t BFEBitsVal = BFEBits->getZExtValue();
52485f757f3fSDimitry Andric 
52495f757f3fSDimitry Andric     ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
52505f757f3fSDimitry Andric     if (!MaskCnst) {
52515f757f3fSDimitry Andric       // Not an AND with a constant
52525f757f3fSDimitry Andric       return SDValue();
52535f757f3fSDimitry Andric     }
52545f757f3fSDimitry Andric     uint64_t MaskVal = MaskCnst->getZExtValue();
52555f757f3fSDimitry Andric 
52565f757f3fSDimitry Andric     if (MaskVal != (uint64_t(1) << BFEBitsVal) - 1)
52575f757f3fSDimitry Andric       return SDValue();
52585f757f3fSDimitry Andric     // If we get here, the AND is unnecessary.  Just replace it with the trunc
52595f757f3fSDimitry Andric     DCI.CombineTo(N, Val, false);
52605f757f3fSDimitry Andric   }
52610b57cec5SDimitry Andric   // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
52620b57cec5SDimitry Andric   if (Val.getOpcode() == ISD::ANY_EXTEND) {
52630b57cec5SDimitry Andric     AExt = Val;
52640b57cec5SDimitry Andric     Val = Val->getOperand(0);
52650b57cec5SDimitry Andric   }
52660b57cec5SDimitry Andric 
52670b57cec5SDimitry Andric   if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
52680b57cec5SDimitry Andric     Val = Val->getOperand(0);
52690b57cec5SDimitry Andric   }
52700b57cec5SDimitry Andric 
52710b57cec5SDimitry Andric   if (Val->getOpcode() == NVPTXISD::LoadV2 ||
52720b57cec5SDimitry Andric       Val->getOpcode() == NVPTXISD::LoadV4) {
52730b57cec5SDimitry Andric     ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
52740b57cec5SDimitry Andric     if (!MaskCnst) {
52750b57cec5SDimitry Andric       // Not an AND with a constant
52760b57cec5SDimitry Andric       return SDValue();
52770b57cec5SDimitry Andric     }
52780b57cec5SDimitry Andric 
52790b57cec5SDimitry Andric     uint64_t MaskVal = MaskCnst->getZExtValue();
52800b57cec5SDimitry Andric     if (MaskVal != 0xff) {
52810b57cec5SDimitry Andric       // Not an AND that chops off top 8 bits
52820b57cec5SDimitry Andric       return SDValue();
52830b57cec5SDimitry Andric     }
52840b57cec5SDimitry Andric 
52850b57cec5SDimitry Andric     MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
52860b57cec5SDimitry Andric     if (!Mem) {
52870b57cec5SDimitry Andric       // Not a MemSDNode?!?
52880b57cec5SDimitry Andric       return SDValue();
52890b57cec5SDimitry Andric     }
52900b57cec5SDimitry Andric 
52910b57cec5SDimitry Andric     EVT MemVT = Mem->getMemoryVT();
52920b57cec5SDimitry Andric     if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
52930b57cec5SDimitry Andric       // We only handle the i8 case
52940b57cec5SDimitry Andric       return SDValue();
52950b57cec5SDimitry Andric     }
52960b57cec5SDimitry Andric 
52977a6dacacSDimitry Andric     unsigned ExtType = Val->getConstantOperandVal(Val->getNumOperands() - 1);
52980b57cec5SDimitry Andric     if (ExtType == ISD::SEXTLOAD) {
52990b57cec5SDimitry Andric       // If for some reason the load is a sextload, the and is needed to zero
53000b57cec5SDimitry Andric       // out the high 8 bits
53010b57cec5SDimitry Andric       return SDValue();
53020b57cec5SDimitry Andric     }
53030b57cec5SDimitry Andric 
53040b57cec5SDimitry Andric     bool AddTo = false;
53050b57cec5SDimitry Andric     if (AExt.getNode() != nullptr) {
53060b57cec5SDimitry Andric       // Re-insert the ext as a zext.
53070b57cec5SDimitry Andric       Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
53080b57cec5SDimitry Andric                             AExt.getValueType(), Val);
53090b57cec5SDimitry Andric       AddTo = true;
53100b57cec5SDimitry Andric     }
53110b57cec5SDimitry Andric 
53120b57cec5SDimitry Andric     // If we get here, the AND is unnecessary.  Just replace it with the load
53130b57cec5SDimitry Andric     DCI.CombineTo(N, Val, AddTo);
53140b57cec5SDimitry Andric   }
53150b57cec5SDimitry Andric 
53160b57cec5SDimitry Andric   return SDValue();
53170b57cec5SDimitry Andric }
53180b57cec5SDimitry Andric 
PerformREMCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOptLevel OptLevel)53190b57cec5SDimitry Andric static SDValue PerformREMCombine(SDNode *N,
53200b57cec5SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI,
53215f757f3fSDimitry Andric                                  CodeGenOptLevel OptLevel) {
53220b57cec5SDimitry Andric   assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM);
53230b57cec5SDimitry Andric 
53240b57cec5SDimitry Andric   // Don't do anything at less than -O2.
53255f757f3fSDimitry Andric   if (OptLevel < CodeGenOptLevel::Default)
53260b57cec5SDimitry Andric     return SDValue();
53270b57cec5SDimitry Andric 
53280b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
53290b57cec5SDimitry Andric   SDLoc DL(N);
53300b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
53310b57cec5SDimitry Andric   bool IsSigned = N->getOpcode() == ISD::SREM;
53320b57cec5SDimitry Andric   unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV;
53330b57cec5SDimitry Andric 
53340b57cec5SDimitry Andric   const SDValue &Num = N->getOperand(0);
53350b57cec5SDimitry Andric   const SDValue &Den = N->getOperand(1);
53360b57cec5SDimitry Andric 
53370b57cec5SDimitry Andric   for (const SDNode *U : Num->uses()) {
53380b57cec5SDimitry Andric     if (U->getOpcode() == DivOpc && U->getOperand(0) == Num &&
53390b57cec5SDimitry Andric         U->getOperand(1) == Den) {
53400b57cec5SDimitry Andric       // Num % Den -> Num - (Num / Den) * Den
53410b57cec5SDimitry Andric       return DAG.getNode(ISD::SUB, DL, VT, Num,
53420b57cec5SDimitry Andric                          DAG.getNode(ISD::MUL, DL, VT,
53430b57cec5SDimitry Andric                                      DAG.getNode(DivOpc, DL, VT, Num, Den),
53440b57cec5SDimitry Andric                                      Den));
53450b57cec5SDimitry Andric     }
53460b57cec5SDimitry Andric   }
53470b57cec5SDimitry Andric   return SDValue();
53480b57cec5SDimitry Andric }
53490b57cec5SDimitry Andric 
53500b57cec5SDimitry Andric enum OperandSignedness {
53510b57cec5SDimitry Andric   Signed = 0,
53520b57cec5SDimitry Andric   Unsigned,
53530b57cec5SDimitry Andric   Unknown
53540b57cec5SDimitry Andric };
53550b57cec5SDimitry Andric 
53560b57cec5SDimitry Andric /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
53570b57cec5SDimitry Andric /// that can be demoted to \p OptSize bits without loss of information. The
53580b57cec5SDimitry Andric /// signedness of the operand, if determinable, is placed in \p S.
IsMulWideOperandDemotable(SDValue Op,unsigned OptSize,OperandSignedness & S)53590b57cec5SDimitry Andric static bool IsMulWideOperandDemotable(SDValue Op,
53600b57cec5SDimitry Andric                                       unsigned OptSize,
53610b57cec5SDimitry Andric                                       OperandSignedness &S) {
53620b57cec5SDimitry Andric   S = Unknown;
53630b57cec5SDimitry Andric 
53640b57cec5SDimitry Andric   if (Op.getOpcode() == ISD::SIGN_EXTEND ||
53650b57cec5SDimitry Andric       Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
53660b57cec5SDimitry Andric     EVT OrigVT = Op.getOperand(0).getValueType();
5367e8d8bef9SDimitry Andric     if (OrigVT.getFixedSizeInBits() <= OptSize) {
53680b57cec5SDimitry Andric       S = Signed;
53690b57cec5SDimitry Andric       return true;
53700b57cec5SDimitry Andric     }
53710b57cec5SDimitry Andric   } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
53720b57cec5SDimitry Andric     EVT OrigVT = Op.getOperand(0).getValueType();
5373e8d8bef9SDimitry Andric     if (OrigVT.getFixedSizeInBits() <= OptSize) {
53740b57cec5SDimitry Andric       S = Unsigned;
53750b57cec5SDimitry Andric       return true;
53760b57cec5SDimitry Andric     }
53770b57cec5SDimitry Andric   }
53780b57cec5SDimitry Andric 
53790b57cec5SDimitry Andric   return false;
53800b57cec5SDimitry Andric }
53810b57cec5SDimitry Andric 
53820b57cec5SDimitry Andric /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
53830b57cec5SDimitry Andric /// be demoted to \p OptSize bits without loss of information. If the operands
53840b57cec5SDimitry Andric /// contain a constant, it should appear as the RHS operand. The signedness of
53850b57cec5SDimitry Andric /// the operands is placed in \p IsSigned.
AreMulWideOperandsDemotable(SDValue LHS,SDValue RHS,unsigned OptSize,bool & IsSigned)53860b57cec5SDimitry Andric static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
53870b57cec5SDimitry Andric                                         unsigned OptSize,
53880b57cec5SDimitry Andric                                         bool &IsSigned) {
53890b57cec5SDimitry Andric   OperandSignedness LHSSign;
53900b57cec5SDimitry Andric 
53910b57cec5SDimitry Andric   // The LHS operand must be a demotable op
53920b57cec5SDimitry Andric   if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
53930b57cec5SDimitry Andric     return false;
53940b57cec5SDimitry Andric 
53950b57cec5SDimitry Andric   // We should have been able to determine the signedness from the LHS
53960b57cec5SDimitry Andric   if (LHSSign == Unknown)
53970b57cec5SDimitry Andric     return false;
53980b57cec5SDimitry Andric 
53990b57cec5SDimitry Andric   IsSigned = (LHSSign == Signed);
54000b57cec5SDimitry Andric 
54010b57cec5SDimitry Andric   // The RHS can be a demotable op or a constant
54020b57cec5SDimitry Andric   if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
54030b57cec5SDimitry Andric     const APInt &Val = CI->getAPIntValue();
54040b57cec5SDimitry Andric     if (LHSSign == Unsigned) {
54050b57cec5SDimitry Andric       return Val.isIntN(OptSize);
54060b57cec5SDimitry Andric     } else {
54070b57cec5SDimitry Andric       return Val.isSignedIntN(OptSize);
54080b57cec5SDimitry Andric     }
54090b57cec5SDimitry Andric   } else {
54100b57cec5SDimitry Andric     OperandSignedness RHSSign;
54110b57cec5SDimitry Andric     if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
54120b57cec5SDimitry Andric       return false;
54130b57cec5SDimitry Andric 
54140b57cec5SDimitry Andric     return LHSSign == RHSSign;
54150b57cec5SDimitry Andric   }
54160b57cec5SDimitry Andric }
54170b57cec5SDimitry Andric 
54180b57cec5SDimitry Andric /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
54190b57cec5SDimitry Andric /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
54200b57cec5SDimitry Andric /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
54210b57cec5SDimitry Andric /// amount.
TryMULWIDECombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)54220b57cec5SDimitry Andric static SDValue TryMULWIDECombine(SDNode *N,
54230b57cec5SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI) {
54240b57cec5SDimitry Andric   EVT MulType = N->getValueType(0);
54250b57cec5SDimitry Andric   if (MulType != MVT::i32 && MulType != MVT::i64) {
54260b57cec5SDimitry Andric     return SDValue();
54270b57cec5SDimitry Andric   }
54280b57cec5SDimitry Andric 
54290b57cec5SDimitry Andric   SDLoc DL(N);
54300b57cec5SDimitry Andric   unsigned OptSize = MulType.getSizeInBits() >> 1;
54310b57cec5SDimitry Andric   SDValue LHS = N->getOperand(0);
54320b57cec5SDimitry Andric   SDValue RHS = N->getOperand(1);
54330b57cec5SDimitry Andric 
54340b57cec5SDimitry Andric   // Canonicalize the multiply so the constant (if any) is on the right
54350b57cec5SDimitry Andric   if (N->getOpcode() == ISD::MUL) {
54360b57cec5SDimitry Andric     if (isa<ConstantSDNode>(LHS)) {
54370b57cec5SDimitry Andric       std::swap(LHS, RHS);
54380b57cec5SDimitry Andric     }
54390b57cec5SDimitry Andric   }
54400b57cec5SDimitry Andric 
54410b57cec5SDimitry Andric   // If we have a SHL, determine the actual multiply amount
54420b57cec5SDimitry Andric   if (N->getOpcode() == ISD::SHL) {
54430b57cec5SDimitry Andric     ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
54440b57cec5SDimitry Andric     if (!ShlRHS) {
54450b57cec5SDimitry Andric       return SDValue();
54460b57cec5SDimitry Andric     }
54470b57cec5SDimitry Andric 
54480b57cec5SDimitry Andric     APInt ShiftAmt = ShlRHS->getAPIntValue();
54490b57cec5SDimitry Andric     unsigned BitWidth = MulType.getSizeInBits();
54500b57cec5SDimitry Andric     if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
54510b57cec5SDimitry Andric       APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
54520b57cec5SDimitry Andric       RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
54530b57cec5SDimitry Andric     } else {
54540b57cec5SDimitry Andric       return SDValue();
54550b57cec5SDimitry Andric     }
54560b57cec5SDimitry Andric   }
54570b57cec5SDimitry Andric 
54580b57cec5SDimitry Andric   bool Signed;
54590b57cec5SDimitry Andric   // Verify that our operands are demotable
54600b57cec5SDimitry Andric   if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
54610b57cec5SDimitry Andric     return SDValue();
54620b57cec5SDimitry Andric   }
54630b57cec5SDimitry Andric 
54640b57cec5SDimitry Andric   EVT DemotedVT;
54650b57cec5SDimitry Andric   if (MulType == MVT::i32) {
54660b57cec5SDimitry Andric     DemotedVT = MVT::i16;
54670b57cec5SDimitry Andric   } else {
54680b57cec5SDimitry Andric     DemotedVT = MVT::i32;
54690b57cec5SDimitry Andric   }
54700b57cec5SDimitry Andric 
54710b57cec5SDimitry Andric   // Truncate the operands to the correct size. Note that these are just for
54720b57cec5SDimitry Andric   // type consistency and will (likely) be eliminated in later phases.
54730b57cec5SDimitry Andric   SDValue TruncLHS =
54740b57cec5SDimitry Andric     DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
54750b57cec5SDimitry Andric   SDValue TruncRHS =
54760b57cec5SDimitry Andric     DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
54770b57cec5SDimitry Andric 
54780b57cec5SDimitry Andric   unsigned Opc;
54790b57cec5SDimitry Andric   if (Signed) {
54800b57cec5SDimitry Andric     Opc = NVPTXISD::MUL_WIDE_SIGNED;
54810b57cec5SDimitry Andric   } else {
54820b57cec5SDimitry Andric     Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
54830b57cec5SDimitry Andric   }
54840b57cec5SDimitry Andric 
54850b57cec5SDimitry Andric   return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
54860b57cec5SDimitry Andric }
54870b57cec5SDimitry Andric 
54880b57cec5SDimitry Andric /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
PerformMULCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOptLevel OptLevel)54890b57cec5SDimitry Andric static SDValue PerformMULCombine(SDNode *N,
54900b57cec5SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI,
54915f757f3fSDimitry Andric                                  CodeGenOptLevel OptLevel) {
54925f757f3fSDimitry Andric   if (OptLevel > CodeGenOptLevel::None) {
54930b57cec5SDimitry Andric     // Try mul.wide combining at OptLevel > 0
54940b57cec5SDimitry Andric     if (SDValue Ret = TryMULWIDECombine(N, DCI))
54950b57cec5SDimitry Andric       return Ret;
54960b57cec5SDimitry Andric   }
54970b57cec5SDimitry Andric 
54980b57cec5SDimitry Andric   return SDValue();
54990b57cec5SDimitry Andric }
55000b57cec5SDimitry Andric 
55010b57cec5SDimitry Andric /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
PerformSHLCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOptLevel OptLevel)55020b57cec5SDimitry Andric static SDValue PerformSHLCombine(SDNode *N,
55030b57cec5SDimitry Andric                                  TargetLowering::DAGCombinerInfo &DCI,
55045f757f3fSDimitry Andric                                  CodeGenOptLevel OptLevel) {
55055f757f3fSDimitry Andric   if (OptLevel > CodeGenOptLevel::None) {
55060b57cec5SDimitry Andric     // Try mul.wide combining at OptLevel > 0
55070b57cec5SDimitry Andric     if (SDValue Ret = TryMULWIDECombine(N, DCI))
55080b57cec5SDimitry Andric       return Ret;
55090b57cec5SDimitry Andric   }
55100b57cec5SDimitry Andric 
55110b57cec5SDimitry Andric   return SDValue();
55120b57cec5SDimitry Andric }
55130b57cec5SDimitry Andric 
PerformSETCCCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,unsigned int SmVersion)55140b57cec5SDimitry Andric static SDValue PerformSETCCCombine(SDNode *N,
55155f757f3fSDimitry Andric                                    TargetLowering::DAGCombinerInfo &DCI,
55165f757f3fSDimitry Andric                                    unsigned int SmVersion) {
55170b57cec5SDimitry Andric   EVT CCType = N->getValueType(0);
55180b57cec5SDimitry Andric   SDValue A = N->getOperand(0);
55190b57cec5SDimitry Andric   SDValue B = N->getOperand(1);
55200b57cec5SDimitry Andric 
55215f757f3fSDimitry Andric   EVT AType = A.getValueType();
55225f757f3fSDimitry Andric   if (!(CCType == MVT::v2i1 && (AType == MVT::v2f16 || AType == MVT::v2bf16)))
55235f757f3fSDimitry Andric     return SDValue();
55245f757f3fSDimitry Andric 
55255f757f3fSDimitry Andric   if (A.getValueType() == MVT::v2bf16 && SmVersion < 90)
55260b57cec5SDimitry Andric     return SDValue();
55270b57cec5SDimitry Andric 
55280b57cec5SDimitry Andric   SDLoc DL(N);
55290b57cec5SDimitry Andric   // setp.f16x2 returns two scalar predicates, which we need to
55300b57cec5SDimitry Andric   // convert back to v2i1. The returned result will be scalarized by
55310b57cec5SDimitry Andric   // the legalizer, but the comparison will remain a single vector
55320b57cec5SDimitry Andric   // instruction.
55335f757f3fSDimitry Andric   SDValue CCNode = DCI.DAG.getNode(
55345f757f3fSDimitry Andric       A.getValueType() == MVT::v2f16 ? NVPTXISD::SETP_F16X2
55355f757f3fSDimitry Andric                                      : NVPTXISD::SETP_BF16X2,
55365f757f3fSDimitry Andric       DL, DCI.DAG.getVTList(MVT::i1, MVT::i1), {A, B, N->getOperand(2)});
55370b57cec5SDimitry Andric   return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0),
55380b57cec5SDimitry Andric                          CCNode.getValue(1));
55390b57cec5SDimitry Andric }
55400b57cec5SDimitry Andric 
PerformEXTRACTCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)55415f757f3fSDimitry Andric static SDValue PerformEXTRACTCombine(SDNode *N,
55425f757f3fSDimitry Andric                                      TargetLowering::DAGCombinerInfo &DCI) {
55435f757f3fSDimitry Andric   SDValue Vector = N->getOperand(0);
55445f757f3fSDimitry Andric   SDLoc DL(N);
55455f757f3fSDimitry Andric   EVT VectorVT = Vector.getValueType();
55465f757f3fSDimitry Andric   if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() &&
55475f757f3fSDimitry Andric       IsPTXVectorType(VectorVT.getSimpleVT()))
55485f757f3fSDimitry Andric     return SDValue(); // Native vector loads already combine nicely w/
55495f757f3fSDimitry Andric                       // extract_vector_elt, except for v4i8.
55505f757f3fSDimitry Andric   // Don't mess with singletons or v2*16 types, we already handle them OK.
55515f757f3fSDimitry Andric   if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT) ||
55525f757f3fSDimitry Andric       VectorVT == MVT::v4i8)
55535f757f3fSDimitry Andric     return SDValue();
55545f757f3fSDimitry Andric 
55555f757f3fSDimitry Andric   uint64_t VectorBits = VectorVT.getSizeInBits();
55565f757f3fSDimitry Andric   // We only handle the types we can extract in-register.
55575f757f3fSDimitry Andric   if (!(VectorBits == 16 || VectorBits == 32 || VectorBits == 64))
55585f757f3fSDimitry Andric     return SDValue();
55595f757f3fSDimitry Andric 
55605f757f3fSDimitry Andric   ConstantSDNode *Index = dyn_cast<ConstantSDNode>(N->getOperand(1));
55615f757f3fSDimitry Andric   // Index == 0 is handled by generic DAG combiner.
55625f757f3fSDimitry Andric   if (!Index || Index->getZExtValue() == 0)
55635f757f3fSDimitry Andric     return SDValue();
55645f757f3fSDimitry Andric 
55655f757f3fSDimitry Andric   MVT IVT = MVT::getIntegerVT(VectorBits);
55665f757f3fSDimitry Andric   EVT EltVT = VectorVT.getVectorElementType();
55675f757f3fSDimitry Andric   EVT EltIVT = EltVT.changeTypeToInteger();
55685f757f3fSDimitry Andric   uint64_t EltBits = EltVT.getScalarSizeInBits();
55695f757f3fSDimitry Andric 
55705f757f3fSDimitry Andric   SDValue Result = DCI.DAG.getNode(
55715f757f3fSDimitry Andric       ISD::TRUNCATE, DL, EltIVT,
55725f757f3fSDimitry Andric       DCI.DAG.getNode(
55735f757f3fSDimitry Andric           ISD::SRA, DL, IVT, DCI.DAG.getNode(ISD::BITCAST, DL, IVT, Vector),
55745f757f3fSDimitry Andric           DCI.DAG.getConstant(Index->getZExtValue() * EltBits, DL, IVT)));
55755f757f3fSDimitry Andric 
55765f757f3fSDimitry Andric   // If element has non-integer type, bitcast it back to the expected type.
55775f757f3fSDimitry Andric   if (EltVT != EltIVT)
55785f757f3fSDimitry Andric     Result = DCI.DAG.getNode(ISD::BITCAST, DL, EltVT, Result);
55795f757f3fSDimitry Andric   // Past legalizer, we may need to extent i8 -> i16 to match the register type.
55805f757f3fSDimitry Andric   if (EltVT != N->getValueType(0))
55815f757f3fSDimitry Andric     Result = DCI.DAG.getNode(ISD::ANY_EXTEND, DL, N->getValueType(0), Result);
55825f757f3fSDimitry Andric 
55835f757f3fSDimitry Andric   return Result;
55845f757f3fSDimitry Andric }
55855f757f3fSDimitry Andric 
PerformVSELECTCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)55865f757f3fSDimitry Andric static SDValue PerformVSELECTCombine(SDNode *N,
55875f757f3fSDimitry Andric                                      TargetLowering::DAGCombinerInfo &DCI) {
55885f757f3fSDimitry Andric   SDValue VA = N->getOperand(1);
55895f757f3fSDimitry Andric   EVT VectorVT = VA.getValueType();
55905f757f3fSDimitry Andric   if (VectorVT != MVT::v4i8)
55915f757f3fSDimitry Andric     return SDValue();
55925f757f3fSDimitry Andric 
55935f757f3fSDimitry Andric   // We need to split vselect into individual per-element operations Because we
55945f757f3fSDimitry Andric   // use BFE/BFI instruction for byte extraction/insertion, we do end up with
55955f757f3fSDimitry Andric   // 32-bit values, so we may as well do comparison as i32 to avoid conversions
55965f757f3fSDimitry Andric   // to/from i16 normally used for i8 values.
55975f757f3fSDimitry Andric   SmallVector<SDValue, 4> E;
55985f757f3fSDimitry Andric   SDLoc DL(N);
55995f757f3fSDimitry Andric   SDValue VCond = N->getOperand(0);
56005f757f3fSDimitry Andric   SDValue VB = N->getOperand(2);
56015f757f3fSDimitry Andric   for (int I = 0; I < 4; ++I) {
56025f757f3fSDimitry Andric     SDValue C = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i1, VCond,
56035f757f3fSDimitry Andric                                 DCI.DAG.getConstant(I, DL, MVT::i32));
56045f757f3fSDimitry Andric     SDValue EA = DCI.DAG.getAnyExtOrTrunc(
56055f757f3fSDimitry Andric         DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VA,
56065f757f3fSDimitry Andric                         DCI.DAG.getConstant(I, DL, MVT::i32)),
56075f757f3fSDimitry Andric         DL, MVT::i32);
56085f757f3fSDimitry Andric     SDValue EB = DCI.DAG.getAnyExtOrTrunc(
56095f757f3fSDimitry Andric         DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8, VB,
56105f757f3fSDimitry Andric                         DCI.DAG.getConstant(I, DL, MVT::i32)),
56115f757f3fSDimitry Andric         DL, MVT::i32);
56125f757f3fSDimitry Andric     E.push_back(DCI.DAG.getAnyExtOrTrunc(
56135f757f3fSDimitry Andric         DCI.DAG.getNode(ISD::SELECT, DL, MVT::i32, C, EA, EB), DL, MVT::i8));
56145f757f3fSDimitry Andric   }
56155f757f3fSDimitry Andric   return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i8, E);
56165f757f3fSDimitry Andric }
56175f757f3fSDimitry Andric 
PerformLOADCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)56185f757f3fSDimitry Andric static SDValue PerformLOADCombine(SDNode *N,
56195f757f3fSDimitry Andric                                   TargetLowering::DAGCombinerInfo &DCI) {
56205f757f3fSDimitry Andric   SelectionDAG &DAG = DCI.DAG;
56215f757f3fSDimitry Andric   LoadSDNode *LD = cast<LoadSDNode>(N);
56225f757f3fSDimitry Andric 
56235f757f3fSDimitry Andric   // Lower a v16i8 load into a LoadV4 operation with i32 results instead of
56245f757f3fSDimitry Andric   // letting ReplaceLoadVector split it into smaller loads during legalization.
56255f757f3fSDimitry Andric   // This is done at dag-combine1 time, so that vector operations with i8
56265f757f3fSDimitry Andric   // elements can be optimised away instead of being needlessly split during
56275f757f3fSDimitry Andric   // legalization, which involves storing to the stack and loading it back.
56285f757f3fSDimitry Andric   EVT VT = N->getValueType(0);
56295f757f3fSDimitry Andric   if (VT != MVT::v16i8)
56305f757f3fSDimitry Andric     return SDValue();
56315f757f3fSDimitry Andric 
56325f757f3fSDimitry Andric   SDLoc DL(N);
56335f757f3fSDimitry Andric 
56345f757f3fSDimitry Andric   // Create a v4i32 vector load operation, effectively <4 x v4i8>.
56355f757f3fSDimitry Andric   unsigned Opc = NVPTXISD::LoadV4;
56365f757f3fSDimitry Andric   EVT NewVT = MVT::v4i32;
56375f757f3fSDimitry Andric   EVT EltVT = NewVT.getVectorElementType();
56385f757f3fSDimitry Andric   unsigned NumElts = NewVT.getVectorNumElements();
56395f757f3fSDimitry Andric   EVT RetVTs[] = {EltVT, EltVT, EltVT, EltVT, MVT::Other};
56405f757f3fSDimitry Andric   SDVTList RetVTList = DAG.getVTList(RetVTs);
56415f757f3fSDimitry Andric   SmallVector<SDValue, 8> Ops(N->ops());
56425f757f3fSDimitry Andric   Ops.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
56435f757f3fSDimitry Andric   SDValue NewLoad = DAG.getMemIntrinsicNode(Opc, DL, RetVTList, Ops, NewVT,
56445f757f3fSDimitry Andric                                             LD->getMemOperand());
56455f757f3fSDimitry Andric   SDValue NewChain = NewLoad.getValue(NumElts);
56465f757f3fSDimitry Andric 
56475f757f3fSDimitry Andric   // Create a vector of the same type returned by the original load.
56485f757f3fSDimitry Andric   SmallVector<SDValue, 4> Elts;
56495f757f3fSDimitry Andric   for (unsigned i = 0; i < NumElts; i++)
56505f757f3fSDimitry Andric     Elts.push_back(NewLoad.getValue(i));
56515f757f3fSDimitry Andric   return DCI.DAG.getMergeValues(
56525f757f3fSDimitry Andric       {DCI.DAG.getBitcast(VT, DCI.DAG.getBuildVector(NewVT, DL, Elts)),
56535f757f3fSDimitry Andric        NewChain},
56545f757f3fSDimitry Andric       DL);
56555f757f3fSDimitry Andric }
56565f757f3fSDimitry Andric 
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const56570b57cec5SDimitry Andric SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
56580b57cec5SDimitry Andric                                                DAGCombinerInfo &DCI) const {
56595f757f3fSDimitry Andric   CodeGenOptLevel OptLevel = getTargetMachine().getOptLevel();
56600b57cec5SDimitry Andric   switch (N->getOpcode()) {
56610b57cec5SDimitry Andric     default: break;
56620b57cec5SDimitry Andric     case ISD::ADD:
56630b57cec5SDimitry Andric     case ISD::FADD:
56640b57cec5SDimitry Andric       return PerformADDCombine(N, DCI, STI, OptLevel);
56650b57cec5SDimitry Andric     case ISD::MUL:
56660b57cec5SDimitry Andric       return PerformMULCombine(N, DCI, OptLevel);
56670b57cec5SDimitry Andric     case ISD::SHL:
56680b57cec5SDimitry Andric       return PerformSHLCombine(N, DCI, OptLevel);
56690b57cec5SDimitry Andric     case ISD::AND:
56700b57cec5SDimitry Andric       return PerformANDCombine(N, DCI);
56710b57cec5SDimitry Andric     case ISD::UREM:
56720b57cec5SDimitry Andric     case ISD::SREM:
56730b57cec5SDimitry Andric       return PerformREMCombine(N, DCI, OptLevel);
56740b57cec5SDimitry Andric     case ISD::SETCC:
56755f757f3fSDimitry Andric       return PerformSETCCCombine(N, DCI, STI.getSmVersion());
56765f757f3fSDimitry Andric     case ISD::LOAD:
56775f757f3fSDimitry Andric       return PerformLOADCombine(N, DCI);
567881ad6265SDimitry Andric     case NVPTXISD::StoreRetval:
567981ad6265SDimitry Andric     case NVPTXISD::StoreRetvalV2:
568081ad6265SDimitry Andric     case NVPTXISD::StoreRetvalV4:
568181ad6265SDimitry Andric       return PerformStoreRetvalCombine(N);
56825f757f3fSDimitry Andric     case ISD::EXTRACT_VECTOR_ELT:
56835f757f3fSDimitry Andric       return PerformEXTRACTCombine(N, DCI);
56845f757f3fSDimitry Andric     case ISD::VSELECT:
56855f757f3fSDimitry Andric       return PerformVSELECTCombine(N, DCI);
56860b57cec5SDimitry Andric   }
56870b57cec5SDimitry Andric   return SDValue();
56880b57cec5SDimitry Andric }
56890b57cec5SDimitry Andric 
56900b57cec5SDimitry Andric /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
ReplaceLoadVector(SDNode * N,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results)56910b57cec5SDimitry Andric static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
56920b57cec5SDimitry Andric                               SmallVectorImpl<SDValue> &Results) {
56930b57cec5SDimitry Andric   EVT ResVT = N->getValueType(0);
56940b57cec5SDimitry Andric   SDLoc DL(N);
56950b57cec5SDimitry Andric 
56960b57cec5SDimitry Andric   assert(ResVT.isVector() && "Vector load must have vector type");
56970b57cec5SDimitry Andric 
56980b57cec5SDimitry Andric   // We only handle "native" vector sizes for now, e.g. <4 x double> is not
56990b57cec5SDimitry Andric   // legal.  We can (and should) split that into 2 loads of <2 x double> here
57000b57cec5SDimitry Andric   // but I'm leaving that as a TODO for now.
57010b57cec5SDimitry Andric   assert(ResVT.isSimple() && "Can only handle simple types");
57020b57cec5SDimitry Andric   switch (ResVT.getSimpleVT().SimpleTy) {
57030b57cec5SDimitry Andric   default:
57040b57cec5SDimitry Andric     return;
57050b57cec5SDimitry Andric   case MVT::v2i8:
57060b57cec5SDimitry Andric   case MVT::v2i16:
57070b57cec5SDimitry Andric   case MVT::v2i32:
57080b57cec5SDimitry Andric   case MVT::v2i64:
57090b57cec5SDimitry Andric   case MVT::v2f16:
57100b57cec5SDimitry Andric   case MVT::v2f32:
57110b57cec5SDimitry Andric   case MVT::v2f64:
57120b57cec5SDimitry Andric   case MVT::v4i8:
57130b57cec5SDimitry Andric   case MVT::v4i16:
57140b57cec5SDimitry Andric   case MVT::v4i32:
57150b57cec5SDimitry Andric   case MVT::v4f16:
57160b57cec5SDimitry Andric   case MVT::v4f32:
57170b57cec5SDimitry Andric   case MVT::v8f16:  // <4 x f16x2>
57185f757f3fSDimitry Andric   case MVT::v8bf16: // <4 x bf16x2>
57195f757f3fSDimitry Andric   case MVT::v8i16:  // <4 x i16x2>
57200b57cec5SDimitry Andric     // This is a "native" vector type
57210b57cec5SDimitry Andric     break;
57220b57cec5SDimitry Andric   }
57230b57cec5SDimitry Andric 
57240b57cec5SDimitry Andric   LoadSDNode *LD = cast<LoadSDNode>(N);
57250b57cec5SDimitry Andric 
57265ffd83dbSDimitry Andric   Align Alignment = LD->getAlign();
57270b57cec5SDimitry Andric   auto &TD = DAG.getDataLayout();
572806c3fb27SDimitry Andric   Align PrefAlign =
572906c3fb27SDimitry Andric       TD.getPrefTypeAlign(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
57305ffd83dbSDimitry Andric   if (Alignment < PrefAlign) {
57310b57cec5SDimitry Andric     // This load is not sufficiently aligned, so bail out and let this vector
57320b57cec5SDimitry Andric     // load be scalarized.  Note that we may still be able to emit smaller
57330b57cec5SDimitry Andric     // vector loads.  For example, if we are loading a <4 x float> with an
57340b57cec5SDimitry Andric     // alignment of 8, this check will fail but the legalizer will try again
57350b57cec5SDimitry Andric     // with 2 x <2 x float>, which will succeed with an alignment of 8.
57360b57cec5SDimitry Andric     return;
57370b57cec5SDimitry Andric   }
57380b57cec5SDimitry Andric 
57390b57cec5SDimitry Andric   EVT EltVT = ResVT.getVectorElementType();
57400b57cec5SDimitry Andric   unsigned NumElts = ResVT.getVectorNumElements();
57410b57cec5SDimitry Andric 
57420b57cec5SDimitry Andric   // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
57430b57cec5SDimitry Andric   // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
57440b57cec5SDimitry Andric   // loaded type to i16 and propagate the "real" type as the memory type.
57450b57cec5SDimitry Andric   bool NeedTrunc = false;
57460b57cec5SDimitry Andric   if (EltVT.getSizeInBits() < 16) {
57470b57cec5SDimitry Andric     EltVT = MVT::i16;
57480b57cec5SDimitry Andric     NeedTrunc = true;
57490b57cec5SDimitry Andric   }
57500b57cec5SDimitry Andric 
57510b57cec5SDimitry Andric   unsigned Opcode = 0;
57520b57cec5SDimitry Andric   SDVTList LdResVTs;
57535f757f3fSDimitry Andric   bool Load16x2 = false;
57540b57cec5SDimitry Andric 
57550b57cec5SDimitry Andric   switch (NumElts) {
57560b57cec5SDimitry Andric   default:
57570b57cec5SDimitry Andric     return;
57580b57cec5SDimitry Andric   case 2:
57590b57cec5SDimitry Andric     Opcode = NVPTXISD::LoadV2;
57600b57cec5SDimitry Andric     LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
57610b57cec5SDimitry Andric     break;
57620b57cec5SDimitry Andric   case 4: {
57630b57cec5SDimitry Andric     Opcode = NVPTXISD::LoadV4;
57640b57cec5SDimitry Andric     EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
57650b57cec5SDimitry Andric     LdResVTs = DAG.getVTList(ListVTs);
57660b57cec5SDimitry Andric     break;
57670b57cec5SDimitry Andric   }
57680b57cec5SDimitry Andric   case 8: {
57690b57cec5SDimitry Andric     // v8f16 is a special case. PTX doesn't have ld.v8.f16
57700b57cec5SDimitry Andric     // instruction. Instead, we split the vector into v2f16 chunks and
57710b57cec5SDimitry Andric     // load them with ld.v4.b32.
57725f757f3fSDimitry Andric     assert(Is16bitsType(EltVT.getSimpleVT()) && "Unsupported v8 vector type.");
57735f757f3fSDimitry Andric     Load16x2 = true;
57740b57cec5SDimitry Andric     Opcode = NVPTXISD::LoadV4;
57755f757f3fSDimitry Andric     EVT VVT;
57765f757f3fSDimitry Andric     switch (EltVT.getSimpleVT().SimpleTy) {
57775f757f3fSDimitry Andric     case MVT::f16:
57785f757f3fSDimitry Andric       VVT = MVT::v2f16;
57795f757f3fSDimitry Andric       break;
57805f757f3fSDimitry Andric     case MVT::bf16:
57815f757f3fSDimitry Andric       VVT = MVT::v2bf16;
57825f757f3fSDimitry Andric       break;
57835f757f3fSDimitry Andric     case MVT::i16:
57845f757f3fSDimitry Andric       VVT = MVT::v2i16;
57855f757f3fSDimitry Andric       break;
57865f757f3fSDimitry Andric     default:
57875f757f3fSDimitry Andric       llvm_unreachable("Unsupported v8 vector type.");
57885f757f3fSDimitry Andric     }
5789bdd1243dSDimitry Andric     EVT ListVTs[] = {VVT, VVT, VVT, VVT, MVT::Other};
57900b57cec5SDimitry Andric     LdResVTs = DAG.getVTList(ListVTs);
57910b57cec5SDimitry Andric     break;
57920b57cec5SDimitry Andric   }
57930b57cec5SDimitry Andric   }
57940b57cec5SDimitry Andric 
57950b57cec5SDimitry Andric   // Copy regular operands
57960b57cec5SDimitry Andric   SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
57970b57cec5SDimitry Andric 
57980b57cec5SDimitry Andric   // The select routine does not have access to the LoadSDNode instance, so
57990b57cec5SDimitry Andric   // pass along the extension information
58000b57cec5SDimitry Andric   OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
58010b57cec5SDimitry Andric 
58020b57cec5SDimitry Andric   SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
58030b57cec5SDimitry Andric                                           LD->getMemoryVT(),
58040b57cec5SDimitry Andric                                           LD->getMemOperand());
58050b57cec5SDimitry Andric 
58060b57cec5SDimitry Andric   SmallVector<SDValue, 8> ScalarRes;
58075f757f3fSDimitry Andric   if (Load16x2) {
58080b57cec5SDimitry Andric     // Split v2f16 subvectors back into individual elements.
58090b57cec5SDimitry Andric     NumElts /= 2;
58100b57cec5SDimitry Andric     for (unsigned i = 0; i < NumElts; ++i) {
58110b57cec5SDimitry Andric       SDValue SubVector = NewLD.getValue(i);
58120b57cec5SDimitry Andric       SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
58130b57cec5SDimitry Andric                                DAG.getIntPtrConstant(0, DL));
58140b57cec5SDimitry Andric       SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
58150b57cec5SDimitry Andric                                DAG.getIntPtrConstant(1, DL));
58160b57cec5SDimitry Andric       ScalarRes.push_back(E0);
58170b57cec5SDimitry Andric       ScalarRes.push_back(E1);
58180b57cec5SDimitry Andric     }
58190b57cec5SDimitry Andric   } else {
58200b57cec5SDimitry Andric     for (unsigned i = 0; i < NumElts; ++i) {
58210b57cec5SDimitry Andric       SDValue Res = NewLD.getValue(i);
58220b57cec5SDimitry Andric       if (NeedTrunc)
58230b57cec5SDimitry Andric         Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
58240b57cec5SDimitry Andric       ScalarRes.push_back(Res);
58250b57cec5SDimitry Andric     }
58260b57cec5SDimitry Andric   }
58270b57cec5SDimitry Andric 
58280b57cec5SDimitry Andric   SDValue LoadChain = NewLD.getValue(NumElts);
58290b57cec5SDimitry Andric 
58300b57cec5SDimitry Andric   SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes);
58310b57cec5SDimitry Andric 
58320b57cec5SDimitry Andric   Results.push_back(BuildVec);
58330b57cec5SDimitry Andric   Results.push_back(LoadChain);
58340b57cec5SDimitry Andric }
58350b57cec5SDimitry Andric 
ReplaceINTRINSIC_W_CHAIN(SDNode * N,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results)58360b57cec5SDimitry Andric static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
58370b57cec5SDimitry Andric                                      SmallVectorImpl<SDValue> &Results) {
58380b57cec5SDimitry Andric   SDValue Chain = N->getOperand(0);
58390b57cec5SDimitry Andric   SDValue Intrin = N->getOperand(1);
58400b57cec5SDimitry Andric   SDLoc DL(N);
58410b57cec5SDimitry Andric 
58420b57cec5SDimitry Andric   // Get the intrinsic ID
58431db9f3b2SDimitry Andric   unsigned IntrinNo = Intrin.getNode()->getAsZExtVal();
58440b57cec5SDimitry Andric   switch (IntrinNo) {
58450b57cec5SDimitry Andric   default:
58460b57cec5SDimitry Andric     return;
58470b57cec5SDimitry Andric   case Intrinsic::nvvm_ldg_global_i:
58480b57cec5SDimitry Andric   case Intrinsic::nvvm_ldg_global_f:
58490b57cec5SDimitry Andric   case Intrinsic::nvvm_ldg_global_p:
58500b57cec5SDimitry Andric   case Intrinsic::nvvm_ldu_global_i:
58510b57cec5SDimitry Andric   case Intrinsic::nvvm_ldu_global_f:
58520b57cec5SDimitry Andric   case Intrinsic::nvvm_ldu_global_p: {
58530b57cec5SDimitry Andric     EVT ResVT = N->getValueType(0);
58540b57cec5SDimitry Andric 
58550b57cec5SDimitry Andric     if (ResVT.isVector()) {
58560b57cec5SDimitry Andric       // Vector LDG/LDU
58570b57cec5SDimitry Andric 
58580b57cec5SDimitry Andric       unsigned NumElts = ResVT.getVectorNumElements();
58590b57cec5SDimitry Andric       EVT EltVT = ResVT.getVectorElementType();
58600b57cec5SDimitry Andric 
58610b57cec5SDimitry Andric       // Since LDU/LDG are target nodes, we cannot rely on DAG type
58620b57cec5SDimitry Andric       // legalization.
58630b57cec5SDimitry Andric       // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
58640b57cec5SDimitry Andric       // loaded type to i16 and propagate the "real" type as the memory type.
58650b57cec5SDimitry Andric       bool NeedTrunc = false;
58660b57cec5SDimitry Andric       if (EltVT.getSizeInBits() < 16) {
58670b57cec5SDimitry Andric         EltVT = MVT::i16;
58680b57cec5SDimitry Andric         NeedTrunc = true;
58690b57cec5SDimitry Andric       }
58700b57cec5SDimitry Andric 
58710b57cec5SDimitry Andric       unsigned Opcode = 0;
58720b57cec5SDimitry Andric       SDVTList LdResVTs;
58730b57cec5SDimitry Andric 
58740b57cec5SDimitry Andric       switch (NumElts) {
58750b57cec5SDimitry Andric       default:
58760b57cec5SDimitry Andric         return;
58770b57cec5SDimitry Andric       case 2:
58780b57cec5SDimitry Andric         switch (IntrinNo) {
58790b57cec5SDimitry Andric         default:
58800b57cec5SDimitry Andric           return;
58810b57cec5SDimitry Andric         case Intrinsic::nvvm_ldg_global_i:
58820b57cec5SDimitry Andric         case Intrinsic::nvvm_ldg_global_f:
58830b57cec5SDimitry Andric         case Intrinsic::nvvm_ldg_global_p:
58840b57cec5SDimitry Andric           Opcode = NVPTXISD::LDGV2;
58850b57cec5SDimitry Andric           break;
58860b57cec5SDimitry Andric         case Intrinsic::nvvm_ldu_global_i:
58870b57cec5SDimitry Andric         case Intrinsic::nvvm_ldu_global_f:
58880b57cec5SDimitry Andric         case Intrinsic::nvvm_ldu_global_p:
58890b57cec5SDimitry Andric           Opcode = NVPTXISD::LDUV2;
58900b57cec5SDimitry Andric           break;
58910b57cec5SDimitry Andric         }
58920b57cec5SDimitry Andric         LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
58930b57cec5SDimitry Andric         break;
58940b57cec5SDimitry Andric       case 4: {
58950b57cec5SDimitry Andric         switch (IntrinNo) {
58960b57cec5SDimitry Andric         default:
58970b57cec5SDimitry Andric           return;
58980b57cec5SDimitry Andric         case Intrinsic::nvvm_ldg_global_i:
58990b57cec5SDimitry Andric         case Intrinsic::nvvm_ldg_global_f:
59000b57cec5SDimitry Andric         case Intrinsic::nvvm_ldg_global_p:
59010b57cec5SDimitry Andric           Opcode = NVPTXISD::LDGV4;
59020b57cec5SDimitry Andric           break;
59030b57cec5SDimitry Andric         case Intrinsic::nvvm_ldu_global_i:
59040b57cec5SDimitry Andric         case Intrinsic::nvvm_ldu_global_f:
59050b57cec5SDimitry Andric         case Intrinsic::nvvm_ldu_global_p:
59060b57cec5SDimitry Andric           Opcode = NVPTXISD::LDUV4;
59070b57cec5SDimitry Andric           break;
59080b57cec5SDimitry Andric         }
59090b57cec5SDimitry Andric         EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
59100b57cec5SDimitry Andric         LdResVTs = DAG.getVTList(ListVTs);
59110b57cec5SDimitry Andric         break;
59120b57cec5SDimitry Andric       }
59130b57cec5SDimitry Andric       }
59140b57cec5SDimitry Andric 
59150b57cec5SDimitry Andric       SmallVector<SDValue, 8> OtherOps;
59160b57cec5SDimitry Andric 
59170b57cec5SDimitry Andric       // Copy regular operands
59180b57cec5SDimitry Andric 
59190b57cec5SDimitry Andric       OtherOps.push_back(Chain); // Chain
59200b57cec5SDimitry Andric                                  // Skip operand 1 (intrinsic ID)
59210b57cec5SDimitry Andric       // Others
59220b57cec5SDimitry Andric       OtherOps.append(N->op_begin() + 2, N->op_end());
59230b57cec5SDimitry Andric 
59240b57cec5SDimitry Andric       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
59250b57cec5SDimitry Andric 
59260b57cec5SDimitry Andric       SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
59270b57cec5SDimitry Andric                                               MemSD->getMemoryVT(),
59280b57cec5SDimitry Andric                                               MemSD->getMemOperand());
59290b57cec5SDimitry Andric 
59300b57cec5SDimitry Andric       SmallVector<SDValue, 4> ScalarRes;
59310b57cec5SDimitry Andric 
59320b57cec5SDimitry Andric       for (unsigned i = 0; i < NumElts; ++i) {
59330b57cec5SDimitry Andric         SDValue Res = NewLD.getValue(i);
59340b57cec5SDimitry Andric         if (NeedTrunc)
59350b57cec5SDimitry Andric           Res =
59360b57cec5SDimitry Andric               DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
59370b57cec5SDimitry Andric         ScalarRes.push_back(Res);
59380b57cec5SDimitry Andric       }
59390b57cec5SDimitry Andric 
59400b57cec5SDimitry Andric       SDValue LoadChain = NewLD.getValue(NumElts);
59410b57cec5SDimitry Andric 
59420b57cec5SDimitry Andric       SDValue BuildVec =
59430b57cec5SDimitry Andric           DAG.getBuildVector(ResVT, DL, ScalarRes);
59440b57cec5SDimitry Andric 
59450b57cec5SDimitry Andric       Results.push_back(BuildVec);
59460b57cec5SDimitry Andric       Results.push_back(LoadChain);
59470b57cec5SDimitry Andric     } else {
59480b57cec5SDimitry Andric       // i8 LDG/LDU
59490b57cec5SDimitry Andric       assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
59500b57cec5SDimitry Andric              "Custom handling of non-i8 ldu/ldg?");
59510b57cec5SDimitry Andric 
59520b57cec5SDimitry Andric       // Just copy all operands as-is
59530b57cec5SDimitry Andric       SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
59540b57cec5SDimitry Andric 
59550b57cec5SDimitry Andric       // Force output to i16
59560b57cec5SDimitry Andric       SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
59570b57cec5SDimitry Andric 
59580b57cec5SDimitry Andric       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
59590b57cec5SDimitry Andric 
59600b57cec5SDimitry Andric       // We make sure the memory type is i8, which will be used during isel
59610b57cec5SDimitry Andric       // to select the proper instruction.
59620b57cec5SDimitry Andric       SDValue NewLD =
59630b57cec5SDimitry Andric           DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
59640b57cec5SDimitry Andric                                   MVT::i8, MemSD->getMemOperand());
59650b57cec5SDimitry Andric 
59660b57cec5SDimitry Andric       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
59670b57cec5SDimitry Andric                                     NewLD.getValue(0)));
59680b57cec5SDimitry Andric       Results.push_back(NewLD.getValue(1));
59690b57cec5SDimitry Andric     }
59700b57cec5SDimitry Andric   }
59710b57cec5SDimitry Andric   }
59720b57cec5SDimitry Andric }
59730b57cec5SDimitry Andric 
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const59740b57cec5SDimitry Andric void NVPTXTargetLowering::ReplaceNodeResults(
59750b57cec5SDimitry Andric     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
59760b57cec5SDimitry Andric   switch (N->getOpcode()) {
59770b57cec5SDimitry Andric   default:
59780b57cec5SDimitry Andric     report_fatal_error("Unhandled custom legalization");
59790b57cec5SDimitry Andric   case ISD::LOAD:
59800b57cec5SDimitry Andric     ReplaceLoadVector(N, DAG, Results);
59810b57cec5SDimitry Andric     return;
59820b57cec5SDimitry Andric   case ISD::INTRINSIC_W_CHAIN:
59830b57cec5SDimitry Andric     ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
59840b57cec5SDimitry Andric     return;
59850b57cec5SDimitry Andric   }
59860b57cec5SDimitry Andric }
59870b57cec5SDimitry Andric 
598881ad6265SDimitry Andric NVPTXTargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const598981ad6265SDimitry Andric NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
599081ad6265SDimitry Andric   Type *Ty = AI->getValOperand()->getType();
599181ad6265SDimitry Andric 
599281ad6265SDimitry Andric   if (AI->isFloatingPointOperation()) {
599381ad6265SDimitry Andric     if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
599481ad6265SDimitry Andric       if (Ty->isFloatTy())
599581ad6265SDimitry Andric         return AtomicExpansionKind::None;
599681ad6265SDimitry Andric       if (Ty->isDoubleTy() && STI.hasAtomAddF64())
599781ad6265SDimitry Andric         return AtomicExpansionKind::None;
599881ad6265SDimitry Andric     }
599981ad6265SDimitry Andric     return AtomicExpansionKind::CmpXChg;
600081ad6265SDimitry Andric   }
600181ad6265SDimitry Andric 
600281ad6265SDimitry Andric   assert(Ty->isIntegerTy() && "Ty should be integer at this point");
600381ad6265SDimitry Andric   auto ITy = cast<llvm::IntegerType>(Ty);
600481ad6265SDimitry Andric 
600581ad6265SDimitry Andric   switch (AI->getOperation()) {
600681ad6265SDimitry Andric   default:
600781ad6265SDimitry Andric     return AtomicExpansionKind::CmpXChg;
600881ad6265SDimitry Andric   case AtomicRMWInst::BinOp::And:
600981ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Or:
601081ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Xor:
601181ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Xchg:
601281ad6265SDimitry Andric     switch (ITy->getBitWidth()) {
601381ad6265SDimitry Andric     case 8:
601481ad6265SDimitry Andric     case 16:
601581ad6265SDimitry Andric       return AtomicExpansionKind::CmpXChg;
601681ad6265SDimitry Andric     case 32:
601781ad6265SDimitry Andric       return AtomicExpansionKind::None;
601881ad6265SDimitry Andric     case 64:
601981ad6265SDimitry Andric       if (STI.hasAtomBitwise64())
602081ad6265SDimitry Andric         return AtomicExpansionKind::None;
602181ad6265SDimitry Andric       return AtomicExpansionKind::CmpXChg;
602281ad6265SDimitry Andric     default:
602381ad6265SDimitry Andric       llvm_unreachable("unsupported width encountered");
602481ad6265SDimitry Andric     }
602581ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Add:
602681ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Sub:
602781ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Max:
602881ad6265SDimitry Andric   case AtomicRMWInst::BinOp::Min:
602981ad6265SDimitry Andric   case AtomicRMWInst::BinOp::UMax:
603081ad6265SDimitry Andric   case AtomicRMWInst::BinOp::UMin:
603181ad6265SDimitry Andric     switch (ITy->getBitWidth()) {
603281ad6265SDimitry Andric     case 8:
603381ad6265SDimitry Andric     case 16:
603481ad6265SDimitry Andric       return AtomicExpansionKind::CmpXChg;
603581ad6265SDimitry Andric     case 32:
603681ad6265SDimitry Andric       return AtomicExpansionKind::None;
603781ad6265SDimitry Andric     case 64:
603881ad6265SDimitry Andric       if (STI.hasAtomMinMax64())
603981ad6265SDimitry Andric         return AtomicExpansionKind::None;
604081ad6265SDimitry Andric       return AtomicExpansionKind::CmpXChg;
604181ad6265SDimitry Andric     default:
604281ad6265SDimitry Andric       llvm_unreachable("unsupported width encountered");
604381ad6265SDimitry Andric     }
604481ad6265SDimitry Andric   }
604581ad6265SDimitry Andric 
604681ad6265SDimitry Andric   return AtomicExpansionKind::CmpXChg;
604781ad6265SDimitry Andric }
604881ad6265SDimitry Andric 
60490b57cec5SDimitry Andric // Pin NVPTXTargetObjectFile's vtables to this file.
605081ad6265SDimitry Andric NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;
60510b57cec5SDimitry Andric 
SelectSectionForGlobal(const GlobalObject * GO,SectionKind Kind,const TargetMachine & TM) const60520b57cec5SDimitry Andric MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal(
60530b57cec5SDimitry Andric     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
60540b57cec5SDimitry Andric   return getDataSection();
60550b57cec5SDimitry Andric }
6056