109467b48Spatrick //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file defines the interfaces that NVPTX uses to lower LLVM code into a
1009467b48Spatrick // selection DAG.
1109467b48Spatrick //
1209467b48Spatrick //===----------------------------------------------------------------------===//
1309467b48Spatrick 
1409467b48Spatrick #include "NVPTXISelLowering.h"
1509467b48Spatrick #include "MCTargetDesc/NVPTXBaseInfo.h"
1609467b48Spatrick #include "NVPTX.h"
1709467b48Spatrick #include "NVPTXSubtarget.h"
1809467b48Spatrick #include "NVPTXTargetMachine.h"
1909467b48Spatrick #include "NVPTXTargetObjectFile.h"
2009467b48Spatrick #include "NVPTXUtilities.h"
2109467b48Spatrick #include "llvm/ADT/APInt.h"
2273471bf0Spatrick #include "llvm/ADT/STLExtras.h"
2309467b48Spatrick #include "llvm/ADT/SmallVector.h"
2409467b48Spatrick #include "llvm/ADT/StringRef.h"
2509467b48Spatrick #include "llvm/CodeGen/Analysis.h"
2609467b48Spatrick #include "llvm/CodeGen/MachineFunction.h"
2709467b48Spatrick #include "llvm/CodeGen/MachineMemOperand.h"
2809467b48Spatrick #include "llvm/CodeGen/SelectionDAG.h"
2909467b48Spatrick #include "llvm/CodeGen/SelectionDAGNodes.h"
3009467b48Spatrick #include "llvm/CodeGen/TargetCallingConv.h"
3109467b48Spatrick #include "llvm/CodeGen/TargetLowering.h"
3209467b48Spatrick #include "llvm/CodeGen/ValueTypes.h"
3309467b48Spatrick #include "llvm/IR/Argument.h"
3409467b48Spatrick #include "llvm/IR/Attributes.h"
3509467b48Spatrick #include "llvm/IR/Constants.h"
3609467b48Spatrick #include "llvm/IR/DataLayout.h"
3709467b48Spatrick #include "llvm/IR/DerivedTypes.h"
38*d415bd75Srobert #include "llvm/IR/FPEnv.h"
3909467b48Spatrick #include "llvm/IR/Function.h"
4009467b48Spatrick #include "llvm/IR/GlobalValue.h"
4109467b48Spatrick #include "llvm/IR/Instruction.h"
4209467b48Spatrick #include "llvm/IR/Instructions.h"
4309467b48Spatrick #include "llvm/IR/IntrinsicsNVPTX.h"
4409467b48Spatrick #include "llvm/IR/Module.h"
4509467b48Spatrick #include "llvm/IR/Type.h"
4609467b48Spatrick #include "llvm/IR/Value.h"
4709467b48Spatrick #include "llvm/Support/Casting.h"
4809467b48Spatrick #include "llvm/Support/CodeGen.h"
4909467b48Spatrick #include "llvm/Support/CommandLine.h"
5009467b48Spatrick #include "llvm/Support/ErrorHandling.h"
5109467b48Spatrick #include "llvm/Support/MachineValueType.h"
5209467b48Spatrick #include "llvm/Support/raw_ostream.h"
5309467b48Spatrick #include "llvm/Target/TargetMachine.h"
5409467b48Spatrick #include "llvm/Target/TargetOptions.h"
5509467b48Spatrick #include <algorithm>
5609467b48Spatrick #include <cassert>
57*d415bd75Srobert #include <cmath>
5809467b48Spatrick #include <cstdint>
5909467b48Spatrick #include <iterator>
6009467b48Spatrick #include <sstream>
6109467b48Spatrick #include <string>
6209467b48Spatrick #include <utility>
6309467b48Spatrick #include <vector>
6409467b48Spatrick 
6509467b48Spatrick #define DEBUG_TYPE "nvptx-lower"
6609467b48Spatrick 
6709467b48Spatrick using namespace llvm;
6809467b48Spatrick 
6973471bf0Spatrick static std::atomic<unsigned> GlobalUniqueCallSite;
7009467b48Spatrick 
7109467b48Spatrick static cl::opt<bool> sched4reg(
7209467b48Spatrick     "nvptx-sched4reg",
7309467b48Spatrick     cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
7409467b48Spatrick 
75*d415bd75Srobert static cl::opt<unsigned> FMAContractLevelOpt(
76*d415bd75Srobert     "nvptx-fma-level", cl::Hidden,
7709467b48Spatrick     cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
7809467b48Spatrick              " 1: do it  2: do it aggressively"),
7909467b48Spatrick     cl::init(2));
8009467b48Spatrick 
8109467b48Spatrick static cl::opt<int> UsePrecDivF32(
82*d415bd75Srobert     "nvptx-prec-divf32", cl::Hidden,
8309467b48Spatrick     cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
8409467b48Spatrick              " IEEE Compliant F32 div.rnd if available."),
8509467b48Spatrick     cl::init(2));
8609467b48Spatrick 
8709467b48Spatrick static cl::opt<bool> UsePrecSqrtF32(
8809467b48Spatrick     "nvptx-prec-sqrtf32", cl::Hidden,
8909467b48Spatrick     cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
9009467b48Spatrick     cl::init(true));
9109467b48Spatrick 
getDivF32Level() const9209467b48Spatrick int NVPTXTargetLowering::getDivF32Level() const {
9309467b48Spatrick   if (UsePrecDivF32.getNumOccurrences() > 0) {
9409467b48Spatrick     // If nvptx-prec-div32=N is used on the command-line, always honor it
9509467b48Spatrick     return UsePrecDivF32;
9609467b48Spatrick   } else {
9709467b48Spatrick     // Otherwise, use div.approx if fast math is enabled
9809467b48Spatrick     if (getTargetMachine().Options.UnsafeFPMath)
9909467b48Spatrick       return 0;
10009467b48Spatrick     else
10109467b48Spatrick       return 2;
10209467b48Spatrick   }
10309467b48Spatrick }
10409467b48Spatrick 
usePrecSqrtF32() const10509467b48Spatrick bool NVPTXTargetLowering::usePrecSqrtF32() const {
10609467b48Spatrick   if (UsePrecSqrtF32.getNumOccurrences() > 0) {
10709467b48Spatrick     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
10809467b48Spatrick     return UsePrecSqrtF32;
10909467b48Spatrick   } else {
11009467b48Spatrick     // Otherwise, use sqrt.approx if fast math is enabled
11109467b48Spatrick     return !getTargetMachine().Options.UnsafeFPMath;
11209467b48Spatrick   }
11309467b48Spatrick }
11409467b48Spatrick 
useF32FTZ(const MachineFunction & MF) const11509467b48Spatrick bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const {
116097a140dSpatrick   return MF.getDenormalMode(APFloat::IEEEsingle()).Output ==
117097a140dSpatrick          DenormalMode::PreserveSign;
11809467b48Spatrick }
11909467b48Spatrick 
IsPTXVectorType(MVT VT)12009467b48Spatrick static bool IsPTXVectorType(MVT VT) {
12109467b48Spatrick   switch (VT.SimpleTy) {
12209467b48Spatrick   default:
12309467b48Spatrick     return false;
12409467b48Spatrick   case MVT::v2i1:
12509467b48Spatrick   case MVT::v4i1:
12609467b48Spatrick   case MVT::v2i8:
12709467b48Spatrick   case MVT::v4i8:
12809467b48Spatrick   case MVT::v2i16:
12909467b48Spatrick   case MVT::v4i16:
13009467b48Spatrick   case MVT::v2i32:
13109467b48Spatrick   case MVT::v4i32:
13209467b48Spatrick   case MVT::v2i64:
13309467b48Spatrick   case MVT::v2f16:
13409467b48Spatrick   case MVT::v4f16:
13509467b48Spatrick   case MVT::v8f16: // <4 x f16x2>
136*d415bd75Srobert   case MVT::v2bf16:
137*d415bd75Srobert   case MVT::v4bf16:
138*d415bd75Srobert   case MVT::v8bf16: // <4 x bf16x2>
13909467b48Spatrick   case MVT::v2f32:
14009467b48Spatrick   case MVT::v4f32:
14109467b48Spatrick   case MVT::v2f64:
14209467b48Spatrick     return true;
14309467b48Spatrick   }
14409467b48Spatrick }
14509467b48Spatrick 
14609467b48Spatrick /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
14709467b48Spatrick /// EVTs that compose it.  Unlike ComputeValueVTs, this will break apart vectors
14809467b48Spatrick /// into their primitive components.
14909467b48Spatrick /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
15009467b48Spatrick /// same number of types as the Ins/Outs arrays in LowerFormalArguments,
15109467b48Spatrick /// LowerCall, and LowerReturn.
ComputePTXValueVTs(const TargetLowering & TLI,const DataLayout & DL,Type * Ty,SmallVectorImpl<EVT> & ValueVTs,SmallVectorImpl<uint64_t> * Offsets=nullptr,uint64_t StartingOffset=0)15209467b48Spatrick static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
15309467b48Spatrick                                Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
15409467b48Spatrick                                SmallVectorImpl<uint64_t> *Offsets = nullptr,
15509467b48Spatrick                                uint64_t StartingOffset = 0) {
15609467b48Spatrick   SmallVector<EVT, 16> TempVTs;
15709467b48Spatrick   SmallVector<uint64_t, 16> TempOffsets;
15809467b48Spatrick 
15909467b48Spatrick   // Special case for i128 - decompose to (i64, i64)
16009467b48Spatrick   if (Ty->isIntegerTy(128)) {
16109467b48Spatrick     ValueVTs.push_back(EVT(MVT::i64));
16209467b48Spatrick     ValueVTs.push_back(EVT(MVT::i64));
16309467b48Spatrick 
16409467b48Spatrick     if (Offsets) {
16509467b48Spatrick       Offsets->push_back(StartingOffset + 0);
16609467b48Spatrick       Offsets->push_back(StartingOffset + 8);
16709467b48Spatrick     }
16809467b48Spatrick 
16909467b48Spatrick     return;
17009467b48Spatrick   }
17109467b48Spatrick 
17209467b48Spatrick   // Given a struct type, recursively traverse the elements with custom ComputePTXValueVTs.
17309467b48Spatrick   if (StructType *STy = dyn_cast<StructType>(Ty)) {
17409467b48Spatrick     auto const *SL = DL.getStructLayout(STy);
17509467b48Spatrick     auto ElementNum = 0;
17609467b48Spatrick     for(auto *EI : STy->elements()) {
17709467b48Spatrick       ComputePTXValueVTs(TLI, DL, EI, ValueVTs, Offsets,
17809467b48Spatrick                          StartingOffset + SL->getElementOffset(ElementNum));
17909467b48Spatrick       ++ElementNum;
18009467b48Spatrick     }
18109467b48Spatrick     return;
18209467b48Spatrick   }
18309467b48Spatrick 
18409467b48Spatrick   ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
18509467b48Spatrick   for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
18609467b48Spatrick     EVT VT = TempVTs[i];
18709467b48Spatrick     uint64_t Off = TempOffsets[i];
18809467b48Spatrick     // Split vectors into individual elements, except for v2f16, which
18909467b48Spatrick     // we will pass as a single scalar.
19009467b48Spatrick     if (VT.isVector()) {
19109467b48Spatrick       unsigned NumElts = VT.getVectorNumElements();
19209467b48Spatrick       EVT EltVT = VT.getVectorElementType();
19309467b48Spatrick       // Vectors with an even number of f16 elements will be passed to
194*d415bd75Srobert       // us as an array of v2f16/v2bf16 elements. We must match this so we
19509467b48Spatrick       // stay in sync with Ins/Outs.
196*d415bd75Srobert       if ((EltVT == MVT::f16 || EltVT == MVT::bf16) && NumElts % 2 == 0) {
197*d415bd75Srobert         EltVT = EltVT == MVT::f16 ? MVT::v2f16 : MVT::v2bf16;
19809467b48Spatrick         NumElts /= 2;
19909467b48Spatrick       }
20009467b48Spatrick       for (unsigned j = 0; j != NumElts; ++j) {
20109467b48Spatrick         ValueVTs.push_back(EltVT);
20209467b48Spatrick         if (Offsets)
20309467b48Spatrick           Offsets->push_back(Off + j * EltVT.getStoreSize());
20409467b48Spatrick       }
20509467b48Spatrick     } else {
20609467b48Spatrick       ValueVTs.push_back(VT);
20709467b48Spatrick       if (Offsets)
20809467b48Spatrick         Offsets->push_back(Off);
20909467b48Spatrick     }
21009467b48Spatrick   }
21109467b48Spatrick }
21209467b48Spatrick 
213*d415bd75Srobert /// PromoteScalarIntegerPTX
214*d415bd75Srobert /// Used to make sure the arguments/returns are suitable for passing
215*d415bd75Srobert /// and promote them to a larger size if they're not.
216*d415bd75Srobert ///
217*d415bd75Srobert /// The promoted type is placed in \p PromoteVT if the function returns true.
PromoteScalarIntegerPTX(const EVT & VT,MVT * PromotedVT)218*d415bd75Srobert static bool PromoteScalarIntegerPTX(const EVT &VT, MVT *PromotedVT) {
219*d415bd75Srobert   if (VT.isScalarInteger()) {
220*d415bd75Srobert     switch (PowerOf2Ceil(VT.getFixedSizeInBits())) {
221*d415bd75Srobert     default:
222*d415bd75Srobert       llvm_unreachable(
223*d415bd75Srobert           "Promotion is not suitable for scalars of size larger than 64-bits");
224*d415bd75Srobert     case 1:
225*d415bd75Srobert       *PromotedVT = MVT::i1;
226*d415bd75Srobert       break;
227*d415bd75Srobert     case 2:
228*d415bd75Srobert     case 4:
229*d415bd75Srobert     case 8:
230*d415bd75Srobert       *PromotedVT = MVT::i8;
231*d415bd75Srobert       break;
232*d415bd75Srobert     case 16:
233*d415bd75Srobert       *PromotedVT = MVT::i16;
234*d415bd75Srobert       break;
235*d415bd75Srobert     case 32:
236*d415bd75Srobert       *PromotedVT = MVT::i32;
237*d415bd75Srobert       break;
238*d415bd75Srobert     case 64:
239*d415bd75Srobert       *PromotedVT = MVT::i64;
240*d415bd75Srobert       break;
241*d415bd75Srobert     }
242*d415bd75Srobert     return EVT(*PromotedVT) != VT;
243*d415bd75Srobert   }
244*d415bd75Srobert   return false;
245*d415bd75Srobert }
246*d415bd75Srobert 
24709467b48Spatrick // Check whether we can merge loads/stores of some of the pieces of a
24809467b48Spatrick // flattened function parameter or return value into a single vector
24909467b48Spatrick // load/store.
25009467b48Spatrick //
25109467b48Spatrick // The flattened parameter is represented as a list of EVTs and
25209467b48Spatrick // offsets, and the whole structure is aligned to ParamAlignment. This
25309467b48Spatrick // function determines whether we can load/store pieces of the
25409467b48Spatrick // parameter starting at index Idx using a single vectorized op of
25509467b48Spatrick // size AccessSize. If so, it returns the number of param pieces
25609467b48Spatrick // covered by the vector op. Otherwise, it returns 1.
CanMergeParamLoadStoresStartingAt(unsigned Idx,uint32_t AccessSize,const SmallVectorImpl<EVT> & ValueVTs,const SmallVectorImpl<uint64_t> & Offsets,Align ParamAlignment)25709467b48Spatrick static unsigned CanMergeParamLoadStoresStartingAt(
25809467b48Spatrick     unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
259097a140dSpatrick     const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) {
26009467b48Spatrick 
26109467b48Spatrick   // Can't vectorize if param alignment is not sufficient.
262097a140dSpatrick   if (ParamAlignment < AccessSize)
26309467b48Spatrick     return 1;
26409467b48Spatrick   // Can't vectorize if offset is not aligned.
26509467b48Spatrick   if (Offsets[Idx] & (AccessSize - 1))
26609467b48Spatrick     return 1;
26709467b48Spatrick 
26809467b48Spatrick   EVT EltVT = ValueVTs[Idx];
26909467b48Spatrick   unsigned EltSize = EltVT.getStoreSize();
27009467b48Spatrick 
27109467b48Spatrick   // Element is too large to vectorize.
27209467b48Spatrick   if (EltSize >= AccessSize)
27309467b48Spatrick     return 1;
27409467b48Spatrick 
27509467b48Spatrick   unsigned NumElts = AccessSize / EltSize;
27609467b48Spatrick   // Can't vectorize if AccessBytes if not a multiple of EltSize.
27709467b48Spatrick   if (AccessSize != EltSize * NumElts)
27809467b48Spatrick     return 1;
27909467b48Spatrick 
28009467b48Spatrick   // We don't have enough elements to vectorize.
28109467b48Spatrick   if (Idx + NumElts > ValueVTs.size())
28209467b48Spatrick     return 1;
28309467b48Spatrick 
28409467b48Spatrick   // PTX ISA can only deal with 2- and 4-element vector ops.
28509467b48Spatrick   if (NumElts != 4 && NumElts != 2)
28609467b48Spatrick     return 1;
28709467b48Spatrick 
28809467b48Spatrick   for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) {
28909467b48Spatrick     // Types do not match.
29009467b48Spatrick     if (ValueVTs[j] != EltVT)
29109467b48Spatrick       return 1;
29209467b48Spatrick 
29309467b48Spatrick     // Elements are not contiguous.
29409467b48Spatrick     if (Offsets[j] - Offsets[j - 1] != EltSize)
29509467b48Spatrick       return 1;
29609467b48Spatrick   }
29709467b48Spatrick   // OK. We can vectorize ValueVTs[i..i+NumElts)
29809467b48Spatrick   return NumElts;
29909467b48Spatrick }
30009467b48Spatrick 
30109467b48Spatrick // Flags for tracking per-element vectorization state of loads/stores
30209467b48Spatrick // of a flattened function parameter or return value.
30309467b48Spatrick enum ParamVectorizationFlags {
30409467b48Spatrick   PVF_INNER = 0x0, // Middle elements of a vector.
30509467b48Spatrick   PVF_FIRST = 0x1, // First element of the vector.
30609467b48Spatrick   PVF_LAST = 0x2,  // Last element of the vector.
30709467b48Spatrick   // Scalar is effectively a 1-element vector.
30809467b48Spatrick   PVF_SCALAR = PVF_FIRST | PVF_LAST
30909467b48Spatrick };
31009467b48Spatrick 
31109467b48Spatrick // Computes whether and how we can vectorize the loads/stores of a
31209467b48Spatrick // flattened function parameter or return value.
31309467b48Spatrick //
31409467b48Spatrick // The flattened parameter is represented as the list of ValueVTs and
31509467b48Spatrick // Offsets, and is aligned to ParamAlignment bytes. We return a vector
31609467b48Spatrick // of the same size as ValueVTs indicating how each piece should be
31709467b48Spatrick // loaded/stored (i.e. as a scalar, or as part of a vector
31809467b48Spatrick // load/store).
31909467b48Spatrick static SmallVector<ParamVectorizationFlags, 16>
VectorizePTXValueVTs(const SmallVectorImpl<EVT> & ValueVTs,const SmallVectorImpl<uint64_t> & Offsets,Align ParamAlignment,bool IsVAArg=false)32009467b48Spatrick VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
32109467b48Spatrick                      const SmallVectorImpl<uint64_t> &Offsets,
322*d415bd75Srobert                      Align ParamAlignment, bool IsVAArg = false) {
32309467b48Spatrick   // Set vector size to match ValueVTs and mark all elements as
32409467b48Spatrick   // scalars by default.
32509467b48Spatrick   SmallVector<ParamVectorizationFlags, 16> VectorInfo;
32609467b48Spatrick   VectorInfo.assign(ValueVTs.size(), PVF_SCALAR);
32709467b48Spatrick 
328*d415bd75Srobert   if (IsVAArg)
329*d415bd75Srobert     return VectorInfo;
330*d415bd75Srobert 
33109467b48Spatrick   // Check what we can vectorize using 128/64/32-bit accesses.
33209467b48Spatrick   for (int I = 0, E = ValueVTs.size(); I != E; ++I) {
33309467b48Spatrick     // Skip elements we've already processed.
33409467b48Spatrick     assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state.");
33509467b48Spatrick     for (unsigned AccessSize : {16, 8, 4, 2}) {
33609467b48Spatrick       unsigned NumElts = CanMergeParamLoadStoresStartingAt(
33709467b48Spatrick           I, AccessSize, ValueVTs, Offsets, ParamAlignment);
33809467b48Spatrick       // Mark vectorized elements.
33909467b48Spatrick       switch (NumElts) {
34009467b48Spatrick       default:
34109467b48Spatrick         llvm_unreachable("Unexpected return value");
34209467b48Spatrick       case 1:
34309467b48Spatrick         // Can't vectorize using this size, try next smaller size.
34409467b48Spatrick         continue;
34509467b48Spatrick       case 2:
34609467b48Spatrick         assert(I + 1 < E && "Not enough elements.");
34709467b48Spatrick         VectorInfo[I] = PVF_FIRST;
34809467b48Spatrick         VectorInfo[I + 1] = PVF_LAST;
34909467b48Spatrick         I += 1;
35009467b48Spatrick         break;
35109467b48Spatrick       case 4:
35209467b48Spatrick         assert(I + 3 < E && "Not enough elements.");
35309467b48Spatrick         VectorInfo[I] = PVF_FIRST;
35409467b48Spatrick         VectorInfo[I + 1] = PVF_INNER;
35509467b48Spatrick         VectorInfo[I + 2] = PVF_INNER;
35609467b48Spatrick         VectorInfo[I + 3] = PVF_LAST;
35709467b48Spatrick         I += 3;
35809467b48Spatrick         break;
35909467b48Spatrick       }
36009467b48Spatrick       // Break out of the inner loop because we've already succeeded
36109467b48Spatrick       // using largest possible AccessSize.
36209467b48Spatrick       break;
36309467b48Spatrick     }
36409467b48Spatrick   }
36509467b48Spatrick   return VectorInfo;
36609467b48Spatrick }
36709467b48Spatrick 
36809467b48Spatrick // NVPTXTargetLowering Constructor.
NVPTXTargetLowering(const NVPTXTargetMachine & TM,const NVPTXSubtarget & STI)36909467b48Spatrick NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
37009467b48Spatrick                                          const NVPTXSubtarget &STI)
37109467b48Spatrick     : TargetLowering(TM), nvTM(&TM), STI(STI) {
37209467b48Spatrick   // always lower memset, memcpy, and memmove intrinsics to load/store
37309467b48Spatrick   // instructions, rather
37409467b48Spatrick   // then generating calls to memset, mempcy or memmove.
37509467b48Spatrick   MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
37609467b48Spatrick   MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
37709467b48Spatrick   MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
37809467b48Spatrick 
37909467b48Spatrick   setBooleanContents(ZeroOrNegativeOneBooleanContent);
38009467b48Spatrick   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
38109467b48Spatrick 
38209467b48Spatrick   // Jump is Expensive. Don't create extra control flow for 'and', 'or'
38309467b48Spatrick   // condition branches.
38409467b48Spatrick   setJumpIsExpensive(true);
38509467b48Spatrick 
38609467b48Spatrick   // Wide divides are _very_ slow. Try to reduce the width of the divide if
38709467b48Spatrick   // possible.
38809467b48Spatrick   addBypassSlowDiv(64, 32);
38909467b48Spatrick 
39009467b48Spatrick   // By default, use the Source scheduling
39109467b48Spatrick   if (sched4reg)
39209467b48Spatrick     setSchedulingPreference(Sched::RegPressure);
39309467b48Spatrick   else
39409467b48Spatrick     setSchedulingPreference(Sched::Source);
39509467b48Spatrick 
39609467b48Spatrick   auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
39709467b48Spatrick                                     LegalizeAction NoF16Action) {
39809467b48Spatrick     setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action);
39909467b48Spatrick   };
40009467b48Spatrick 
40109467b48Spatrick   addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
40209467b48Spatrick   addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
40309467b48Spatrick   addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
40409467b48Spatrick   addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
40509467b48Spatrick   addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
40609467b48Spatrick   addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
40709467b48Spatrick   addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass);
40809467b48Spatrick   addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass);
409*d415bd75Srobert   addRegisterClass(MVT::bf16, &NVPTX::Float16RegsRegClass);
410*d415bd75Srobert   addRegisterClass(MVT::v2bf16, &NVPTX::Float16x2RegsRegClass);
41109467b48Spatrick 
41209467b48Spatrick   // Conversion to/from FP16/FP16x2 is always legal.
41309467b48Spatrick   setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal);
41409467b48Spatrick   setOperationAction(ISD::FP_TO_SINT, MVT::f16, Legal);
41509467b48Spatrick   setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
41609467b48Spatrick   setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
41709467b48Spatrick   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand);
41809467b48Spatrick   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand);
41909467b48Spatrick 
42009467b48Spatrick   setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
42109467b48Spatrick   setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
42209467b48Spatrick 
42309467b48Spatrick   // Operations not directly supported by NVPTX.
42409467b48Spatrick   for (MVT VT : {MVT::f16, MVT::v2f16, MVT::f32, MVT::f64, MVT::i1, MVT::i8,
42509467b48Spatrick                  MVT::i16, MVT::i32, MVT::i64}) {
42609467b48Spatrick     setOperationAction(ISD::SELECT_CC, VT, Expand);
42709467b48Spatrick     setOperationAction(ISD::BR_CC, VT, Expand);
42809467b48Spatrick   }
42909467b48Spatrick 
43009467b48Spatrick   // Some SIGN_EXTEND_INREG can be done using cvt instruction.
43109467b48Spatrick   // For others we will expand to a SHL/SRA pair.
43209467b48Spatrick   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal);
43309467b48Spatrick   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
43409467b48Spatrick   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
43509467b48Spatrick   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
43609467b48Spatrick   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
43709467b48Spatrick 
43809467b48Spatrick   setOperationAction(ISD::SHL_PARTS, MVT::i32  , Custom);
43909467b48Spatrick   setOperationAction(ISD::SRA_PARTS, MVT::i32  , Custom);
44009467b48Spatrick   setOperationAction(ISD::SRL_PARTS, MVT::i32  , Custom);
44109467b48Spatrick   setOperationAction(ISD::SHL_PARTS, MVT::i64  , Custom);
44209467b48Spatrick   setOperationAction(ISD::SRA_PARTS, MVT::i64  , Custom);
44309467b48Spatrick   setOperationAction(ISD::SRL_PARTS, MVT::i64  , Custom);
44409467b48Spatrick 
44509467b48Spatrick   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
44609467b48Spatrick   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
44709467b48Spatrick 
44809467b48Spatrick   // TODO: we may consider expanding ROTL/ROTR on older GPUs.  Currently on GPUs
44909467b48Spatrick   // that don't have h/w rotation we lower them to multi-instruction assembly.
45009467b48Spatrick   // See ROT*_sw in NVPTXIntrInfo.td
45109467b48Spatrick   setOperationAction(ISD::ROTL, MVT::i64, Legal);
45209467b48Spatrick   setOperationAction(ISD::ROTR, MVT::i64, Legal);
45309467b48Spatrick   setOperationAction(ISD::ROTL, MVT::i32, Legal);
45409467b48Spatrick   setOperationAction(ISD::ROTR, MVT::i32, Legal);
45509467b48Spatrick 
45609467b48Spatrick   setOperationAction(ISD::ROTL, MVT::i16, Expand);
45709467b48Spatrick   setOperationAction(ISD::ROTR, MVT::i16, Expand);
45809467b48Spatrick   setOperationAction(ISD::ROTL, MVT::i8, Expand);
45909467b48Spatrick   setOperationAction(ISD::ROTR, MVT::i8, Expand);
46009467b48Spatrick   setOperationAction(ISD::BSWAP, MVT::i16, Expand);
46109467b48Spatrick   setOperationAction(ISD::BSWAP, MVT::i32, Expand);
46209467b48Spatrick   setOperationAction(ISD::BSWAP, MVT::i64, Expand);
46309467b48Spatrick 
46409467b48Spatrick   // Indirect branch is not supported.
46509467b48Spatrick   // This also disables Jump Table creation.
46609467b48Spatrick   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
46709467b48Spatrick   setOperationAction(ISD::BRIND, MVT::Other, Expand);
46809467b48Spatrick 
46909467b48Spatrick   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
47009467b48Spatrick   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
47109467b48Spatrick 
47209467b48Spatrick   // We want to legalize constant related memmove and memcopy
47309467b48Spatrick   // intrinsics.
47409467b48Spatrick   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
47509467b48Spatrick 
47609467b48Spatrick   // Turn FP extload into load/fpextend
47709467b48Spatrick   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
47809467b48Spatrick   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
47909467b48Spatrick   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
48009467b48Spatrick   setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
48109467b48Spatrick   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
48209467b48Spatrick   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
48309467b48Spatrick   setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
48409467b48Spatrick   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
48509467b48Spatrick   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
48609467b48Spatrick   // Turn FP truncstore into trunc + store.
48709467b48Spatrick   // FIXME: vector types should also be expanded
48809467b48Spatrick   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
48909467b48Spatrick   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
49009467b48Spatrick   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
49109467b48Spatrick 
49209467b48Spatrick   // PTX does not support load / store predicate registers
49309467b48Spatrick   setOperationAction(ISD::LOAD, MVT::i1, Custom);
49409467b48Spatrick   setOperationAction(ISD::STORE, MVT::i1, Custom);
49509467b48Spatrick 
49609467b48Spatrick   for (MVT VT : MVT::integer_valuetypes()) {
49709467b48Spatrick     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
49809467b48Spatrick     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
49909467b48Spatrick     setTruncStoreAction(VT, MVT::i1, Expand);
50009467b48Spatrick   }
50109467b48Spatrick 
50209467b48Spatrick   // This is legal in NVPTX
50309467b48Spatrick   setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
50409467b48Spatrick   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
50509467b48Spatrick   setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
506*d415bd75Srobert   setOperationAction(ISD::ConstantFP, MVT::bf16, Legal);
50709467b48Spatrick 
50809467b48Spatrick   // TRAP can be lowered to PTX trap
50909467b48Spatrick   setOperationAction(ISD::TRAP, MVT::Other, Legal);
51009467b48Spatrick 
51109467b48Spatrick   // Register custom handling for vector loads/stores
51209467b48Spatrick   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
51309467b48Spatrick     if (IsPTXVectorType(VT)) {
51409467b48Spatrick       setOperationAction(ISD::LOAD, VT, Custom);
51509467b48Spatrick       setOperationAction(ISD::STORE, VT, Custom);
51609467b48Spatrick       setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
51709467b48Spatrick     }
51809467b48Spatrick   }
51909467b48Spatrick 
520*d415bd75Srobert   // Support varargs.
521*d415bd75Srobert   setOperationAction(ISD::VASTART, MVT::Other, Custom);
522*d415bd75Srobert   setOperationAction(ISD::VAARG, MVT::Other, Custom);
523*d415bd75Srobert   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
524*d415bd75Srobert   setOperationAction(ISD::VAEND, MVT::Other, Expand);
525*d415bd75Srobert 
52609467b48Spatrick   // Custom handling for i8 intrinsics
52709467b48Spatrick   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
52809467b48Spatrick 
52909467b48Spatrick   for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) {
53009467b48Spatrick     setOperationAction(ISD::ABS,  Ty, Legal);
53109467b48Spatrick     setOperationAction(ISD::SMIN, Ty, Legal);
53209467b48Spatrick     setOperationAction(ISD::SMAX, Ty, Legal);
53309467b48Spatrick     setOperationAction(ISD::UMIN, Ty, Legal);
53409467b48Spatrick     setOperationAction(ISD::UMAX, Ty, Legal);
53509467b48Spatrick 
53609467b48Spatrick     setOperationAction(ISD::CTPOP, Ty, Legal);
53709467b48Spatrick     setOperationAction(ISD::CTLZ, Ty, Legal);
53809467b48Spatrick   }
53909467b48Spatrick 
540*d415bd75Srobert   setOperationAction(ISD::ADDC, MVT::i32, Legal);
541*d415bd75Srobert   setOperationAction(ISD::ADDE, MVT::i32, Legal);
542*d415bd75Srobert   setOperationAction(ISD::SUBC, MVT::i32, Legal);
543*d415bd75Srobert   setOperationAction(ISD::SUBE, MVT::i32, Legal);
544*d415bd75Srobert   if (STI.getPTXVersion() >= 43) {
545*d415bd75Srobert     setOperationAction(ISD::ADDC, MVT::i64, Legal);
546*d415bd75Srobert     setOperationAction(ISD::ADDE, MVT::i64, Legal);
547*d415bd75Srobert     setOperationAction(ISD::SUBC, MVT::i64, Legal);
548*d415bd75Srobert     setOperationAction(ISD::SUBE, MVT::i64, Legal);
549*d415bd75Srobert   }
550*d415bd75Srobert 
55109467b48Spatrick   setOperationAction(ISD::CTTZ, MVT::i16, Expand);
55209467b48Spatrick   setOperationAction(ISD::CTTZ, MVT::i32, Expand);
55309467b48Spatrick   setOperationAction(ISD::CTTZ, MVT::i64, Expand);
55409467b48Spatrick 
55509467b48Spatrick   // PTX does not directly support SELP of i1, so promote to i32 first
55609467b48Spatrick   setOperationAction(ISD::SELECT, MVT::i1, Custom);
55709467b48Spatrick 
55809467b48Spatrick   // PTX cannot multiply two i64s in a single instruction.
55909467b48Spatrick   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
56009467b48Spatrick   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
56109467b48Spatrick 
56209467b48Spatrick   // We have some custom DAG combine patterns for these nodes
563*d415bd75Srobert   setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::FADD, ISD::MUL, ISD::SHL,
564*d415bd75Srobert                        ISD::SREM, ISD::UREM});
56509467b48Spatrick 
56609467b48Spatrick   // setcc for f16x2 needs special handling to prevent legalizer's
56709467b48Spatrick   // attempt to scalarize it due to v2i1 not being legal.
56809467b48Spatrick   if (STI.allowFP16Math())
56909467b48Spatrick     setTargetDAGCombine(ISD::SETCC);
57009467b48Spatrick 
57109467b48Spatrick   // Promote fp16 arithmetic if fp16 hardware isn't available or the
57209467b48Spatrick   // user passed --nvptx-no-fp16-math. The flag is useful because,
57309467b48Spatrick   // although sm_53+ GPUs have some sort of FP16 support in
57409467b48Spatrick   // hardware, only sm_53 and sm_60 have full implementation. Others
57509467b48Spatrick   // only have token amount of hardware and are likely to run faster
57609467b48Spatrick   // by using fp32 units instead.
57709467b48Spatrick   for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) {
57809467b48Spatrick     setFP16OperationAction(Op, MVT::f16, Legal, Promote);
57909467b48Spatrick     setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
58009467b48Spatrick   }
58109467b48Spatrick 
582*d415bd75Srobert   // f16/f16x2 neg was introduced in PTX 60, SM_53.
583*d415bd75Srobert   const bool IsFP16FP16x2NegAvailable = STI.getSmVersion() >= 53 &&
584*d415bd75Srobert                                         STI.getPTXVersion() >= 60 &&
585*d415bd75Srobert                                         STI.allowFP16Math();
586*d415bd75Srobert   for (const auto &VT : {MVT::f16, MVT::v2f16})
587*d415bd75Srobert     setOperationAction(ISD::FNEG, VT,
588*d415bd75Srobert                        IsFP16FP16x2NegAvailable ? Legal : Expand);
58909467b48Spatrick 
59009467b48Spatrick   // (would be) Library functions.
59109467b48Spatrick 
59209467b48Spatrick   // These map to conversion instructions for scalar FP types.
59309467b48Spatrick   for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
594*d415bd75Srobert                          ISD::FROUNDEVEN, ISD::FTRUNC}) {
59509467b48Spatrick     setOperationAction(Op, MVT::f16, Legal);
59609467b48Spatrick     setOperationAction(Op, MVT::f32, Legal);
59709467b48Spatrick     setOperationAction(Op, MVT::f64, Legal);
59809467b48Spatrick     setOperationAction(Op, MVT::v2f16, Expand);
59909467b48Spatrick   }
60009467b48Spatrick 
60109467b48Spatrick   setOperationAction(ISD::FROUND, MVT::f16, Promote);
60209467b48Spatrick   setOperationAction(ISD::FROUND, MVT::v2f16, Expand);
60309467b48Spatrick   setOperationAction(ISD::FROUND, MVT::f32, Custom);
60409467b48Spatrick   setOperationAction(ISD::FROUND, MVT::f64, Custom);
60509467b48Spatrick 
60609467b48Spatrick 
60709467b48Spatrick   // 'Expand' implements FCOPYSIGN without calling an external library.
60809467b48Spatrick   setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
60909467b48Spatrick   setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
61009467b48Spatrick   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
61109467b48Spatrick   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
61209467b48Spatrick 
61309467b48Spatrick   // These map to corresponding instructions for f32/f64. f16 must be
61409467b48Spatrick   // promoted to f32. v2f16 is expanded to f16, which is then promoted
61509467b48Spatrick   // to f32.
616*d415bd75Srobert   for (const auto &Op :
617*d415bd75Srobert        {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FABS}) {
61809467b48Spatrick     setOperationAction(Op, MVT::f16, Promote);
61909467b48Spatrick     setOperationAction(Op, MVT::f32, Legal);
62009467b48Spatrick     setOperationAction(Op, MVT::f64, Legal);
62109467b48Spatrick     setOperationAction(Op, MVT::v2f16, Expand);
62209467b48Spatrick   }
623*d415bd75Srobert   // max.f16, max.f16x2 and max.NaN are supported on sm_80+.
624*d415bd75Srobert   auto GetMinMaxAction = [&](LegalizeAction NotSm80Action) {
625*d415bd75Srobert     bool IsAtLeastSm80 = STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 70;
626*d415bd75Srobert     return IsAtLeastSm80 ? Legal : NotSm80Action;
627*d415bd75Srobert   };
628*d415bd75Srobert   for (const auto &Op : {ISD::FMINNUM, ISD::FMAXNUM}) {
629*d415bd75Srobert     setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Promote), Promote);
630*d415bd75Srobert     setOperationAction(Op, MVT::f32, Legal);
631*d415bd75Srobert     setOperationAction(Op, MVT::f64, Legal);
632*d415bd75Srobert     setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
633*d415bd75Srobert   }
634*d415bd75Srobert   for (const auto &Op : {ISD::FMINIMUM, ISD::FMAXIMUM}) {
635*d415bd75Srobert     setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Expand), Expand);
636*d415bd75Srobert     setOperationAction(Op, MVT::f32, GetMinMaxAction(Expand));
637*d415bd75Srobert     setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
638*d415bd75Srobert   }
63909467b48Spatrick 
64009467b48Spatrick   // No FEXP2, FLOG2.  The PTX ex2 and log2 functions are always approximate.
64109467b48Spatrick   // No FPOW or FREM in PTX.
64209467b48Spatrick 
64309467b48Spatrick   // Now deduce the information based on the above mentioned
64409467b48Spatrick   // actions
64509467b48Spatrick   computeRegisterProperties(STI.getRegisterInfo());
646*d415bd75Srobert 
647*d415bd75Srobert   setMinCmpXchgSizeInBits(32);
64809467b48Spatrick }
64909467b48Spatrick 
getTargetNodeName(unsigned Opcode) const65009467b48Spatrick const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
65109467b48Spatrick   switch ((NVPTXISD::NodeType)Opcode) {
65209467b48Spatrick   case NVPTXISD::FIRST_NUMBER:
65309467b48Spatrick     break;
65409467b48Spatrick   case NVPTXISD::CALL:
65509467b48Spatrick     return "NVPTXISD::CALL";
65609467b48Spatrick   case NVPTXISD::RET_FLAG:
65709467b48Spatrick     return "NVPTXISD::RET_FLAG";
65809467b48Spatrick   case NVPTXISD::LOAD_PARAM:
65909467b48Spatrick     return "NVPTXISD::LOAD_PARAM";
66009467b48Spatrick   case NVPTXISD::Wrapper:
66109467b48Spatrick     return "NVPTXISD::Wrapper";
66209467b48Spatrick   case NVPTXISD::DeclareParam:
66309467b48Spatrick     return "NVPTXISD::DeclareParam";
66409467b48Spatrick   case NVPTXISD::DeclareScalarParam:
66509467b48Spatrick     return "NVPTXISD::DeclareScalarParam";
66609467b48Spatrick   case NVPTXISD::DeclareRet:
66709467b48Spatrick     return "NVPTXISD::DeclareRet";
66809467b48Spatrick   case NVPTXISD::DeclareScalarRet:
66909467b48Spatrick     return "NVPTXISD::DeclareScalarRet";
67009467b48Spatrick   case NVPTXISD::DeclareRetParam:
67109467b48Spatrick     return "NVPTXISD::DeclareRetParam";
67209467b48Spatrick   case NVPTXISD::PrintCall:
67309467b48Spatrick     return "NVPTXISD::PrintCall";
67409467b48Spatrick   case NVPTXISD::PrintConvergentCall:
67509467b48Spatrick     return "NVPTXISD::PrintConvergentCall";
67609467b48Spatrick   case NVPTXISD::PrintCallUni:
67709467b48Spatrick     return "NVPTXISD::PrintCallUni";
67809467b48Spatrick   case NVPTXISD::PrintConvergentCallUni:
67909467b48Spatrick     return "NVPTXISD::PrintConvergentCallUni";
68009467b48Spatrick   case NVPTXISD::LoadParam:
68109467b48Spatrick     return "NVPTXISD::LoadParam";
68209467b48Spatrick   case NVPTXISD::LoadParamV2:
68309467b48Spatrick     return "NVPTXISD::LoadParamV2";
68409467b48Spatrick   case NVPTXISD::LoadParamV4:
68509467b48Spatrick     return "NVPTXISD::LoadParamV4";
68609467b48Spatrick   case NVPTXISD::StoreParam:
68709467b48Spatrick     return "NVPTXISD::StoreParam";
68809467b48Spatrick   case NVPTXISD::StoreParamV2:
68909467b48Spatrick     return "NVPTXISD::StoreParamV2";
69009467b48Spatrick   case NVPTXISD::StoreParamV4:
69109467b48Spatrick     return "NVPTXISD::StoreParamV4";
69209467b48Spatrick   case NVPTXISD::StoreParamS32:
69309467b48Spatrick     return "NVPTXISD::StoreParamS32";
69409467b48Spatrick   case NVPTXISD::StoreParamU32:
69509467b48Spatrick     return "NVPTXISD::StoreParamU32";
69609467b48Spatrick   case NVPTXISD::CallArgBegin:
69709467b48Spatrick     return "NVPTXISD::CallArgBegin";
69809467b48Spatrick   case NVPTXISD::CallArg:
69909467b48Spatrick     return "NVPTXISD::CallArg";
70009467b48Spatrick   case NVPTXISD::LastCallArg:
70109467b48Spatrick     return "NVPTXISD::LastCallArg";
70209467b48Spatrick   case NVPTXISD::CallArgEnd:
70309467b48Spatrick     return "NVPTXISD::CallArgEnd";
70409467b48Spatrick   case NVPTXISD::CallVoid:
70509467b48Spatrick     return "NVPTXISD::CallVoid";
70609467b48Spatrick   case NVPTXISD::CallVal:
70709467b48Spatrick     return "NVPTXISD::CallVal";
70809467b48Spatrick   case NVPTXISD::CallSymbol:
70909467b48Spatrick     return "NVPTXISD::CallSymbol";
71009467b48Spatrick   case NVPTXISD::Prototype:
71109467b48Spatrick     return "NVPTXISD::Prototype";
71209467b48Spatrick   case NVPTXISD::MoveParam:
71309467b48Spatrick     return "NVPTXISD::MoveParam";
71409467b48Spatrick   case NVPTXISD::StoreRetval:
71509467b48Spatrick     return "NVPTXISD::StoreRetval";
71609467b48Spatrick   case NVPTXISD::StoreRetvalV2:
71709467b48Spatrick     return "NVPTXISD::StoreRetvalV2";
71809467b48Spatrick   case NVPTXISD::StoreRetvalV4:
71909467b48Spatrick     return "NVPTXISD::StoreRetvalV4";
72009467b48Spatrick   case NVPTXISD::PseudoUseParam:
72109467b48Spatrick     return "NVPTXISD::PseudoUseParam";
72209467b48Spatrick   case NVPTXISD::RETURN:
72309467b48Spatrick     return "NVPTXISD::RETURN";
72409467b48Spatrick   case NVPTXISD::CallSeqBegin:
72509467b48Spatrick     return "NVPTXISD::CallSeqBegin";
72609467b48Spatrick   case NVPTXISD::CallSeqEnd:
72709467b48Spatrick     return "NVPTXISD::CallSeqEnd";
72809467b48Spatrick   case NVPTXISD::CallPrototype:
72909467b48Spatrick     return "NVPTXISD::CallPrototype";
73009467b48Spatrick   case NVPTXISD::ProxyReg:
73109467b48Spatrick     return "NVPTXISD::ProxyReg";
73209467b48Spatrick   case NVPTXISD::LoadV2:
73309467b48Spatrick     return "NVPTXISD::LoadV2";
73409467b48Spatrick   case NVPTXISD::LoadV4:
73509467b48Spatrick     return "NVPTXISD::LoadV4";
73609467b48Spatrick   case NVPTXISD::LDGV2:
73709467b48Spatrick     return "NVPTXISD::LDGV2";
73809467b48Spatrick   case NVPTXISD::LDGV4:
73909467b48Spatrick     return "NVPTXISD::LDGV4";
74009467b48Spatrick   case NVPTXISD::LDUV2:
74109467b48Spatrick     return "NVPTXISD::LDUV2";
74209467b48Spatrick   case NVPTXISD::LDUV4:
74309467b48Spatrick     return "NVPTXISD::LDUV4";
74409467b48Spatrick   case NVPTXISD::StoreV2:
74509467b48Spatrick     return "NVPTXISD::StoreV2";
74609467b48Spatrick   case NVPTXISD::StoreV4:
74709467b48Spatrick     return "NVPTXISD::StoreV4";
74809467b48Spatrick   case NVPTXISD::FUN_SHFL_CLAMP:
74909467b48Spatrick     return "NVPTXISD::FUN_SHFL_CLAMP";
75009467b48Spatrick   case NVPTXISD::FUN_SHFR_CLAMP:
75109467b48Spatrick     return "NVPTXISD::FUN_SHFR_CLAMP";
75209467b48Spatrick   case NVPTXISD::IMAD:
75309467b48Spatrick     return "NVPTXISD::IMAD";
75409467b48Spatrick   case NVPTXISD::SETP_F16X2:
75509467b48Spatrick     return "NVPTXISD::SETP_F16X2";
75609467b48Spatrick   case NVPTXISD::Dummy:
75709467b48Spatrick     return "NVPTXISD::Dummy";
75809467b48Spatrick   case NVPTXISD::MUL_WIDE_SIGNED:
75909467b48Spatrick     return "NVPTXISD::MUL_WIDE_SIGNED";
76009467b48Spatrick   case NVPTXISD::MUL_WIDE_UNSIGNED:
76109467b48Spatrick     return "NVPTXISD::MUL_WIDE_UNSIGNED";
76209467b48Spatrick   case NVPTXISD::Tex1DFloatS32:        return "NVPTXISD::Tex1DFloatS32";
76309467b48Spatrick   case NVPTXISD::Tex1DFloatFloat:      return "NVPTXISD::Tex1DFloatFloat";
76409467b48Spatrick   case NVPTXISD::Tex1DFloatFloatLevel:
76509467b48Spatrick     return "NVPTXISD::Tex1DFloatFloatLevel";
76609467b48Spatrick   case NVPTXISD::Tex1DFloatFloatGrad:
76709467b48Spatrick     return "NVPTXISD::Tex1DFloatFloatGrad";
76809467b48Spatrick   case NVPTXISD::Tex1DS32S32:          return "NVPTXISD::Tex1DS32S32";
76909467b48Spatrick   case NVPTXISD::Tex1DS32Float:        return "NVPTXISD::Tex1DS32Float";
77009467b48Spatrick   case NVPTXISD::Tex1DS32FloatLevel:
77109467b48Spatrick     return "NVPTXISD::Tex1DS32FloatLevel";
77209467b48Spatrick   case NVPTXISD::Tex1DS32FloatGrad:
77309467b48Spatrick     return "NVPTXISD::Tex1DS32FloatGrad";
77409467b48Spatrick   case NVPTXISD::Tex1DU32S32:          return "NVPTXISD::Tex1DU32S32";
77509467b48Spatrick   case NVPTXISD::Tex1DU32Float:        return "NVPTXISD::Tex1DU32Float";
77609467b48Spatrick   case NVPTXISD::Tex1DU32FloatLevel:
77709467b48Spatrick     return "NVPTXISD::Tex1DU32FloatLevel";
77809467b48Spatrick   case NVPTXISD::Tex1DU32FloatGrad:
77909467b48Spatrick     return "NVPTXISD::Tex1DU32FloatGrad";
78009467b48Spatrick   case NVPTXISD::Tex1DArrayFloatS32:   return "NVPTXISD::Tex1DArrayFloatS32";
78109467b48Spatrick   case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
78209467b48Spatrick   case NVPTXISD::Tex1DArrayFloatFloatLevel:
78309467b48Spatrick     return "NVPTXISD::Tex1DArrayFloatFloatLevel";
78409467b48Spatrick   case NVPTXISD::Tex1DArrayFloatFloatGrad:
78509467b48Spatrick     return "NVPTXISD::Tex1DArrayFloatFloatGrad";
78609467b48Spatrick   case NVPTXISD::Tex1DArrayS32S32:     return "NVPTXISD::Tex1DArrayS32S32";
78709467b48Spatrick   case NVPTXISD::Tex1DArrayS32Float:   return "NVPTXISD::Tex1DArrayS32Float";
78809467b48Spatrick   case NVPTXISD::Tex1DArrayS32FloatLevel:
78909467b48Spatrick     return "NVPTXISD::Tex1DArrayS32FloatLevel";
79009467b48Spatrick   case NVPTXISD::Tex1DArrayS32FloatGrad:
79109467b48Spatrick     return "NVPTXISD::Tex1DArrayS32FloatGrad";
79209467b48Spatrick   case NVPTXISD::Tex1DArrayU32S32:     return "NVPTXISD::Tex1DArrayU32S32";
79309467b48Spatrick   case NVPTXISD::Tex1DArrayU32Float:   return "NVPTXISD::Tex1DArrayU32Float";
79409467b48Spatrick   case NVPTXISD::Tex1DArrayU32FloatLevel:
79509467b48Spatrick     return "NVPTXISD::Tex1DArrayU32FloatLevel";
79609467b48Spatrick   case NVPTXISD::Tex1DArrayU32FloatGrad:
79709467b48Spatrick     return "NVPTXISD::Tex1DArrayU32FloatGrad";
79809467b48Spatrick   case NVPTXISD::Tex2DFloatS32:        return "NVPTXISD::Tex2DFloatS32";
79909467b48Spatrick   case NVPTXISD::Tex2DFloatFloat:      return "NVPTXISD::Tex2DFloatFloat";
80009467b48Spatrick   case NVPTXISD::Tex2DFloatFloatLevel:
80109467b48Spatrick     return "NVPTXISD::Tex2DFloatFloatLevel";
80209467b48Spatrick   case NVPTXISD::Tex2DFloatFloatGrad:
80309467b48Spatrick     return "NVPTXISD::Tex2DFloatFloatGrad";
80409467b48Spatrick   case NVPTXISD::Tex2DS32S32:          return "NVPTXISD::Tex2DS32S32";
80509467b48Spatrick   case NVPTXISD::Tex2DS32Float:        return "NVPTXISD::Tex2DS32Float";
80609467b48Spatrick   case NVPTXISD::Tex2DS32FloatLevel:
80709467b48Spatrick     return "NVPTXISD::Tex2DS32FloatLevel";
80809467b48Spatrick   case NVPTXISD::Tex2DS32FloatGrad:
80909467b48Spatrick     return "NVPTXISD::Tex2DS32FloatGrad";
81009467b48Spatrick   case NVPTXISD::Tex2DU32S32:          return "NVPTXISD::Tex2DU32S32";
81109467b48Spatrick   case NVPTXISD::Tex2DU32Float:        return "NVPTXISD::Tex2DU32Float";
81209467b48Spatrick   case NVPTXISD::Tex2DU32FloatLevel:
81309467b48Spatrick     return "NVPTXISD::Tex2DU32FloatLevel";
81409467b48Spatrick   case NVPTXISD::Tex2DU32FloatGrad:
81509467b48Spatrick     return "NVPTXISD::Tex2DU32FloatGrad";
81609467b48Spatrick   case NVPTXISD::Tex2DArrayFloatS32:   return "NVPTXISD::Tex2DArrayFloatS32";
81709467b48Spatrick   case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
81809467b48Spatrick   case NVPTXISD::Tex2DArrayFloatFloatLevel:
81909467b48Spatrick     return "NVPTXISD::Tex2DArrayFloatFloatLevel";
82009467b48Spatrick   case NVPTXISD::Tex2DArrayFloatFloatGrad:
82109467b48Spatrick     return "NVPTXISD::Tex2DArrayFloatFloatGrad";
82209467b48Spatrick   case NVPTXISD::Tex2DArrayS32S32:     return "NVPTXISD::Tex2DArrayS32S32";
82309467b48Spatrick   case NVPTXISD::Tex2DArrayS32Float:   return "NVPTXISD::Tex2DArrayS32Float";
82409467b48Spatrick   case NVPTXISD::Tex2DArrayS32FloatLevel:
82509467b48Spatrick     return "NVPTXISD::Tex2DArrayS32FloatLevel";
82609467b48Spatrick   case NVPTXISD::Tex2DArrayS32FloatGrad:
82709467b48Spatrick     return "NVPTXISD::Tex2DArrayS32FloatGrad";
82809467b48Spatrick   case NVPTXISD::Tex2DArrayU32S32:     return "NVPTXISD::Tex2DArrayU32S32";
82909467b48Spatrick   case NVPTXISD::Tex2DArrayU32Float:   return "NVPTXISD::Tex2DArrayU32Float";
83009467b48Spatrick   case NVPTXISD::Tex2DArrayU32FloatLevel:
83109467b48Spatrick     return "NVPTXISD::Tex2DArrayU32FloatLevel";
83209467b48Spatrick   case NVPTXISD::Tex2DArrayU32FloatGrad:
83309467b48Spatrick     return "NVPTXISD::Tex2DArrayU32FloatGrad";
83409467b48Spatrick   case NVPTXISD::Tex3DFloatS32:        return "NVPTXISD::Tex3DFloatS32";
83509467b48Spatrick   case NVPTXISD::Tex3DFloatFloat:      return "NVPTXISD::Tex3DFloatFloat";
83609467b48Spatrick   case NVPTXISD::Tex3DFloatFloatLevel:
83709467b48Spatrick     return "NVPTXISD::Tex3DFloatFloatLevel";
83809467b48Spatrick   case NVPTXISD::Tex3DFloatFloatGrad:
83909467b48Spatrick     return "NVPTXISD::Tex3DFloatFloatGrad";
84009467b48Spatrick   case NVPTXISD::Tex3DS32S32:          return "NVPTXISD::Tex3DS32S32";
84109467b48Spatrick   case NVPTXISD::Tex3DS32Float:        return "NVPTXISD::Tex3DS32Float";
84209467b48Spatrick   case NVPTXISD::Tex3DS32FloatLevel:
84309467b48Spatrick     return "NVPTXISD::Tex3DS32FloatLevel";
84409467b48Spatrick   case NVPTXISD::Tex3DS32FloatGrad:
84509467b48Spatrick     return "NVPTXISD::Tex3DS32FloatGrad";
84609467b48Spatrick   case NVPTXISD::Tex3DU32S32:          return "NVPTXISD::Tex3DU32S32";
84709467b48Spatrick   case NVPTXISD::Tex3DU32Float:        return "NVPTXISD::Tex3DU32Float";
84809467b48Spatrick   case NVPTXISD::Tex3DU32FloatLevel:
84909467b48Spatrick     return "NVPTXISD::Tex3DU32FloatLevel";
85009467b48Spatrick   case NVPTXISD::Tex3DU32FloatGrad:
85109467b48Spatrick     return "NVPTXISD::Tex3DU32FloatGrad";
85209467b48Spatrick   case NVPTXISD::TexCubeFloatFloat:      return "NVPTXISD::TexCubeFloatFloat";
85309467b48Spatrick   case NVPTXISD::TexCubeFloatFloatLevel:
85409467b48Spatrick     return "NVPTXISD::TexCubeFloatFloatLevel";
85509467b48Spatrick   case NVPTXISD::TexCubeS32Float:        return "NVPTXISD::TexCubeS32Float";
85609467b48Spatrick   case NVPTXISD::TexCubeS32FloatLevel:
85709467b48Spatrick     return "NVPTXISD::TexCubeS32FloatLevel";
85809467b48Spatrick   case NVPTXISD::TexCubeU32Float:        return "NVPTXISD::TexCubeU32Float";
85909467b48Spatrick   case NVPTXISD::TexCubeU32FloatLevel:
86009467b48Spatrick     return "NVPTXISD::TexCubeU32FloatLevel";
86109467b48Spatrick   case NVPTXISD::TexCubeArrayFloatFloat:
86209467b48Spatrick     return "NVPTXISD::TexCubeArrayFloatFloat";
86309467b48Spatrick   case NVPTXISD::TexCubeArrayFloatFloatLevel:
86409467b48Spatrick     return "NVPTXISD::TexCubeArrayFloatFloatLevel";
86509467b48Spatrick   case NVPTXISD::TexCubeArrayS32Float:
86609467b48Spatrick     return "NVPTXISD::TexCubeArrayS32Float";
86709467b48Spatrick   case NVPTXISD::TexCubeArrayS32FloatLevel:
86809467b48Spatrick     return "NVPTXISD::TexCubeArrayS32FloatLevel";
86909467b48Spatrick   case NVPTXISD::TexCubeArrayU32Float:
87009467b48Spatrick     return "NVPTXISD::TexCubeArrayU32Float";
87109467b48Spatrick   case NVPTXISD::TexCubeArrayU32FloatLevel:
87209467b48Spatrick     return "NVPTXISD::TexCubeArrayU32FloatLevel";
87309467b48Spatrick   case NVPTXISD::Tld4R2DFloatFloat:
87409467b48Spatrick     return "NVPTXISD::Tld4R2DFloatFloat";
87509467b48Spatrick   case NVPTXISD::Tld4G2DFloatFloat:
87609467b48Spatrick     return "NVPTXISD::Tld4G2DFloatFloat";
87709467b48Spatrick   case NVPTXISD::Tld4B2DFloatFloat:
87809467b48Spatrick     return "NVPTXISD::Tld4B2DFloatFloat";
87909467b48Spatrick   case NVPTXISD::Tld4A2DFloatFloat:
88009467b48Spatrick     return "NVPTXISD::Tld4A2DFloatFloat";
88109467b48Spatrick   case NVPTXISD::Tld4R2DS64Float:
88209467b48Spatrick     return "NVPTXISD::Tld4R2DS64Float";
88309467b48Spatrick   case NVPTXISD::Tld4G2DS64Float:
88409467b48Spatrick     return "NVPTXISD::Tld4G2DS64Float";
88509467b48Spatrick   case NVPTXISD::Tld4B2DS64Float:
88609467b48Spatrick     return "NVPTXISD::Tld4B2DS64Float";
88709467b48Spatrick   case NVPTXISD::Tld4A2DS64Float:
88809467b48Spatrick     return "NVPTXISD::Tld4A2DS64Float";
88909467b48Spatrick   case NVPTXISD::Tld4R2DU64Float:
89009467b48Spatrick     return "NVPTXISD::Tld4R2DU64Float";
89109467b48Spatrick   case NVPTXISD::Tld4G2DU64Float:
89209467b48Spatrick     return "NVPTXISD::Tld4G2DU64Float";
89309467b48Spatrick   case NVPTXISD::Tld4B2DU64Float:
89409467b48Spatrick     return "NVPTXISD::Tld4B2DU64Float";
89509467b48Spatrick   case NVPTXISD::Tld4A2DU64Float:
89609467b48Spatrick     return "NVPTXISD::Tld4A2DU64Float";
89709467b48Spatrick 
89809467b48Spatrick   case NVPTXISD::TexUnified1DFloatS32:
89909467b48Spatrick     return "NVPTXISD::TexUnified1DFloatS32";
90009467b48Spatrick   case NVPTXISD::TexUnified1DFloatFloat:
90109467b48Spatrick     return "NVPTXISD::TexUnified1DFloatFloat";
90209467b48Spatrick   case NVPTXISD::TexUnified1DFloatFloatLevel:
90309467b48Spatrick     return "NVPTXISD::TexUnified1DFloatFloatLevel";
90409467b48Spatrick   case NVPTXISD::TexUnified1DFloatFloatGrad:
90509467b48Spatrick     return "NVPTXISD::TexUnified1DFloatFloatGrad";
90609467b48Spatrick   case NVPTXISD::TexUnified1DS32S32:
90709467b48Spatrick     return "NVPTXISD::TexUnified1DS32S32";
90809467b48Spatrick   case NVPTXISD::TexUnified1DS32Float:
90909467b48Spatrick     return "NVPTXISD::TexUnified1DS32Float";
91009467b48Spatrick   case NVPTXISD::TexUnified1DS32FloatLevel:
91109467b48Spatrick     return "NVPTXISD::TexUnified1DS32FloatLevel";
91209467b48Spatrick   case NVPTXISD::TexUnified1DS32FloatGrad:
91309467b48Spatrick     return "NVPTXISD::TexUnified1DS32FloatGrad";
91409467b48Spatrick   case NVPTXISD::TexUnified1DU32S32:
91509467b48Spatrick     return "NVPTXISD::TexUnified1DU32S32";
91609467b48Spatrick   case NVPTXISD::TexUnified1DU32Float:
91709467b48Spatrick     return "NVPTXISD::TexUnified1DU32Float";
91809467b48Spatrick   case NVPTXISD::TexUnified1DU32FloatLevel:
91909467b48Spatrick     return "NVPTXISD::TexUnified1DU32FloatLevel";
92009467b48Spatrick   case NVPTXISD::TexUnified1DU32FloatGrad:
92109467b48Spatrick     return "NVPTXISD::TexUnified1DU32FloatGrad";
92209467b48Spatrick   case NVPTXISD::TexUnified1DArrayFloatS32:
92309467b48Spatrick     return "NVPTXISD::TexUnified1DArrayFloatS32";
92409467b48Spatrick   case NVPTXISD::TexUnified1DArrayFloatFloat:
92509467b48Spatrick     return "NVPTXISD::TexUnified1DArrayFloatFloat";
92609467b48Spatrick   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
92709467b48Spatrick     return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
92809467b48Spatrick   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
92909467b48Spatrick     return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
93009467b48Spatrick   case NVPTXISD::TexUnified1DArrayS32S32:
93109467b48Spatrick     return "NVPTXISD::TexUnified1DArrayS32S32";
93209467b48Spatrick   case NVPTXISD::TexUnified1DArrayS32Float:
93309467b48Spatrick     return "NVPTXISD::TexUnified1DArrayS32Float";
93409467b48Spatrick   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
93509467b48Spatrick     return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
93609467b48Spatrick   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
93709467b48Spatrick     return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
93809467b48Spatrick   case NVPTXISD::TexUnified1DArrayU32S32:
93909467b48Spatrick     return "NVPTXISD::TexUnified1DArrayU32S32";
94009467b48Spatrick   case NVPTXISD::TexUnified1DArrayU32Float:
94109467b48Spatrick     return "NVPTXISD::TexUnified1DArrayU32Float";
94209467b48Spatrick   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
94309467b48Spatrick     return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
94409467b48Spatrick   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
94509467b48Spatrick     return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
94609467b48Spatrick   case NVPTXISD::TexUnified2DFloatS32:
94709467b48Spatrick     return "NVPTXISD::TexUnified2DFloatS32";
94809467b48Spatrick   case NVPTXISD::TexUnified2DFloatFloat:
94909467b48Spatrick     return "NVPTXISD::TexUnified2DFloatFloat";
95009467b48Spatrick   case NVPTXISD::TexUnified2DFloatFloatLevel:
95109467b48Spatrick     return "NVPTXISD::TexUnified2DFloatFloatLevel";
95209467b48Spatrick   case NVPTXISD::TexUnified2DFloatFloatGrad:
95309467b48Spatrick     return "NVPTXISD::TexUnified2DFloatFloatGrad";
95409467b48Spatrick   case NVPTXISD::TexUnified2DS32S32:
95509467b48Spatrick     return "NVPTXISD::TexUnified2DS32S32";
95609467b48Spatrick   case NVPTXISD::TexUnified2DS32Float:
95709467b48Spatrick     return "NVPTXISD::TexUnified2DS32Float";
95809467b48Spatrick   case NVPTXISD::TexUnified2DS32FloatLevel:
95909467b48Spatrick     return "NVPTXISD::TexUnified2DS32FloatLevel";
96009467b48Spatrick   case NVPTXISD::TexUnified2DS32FloatGrad:
96109467b48Spatrick     return "NVPTXISD::TexUnified2DS32FloatGrad";
96209467b48Spatrick   case NVPTXISD::TexUnified2DU32S32:
96309467b48Spatrick     return "NVPTXISD::TexUnified2DU32S32";
96409467b48Spatrick   case NVPTXISD::TexUnified2DU32Float:
96509467b48Spatrick     return "NVPTXISD::TexUnified2DU32Float";
96609467b48Spatrick   case NVPTXISD::TexUnified2DU32FloatLevel:
96709467b48Spatrick     return "NVPTXISD::TexUnified2DU32FloatLevel";
96809467b48Spatrick   case NVPTXISD::TexUnified2DU32FloatGrad:
96909467b48Spatrick     return "NVPTXISD::TexUnified2DU32FloatGrad";
97009467b48Spatrick   case NVPTXISD::TexUnified2DArrayFloatS32:
97109467b48Spatrick     return "NVPTXISD::TexUnified2DArrayFloatS32";
97209467b48Spatrick   case NVPTXISD::TexUnified2DArrayFloatFloat:
97309467b48Spatrick     return "NVPTXISD::TexUnified2DArrayFloatFloat";
97409467b48Spatrick   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
97509467b48Spatrick     return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
97609467b48Spatrick   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
97709467b48Spatrick     return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
97809467b48Spatrick   case NVPTXISD::TexUnified2DArrayS32S32:
97909467b48Spatrick     return "NVPTXISD::TexUnified2DArrayS32S32";
98009467b48Spatrick   case NVPTXISD::TexUnified2DArrayS32Float:
98109467b48Spatrick     return "NVPTXISD::TexUnified2DArrayS32Float";
98209467b48Spatrick   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
98309467b48Spatrick     return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
98409467b48Spatrick   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
98509467b48Spatrick     return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
98609467b48Spatrick   case NVPTXISD::TexUnified2DArrayU32S32:
98709467b48Spatrick     return "NVPTXISD::TexUnified2DArrayU32S32";
98809467b48Spatrick   case NVPTXISD::TexUnified2DArrayU32Float:
98909467b48Spatrick     return "NVPTXISD::TexUnified2DArrayU32Float";
99009467b48Spatrick   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
99109467b48Spatrick     return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
99209467b48Spatrick   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
99309467b48Spatrick     return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
99409467b48Spatrick   case NVPTXISD::TexUnified3DFloatS32:
99509467b48Spatrick     return "NVPTXISD::TexUnified3DFloatS32";
99609467b48Spatrick   case NVPTXISD::TexUnified3DFloatFloat:
99709467b48Spatrick     return "NVPTXISD::TexUnified3DFloatFloat";
99809467b48Spatrick   case NVPTXISD::TexUnified3DFloatFloatLevel:
99909467b48Spatrick     return "NVPTXISD::TexUnified3DFloatFloatLevel";
100009467b48Spatrick   case NVPTXISD::TexUnified3DFloatFloatGrad:
100109467b48Spatrick     return "NVPTXISD::TexUnified3DFloatFloatGrad";
100209467b48Spatrick   case NVPTXISD::TexUnified3DS32S32:
100309467b48Spatrick     return "NVPTXISD::TexUnified3DS32S32";
100409467b48Spatrick   case NVPTXISD::TexUnified3DS32Float:
100509467b48Spatrick     return "NVPTXISD::TexUnified3DS32Float";
100609467b48Spatrick   case NVPTXISD::TexUnified3DS32FloatLevel:
100709467b48Spatrick     return "NVPTXISD::TexUnified3DS32FloatLevel";
100809467b48Spatrick   case NVPTXISD::TexUnified3DS32FloatGrad:
100909467b48Spatrick     return "NVPTXISD::TexUnified3DS32FloatGrad";
101009467b48Spatrick   case NVPTXISD::TexUnified3DU32S32:
101109467b48Spatrick     return "NVPTXISD::TexUnified3DU32S32";
101209467b48Spatrick   case NVPTXISD::TexUnified3DU32Float:
101309467b48Spatrick     return "NVPTXISD::TexUnified3DU32Float";
101409467b48Spatrick   case NVPTXISD::TexUnified3DU32FloatLevel:
101509467b48Spatrick     return "NVPTXISD::TexUnified3DU32FloatLevel";
101609467b48Spatrick   case NVPTXISD::TexUnified3DU32FloatGrad:
101709467b48Spatrick     return "NVPTXISD::TexUnified3DU32FloatGrad";
101809467b48Spatrick   case NVPTXISD::TexUnifiedCubeFloatFloat:
101909467b48Spatrick     return "NVPTXISD::TexUnifiedCubeFloatFloat";
102009467b48Spatrick   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
102109467b48Spatrick     return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
102209467b48Spatrick   case NVPTXISD::TexUnifiedCubeS32Float:
102309467b48Spatrick     return "NVPTXISD::TexUnifiedCubeS32Float";
102409467b48Spatrick   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
102509467b48Spatrick     return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
102609467b48Spatrick   case NVPTXISD::TexUnifiedCubeU32Float:
102709467b48Spatrick     return "NVPTXISD::TexUnifiedCubeU32Float";
102809467b48Spatrick   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
102909467b48Spatrick     return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
103009467b48Spatrick   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
103109467b48Spatrick     return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
103209467b48Spatrick   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
103309467b48Spatrick     return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
103409467b48Spatrick   case NVPTXISD::TexUnifiedCubeArrayS32Float:
103509467b48Spatrick     return "NVPTXISD::TexUnifiedCubeArrayS32Float";
103609467b48Spatrick   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
103709467b48Spatrick     return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
103809467b48Spatrick   case NVPTXISD::TexUnifiedCubeArrayU32Float:
103909467b48Spatrick     return "NVPTXISD::TexUnifiedCubeArrayU32Float";
104009467b48Spatrick   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
104109467b48Spatrick     return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
104209467b48Spatrick   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
104309467b48Spatrick     return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
104409467b48Spatrick   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
104509467b48Spatrick     return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
104609467b48Spatrick   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
104709467b48Spatrick     return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
104809467b48Spatrick   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
104909467b48Spatrick     return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
105009467b48Spatrick   case NVPTXISD::Tld4UnifiedR2DS64Float:
105109467b48Spatrick     return "NVPTXISD::Tld4UnifiedR2DS64Float";
105209467b48Spatrick   case NVPTXISD::Tld4UnifiedG2DS64Float:
105309467b48Spatrick     return "NVPTXISD::Tld4UnifiedG2DS64Float";
105409467b48Spatrick   case NVPTXISD::Tld4UnifiedB2DS64Float:
105509467b48Spatrick     return "NVPTXISD::Tld4UnifiedB2DS64Float";
105609467b48Spatrick   case NVPTXISD::Tld4UnifiedA2DS64Float:
105709467b48Spatrick     return "NVPTXISD::Tld4UnifiedA2DS64Float";
105809467b48Spatrick   case NVPTXISD::Tld4UnifiedR2DU64Float:
105909467b48Spatrick     return "NVPTXISD::Tld4UnifiedR2DU64Float";
106009467b48Spatrick   case NVPTXISD::Tld4UnifiedG2DU64Float:
106109467b48Spatrick     return "NVPTXISD::Tld4UnifiedG2DU64Float";
106209467b48Spatrick   case NVPTXISD::Tld4UnifiedB2DU64Float:
106309467b48Spatrick     return "NVPTXISD::Tld4UnifiedB2DU64Float";
106409467b48Spatrick   case NVPTXISD::Tld4UnifiedA2DU64Float:
106509467b48Spatrick     return "NVPTXISD::Tld4UnifiedA2DU64Float";
106609467b48Spatrick 
106709467b48Spatrick   case NVPTXISD::Suld1DI8Clamp:          return "NVPTXISD::Suld1DI8Clamp";
106809467b48Spatrick   case NVPTXISD::Suld1DI16Clamp:         return "NVPTXISD::Suld1DI16Clamp";
106909467b48Spatrick   case NVPTXISD::Suld1DI32Clamp:         return "NVPTXISD::Suld1DI32Clamp";
107009467b48Spatrick   case NVPTXISD::Suld1DI64Clamp:         return "NVPTXISD::Suld1DI64Clamp";
107109467b48Spatrick   case NVPTXISD::Suld1DV2I8Clamp:        return "NVPTXISD::Suld1DV2I8Clamp";
107209467b48Spatrick   case NVPTXISD::Suld1DV2I16Clamp:       return "NVPTXISD::Suld1DV2I16Clamp";
107309467b48Spatrick   case NVPTXISD::Suld1DV2I32Clamp:       return "NVPTXISD::Suld1DV2I32Clamp";
107409467b48Spatrick   case NVPTXISD::Suld1DV2I64Clamp:       return "NVPTXISD::Suld1DV2I64Clamp";
107509467b48Spatrick   case NVPTXISD::Suld1DV4I8Clamp:        return "NVPTXISD::Suld1DV4I8Clamp";
107609467b48Spatrick   case NVPTXISD::Suld1DV4I16Clamp:       return "NVPTXISD::Suld1DV4I16Clamp";
107709467b48Spatrick   case NVPTXISD::Suld1DV4I32Clamp:       return "NVPTXISD::Suld1DV4I32Clamp";
107809467b48Spatrick 
107909467b48Spatrick   case NVPTXISD::Suld1DArrayI8Clamp:   return "NVPTXISD::Suld1DArrayI8Clamp";
108009467b48Spatrick   case NVPTXISD::Suld1DArrayI16Clamp:  return "NVPTXISD::Suld1DArrayI16Clamp";
108109467b48Spatrick   case NVPTXISD::Suld1DArrayI32Clamp:  return "NVPTXISD::Suld1DArrayI32Clamp";
108209467b48Spatrick   case NVPTXISD::Suld1DArrayI64Clamp:  return "NVPTXISD::Suld1DArrayI64Clamp";
108309467b48Spatrick   case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
108409467b48Spatrick   case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
108509467b48Spatrick   case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
108609467b48Spatrick   case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
108709467b48Spatrick   case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
108809467b48Spatrick   case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
108909467b48Spatrick   case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
109009467b48Spatrick 
109109467b48Spatrick   case NVPTXISD::Suld2DI8Clamp:          return "NVPTXISD::Suld2DI8Clamp";
109209467b48Spatrick   case NVPTXISD::Suld2DI16Clamp:         return "NVPTXISD::Suld2DI16Clamp";
109309467b48Spatrick   case NVPTXISD::Suld2DI32Clamp:         return "NVPTXISD::Suld2DI32Clamp";
109409467b48Spatrick   case NVPTXISD::Suld2DI64Clamp:         return "NVPTXISD::Suld2DI64Clamp";
109509467b48Spatrick   case NVPTXISD::Suld2DV2I8Clamp:        return "NVPTXISD::Suld2DV2I8Clamp";
109609467b48Spatrick   case NVPTXISD::Suld2DV2I16Clamp:       return "NVPTXISD::Suld2DV2I16Clamp";
109709467b48Spatrick   case NVPTXISD::Suld2DV2I32Clamp:       return "NVPTXISD::Suld2DV2I32Clamp";
109809467b48Spatrick   case NVPTXISD::Suld2DV2I64Clamp:       return "NVPTXISD::Suld2DV2I64Clamp";
109909467b48Spatrick   case NVPTXISD::Suld2DV4I8Clamp:        return "NVPTXISD::Suld2DV4I8Clamp";
110009467b48Spatrick   case NVPTXISD::Suld2DV4I16Clamp:       return "NVPTXISD::Suld2DV4I16Clamp";
110109467b48Spatrick   case NVPTXISD::Suld2DV4I32Clamp:       return "NVPTXISD::Suld2DV4I32Clamp";
110209467b48Spatrick 
110309467b48Spatrick   case NVPTXISD::Suld2DArrayI8Clamp:   return "NVPTXISD::Suld2DArrayI8Clamp";
110409467b48Spatrick   case NVPTXISD::Suld2DArrayI16Clamp:  return "NVPTXISD::Suld2DArrayI16Clamp";
110509467b48Spatrick   case NVPTXISD::Suld2DArrayI32Clamp:  return "NVPTXISD::Suld2DArrayI32Clamp";
110609467b48Spatrick   case NVPTXISD::Suld2DArrayI64Clamp:  return "NVPTXISD::Suld2DArrayI64Clamp";
110709467b48Spatrick   case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
110809467b48Spatrick   case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
110909467b48Spatrick   case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
111009467b48Spatrick   case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
111109467b48Spatrick   case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
111209467b48Spatrick   case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
111309467b48Spatrick   case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
111409467b48Spatrick 
111509467b48Spatrick   case NVPTXISD::Suld3DI8Clamp:          return "NVPTXISD::Suld3DI8Clamp";
111609467b48Spatrick   case NVPTXISD::Suld3DI16Clamp:         return "NVPTXISD::Suld3DI16Clamp";
111709467b48Spatrick   case NVPTXISD::Suld3DI32Clamp:         return "NVPTXISD::Suld3DI32Clamp";
111809467b48Spatrick   case NVPTXISD::Suld3DI64Clamp:         return "NVPTXISD::Suld3DI64Clamp";
111909467b48Spatrick   case NVPTXISD::Suld3DV2I8Clamp:        return "NVPTXISD::Suld3DV2I8Clamp";
112009467b48Spatrick   case NVPTXISD::Suld3DV2I16Clamp:       return "NVPTXISD::Suld3DV2I16Clamp";
112109467b48Spatrick   case NVPTXISD::Suld3DV2I32Clamp:       return "NVPTXISD::Suld3DV2I32Clamp";
112209467b48Spatrick   case NVPTXISD::Suld3DV2I64Clamp:       return "NVPTXISD::Suld3DV2I64Clamp";
112309467b48Spatrick   case NVPTXISD::Suld3DV4I8Clamp:        return "NVPTXISD::Suld3DV4I8Clamp";
112409467b48Spatrick   case NVPTXISD::Suld3DV4I16Clamp:       return "NVPTXISD::Suld3DV4I16Clamp";
112509467b48Spatrick   case NVPTXISD::Suld3DV4I32Clamp:       return "NVPTXISD::Suld3DV4I32Clamp";
112609467b48Spatrick 
112709467b48Spatrick   case NVPTXISD::Suld1DI8Trap:          return "NVPTXISD::Suld1DI8Trap";
112809467b48Spatrick   case NVPTXISD::Suld1DI16Trap:         return "NVPTXISD::Suld1DI16Trap";
112909467b48Spatrick   case NVPTXISD::Suld1DI32Trap:         return "NVPTXISD::Suld1DI32Trap";
113009467b48Spatrick   case NVPTXISD::Suld1DI64Trap:         return "NVPTXISD::Suld1DI64Trap";
113109467b48Spatrick   case NVPTXISD::Suld1DV2I8Trap:        return "NVPTXISD::Suld1DV2I8Trap";
113209467b48Spatrick   case NVPTXISD::Suld1DV2I16Trap:       return "NVPTXISD::Suld1DV2I16Trap";
113309467b48Spatrick   case NVPTXISD::Suld1DV2I32Trap:       return "NVPTXISD::Suld1DV2I32Trap";
113409467b48Spatrick   case NVPTXISD::Suld1DV2I64Trap:       return "NVPTXISD::Suld1DV2I64Trap";
113509467b48Spatrick   case NVPTXISD::Suld1DV4I8Trap:        return "NVPTXISD::Suld1DV4I8Trap";
113609467b48Spatrick   case NVPTXISD::Suld1DV4I16Trap:       return "NVPTXISD::Suld1DV4I16Trap";
113709467b48Spatrick   case NVPTXISD::Suld1DV4I32Trap:       return "NVPTXISD::Suld1DV4I32Trap";
113809467b48Spatrick 
113909467b48Spatrick   case NVPTXISD::Suld1DArrayI8Trap:     return "NVPTXISD::Suld1DArrayI8Trap";
114009467b48Spatrick   case NVPTXISD::Suld1DArrayI16Trap:    return "NVPTXISD::Suld1DArrayI16Trap";
114109467b48Spatrick   case NVPTXISD::Suld1DArrayI32Trap:    return "NVPTXISD::Suld1DArrayI32Trap";
114209467b48Spatrick   case NVPTXISD::Suld1DArrayI64Trap:    return "NVPTXISD::Suld1DArrayI64Trap";
114309467b48Spatrick   case NVPTXISD::Suld1DArrayV2I8Trap:   return "NVPTXISD::Suld1DArrayV2I8Trap";
114409467b48Spatrick   case NVPTXISD::Suld1DArrayV2I16Trap:  return "NVPTXISD::Suld1DArrayV2I16Trap";
114509467b48Spatrick   case NVPTXISD::Suld1DArrayV2I32Trap:  return "NVPTXISD::Suld1DArrayV2I32Trap";
114609467b48Spatrick   case NVPTXISD::Suld1DArrayV2I64Trap:  return "NVPTXISD::Suld1DArrayV2I64Trap";
114709467b48Spatrick   case NVPTXISD::Suld1DArrayV4I8Trap:   return "NVPTXISD::Suld1DArrayV4I8Trap";
114809467b48Spatrick   case NVPTXISD::Suld1DArrayV4I16Trap:  return "NVPTXISD::Suld1DArrayV4I16Trap";
114909467b48Spatrick   case NVPTXISD::Suld1DArrayV4I32Trap:  return "NVPTXISD::Suld1DArrayV4I32Trap";
115009467b48Spatrick 
115109467b48Spatrick   case NVPTXISD::Suld2DI8Trap:          return "NVPTXISD::Suld2DI8Trap";
115209467b48Spatrick   case NVPTXISD::Suld2DI16Trap:         return "NVPTXISD::Suld2DI16Trap";
115309467b48Spatrick   case NVPTXISD::Suld2DI32Trap:         return "NVPTXISD::Suld2DI32Trap";
115409467b48Spatrick   case NVPTXISD::Suld2DI64Trap:         return "NVPTXISD::Suld2DI64Trap";
115509467b48Spatrick   case NVPTXISD::Suld2DV2I8Trap:        return "NVPTXISD::Suld2DV2I8Trap";
115609467b48Spatrick   case NVPTXISD::Suld2DV2I16Trap:       return "NVPTXISD::Suld2DV2I16Trap";
115709467b48Spatrick   case NVPTXISD::Suld2DV2I32Trap:       return "NVPTXISD::Suld2DV2I32Trap";
115809467b48Spatrick   case NVPTXISD::Suld2DV2I64Trap:       return "NVPTXISD::Suld2DV2I64Trap";
115909467b48Spatrick   case NVPTXISD::Suld2DV4I8Trap:        return "NVPTXISD::Suld2DV4I8Trap";
116009467b48Spatrick   case NVPTXISD::Suld2DV4I16Trap:       return "NVPTXISD::Suld2DV4I16Trap";
116109467b48Spatrick   case NVPTXISD::Suld2DV4I32Trap:       return "NVPTXISD::Suld2DV4I32Trap";
116209467b48Spatrick 
116309467b48Spatrick   case NVPTXISD::Suld2DArrayI8Trap:     return "NVPTXISD::Suld2DArrayI8Trap";
116409467b48Spatrick   case NVPTXISD::Suld2DArrayI16Trap:    return "NVPTXISD::Suld2DArrayI16Trap";
116509467b48Spatrick   case NVPTXISD::Suld2DArrayI32Trap:    return "NVPTXISD::Suld2DArrayI32Trap";
116609467b48Spatrick   case NVPTXISD::Suld2DArrayI64Trap:    return "NVPTXISD::Suld2DArrayI64Trap";
116709467b48Spatrick   case NVPTXISD::Suld2DArrayV2I8Trap:   return "NVPTXISD::Suld2DArrayV2I8Trap";
116809467b48Spatrick   case NVPTXISD::Suld2DArrayV2I16Trap:  return "NVPTXISD::Suld2DArrayV2I16Trap";
116909467b48Spatrick   case NVPTXISD::Suld2DArrayV2I32Trap:  return "NVPTXISD::Suld2DArrayV2I32Trap";
117009467b48Spatrick   case NVPTXISD::Suld2DArrayV2I64Trap:  return "NVPTXISD::Suld2DArrayV2I64Trap";
117109467b48Spatrick   case NVPTXISD::Suld2DArrayV4I8Trap:   return "NVPTXISD::Suld2DArrayV4I8Trap";
117209467b48Spatrick   case NVPTXISD::Suld2DArrayV4I16Trap:  return "NVPTXISD::Suld2DArrayV4I16Trap";
117309467b48Spatrick   case NVPTXISD::Suld2DArrayV4I32Trap:  return "NVPTXISD::Suld2DArrayV4I32Trap";
117409467b48Spatrick 
117509467b48Spatrick   case NVPTXISD::Suld3DI8Trap:          return "NVPTXISD::Suld3DI8Trap";
117609467b48Spatrick   case NVPTXISD::Suld3DI16Trap:         return "NVPTXISD::Suld3DI16Trap";
117709467b48Spatrick   case NVPTXISD::Suld3DI32Trap:         return "NVPTXISD::Suld3DI32Trap";
117809467b48Spatrick   case NVPTXISD::Suld3DI64Trap:         return "NVPTXISD::Suld3DI64Trap";
117909467b48Spatrick   case NVPTXISD::Suld3DV2I8Trap:        return "NVPTXISD::Suld3DV2I8Trap";
118009467b48Spatrick   case NVPTXISD::Suld3DV2I16Trap:       return "NVPTXISD::Suld3DV2I16Trap";
118109467b48Spatrick   case NVPTXISD::Suld3DV2I32Trap:       return "NVPTXISD::Suld3DV2I32Trap";
118209467b48Spatrick   case NVPTXISD::Suld3DV2I64Trap:       return "NVPTXISD::Suld3DV2I64Trap";
118309467b48Spatrick   case NVPTXISD::Suld3DV4I8Trap:        return "NVPTXISD::Suld3DV4I8Trap";
118409467b48Spatrick   case NVPTXISD::Suld3DV4I16Trap:       return "NVPTXISD::Suld3DV4I16Trap";
118509467b48Spatrick   case NVPTXISD::Suld3DV4I32Trap:       return "NVPTXISD::Suld3DV4I32Trap";
118609467b48Spatrick 
118709467b48Spatrick   case NVPTXISD::Suld1DI8Zero:          return "NVPTXISD::Suld1DI8Zero";
118809467b48Spatrick   case NVPTXISD::Suld1DI16Zero:         return "NVPTXISD::Suld1DI16Zero";
118909467b48Spatrick   case NVPTXISD::Suld1DI32Zero:         return "NVPTXISD::Suld1DI32Zero";
119009467b48Spatrick   case NVPTXISD::Suld1DI64Zero:         return "NVPTXISD::Suld1DI64Zero";
119109467b48Spatrick   case NVPTXISD::Suld1DV2I8Zero:        return "NVPTXISD::Suld1DV2I8Zero";
119209467b48Spatrick   case NVPTXISD::Suld1DV2I16Zero:       return "NVPTXISD::Suld1DV2I16Zero";
119309467b48Spatrick   case NVPTXISD::Suld1DV2I32Zero:       return "NVPTXISD::Suld1DV2I32Zero";
119409467b48Spatrick   case NVPTXISD::Suld1DV2I64Zero:       return "NVPTXISD::Suld1DV2I64Zero";
119509467b48Spatrick   case NVPTXISD::Suld1DV4I8Zero:        return "NVPTXISD::Suld1DV4I8Zero";
119609467b48Spatrick   case NVPTXISD::Suld1DV4I16Zero:       return "NVPTXISD::Suld1DV4I16Zero";
119709467b48Spatrick   case NVPTXISD::Suld1DV4I32Zero:       return "NVPTXISD::Suld1DV4I32Zero";
119809467b48Spatrick 
119909467b48Spatrick   case NVPTXISD::Suld1DArrayI8Zero:     return "NVPTXISD::Suld1DArrayI8Zero";
120009467b48Spatrick   case NVPTXISD::Suld1DArrayI16Zero:    return "NVPTXISD::Suld1DArrayI16Zero";
120109467b48Spatrick   case NVPTXISD::Suld1DArrayI32Zero:    return "NVPTXISD::Suld1DArrayI32Zero";
120209467b48Spatrick   case NVPTXISD::Suld1DArrayI64Zero:    return "NVPTXISD::Suld1DArrayI64Zero";
120309467b48Spatrick   case NVPTXISD::Suld1DArrayV2I8Zero:   return "NVPTXISD::Suld1DArrayV2I8Zero";
120409467b48Spatrick   case NVPTXISD::Suld1DArrayV2I16Zero:  return "NVPTXISD::Suld1DArrayV2I16Zero";
120509467b48Spatrick   case NVPTXISD::Suld1DArrayV2I32Zero:  return "NVPTXISD::Suld1DArrayV2I32Zero";
120609467b48Spatrick   case NVPTXISD::Suld1DArrayV2I64Zero:  return "NVPTXISD::Suld1DArrayV2I64Zero";
120709467b48Spatrick   case NVPTXISD::Suld1DArrayV4I8Zero:   return "NVPTXISD::Suld1DArrayV4I8Zero";
120809467b48Spatrick   case NVPTXISD::Suld1DArrayV4I16Zero:  return "NVPTXISD::Suld1DArrayV4I16Zero";
120909467b48Spatrick   case NVPTXISD::Suld1DArrayV4I32Zero:  return "NVPTXISD::Suld1DArrayV4I32Zero";
121009467b48Spatrick 
121109467b48Spatrick   case NVPTXISD::Suld2DI8Zero:          return "NVPTXISD::Suld2DI8Zero";
121209467b48Spatrick   case NVPTXISD::Suld2DI16Zero:         return "NVPTXISD::Suld2DI16Zero";
121309467b48Spatrick   case NVPTXISD::Suld2DI32Zero:         return "NVPTXISD::Suld2DI32Zero";
121409467b48Spatrick   case NVPTXISD::Suld2DI64Zero:         return "NVPTXISD::Suld2DI64Zero";
121509467b48Spatrick   case NVPTXISD::Suld2DV2I8Zero:        return "NVPTXISD::Suld2DV2I8Zero";
121609467b48Spatrick   case NVPTXISD::Suld2DV2I16Zero:       return "NVPTXISD::Suld2DV2I16Zero";
121709467b48Spatrick   case NVPTXISD::Suld2DV2I32Zero:       return "NVPTXISD::Suld2DV2I32Zero";
121809467b48Spatrick   case NVPTXISD::Suld2DV2I64Zero:       return "NVPTXISD::Suld2DV2I64Zero";
121909467b48Spatrick   case NVPTXISD::Suld2DV4I8Zero:        return "NVPTXISD::Suld2DV4I8Zero";
122009467b48Spatrick   case NVPTXISD::Suld2DV4I16Zero:       return "NVPTXISD::Suld2DV4I16Zero";
122109467b48Spatrick   case NVPTXISD::Suld2DV4I32Zero:       return "NVPTXISD::Suld2DV4I32Zero";
122209467b48Spatrick 
122309467b48Spatrick   case NVPTXISD::Suld2DArrayI8Zero:     return "NVPTXISD::Suld2DArrayI8Zero";
122409467b48Spatrick   case NVPTXISD::Suld2DArrayI16Zero:    return "NVPTXISD::Suld2DArrayI16Zero";
122509467b48Spatrick   case NVPTXISD::Suld2DArrayI32Zero:    return "NVPTXISD::Suld2DArrayI32Zero";
122609467b48Spatrick   case NVPTXISD::Suld2DArrayI64Zero:    return "NVPTXISD::Suld2DArrayI64Zero";
122709467b48Spatrick   case NVPTXISD::Suld2DArrayV2I8Zero:   return "NVPTXISD::Suld2DArrayV2I8Zero";
122809467b48Spatrick   case NVPTXISD::Suld2DArrayV2I16Zero:  return "NVPTXISD::Suld2DArrayV2I16Zero";
122909467b48Spatrick   case NVPTXISD::Suld2DArrayV2I32Zero:  return "NVPTXISD::Suld2DArrayV2I32Zero";
123009467b48Spatrick   case NVPTXISD::Suld2DArrayV2I64Zero:  return "NVPTXISD::Suld2DArrayV2I64Zero";
123109467b48Spatrick   case NVPTXISD::Suld2DArrayV4I8Zero:   return "NVPTXISD::Suld2DArrayV4I8Zero";
123209467b48Spatrick   case NVPTXISD::Suld2DArrayV4I16Zero:  return "NVPTXISD::Suld2DArrayV4I16Zero";
123309467b48Spatrick   case NVPTXISD::Suld2DArrayV4I32Zero:  return "NVPTXISD::Suld2DArrayV4I32Zero";
123409467b48Spatrick 
123509467b48Spatrick   case NVPTXISD::Suld3DI8Zero:          return "NVPTXISD::Suld3DI8Zero";
123609467b48Spatrick   case NVPTXISD::Suld3DI16Zero:         return "NVPTXISD::Suld3DI16Zero";
123709467b48Spatrick   case NVPTXISD::Suld3DI32Zero:         return "NVPTXISD::Suld3DI32Zero";
123809467b48Spatrick   case NVPTXISD::Suld3DI64Zero:         return "NVPTXISD::Suld3DI64Zero";
123909467b48Spatrick   case NVPTXISD::Suld3DV2I8Zero:        return "NVPTXISD::Suld3DV2I8Zero";
124009467b48Spatrick   case NVPTXISD::Suld3DV2I16Zero:       return "NVPTXISD::Suld3DV2I16Zero";
124109467b48Spatrick   case NVPTXISD::Suld3DV2I32Zero:       return "NVPTXISD::Suld3DV2I32Zero";
124209467b48Spatrick   case NVPTXISD::Suld3DV2I64Zero:       return "NVPTXISD::Suld3DV2I64Zero";
124309467b48Spatrick   case NVPTXISD::Suld3DV4I8Zero:        return "NVPTXISD::Suld3DV4I8Zero";
124409467b48Spatrick   case NVPTXISD::Suld3DV4I16Zero:       return "NVPTXISD::Suld3DV4I16Zero";
124509467b48Spatrick   case NVPTXISD::Suld3DV4I32Zero:       return "NVPTXISD::Suld3DV4I32Zero";
124609467b48Spatrick   }
124709467b48Spatrick   return nullptr;
124809467b48Spatrick }
124909467b48Spatrick 
125009467b48Spatrick TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const125109467b48Spatrick NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
125273471bf0Spatrick   if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
125373471bf0Spatrick       VT.getScalarType() == MVT::i1)
125409467b48Spatrick     return TypeSplitVector;
125509467b48Spatrick   if (VT == MVT::v2f16)
125609467b48Spatrick     return TypeLegal;
125709467b48Spatrick   return TargetLoweringBase::getPreferredVectorAction(VT);
125809467b48Spatrick }
125909467b48Spatrick 
getSqrtEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & ExtraSteps,bool & UseOneConst,bool Reciprocal) const126009467b48Spatrick SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
126109467b48Spatrick                                              int Enabled, int &ExtraSteps,
126209467b48Spatrick                                              bool &UseOneConst,
126309467b48Spatrick                                              bool Reciprocal) const {
126409467b48Spatrick   if (!(Enabled == ReciprocalEstimate::Enabled ||
126509467b48Spatrick         (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32())))
126609467b48Spatrick     return SDValue();
126709467b48Spatrick 
126809467b48Spatrick   if (ExtraSteps == ReciprocalEstimate::Unspecified)
126909467b48Spatrick     ExtraSteps = 0;
127009467b48Spatrick 
127109467b48Spatrick   SDLoc DL(Operand);
127209467b48Spatrick   EVT VT = Operand.getValueType();
127309467b48Spatrick   bool Ftz = useF32FTZ(DAG.getMachineFunction());
127409467b48Spatrick 
127509467b48Spatrick   auto MakeIntrinsicCall = [&](Intrinsic::ID IID) {
127609467b48Spatrick     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
127709467b48Spatrick                        DAG.getConstant(IID, DL, MVT::i32), Operand);
127809467b48Spatrick   };
127909467b48Spatrick 
128009467b48Spatrick   // The sqrt and rsqrt refinement processes assume we always start out with an
128109467b48Spatrick   // approximation of the rsqrt.  Therefore, if we're going to do any refinement
128209467b48Spatrick   // (i.e. ExtraSteps > 0), we must return an rsqrt.  But if we're *not* doing
128309467b48Spatrick   // any refinement, we must return a regular sqrt.
128409467b48Spatrick   if (Reciprocal || ExtraSteps > 0) {
128509467b48Spatrick     if (VT == MVT::f32)
128609467b48Spatrick       return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f
128709467b48Spatrick                                    : Intrinsic::nvvm_rsqrt_approx_f);
128809467b48Spatrick     else if (VT == MVT::f64)
128909467b48Spatrick       return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d);
129009467b48Spatrick     else
129109467b48Spatrick       return SDValue();
129209467b48Spatrick   } else {
129309467b48Spatrick     if (VT == MVT::f32)
129409467b48Spatrick       return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
129509467b48Spatrick                                    : Intrinsic::nvvm_sqrt_approx_f);
129609467b48Spatrick     else {
129709467b48Spatrick       // There's no sqrt.approx.f64 instruction, so we emit
129809467b48Spatrick       // reciprocal(rsqrt(x)).  This is faster than
129909467b48Spatrick       // select(x == 0, 0, x * rsqrt(x)).  (In fact, it's faster than plain
130009467b48Spatrick       // x * rsqrt(x).)
130109467b48Spatrick       return DAG.getNode(
130209467b48Spatrick           ISD::INTRINSIC_WO_CHAIN, DL, VT,
130309467b48Spatrick           DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32),
130409467b48Spatrick           MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
130509467b48Spatrick     }
130609467b48Spatrick   }
130709467b48Spatrick }
130809467b48Spatrick 
130909467b48Spatrick SDValue
LowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const131009467b48Spatrick NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
131109467b48Spatrick   SDLoc dl(Op);
131209467b48Spatrick   const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op);
131309467b48Spatrick   auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());
131409467b48Spatrick   Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);
131509467b48Spatrick   return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
131609467b48Spatrick }
131709467b48Spatrick 
getPrototype(const DataLayout & DL,Type * retTy,const ArgListTy & Args,const SmallVectorImpl<ISD::OutputArg> & Outs,MaybeAlign retAlignment,std::optional<std::pair<unsigned,const APInt &>> VAInfo,const CallBase & CB,unsigned UniqueCallSite) const131809467b48Spatrick std::string NVPTXTargetLowering::getPrototype(
131909467b48Spatrick     const DataLayout &DL, Type *retTy, const ArgListTy &Args,
1320097a140dSpatrick     const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment,
1321*d415bd75Srobert     std::optional<std::pair<unsigned, const APInt &>> VAInfo,
132273471bf0Spatrick     const CallBase &CB, unsigned UniqueCallSite) const {
132309467b48Spatrick   auto PtrVT = getPointerTy(DL);
132409467b48Spatrick 
132509467b48Spatrick   bool isABI = (STI.getSmVersion() >= 20);
132609467b48Spatrick   assert(isABI && "Non-ABI compilation is not supported");
132709467b48Spatrick   if (!isABI)
132809467b48Spatrick     return "";
132909467b48Spatrick 
1330*d415bd75Srobert   std::string Prototype;
1331*d415bd75Srobert   raw_string_ostream O(Prototype);
133273471bf0Spatrick   O << "prototype_" << UniqueCallSite << " : .callprototype ";
133309467b48Spatrick 
133409467b48Spatrick   if (retTy->getTypeID() == Type::VoidTyID) {
133509467b48Spatrick     O << "()";
133609467b48Spatrick   } else {
133709467b48Spatrick     O << "(";
133809467b48Spatrick     if (retTy->isFloatingPointTy() || (retTy->isIntegerTy() && !retTy->isIntegerTy(128))) {
133909467b48Spatrick       unsigned size = 0;
134009467b48Spatrick       if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
134109467b48Spatrick         size = ITy->getBitWidth();
134209467b48Spatrick       } else {
134309467b48Spatrick         assert(retTy->isFloatingPointTy() &&
134409467b48Spatrick                "Floating point type expected here");
134509467b48Spatrick         size = retTy->getPrimitiveSizeInBits();
134609467b48Spatrick       }
134709467b48Spatrick       // PTX ABI requires all scalar return values to be at least 32
134809467b48Spatrick       // bits in size.  fp16 normally uses .b16 as its storage type in
134909467b48Spatrick       // PTX, so its size must be adjusted here, too.
1350*d415bd75Srobert       size = promoteScalarArgumentSize(size);
135109467b48Spatrick 
135209467b48Spatrick       O << ".param .b" << size << " _";
135309467b48Spatrick     } else if (isa<PointerType>(retTy)) {
135409467b48Spatrick       O << ".param .b" << PtrVT.getSizeInBits() << " _";
135509467b48Spatrick     } else if (retTy->isAggregateType() || retTy->isVectorTy() ||
135609467b48Spatrick                retTy->isIntegerTy(128)) {
1357097a140dSpatrick       O << ".param .align " << (retAlignment ? retAlignment->value() : 0)
1358097a140dSpatrick         << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]";
135909467b48Spatrick     } else {
136009467b48Spatrick       llvm_unreachable("Unknown return type");
136109467b48Spatrick     }
136209467b48Spatrick     O << ") ";
136309467b48Spatrick   }
136409467b48Spatrick   O << "_ (";
136509467b48Spatrick 
136609467b48Spatrick   bool first = true;
136709467b48Spatrick 
1368*d415bd75Srobert   const Function *F = CB.getFunction();
1369*d415bd75Srobert   unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();
1370*d415bd75Srobert   for (unsigned i = 0, OIdx = 0; i != NumArgs; ++i, ++OIdx) {
137109467b48Spatrick     Type *Ty = Args[i].Ty;
137209467b48Spatrick     if (!first) {
137309467b48Spatrick       O << ", ";
137409467b48Spatrick     }
137509467b48Spatrick     first = false;
137609467b48Spatrick 
137709467b48Spatrick     if (!Outs[OIdx].Flags.isByVal()) {
137809467b48Spatrick       if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1379*d415bd75Srobert         unsigned ParamAlign = 0;
1380097a140dSpatrick         const CallInst *CallI = cast<CallInst>(&CB);
138109467b48Spatrick         // +1 because index 0 is reserved for return type alignment
1382*d415bd75Srobert         if (!getAlign(*CallI, i + 1, ParamAlign))
1383*d415bd75Srobert           ParamAlign = getFunctionParamOptimizedAlign(F, Ty, DL).value();
1384*d415bd75Srobert         O << ".param .align " << ParamAlign << " .b8 ";
138509467b48Spatrick         O << "_";
1386*d415bd75Srobert         O << "[" << DL.getTypeAllocSize(Ty) << "]";
138709467b48Spatrick         // update the index for Outs
138809467b48Spatrick         SmallVector<EVT, 16> vtparts;
138909467b48Spatrick         ComputeValueVTs(*this, DL, Ty, vtparts);
139009467b48Spatrick         if (unsigned len = vtparts.size())
139109467b48Spatrick           OIdx += len - 1;
139209467b48Spatrick         continue;
139309467b48Spatrick       }
139409467b48Spatrick       // i8 types in IR will be i16 types in SDAG
139509467b48Spatrick       assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
139609467b48Spatrick               (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
139709467b48Spatrick              "type mismatch between callee prototype and arguments");
139809467b48Spatrick       // scalar type
139909467b48Spatrick       unsigned sz = 0;
140009467b48Spatrick       if (isa<IntegerType>(Ty)) {
140109467b48Spatrick         sz = cast<IntegerType>(Ty)->getBitWidth();
1402*d415bd75Srobert         sz = promoteScalarArgumentSize(sz);
140309467b48Spatrick       } else if (isa<PointerType>(Ty)) {
140409467b48Spatrick         sz = PtrVT.getSizeInBits();
140509467b48Spatrick       } else if (Ty->isHalfTy())
140609467b48Spatrick         // PTX ABI requires all scalar parameters to be at least 32
140709467b48Spatrick         // bits in size.  fp16 normally uses .b16 as its storage type
140809467b48Spatrick         // in PTX, so its size must be adjusted here, too.
140909467b48Spatrick         sz = 32;
141009467b48Spatrick       else
141109467b48Spatrick         sz = Ty->getPrimitiveSizeInBits();
141209467b48Spatrick       O << ".param .b" << sz << " ";
141309467b48Spatrick       O << "_";
141409467b48Spatrick       continue;
141509467b48Spatrick     }
141609467b48Spatrick 
1417*d415bd75Srobert     Type *ETy = Args[i].IndirectType;
1418*d415bd75Srobert     Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1419*d415bd75Srobert     Align ParamByValAlign =
1420*d415bd75Srobert         getFunctionByValParamAlign(F, ETy, InitialAlign, DL);
1421*d415bd75Srobert 
1422*d415bd75Srobert     O << ".param .align " << ParamByValAlign.value() << " .b8 ";
142309467b48Spatrick     O << "_";
1424*d415bd75Srobert     O << "[" << Outs[OIdx].Flags.getByValSize() << "]";
142509467b48Spatrick   }
1426*d415bd75Srobert 
1427*d415bd75Srobert   if (VAInfo)
1428*d415bd75Srobert     O << (first ? "" : ",") << " .param .align " << VAInfo->second
1429*d415bd75Srobert       << " .b8 _[]\n";
1430*d415bd75Srobert   O << ")";
1431*d415bd75Srobert   if (shouldEmitPTXNoReturn(&CB, *nvTM))
1432*d415bd75Srobert     O << " .noreturn";
1433*d415bd75Srobert   O << ";";
1434*d415bd75Srobert 
1435*d415bd75Srobert   return Prototype;
143609467b48Spatrick }
143709467b48Spatrick 
getArgumentAlignment(SDValue Callee,const CallBase * CB,Type * Ty,unsigned Idx,const DataLayout & DL) const1438097a140dSpatrick Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
1439097a140dSpatrick                                                 const CallBase *CB, Type *Ty,
1440097a140dSpatrick                                                 unsigned Idx,
144109467b48Spatrick                                                 const DataLayout &DL) const {
1442097a140dSpatrick   if (!CB) {
144309467b48Spatrick     // CallSite is zero, fallback to ABI type alignment
1444097a140dSpatrick     return DL.getABITypeAlign(Ty);
144509467b48Spatrick   }
144609467b48Spatrick 
1447097a140dSpatrick   unsigned Alignment = 0;
1448097a140dSpatrick   const Function *DirectCallee = CB->getCalledFunction();
144909467b48Spatrick 
145009467b48Spatrick   if (!DirectCallee) {
145109467b48Spatrick     // We don't have a direct function symbol, but that may be because of
145209467b48Spatrick     // constant cast instructions in the call.
145309467b48Spatrick 
145409467b48Spatrick     // With bitcast'd call targets, the instruction will be the call
1455097a140dSpatrick     if (const auto *CI = dyn_cast<CallInst>(CB)) {
145609467b48Spatrick       // Check if we have call alignment metadata
1457097a140dSpatrick       if (getAlign(*CI, Idx, Alignment))
1458097a140dSpatrick         return Align(Alignment);
145909467b48Spatrick     }
1460*d415bd75Srobert     DirectCallee = getMaybeBitcastedCallee(CB);
146109467b48Spatrick   }
146209467b48Spatrick 
146309467b48Spatrick   // Check for function alignment information if we found that the
146409467b48Spatrick   // ultimate target is a Function
1465*d415bd75Srobert   if (DirectCallee) {
1466097a140dSpatrick     if (getAlign(*DirectCallee, Idx, Alignment))
1467097a140dSpatrick       return Align(Alignment);
1468*d415bd75Srobert     // If alignment information is not available, fall back to the
1469*d415bd75Srobert     // default function param optimized type alignment
1470*d415bd75Srobert     return getFunctionParamOptimizedAlign(DirectCallee, Ty, DL);
1471*d415bd75Srobert   }
147209467b48Spatrick 
1473*d415bd75Srobert   // Call is indirect, fall back to the ABI type alignment
1474097a140dSpatrick   return DL.getABITypeAlign(Ty);
147509467b48Spatrick }
147609467b48Spatrick 
LowerCall(TargetLowering::CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const147709467b48Spatrick SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
147809467b48Spatrick                                        SmallVectorImpl<SDValue> &InVals) const {
1479*d415bd75Srobert 
1480*d415bd75Srobert   if (CLI.IsVarArg && (STI.getPTXVersion() < 60 || STI.getSmVersion() < 30))
1481*d415bd75Srobert     report_fatal_error(
1482*d415bd75Srobert         "Support for variadic functions (unsized array parameter) introduced "
1483*d415bd75Srobert         "in PTX ISA version 6.0 and requires target sm_30.");
1484*d415bd75Srobert 
148509467b48Spatrick   SelectionDAG &DAG = CLI.DAG;
148609467b48Spatrick   SDLoc dl = CLI.DL;
148709467b48Spatrick   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
148809467b48Spatrick   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
148909467b48Spatrick   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
149009467b48Spatrick   SDValue Chain = CLI.Chain;
149109467b48Spatrick   SDValue Callee = CLI.Callee;
149209467b48Spatrick   bool &isTailCall = CLI.IsTailCall;
149309467b48Spatrick   ArgListTy &Args = CLI.getArgs();
149409467b48Spatrick   Type *RetTy = CLI.RetTy;
1495097a140dSpatrick   const CallBase *CB = CLI.CB;
149609467b48Spatrick   const DataLayout &DL = DAG.getDataLayout();
149709467b48Spatrick 
149809467b48Spatrick   bool isABI = (STI.getSmVersion() >= 20);
149909467b48Spatrick   assert(isABI && "Non-ABI compilation is not supported");
150009467b48Spatrick   if (!isABI)
150109467b48Spatrick     return Chain;
150209467b48Spatrick 
1503*d415bd75Srobert   // Variadic arguments.
1504*d415bd75Srobert   //
1505*d415bd75Srobert   // Normally, for each argument, we declare a param scalar or a param
1506*d415bd75Srobert   // byte array in the .param space, and store the argument value to that
1507*d415bd75Srobert   // param scalar or array starting at offset 0.
1508*d415bd75Srobert   //
1509*d415bd75Srobert   // In the case of the first variadic argument, we declare a vararg byte array
1510*d415bd75Srobert   // with size 0. The exact size of this array isn't known at this point, so
1511*d415bd75Srobert   // it'll be patched later. All the variadic arguments will be stored to this
1512*d415bd75Srobert   // array at a certain offset (which gets tracked by 'VAOffset'). The offset is
1513*d415bd75Srobert   // initially set to 0, so it can be used for non-variadic arguments (which use
1514*d415bd75Srobert   // 0 offset) to simplify the code.
1515*d415bd75Srobert   //
1516*d415bd75Srobert   // After all vararg is processed, 'VAOffset' holds the size of the
1517*d415bd75Srobert   // vararg byte array.
1518*d415bd75Srobert 
1519*d415bd75Srobert   SDValue VADeclareParam;                 // vararg byte array
1520*d415bd75Srobert   unsigned FirstVAArg = CLI.NumFixedArgs; // position of the first variadic
1521*d415bd75Srobert   unsigned VAOffset = 0;                  // current offset in the param array
1522*d415bd75Srobert 
152373471bf0Spatrick   unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1);
1524*d415bd75Srobert   SDValue TempChain = Chain;
152573471bf0Spatrick   Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl);
152609467b48Spatrick   SDValue InFlag = Chain.getValue(1);
152709467b48Spatrick 
1528*d415bd75Srobert   unsigned ParamCount = 0;
152909467b48Spatrick   // Args.size() and Outs.size() need not match.
153009467b48Spatrick   // Outs.size() will be larger
153109467b48Spatrick   //   * if there is an aggregate argument with multiple fields (each field
153209467b48Spatrick   //     showing up separately in Outs)
153309467b48Spatrick   //   * if there is a vector argument with more than typical vector-length
153409467b48Spatrick   //     elements (generally if more than 4) where each vector element is
153509467b48Spatrick   //     individually present in Outs.
153609467b48Spatrick   // So a different index should be used for indexing into Outs/OutVals.
153709467b48Spatrick   // See similar issue in LowerFormalArguments.
153809467b48Spatrick   unsigned OIdx = 0;
153909467b48Spatrick   // Declare the .params or .reg need to pass values
154009467b48Spatrick   // to the function
154109467b48Spatrick   for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
154209467b48Spatrick     EVT VT = Outs[OIdx].VT;
154309467b48Spatrick     Type *Ty = Args[i].Ty;
1544*d415bd75Srobert     bool IsVAArg = (i >= CLI.NumFixedArgs);
1545*d415bd75Srobert     bool IsByVal = Outs[OIdx].Flags.isByVal();
154609467b48Spatrick 
154709467b48Spatrick     SmallVector<EVT, 16> VTs;
154809467b48Spatrick     SmallVector<uint64_t, 16> Offsets;
1549*d415bd75Srobert 
1550*d415bd75Srobert     assert((!IsByVal || Args[i].IndirectType) &&
1551*d415bd75Srobert            "byval arg must have indirect type");
1552*d415bd75Srobert     Type *ETy = (IsByVal ? Args[i].IndirectType : Ty);
1553*d415bd75Srobert     ComputePTXValueVTs(*this, DL, ETy, VTs, &Offsets, IsByVal ? 0 : VAOffset);
1554*d415bd75Srobert 
1555*d415bd75Srobert     Align ArgAlign;
1556*d415bd75Srobert     if (IsByVal) {
1557*d415bd75Srobert       // The ByValAlign in the Outs[OIdx].Flags is always set at this point,
1558*d415bd75Srobert       // so we don't need to worry whether it's naturally aligned or not.
1559*d415bd75Srobert       // See TargetLowering::LowerCallTo().
1560*d415bd75Srobert       Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1561*d415bd75Srobert       ArgAlign = getFunctionByValParamAlign(CB->getCalledFunction(), ETy,
1562*d415bd75Srobert                                             InitialAlign, DL);
1563*d415bd75Srobert       if (IsVAArg)
1564*d415bd75Srobert         VAOffset = alignTo(VAOffset, ArgAlign);
1565*d415bd75Srobert     } else {
1566*d415bd75Srobert       ArgAlign = getArgumentAlignment(Callee, CB, Ty, ParamCount + 1, DL);
1567*d415bd75Srobert     }
1568*d415bd75Srobert 
1569*d415bd75Srobert     unsigned TypeSize =
1570*d415bd75Srobert         (IsByVal ? Outs[OIdx].Flags.getByValSize() : DL.getTypeAllocSize(Ty));
157109467b48Spatrick     SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1572*d415bd75Srobert 
157309467b48Spatrick     bool NeedAlign; // Does argument declaration specify alignment?
1574*d415bd75Srobert     if (IsVAArg) {
1575*d415bd75Srobert       if (ParamCount == FirstVAArg) {
1576*d415bd75Srobert         SDValue DeclareParamOps[] = {
1577*d415bd75Srobert             Chain, DAG.getConstant(STI.getMaxRequiredAlignment(), dl, MVT::i32),
1578*d415bd75Srobert             DAG.getConstant(ParamCount, dl, MVT::i32),
1579*d415bd75Srobert             DAG.getConstant(1, dl, MVT::i32), InFlag};
1580*d415bd75Srobert         VADeclareParam = Chain = DAG.getNode(NVPTXISD::DeclareParam, dl,
1581*d415bd75Srobert                                              DeclareParamVTs, DeclareParamOps);
1582*d415bd75Srobert       }
1583*d415bd75Srobert       NeedAlign = IsByVal || Ty->isAggregateType() || Ty->isVectorTy() ||
1584*d415bd75Srobert                   Ty->isIntegerTy(128);
1585*d415bd75Srobert     } else if (IsByVal || Ty->isAggregateType() || Ty->isVectorTy() ||
1586*d415bd75Srobert                Ty->isIntegerTy(128)) {
158709467b48Spatrick       // declare .param .align <align> .b8 .param<n>[<size>];
158809467b48Spatrick       SDValue DeclareParamOps[] = {
1589097a140dSpatrick           Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
1590*d415bd75Srobert           DAG.getConstant(ParamCount, dl, MVT::i32),
1591*d415bd75Srobert           DAG.getConstant(TypeSize, dl, MVT::i32), InFlag};
159209467b48Spatrick       Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
159309467b48Spatrick                           DeclareParamOps);
159409467b48Spatrick       NeedAlign = true;
159509467b48Spatrick     } else {
159609467b48Spatrick       // declare .param .b<size> .param<n>;
1597*d415bd75Srobert       if (VT.isInteger() || VT.isFloatingPoint()) {
159809467b48Spatrick         // PTX ABI requires integral types to be at least 32 bits in
159909467b48Spatrick         // size. FP16 is loaded/stored using i16, so it's handled
160009467b48Spatrick         // here as well.
1601*d415bd75Srobert         TypeSize = promoteScalarArgumentSize(TypeSize * 8) / 8;
160209467b48Spatrick       }
160309467b48Spatrick       SDValue DeclareScalarParamOps[] = {
1604*d415bd75Srobert           Chain, DAG.getConstant(ParamCount, dl, MVT::i32),
1605*d415bd75Srobert           DAG.getConstant(TypeSize * 8, dl, MVT::i32),
160609467b48Spatrick           DAG.getConstant(0, dl, MVT::i32), InFlag};
160709467b48Spatrick       Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
160809467b48Spatrick                           DeclareScalarParamOps);
160909467b48Spatrick       NeedAlign = false;
161009467b48Spatrick     }
161109467b48Spatrick     InFlag = Chain.getValue(1);
161209467b48Spatrick 
161309467b48Spatrick     // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
161409467b48Spatrick     // than 32-bits are sign extended or zero extended, depending on
161509467b48Spatrick     // whether they are signed or unsigned types. This case applies
161609467b48Spatrick     // only to scalar parameters and not to aggregate values.
161709467b48Spatrick     bool ExtendIntegerParam =
161809467b48Spatrick         Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;
161909467b48Spatrick 
1620*d415bd75Srobert     auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign, IsVAArg);
162109467b48Spatrick     SmallVector<SDValue, 6> StoreOperands;
162209467b48Spatrick     for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
1623*d415bd75Srobert       EVT EltVT = VTs[j];
1624*d415bd75Srobert       int CurOffset = Offsets[j];
1625*d415bd75Srobert       MaybeAlign PartAlign;
1626*d415bd75Srobert       if (NeedAlign)
1627*d415bd75Srobert         PartAlign = commonAlignment(ArgAlign, CurOffset);
1628*d415bd75Srobert 
162909467b48Spatrick       // New store.
163009467b48Spatrick       if (VectorInfo[j] & PVF_FIRST) {
163109467b48Spatrick         assert(StoreOperands.empty() && "Unfinished preceding store.");
163209467b48Spatrick         StoreOperands.push_back(Chain);
1633*d415bd75Srobert         StoreOperands.push_back(
1634*d415bd75Srobert             DAG.getConstant(IsVAArg ? FirstVAArg : ParamCount, dl, MVT::i32));
1635*d415bd75Srobert         StoreOperands.push_back(DAG.getConstant(
1636*d415bd75Srobert             IsByVal ? CurOffset + VAOffset : (IsVAArg ? VAOffset : CurOffset),
1637*d415bd75Srobert             dl, MVT::i32));
163809467b48Spatrick       }
163909467b48Spatrick 
164009467b48Spatrick       SDValue StVal = OutVals[OIdx];
1641*d415bd75Srobert 
1642*d415bd75Srobert       MVT PromotedVT;
1643*d415bd75Srobert       if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) {
1644*d415bd75Srobert         EltVT = EVT(PromotedVT);
1645*d415bd75Srobert       }
1646*d415bd75Srobert       if (PromoteScalarIntegerPTX(StVal.getValueType(), &PromotedVT)) {
1647*d415bd75Srobert         llvm::ISD::NodeType Ext =
1648*d415bd75Srobert             Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1649*d415bd75Srobert         StVal = DAG.getNode(Ext, dl, PromotedVT, StVal);
1650*d415bd75Srobert       }
1651*d415bd75Srobert 
1652*d415bd75Srobert       if (IsByVal) {
1653*d415bd75Srobert         auto PtrVT = getPointerTy(DL);
1654*d415bd75Srobert         SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StVal,
1655*d415bd75Srobert                                       DAG.getConstant(CurOffset, dl, PtrVT));
1656*d415bd75Srobert         StVal = DAG.getLoad(EltVT, dl, TempChain, srcAddr, MachinePointerInfo(),
1657*d415bd75Srobert                             PartAlign);
1658*d415bd75Srobert       } else if (ExtendIntegerParam) {
165909467b48Spatrick         assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
166009467b48Spatrick         // zext/sext to i32
166109467b48Spatrick         StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
166209467b48Spatrick                                                       : ISD::ZERO_EXTEND,
166309467b48Spatrick                             dl, MVT::i32, StVal);
1664*d415bd75Srobert       }
1665*d415bd75Srobert 
1666*d415bd75Srobert       if (!ExtendIntegerParam && EltVT.getSizeInBits() < 16) {
166709467b48Spatrick         // Use 16-bit registers for small stores as it's the
166809467b48Spatrick         // smallest general purpose register size supported by NVPTX.
166909467b48Spatrick         StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
167009467b48Spatrick       }
167109467b48Spatrick 
167209467b48Spatrick       // Record the value to store.
167309467b48Spatrick       StoreOperands.push_back(StVal);
167409467b48Spatrick 
167509467b48Spatrick       if (VectorInfo[j] & PVF_LAST) {
167609467b48Spatrick         unsigned NumElts = StoreOperands.size() - 3;
167709467b48Spatrick         NVPTXISD::NodeType Op;
167809467b48Spatrick         switch (NumElts) {
167909467b48Spatrick         case 1:
168009467b48Spatrick           Op = NVPTXISD::StoreParam;
168109467b48Spatrick           break;
168209467b48Spatrick         case 2:
168309467b48Spatrick           Op = NVPTXISD::StoreParamV2;
168409467b48Spatrick           break;
168509467b48Spatrick         case 4:
168609467b48Spatrick           Op = NVPTXISD::StoreParamV4;
168709467b48Spatrick           break;
168809467b48Spatrick         default:
168909467b48Spatrick           llvm_unreachable("Invalid vector info.");
169009467b48Spatrick         }
169109467b48Spatrick 
169209467b48Spatrick         StoreOperands.push_back(InFlag);
169309467b48Spatrick 
169409467b48Spatrick         // Adjust type of the store op if we've extended the scalar
169509467b48Spatrick         // return value.
1696*d415bd75Srobert         EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
169709467b48Spatrick 
169809467b48Spatrick         Chain = DAG.getMemIntrinsicNode(
169909467b48Spatrick             Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
1700*d415bd75Srobert             TheStoreType, MachinePointerInfo(), PartAlign,
170109467b48Spatrick             MachineMemOperand::MOStore);
170209467b48Spatrick         InFlag = Chain.getValue(1);
170309467b48Spatrick 
170409467b48Spatrick         // Cleanup.
170509467b48Spatrick         StoreOperands.clear();
1706*d415bd75Srobert 
1707*d415bd75Srobert         // TODO: We may need to support vector types that can be passed
1708*d415bd75Srobert         // as scalars in variadic arguments.
1709*d415bd75Srobert         if (!IsByVal && IsVAArg) {
1710*d415bd75Srobert           assert(NumElts == 1 &&
1711*d415bd75Srobert                  "Vectorization is expected to be disabled for variadics.");
1712*d415bd75Srobert           VAOffset += DL.getTypeAllocSize(
1713*d415bd75Srobert               TheStoreType.getTypeForEVT(*DAG.getContext()));
171409467b48Spatrick         }
1715*d415bd75Srobert       }
1716*d415bd75Srobert       if (!IsByVal)
171709467b48Spatrick         ++OIdx;
171809467b48Spatrick     }
171909467b48Spatrick     assert(StoreOperands.empty() && "Unfinished parameter store.");
1720*d415bd75Srobert     if (!IsByVal && VTs.size() > 0)
172109467b48Spatrick       --OIdx;
1722*d415bd75Srobert     ++ParamCount;
1723*d415bd75Srobert     if (IsByVal && IsVAArg)
1724*d415bd75Srobert       VAOffset += TypeSize;
172509467b48Spatrick   }
172609467b48Spatrick 
172709467b48Spatrick   GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
1728*d415bd75Srobert   MaybeAlign retAlignment = std::nullopt;
172909467b48Spatrick 
173009467b48Spatrick   // Handle Result
173109467b48Spatrick   if (Ins.size() > 0) {
173209467b48Spatrick     SmallVector<EVT, 16> resvtparts;
173309467b48Spatrick     ComputeValueVTs(*this, DL, RetTy, resvtparts);
173409467b48Spatrick 
173509467b48Spatrick     // Declare
173609467b48Spatrick     //  .param .align 16 .b8 retval0[<size-in-bytes>], or
173709467b48Spatrick     //  .param .b<size-in-bits> retval0
173809467b48Spatrick     unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy);
173909467b48Spatrick     // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
174009467b48Spatrick     // these three types to match the logic in
174109467b48Spatrick     // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
174209467b48Spatrick     // Plus, this behavior is consistent with nvcc's.
174309467b48Spatrick     if (RetTy->isFloatingPointTy() || RetTy->isPointerTy() ||
174409467b48Spatrick         (RetTy->isIntegerTy() && !RetTy->isIntegerTy(128))) {
1745*d415bd75Srobert       resultsz = promoteScalarArgumentSize(resultsz);
174609467b48Spatrick       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
174709467b48Spatrick       SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
174809467b48Spatrick                                   DAG.getConstant(resultsz, dl, MVT::i32),
174909467b48Spatrick                                   DAG.getConstant(0, dl, MVT::i32), InFlag };
175009467b48Spatrick       Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
175109467b48Spatrick                           DeclareRetOps);
175209467b48Spatrick       InFlag = Chain.getValue(1);
175309467b48Spatrick     } else {
1754097a140dSpatrick       retAlignment = getArgumentAlignment(Callee, CB, RetTy, 0, DL);
1755097a140dSpatrick       assert(retAlignment && "retAlignment is guaranteed to be set");
175609467b48Spatrick       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1757097a140dSpatrick       SDValue DeclareRetOps[] = {
1758097a140dSpatrick           Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32),
175909467b48Spatrick           DAG.getConstant(resultsz / 8, dl, MVT::i32),
176009467b48Spatrick           DAG.getConstant(0, dl, MVT::i32), InFlag};
176109467b48Spatrick       Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
176209467b48Spatrick                           DeclareRetOps);
176309467b48Spatrick       InFlag = Chain.getValue(1);
176409467b48Spatrick     }
176509467b48Spatrick   }
176609467b48Spatrick 
1767*d415bd75Srobert   bool HasVAArgs = CLI.IsVarArg && (CLI.Args.size() > CLI.NumFixedArgs);
1768*d415bd75Srobert   // Set the size of the vararg param byte array if the callee is a variadic
1769*d415bd75Srobert   // function and the variadic part is not empty.
1770*d415bd75Srobert   if (HasVAArgs) {
1771*d415bd75Srobert     SDValue DeclareParamOps[] = {
1772*d415bd75Srobert         VADeclareParam.getOperand(0), VADeclareParam.getOperand(1),
1773*d415bd75Srobert         VADeclareParam.getOperand(2), DAG.getConstant(VAOffset, dl, MVT::i32),
1774*d415bd75Srobert         VADeclareParam.getOperand(4)};
1775*d415bd75Srobert     DAG.MorphNodeTo(VADeclareParam.getNode(), VADeclareParam.getOpcode(),
1776*d415bd75Srobert                     VADeclareParam->getVTList(), DeclareParamOps);
1777*d415bd75Srobert   }
1778*d415bd75Srobert 
177909467b48Spatrick   // Both indirect calls and libcalls have nullptr Func. In order to distinguish
178009467b48Spatrick   // between them we must rely on the call site value which is valid for
178109467b48Spatrick   // indirect calls but is always null for libcalls.
1782097a140dSpatrick   bool isIndirectCall = !Func && CB;
178309467b48Spatrick 
178409467b48Spatrick   if (isa<ExternalSymbolSDNode>(Callee)) {
178509467b48Spatrick     Function* CalleeFunc = nullptr;
178609467b48Spatrick 
178709467b48Spatrick     // Try to find the callee in the current module.
178809467b48Spatrick     Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc);
178909467b48Spatrick     assert(CalleeFunc != nullptr && "Libcall callee must be set.");
179009467b48Spatrick 
179109467b48Spatrick     // Set the "libcall callee" attribute to indicate that the function
179209467b48Spatrick     // must always have a declaration.
179309467b48Spatrick     CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");
179409467b48Spatrick   }
179509467b48Spatrick 
179609467b48Spatrick   if (isIndirectCall) {
179709467b48Spatrick     // This is indirect function call case : PTX requires a prototype of the
179809467b48Spatrick     // form
179909467b48Spatrick     // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
180009467b48Spatrick     // to be emitted, and the label has to used as the last arg of call
180109467b48Spatrick     // instruction.
180209467b48Spatrick     // The prototype is embedded in a string and put as the operand for a
180309467b48Spatrick     // CallPrototype SDNode which will print out to the value of the string.
180409467b48Spatrick     SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1805*d415bd75Srobert     std::string Proto = getPrototype(
1806*d415bd75Srobert         DL, RetTy, Args, Outs, retAlignment,
1807*d415bd75Srobert         HasVAArgs
1808*d415bd75Srobert             ? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair(
1809*d415bd75Srobert                   CLI.NumFixedArgs,
1810*d415bd75Srobert                   cast<ConstantSDNode>(VADeclareParam->getOperand(1))
1811*d415bd75Srobert                       ->getAPIntValue()))
1812*d415bd75Srobert             : std::nullopt,
1813*d415bd75Srobert         *CB, UniqueCallSite);
1814*d415bd75Srobert     const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
181509467b48Spatrick     SDValue ProtoOps[] = {
1816*d415bd75Srobert         Chain,
1817*d415bd75Srobert         DAG.getTargetExternalSymbol(ProtoStr, MVT::i32),
1818*d415bd75Srobert         InFlag,
181909467b48Spatrick     };
182009467b48Spatrick     Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
182109467b48Spatrick     InFlag = Chain.getValue(1);
182209467b48Spatrick   }
182309467b48Spatrick   // Op to just print "call"
182409467b48Spatrick   SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
182509467b48Spatrick   SDValue PrintCallOps[] = {
182609467b48Spatrick     Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
182709467b48Spatrick   };
182809467b48Spatrick   // We model convergent calls as separate opcodes.
182909467b48Spatrick   unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
183009467b48Spatrick   if (CLI.IsConvergent)
183109467b48Spatrick     Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
183209467b48Spatrick                                               : NVPTXISD::PrintConvergentCall;
183309467b48Spatrick   Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
183409467b48Spatrick   InFlag = Chain.getValue(1);
183509467b48Spatrick 
183609467b48Spatrick   // Ops to print out the function name
183709467b48Spatrick   SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
183809467b48Spatrick   SDValue CallVoidOps[] = { Chain, Callee, InFlag };
183909467b48Spatrick   Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
184009467b48Spatrick   InFlag = Chain.getValue(1);
184109467b48Spatrick 
184209467b48Spatrick   // Ops to print out the param list
184309467b48Spatrick   SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
184409467b48Spatrick   SDValue CallArgBeginOps[] = { Chain, InFlag };
184509467b48Spatrick   Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
184609467b48Spatrick                       CallArgBeginOps);
184709467b48Spatrick   InFlag = Chain.getValue(1);
184809467b48Spatrick 
1849*d415bd75Srobert   for (unsigned i = 0, e = std::min(CLI.NumFixedArgs + 1, ParamCount); i != e;
1850*d415bd75Srobert        ++i) {
185109467b48Spatrick     unsigned opcode;
185209467b48Spatrick     if (i == (e - 1))
185309467b48Spatrick       opcode = NVPTXISD::LastCallArg;
185409467b48Spatrick     else
185509467b48Spatrick       opcode = NVPTXISD::CallArg;
185609467b48Spatrick     SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
185709467b48Spatrick     SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
185809467b48Spatrick                              DAG.getConstant(i, dl, MVT::i32), InFlag };
185909467b48Spatrick     Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
186009467b48Spatrick     InFlag = Chain.getValue(1);
186109467b48Spatrick   }
186209467b48Spatrick   SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
186309467b48Spatrick   SDValue CallArgEndOps[] = { Chain,
186409467b48Spatrick                               DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32),
186509467b48Spatrick                               InFlag };
186609467b48Spatrick   Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
186709467b48Spatrick   InFlag = Chain.getValue(1);
186809467b48Spatrick 
186909467b48Spatrick   if (isIndirectCall) {
187009467b48Spatrick     SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
187173471bf0Spatrick     SDValue PrototypeOps[] = {
187273471bf0Spatrick         Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InFlag};
187309467b48Spatrick     Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
187409467b48Spatrick     InFlag = Chain.getValue(1);
187509467b48Spatrick   }
187609467b48Spatrick 
187709467b48Spatrick   SmallVector<SDValue, 16> ProxyRegOps;
1878*d415bd75Srobert   SmallVector<std::optional<MVT>, 16> ProxyRegTruncates;
187909467b48Spatrick 
188009467b48Spatrick   // Generate loads from param memory/moves from registers for result
188109467b48Spatrick   if (Ins.size() > 0) {
188209467b48Spatrick     SmallVector<EVT, 16> VTs;
188309467b48Spatrick     SmallVector<uint64_t, 16> Offsets;
188409467b48Spatrick     ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
188509467b48Spatrick     assert(VTs.size() == Ins.size() && "Bad value decomposition");
188609467b48Spatrick 
1887097a140dSpatrick     Align RetAlign = getArgumentAlignment(Callee, CB, RetTy, 0, DL);
188809467b48Spatrick     auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
188909467b48Spatrick 
189009467b48Spatrick     SmallVector<EVT, 6> LoadVTs;
189109467b48Spatrick     int VecIdx = -1; // Index of the first element of the vector.
189209467b48Spatrick 
189309467b48Spatrick     // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
189409467b48Spatrick     // 32-bits are sign extended or zero extended, depending on whether
189509467b48Spatrick     // they are signed or unsigned types.
189609467b48Spatrick     bool ExtendIntegerRetVal =
189709467b48Spatrick         RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
189809467b48Spatrick 
189909467b48Spatrick     for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
190009467b48Spatrick       bool needTruncate = false;
190109467b48Spatrick       EVT TheLoadType = VTs[i];
190209467b48Spatrick       EVT EltType = Ins[i].VT;
1903097a140dSpatrick       Align EltAlign = commonAlignment(RetAlign, Offsets[i]);
1904*d415bd75Srobert       MVT PromotedVT;
1905*d415bd75Srobert 
1906*d415bd75Srobert       if (PromoteScalarIntegerPTX(TheLoadType, &PromotedVT)) {
1907*d415bd75Srobert         TheLoadType = EVT(PromotedVT);
1908*d415bd75Srobert         EltType = EVT(PromotedVT);
1909*d415bd75Srobert         needTruncate = true;
1910*d415bd75Srobert       }
1911*d415bd75Srobert 
191209467b48Spatrick       if (ExtendIntegerRetVal) {
191309467b48Spatrick         TheLoadType = MVT::i32;
191409467b48Spatrick         EltType = MVT::i32;
191509467b48Spatrick         needTruncate = true;
191609467b48Spatrick       } else if (TheLoadType.getSizeInBits() < 16) {
191709467b48Spatrick         if (VTs[i].isInteger())
191809467b48Spatrick           needTruncate = true;
191909467b48Spatrick         EltType = MVT::i16;
192009467b48Spatrick       }
192109467b48Spatrick 
192209467b48Spatrick       // Record index of the very first element of the vector.
192309467b48Spatrick       if (VectorInfo[i] & PVF_FIRST) {
192409467b48Spatrick         assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");
192509467b48Spatrick         VecIdx = i;
192609467b48Spatrick       }
192709467b48Spatrick 
192809467b48Spatrick       LoadVTs.push_back(EltType);
192909467b48Spatrick 
193009467b48Spatrick       if (VectorInfo[i] & PVF_LAST) {
193109467b48Spatrick         unsigned NumElts = LoadVTs.size();
193209467b48Spatrick         LoadVTs.push_back(MVT::Other);
193309467b48Spatrick         LoadVTs.push_back(MVT::Glue);
193409467b48Spatrick         NVPTXISD::NodeType Op;
193509467b48Spatrick         switch (NumElts) {
193609467b48Spatrick         case 1:
193709467b48Spatrick           Op = NVPTXISD::LoadParam;
193809467b48Spatrick           break;
193909467b48Spatrick         case 2:
194009467b48Spatrick           Op = NVPTXISD::LoadParamV2;
194109467b48Spatrick           break;
194209467b48Spatrick         case 4:
194309467b48Spatrick           Op = NVPTXISD::LoadParamV4;
194409467b48Spatrick           break;
194509467b48Spatrick         default:
194609467b48Spatrick           llvm_unreachable("Invalid vector info.");
194709467b48Spatrick         }
194809467b48Spatrick 
194909467b48Spatrick         SDValue LoadOperands[] = {
195009467b48Spatrick             Chain, DAG.getConstant(1, dl, MVT::i32),
195109467b48Spatrick             DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag};
195209467b48Spatrick         SDValue RetVal = DAG.getMemIntrinsicNode(
195309467b48Spatrick             Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
195409467b48Spatrick             MachinePointerInfo(), EltAlign,
195509467b48Spatrick             MachineMemOperand::MOLoad);
195609467b48Spatrick 
195709467b48Spatrick         for (unsigned j = 0; j < NumElts; ++j) {
195809467b48Spatrick           ProxyRegOps.push_back(RetVal.getValue(j));
195909467b48Spatrick 
196009467b48Spatrick           if (needTruncate)
1961*d415bd75Srobert             ProxyRegTruncates.push_back(std::optional<MVT>(Ins[VecIdx + j].VT));
196209467b48Spatrick           else
1963*d415bd75Srobert             ProxyRegTruncates.push_back(std::optional<MVT>());
196409467b48Spatrick         }
196509467b48Spatrick 
196609467b48Spatrick         Chain = RetVal.getValue(NumElts);
196709467b48Spatrick         InFlag = RetVal.getValue(NumElts + 1);
196809467b48Spatrick 
196909467b48Spatrick         // Cleanup
197009467b48Spatrick         VecIdx = -1;
197109467b48Spatrick         LoadVTs.clear();
197209467b48Spatrick       }
197309467b48Spatrick     }
197409467b48Spatrick   }
197509467b48Spatrick 
1976*d415bd75Srobert   Chain =
1977*d415bd75Srobert       DAG.getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InFlag, dl);
197809467b48Spatrick   InFlag = Chain.getValue(1);
197909467b48Spatrick 
198009467b48Spatrick   // Append ProxyReg instructions to the chain to make sure that `callseq_end`
198109467b48Spatrick   // will not get lost. Otherwise, during libcalls expansion, the nodes can become
198209467b48Spatrick   // dangling.
198309467b48Spatrick   for (unsigned i = 0; i < ProxyRegOps.size(); ++i) {
198409467b48Spatrick     SDValue Ret = DAG.getNode(
198509467b48Spatrick       NVPTXISD::ProxyReg, dl,
198609467b48Spatrick       DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
198709467b48Spatrick       { Chain, ProxyRegOps[i], InFlag }
198809467b48Spatrick     );
198909467b48Spatrick 
199009467b48Spatrick     Chain = Ret.getValue(1);
199109467b48Spatrick     InFlag = Ret.getValue(2);
199209467b48Spatrick 
1993*d415bd75Srobert     if (ProxyRegTruncates[i]) {
1994*d415bd75Srobert       Ret = DAG.getNode(ISD::TRUNCATE, dl, *ProxyRegTruncates[i], Ret);
199509467b48Spatrick     }
199609467b48Spatrick 
199709467b48Spatrick     InVals.push_back(Ret);
199809467b48Spatrick   }
199909467b48Spatrick 
200009467b48Spatrick   // set isTailCall to false for now, until we figure out how to express
200109467b48Spatrick   // tail call optimization in PTX
200209467b48Spatrick   isTailCall = false;
200309467b48Spatrick   return Chain;
200409467b48Spatrick }
200509467b48Spatrick 
200609467b48Spatrick // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
200709467b48Spatrick // (see LegalizeDAG.cpp). This is slow and uses local memory.
200809467b48Spatrick // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
200909467b48Spatrick SDValue
LowerCONCAT_VECTORS(SDValue Op,SelectionDAG & DAG) const201009467b48Spatrick NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
201109467b48Spatrick   SDNode *Node = Op.getNode();
201209467b48Spatrick   SDLoc dl(Node);
201309467b48Spatrick   SmallVector<SDValue, 8> Ops;
201409467b48Spatrick   unsigned NumOperands = Node->getNumOperands();
201509467b48Spatrick   for (unsigned i = 0; i < NumOperands; ++i) {
201609467b48Spatrick     SDValue SubOp = Node->getOperand(i);
201709467b48Spatrick     EVT VVT = SubOp.getNode()->getValueType(0);
201809467b48Spatrick     EVT EltVT = VVT.getVectorElementType();
201909467b48Spatrick     unsigned NumSubElem = VVT.getVectorNumElements();
202009467b48Spatrick     for (unsigned j = 0; j < NumSubElem; ++j) {
202109467b48Spatrick       Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
202209467b48Spatrick                                 DAG.getIntPtrConstant(j, dl)));
202309467b48Spatrick     }
202409467b48Spatrick   }
202509467b48Spatrick   return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
202609467b48Spatrick }
202709467b48Spatrick 
202809467b48Spatrick // We can init constant f16x2 with a single .b32 move.  Normally it
202909467b48Spatrick // would get lowered as two constant loads and vector-packing move.
203009467b48Spatrick //        mov.b16         %h1, 0x4000;
203109467b48Spatrick //        mov.b16         %h2, 0x3C00;
203209467b48Spatrick //        mov.b32         %hh2, {%h2, %h1};
203309467b48Spatrick // Instead we want just a constant move:
203409467b48Spatrick //        mov.b32         %hh2, 0x40003C00
203509467b48Spatrick //
203609467b48Spatrick // This results in better SASS code with CUDA 7.x. Ptxas in CUDA 8.0
203709467b48Spatrick // generates good SASS in both cases.
LowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const203809467b48Spatrick SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
203909467b48Spatrick                                                SelectionDAG &DAG) const {
204009467b48Spatrick   if (!(Op->getValueType(0) == MVT::v2f16 &&
204109467b48Spatrick         isa<ConstantFPSDNode>(Op->getOperand(0)) &&
204209467b48Spatrick         isa<ConstantFPSDNode>(Op->getOperand(1))))
204309467b48Spatrick     return Op;
204409467b48Spatrick 
204509467b48Spatrick   APInt E0 =
204609467b48Spatrick       cast<ConstantFPSDNode>(Op->getOperand(0))->getValueAPF().bitcastToAPInt();
204709467b48Spatrick   APInt E1 =
204809467b48Spatrick       cast<ConstantFPSDNode>(Op->getOperand(1))->getValueAPF().bitcastToAPInt();
204909467b48Spatrick   SDValue Const =
205009467b48Spatrick       DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32);
205109467b48Spatrick   return DAG.getNode(ISD::BITCAST, SDLoc(Op), MVT::v2f16, Const);
205209467b48Spatrick }
205309467b48Spatrick 
LowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const205409467b48Spatrick SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
205509467b48Spatrick                                                      SelectionDAG &DAG) const {
205609467b48Spatrick   SDValue Index = Op->getOperand(1);
205709467b48Spatrick   // Constant index will be matched by tablegen.
205809467b48Spatrick   if (isa<ConstantSDNode>(Index.getNode()))
205909467b48Spatrick     return Op;
206009467b48Spatrick 
206109467b48Spatrick   // Extract individual elements and select one of them.
206209467b48Spatrick   SDValue Vector = Op->getOperand(0);
206309467b48Spatrick   EVT VectorVT = Vector.getValueType();
206409467b48Spatrick   assert(VectorVT == MVT::v2f16 && "Unexpected vector type.");
206509467b48Spatrick   EVT EltVT = VectorVT.getVectorElementType();
206609467b48Spatrick 
206709467b48Spatrick   SDLoc dl(Op.getNode());
206809467b48Spatrick   SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
206909467b48Spatrick                            DAG.getIntPtrConstant(0, dl));
207009467b48Spatrick   SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
207109467b48Spatrick                            DAG.getIntPtrConstant(1, dl));
207209467b48Spatrick   return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1,
207309467b48Spatrick                          ISD::CondCode::SETEQ);
207409467b48Spatrick }
207509467b48Spatrick 
207609467b48Spatrick /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
207709467b48Spatrick /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
207809467b48Spatrick ///    amount, or
207909467b48Spatrick /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
208009467b48Spatrick ///    amount.
LowerShiftRightParts(SDValue Op,SelectionDAG & DAG) const208109467b48Spatrick SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
208209467b48Spatrick                                                   SelectionDAG &DAG) const {
208309467b48Spatrick   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
208409467b48Spatrick   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
208509467b48Spatrick 
208609467b48Spatrick   EVT VT = Op.getValueType();
208709467b48Spatrick   unsigned VTBits = VT.getSizeInBits();
208809467b48Spatrick   SDLoc dl(Op);
208909467b48Spatrick   SDValue ShOpLo = Op.getOperand(0);
209009467b48Spatrick   SDValue ShOpHi = Op.getOperand(1);
209109467b48Spatrick   SDValue ShAmt  = Op.getOperand(2);
209209467b48Spatrick   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
209309467b48Spatrick 
209409467b48Spatrick   if (VTBits == 32 && STI.getSmVersion() >= 35) {
209509467b48Spatrick     // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
209609467b48Spatrick     // {dHi, dLo} = {aHi, aLo} >> Amt
209709467b48Spatrick     //   dHi = aHi >> Amt
209809467b48Spatrick     //   dLo = shf.r.clamp aLo, aHi, Amt
209909467b48Spatrick 
210009467b48Spatrick     SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
210109467b48Spatrick     SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
210209467b48Spatrick                              ShAmt);
210309467b48Spatrick 
210409467b48Spatrick     SDValue Ops[2] = { Lo, Hi };
210509467b48Spatrick     return DAG.getMergeValues(Ops, dl);
210609467b48Spatrick   }
210709467b48Spatrick   else {
210809467b48Spatrick     // {dHi, dLo} = {aHi, aLo} >> Amt
210909467b48Spatrick     // - if (Amt>=size) then
211009467b48Spatrick     //      dLo = aHi >> (Amt-size)
211109467b48Spatrick     //      dHi = aHi >> Amt (this is either all 0 or all 1)
211209467b48Spatrick     //   else
211309467b48Spatrick     //      dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
211409467b48Spatrick     //      dHi = aHi >> Amt
211509467b48Spatrick 
211609467b48Spatrick     SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
211709467b48Spatrick                                    DAG.getConstant(VTBits, dl, MVT::i32),
211809467b48Spatrick                                    ShAmt);
211909467b48Spatrick     SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
212009467b48Spatrick     SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
212109467b48Spatrick                                      DAG.getConstant(VTBits, dl, MVT::i32));
212209467b48Spatrick     SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
212309467b48Spatrick     SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
212409467b48Spatrick     SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
212509467b48Spatrick 
212609467b48Spatrick     SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
212709467b48Spatrick                                DAG.getConstant(VTBits, dl, MVT::i32),
212809467b48Spatrick                                ISD::SETGE);
212909467b48Spatrick     SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
213009467b48Spatrick     SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
213109467b48Spatrick 
213209467b48Spatrick     SDValue Ops[2] = { Lo, Hi };
213309467b48Spatrick     return DAG.getMergeValues(Ops, dl);
213409467b48Spatrick   }
213509467b48Spatrick }
213609467b48Spatrick 
213709467b48Spatrick /// LowerShiftLeftParts - Lower SHL_PARTS, which
213809467b48Spatrick /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
213909467b48Spatrick ///    amount, or
214009467b48Spatrick /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
214109467b48Spatrick ///    amount.
LowerShiftLeftParts(SDValue Op,SelectionDAG & DAG) const214209467b48Spatrick SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
214309467b48Spatrick                                                  SelectionDAG &DAG) const {
214409467b48Spatrick   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
214509467b48Spatrick   assert(Op.getOpcode() == ISD::SHL_PARTS);
214609467b48Spatrick 
214709467b48Spatrick   EVT VT = Op.getValueType();
214809467b48Spatrick   unsigned VTBits = VT.getSizeInBits();
214909467b48Spatrick   SDLoc dl(Op);
215009467b48Spatrick   SDValue ShOpLo = Op.getOperand(0);
215109467b48Spatrick   SDValue ShOpHi = Op.getOperand(1);
215209467b48Spatrick   SDValue ShAmt  = Op.getOperand(2);
215309467b48Spatrick 
215409467b48Spatrick   if (VTBits == 32 && STI.getSmVersion() >= 35) {
215509467b48Spatrick     // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
215609467b48Spatrick     // {dHi, dLo} = {aHi, aLo} << Amt
215709467b48Spatrick     //   dHi = shf.l.clamp aLo, aHi, Amt
215809467b48Spatrick     //   dLo = aLo << Amt
215909467b48Spatrick 
216009467b48Spatrick     SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
216109467b48Spatrick                              ShAmt);
216209467b48Spatrick     SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
216309467b48Spatrick 
216409467b48Spatrick     SDValue Ops[2] = { Lo, Hi };
216509467b48Spatrick     return DAG.getMergeValues(Ops, dl);
216609467b48Spatrick   }
216709467b48Spatrick   else {
216809467b48Spatrick     // {dHi, dLo} = {aHi, aLo} << Amt
216909467b48Spatrick     // - if (Amt>=size) then
217009467b48Spatrick     //      dLo = aLo << Amt (all 0)
217109467b48Spatrick     //      dLo = aLo << (Amt-size)
217209467b48Spatrick     //   else
217309467b48Spatrick     //      dLo = aLo << Amt
217409467b48Spatrick     //      dHi = (aHi << Amt) | (aLo >> (size-Amt))
217509467b48Spatrick 
217609467b48Spatrick     SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
217709467b48Spatrick                                    DAG.getConstant(VTBits, dl, MVT::i32),
217809467b48Spatrick                                    ShAmt);
217909467b48Spatrick     SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
218009467b48Spatrick     SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
218109467b48Spatrick                                      DAG.getConstant(VTBits, dl, MVT::i32));
218209467b48Spatrick     SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
218309467b48Spatrick     SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
218409467b48Spatrick     SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
218509467b48Spatrick 
218609467b48Spatrick     SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
218709467b48Spatrick                                DAG.getConstant(VTBits, dl, MVT::i32),
218809467b48Spatrick                                ISD::SETGE);
218909467b48Spatrick     SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
219009467b48Spatrick     SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
219109467b48Spatrick 
219209467b48Spatrick     SDValue Ops[2] = { Lo, Hi };
219309467b48Spatrick     return DAG.getMergeValues(Ops, dl);
219409467b48Spatrick   }
219509467b48Spatrick }
219609467b48Spatrick 
LowerFROUND(SDValue Op,SelectionDAG & DAG) const219709467b48Spatrick SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
219809467b48Spatrick   EVT VT = Op.getValueType();
219909467b48Spatrick 
220009467b48Spatrick   if (VT == MVT::f32)
220109467b48Spatrick     return LowerFROUND32(Op, DAG);
220209467b48Spatrick 
220309467b48Spatrick   if (VT == MVT::f64)
220409467b48Spatrick     return LowerFROUND64(Op, DAG);
220509467b48Spatrick 
220609467b48Spatrick   llvm_unreachable("unhandled type");
220709467b48Spatrick }
220809467b48Spatrick 
220909467b48Spatrick // This is the the rounding method used in CUDA libdevice in C like code:
221009467b48Spatrick // float roundf(float A)
221109467b48Spatrick // {
221209467b48Spatrick //   float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f));
221309467b48Spatrick //   RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
221409467b48Spatrick //   return abs(A) < 0.5 ? (float)(int)A : RoundedA;
221509467b48Spatrick // }
LowerFROUND32(SDValue Op,SelectionDAG & DAG) const221609467b48Spatrick SDValue NVPTXTargetLowering::LowerFROUND32(SDValue Op,
221709467b48Spatrick                                            SelectionDAG &DAG) const {
221809467b48Spatrick   SDLoc SL(Op);
221909467b48Spatrick   SDValue A = Op.getOperand(0);
222009467b48Spatrick   EVT VT = Op.getValueType();
222109467b48Spatrick 
222209467b48Spatrick   SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
222309467b48Spatrick 
222409467b48Spatrick   // RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f))
222509467b48Spatrick   SDValue Bitcast  = DAG.getNode(ISD::BITCAST, SL, MVT::i32, A);
222609467b48Spatrick   const int SignBitMask = 0x80000000;
222709467b48Spatrick   SDValue Sign = DAG.getNode(ISD::AND, SL, MVT::i32, Bitcast,
222809467b48Spatrick                              DAG.getConstant(SignBitMask, SL, MVT::i32));
222909467b48Spatrick   const int PointFiveInBits = 0x3F000000;
223009467b48Spatrick   SDValue PointFiveWithSignRaw =
223109467b48Spatrick       DAG.getNode(ISD::OR, SL, MVT::i32, Sign,
223209467b48Spatrick                   DAG.getConstant(PointFiveInBits, SL, MVT::i32));
223309467b48Spatrick   SDValue PointFiveWithSign =
223409467b48Spatrick       DAG.getNode(ISD::BITCAST, SL, VT, PointFiveWithSignRaw);
223509467b48Spatrick   SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, A, PointFiveWithSign);
223609467b48Spatrick   SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
223709467b48Spatrick 
223809467b48Spatrick   // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
223909467b48Spatrick   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
224009467b48Spatrick   SDValue IsLarge =
224109467b48Spatrick       DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 23.0), SL, VT),
224209467b48Spatrick                    ISD::SETOGT);
224309467b48Spatrick   RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
224409467b48Spatrick 
224509467b48Spatrick   // return abs(A) < 0.5 ? (float)(int)A : RoundedA;
224609467b48Spatrick   SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
224709467b48Spatrick                                 DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
224809467b48Spatrick   SDValue RoundedAForSmallA = DAG.getNode(ISD::FTRUNC, SL, VT, A);
224909467b48Spatrick   return DAG.getNode(ISD::SELECT, SL, VT, IsSmall, RoundedAForSmallA, RoundedA);
225009467b48Spatrick }
225109467b48Spatrick 
225209467b48Spatrick // The implementation of round(double) is similar to that of round(float) in
225309467b48Spatrick // that they both separate the value range into three regions and use a method
225409467b48Spatrick // specific to the region to round the values. However, round(double) first
225509467b48Spatrick // calculates the round of the absolute value and then adds the sign back while
225609467b48Spatrick // round(float) directly rounds the value with sign.
LowerFROUND64(SDValue Op,SelectionDAG & DAG) const225709467b48Spatrick SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op,
225809467b48Spatrick                                            SelectionDAG &DAG) const {
225909467b48Spatrick   SDLoc SL(Op);
226009467b48Spatrick   SDValue A = Op.getOperand(0);
226109467b48Spatrick   EVT VT = Op.getValueType();
226209467b48Spatrick 
226309467b48Spatrick   SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
226409467b48Spatrick 
226509467b48Spatrick   // double RoundedA = (double) (int) (abs(A) + 0.5f);
226609467b48Spatrick   SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, AbsA,
226709467b48Spatrick                                   DAG.getConstantFP(0.5, SL, VT));
226809467b48Spatrick   SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
226909467b48Spatrick 
227009467b48Spatrick   // RoundedA = abs(A) < 0.5 ? (double)0 : RoundedA;
227109467b48Spatrick   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
227209467b48Spatrick   SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
227309467b48Spatrick                                 DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
227409467b48Spatrick   RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsSmall,
227509467b48Spatrick                          DAG.getConstantFP(0, SL, VT),
227609467b48Spatrick                          RoundedA);
227709467b48Spatrick 
227809467b48Spatrick   // Add sign to rounded_A
227909467b48Spatrick   RoundedA = DAG.getNode(ISD::FCOPYSIGN, SL, VT, RoundedA, A);
228009467b48Spatrick   DAG.getNode(ISD::FTRUNC, SL, VT, A);
228109467b48Spatrick 
228209467b48Spatrick   // RoundedA = abs(A) > 0x1.0p52 ? A : RoundedA;
228309467b48Spatrick   SDValue IsLarge =
228409467b48Spatrick       DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 52.0), SL, VT),
228509467b48Spatrick                    ISD::SETOGT);
228609467b48Spatrick   return DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
228709467b48Spatrick }
228809467b48Spatrick 
228909467b48Spatrick 
229009467b48Spatrick 
229109467b48Spatrick SDValue
LowerOperation(SDValue Op,SelectionDAG & DAG) const229209467b48Spatrick NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
229309467b48Spatrick   switch (Op.getOpcode()) {
229409467b48Spatrick   case ISD::RETURNADDR:
229509467b48Spatrick     return SDValue();
229609467b48Spatrick   case ISD::FRAMEADDR:
229709467b48Spatrick     return SDValue();
229809467b48Spatrick   case ISD::GlobalAddress:
229909467b48Spatrick     return LowerGlobalAddress(Op, DAG);
230009467b48Spatrick   case ISD::INTRINSIC_W_CHAIN:
230109467b48Spatrick     return Op;
230209467b48Spatrick   case ISD::BUILD_VECTOR:
230309467b48Spatrick     return LowerBUILD_VECTOR(Op, DAG);
230409467b48Spatrick   case ISD::EXTRACT_SUBVECTOR:
230509467b48Spatrick     return Op;
230609467b48Spatrick   case ISD::EXTRACT_VECTOR_ELT:
230709467b48Spatrick     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
230809467b48Spatrick   case ISD::CONCAT_VECTORS:
230909467b48Spatrick     return LowerCONCAT_VECTORS(Op, DAG);
231009467b48Spatrick   case ISD::STORE:
231109467b48Spatrick     return LowerSTORE(Op, DAG);
231209467b48Spatrick   case ISD::LOAD:
231309467b48Spatrick     return LowerLOAD(Op, DAG);
231409467b48Spatrick   case ISD::SHL_PARTS:
231509467b48Spatrick     return LowerShiftLeftParts(Op, DAG);
231609467b48Spatrick   case ISD::SRA_PARTS:
231709467b48Spatrick   case ISD::SRL_PARTS:
231809467b48Spatrick     return LowerShiftRightParts(Op, DAG);
231909467b48Spatrick   case ISD::SELECT:
232009467b48Spatrick     return LowerSelect(Op, DAG);
232109467b48Spatrick   case ISD::FROUND:
232209467b48Spatrick     return LowerFROUND(Op, DAG);
2323*d415bd75Srobert   case ISD::VAARG:
2324*d415bd75Srobert     return LowerVAARG(Op, DAG);
2325*d415bd75Srobert   case ISD::VASTART:
2326*d415bd75Srobert     return LowerVASTART(Op, DAG);
232709467b48Spatrick   default:
232809467b48Spatrick     llvm_unreachable("Custom lowering not defined for operation");
232909467b48Spatrick   }
233009467b48Spatrick }
233109467b48Spatrick 
2332*d415bd75Srobert // This function is almost a copy of SelectionDAG::expandVAArg().
2333*d415bd75Srobert // The only diff is that this one produces loads from local address space.
LowerVAARG(SDValue Op,SelectionDAG & DAG) const2334*d415bd75Srobert SDValue NVPTXTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2335*d415bd75Srobert   const TargetLowering *TLI = STI.getTargetLowering();
2336*d415bd75Srobert   SDLoc DL(Op);
2337*d415bd75Srobert 
2338*d415bd75Srobert   SDNode *Node = Op.getNode();
2339*d415bd75Srobert   const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2340*d415bd75Srobert   EVT VT = Node->getValueType(0);
2341*d415bd75Srobert   auto *Ty = VT.getTypeForEVT(*DAG.getContext());
2342*d415bd75Srobert   SDValue Tmp1 = Node->getOperand(0);
2343*d415bd75Srobert   SDValue Tmp2 = Node->getOperand(1);
2344*d415bd75Srobert   const MaybeAlign MA(Node->getConstantOperandVal(3));
2345*d415bd75Srobert 
2346*d415bd75Srobert   SDValue VAListLoad = DAG.getLoad(TLI->getPointerTy(DAG.getDataLayout()), DL,
2347*d415bd75Srobert                                    Tmp1, Tmp2, MachinePointerInfo(V));
2348*d415bd75Srobert   SDValue VAList = VAListLoad;
2349*d415bd75Srobert 
2350*d415bd75Srobert   if (MA && *MA > TLI->getMinStackArgumentAlignment()) {
2351*d415bd75Srobert     VAList = DAG.getNode(
2352*d415bd75Srobert         ISD::ADD, DL, VAList.getValueType(), VAList,
2353*d415bd75Srobert         DAG.getConstant(MA->value() - 1, DL, VAList.getValueType()));
2354*d415bd75Srobert 
2355*d415bd75Srobert     VAList = DAG.getNode(
2356*d415bd75Srobert         ISD::AND, DL, VAList.getValueType(), VAList,
2357*d415bd75Srobert         DAG.getConstant(-(int64_t)MA->value(), DL, VAList.getValueType()));
2358*d415bd75Srobert   }
2359*d415bd75Srobert 
2360*d415bd75Srobert   // Increment the pointer, VAList, to the next vaarg
2361*d415bd75Srobert   Tmp1 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
2362*d415bd75Srobert                      DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(Ty),
2363*d415bd75Srobert                                      DL, VAList.getValueType()));
2364*d415bd75Srobert 
2365*d415bd75Srobert   // Store the incremented VAList to the legalized pointer
2366*d415bd75Srobert   Tmp1 = DAG.getStore(VAListLoad.getValue(1), DL, Tmp1, Tmp2,
2367*d415bd75Srobert                       MachinePointerInfo(V));
2368*d415bd75Srobert 
2369*d415bd75Srobert   const Value *SrcV =
2370*d415bd75Srobert       Constant::getNullValue(PointerType::get(Ty, ADDRESS_SPACE_LOCAL));
2371*d415bd75Srobert 
2372*d415bd75Srobert   // Load the actual argument out of the pointer VAList
2373*d415bd75Srobert   return DAG.getLoad(VT, DL, Tmp1, VAList, MachinePointerInfo(SrcV));
2374*d415bd75Srobert }
2375*d415bd75Srobert 
LowerVASTART(SDValue Op,SelectionDAG & DAG) const2376*d415bd75Srobert SDValue NVPTXTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2377*d415bd75Srobert   const TargetLowering *TLI = STI.getTargetLowering();
2378*d415bd75Srobert   SDLoc DL(Op);
2379*d415bd75Srobert   EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout());
2380*d415bd75Srobert 
2381*d415bd75Srobert   // Store the address of unsized array <function>_vararg[] in the ap object.
2382*d415bd75Srobert   SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);
2383*d415bd75Srobert   SDValue VAReg = DAG.getNode(NVPTXISD::Wrapper, DL, PtrVT, Arg);
2384*d415bd75Srobert 
2385*d415bd75Srobert   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2386*d415bd75Srobert   return DAG.getStore(Op.getOperand(0), DL, VAReg, Op.getOperand(1),
2387*d415bd75Srobert                       MachinePointerInfo(SV));
2388*d415bd75Srobert }
2389*d415bd75Srobert 
LowerSelect(SDValue Op,SelectionDAG & DAG) const239009467b48Spatrick SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
239109467b48Spatrick   SDValue Op0 = Op->getOperand(0);
239209467b48Spatrick   SDValue Op1 = Op->getOperand(1);
239309467b48Spatrick   SDValue Op2 = Op->getOperand(2);
239409467b48Spatrick   SDLoc DL(Op.getNode());
239509467b48Spatrick 
239609467b48Spatrick   assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
239709467b48Spatrick 
239809467b48Spatrick   Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
239909467b48Spatrick   Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
240009467b48Spatrick   SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
240109467b48Spatrick   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
240209467b48Spatrick 
240309467b48Spatrick   return Trunc;
240409467b48Spatrick }
240509467b48Spatrick 
LowerLOAD(SDValue Op,SelectionDAG & DAG) const240609467b48Spatrick SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
240709467b48Spatrick   if (Op.getValueType() == MVT::i1)
240809467b48Spatrick     return LowerLOADi1(Op, DAG);
240909467b48Spatrick 
241009467b48Spatrick   // v2f16 is legal, so we can't rely on legalizer to handle unaligned
241109467b48Spatrick   // loads and have to handle it here.
241209467b48Spatrick   if (Op.getValueType() == MVT::v2f16) {
241309467b48Spatrick     LoadSDNode *Load = cast<LoadSDNode>(Op);
241409467b48Spatrick     EVT MemVT = Load->getMemoryVT();
241509467b48Spatrick     if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
241609467b48Spatrick                                         MemVT, *Load->getMemOperand())) {
241709467b48Spatrick       SDValue Ops[2];
241809467b48Spatrick       std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
241909467b48Spatrick       return DAG.getMergeValues(Ops, SDLoc(Op));
242009467b48Spatrick     }
242109467b48Spatrick   }
242209467b48Spatrick 
242309467b48Spatrick   return SDValue();
242409467b48Spatrick }
242509467b48Spatrick 
242609467b48Spatrick // v = ld i1* addr
242709467b48Spatrick //   =>
242809467b48Spatrick // v1 = ld i8* addr (-> i16)
242909467b48Spatrick // v = trunc i16 to i1
LowerLOADi1(SDValue Op,SelectionDAG & DAG) const243009467b48Spatrick SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
243109467b48Spatrick   SDNode *Node = Op.getNode();
243209467b48Spatrick   LoadSDNode *LD = cast<LoadSDNode>(Node);
243309467b48Spatrick   SDLoc dl(Node);
243409467b48Spatrick   assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
243509467b48Spatrick   assert(Node->getValueType(0) == MVT::i1 &&
243609467b48Spatrick          "Custom lowering for i1 load only");
243709467b48Spatrick   SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
2438*d415bd75Srobert                               LD->getPointerInfo(), LD->getAlign(),
243909467b48Spatrick                               LD->getMemOperand()->getFlags());
244009467b48Spatrick   SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
244109467b48Spatrick   // The legalizer (the caller) is expecting two values from the legalized
244209467b48Spatrick   // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
244309467b48Spatrick   // in LegalizeDAG.cpp which also uses MergeValues.
244409467b48Spatrick   SDValue Ops[] = { result, LD->getChain() };
244509467b48Spatrick   return DAG.getMergeValues(Ops, dl);
244609467b48Spatrick }
244709467b48Spatrick 
LowerSTORE(SDValue Op,SelectionDAG & DAG) const244809467b48Spatrick SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
244909467b48Spatrick   StoreSDNode *Store = cast<StoreSDNode>(Op);
245009467b48Spatrick   EVT VT = Store->getMemoryVT();
245109467b48Spatrick 
245209467b48Spatrick   if (VT == MVT::i1)
245309467b48Spatrick     return LowerSTOREi1(Op, DAG);
245409467b48Spatrick 
245509467b48Spatrick   // v2f16 is legal, so we can't rely on legalizer to handle unaligned
245609467b48Spatrick   // stores and have to handle it here.
245709467b48Spatrick   if (VT == MVT::v2f16 &&
245809467b48Spatrick       !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
245909467b48Spatrick                                       VT, *Store->getMemOperand()))
246009467b48Spatrick     return expandUnalignedStore(Store, DAG);
246109467b48Spatrick 
246209467b48Spatrick   if (VT.isVector())
246309467b48Spatrick     return LowerSTOREVector(Op, DAG);
246409467b48Spatrick 
246509467b48Spatrick   return SDValue();
246609467b48Spatrick }
246709467b48Spatrick 
246809467b48Spatrick SDValue
LowerSTOREVector(SDValue Op,SelectionDAG & DAG) const246909467b48Spatrick NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
247009467b48Spatrick   SDNode *N = Op.getNode();
247109467b48Spatrick   SDValue Val = N->getOperand(1);
247209467b48Spatrick   SDLoc DL(N);
247309467b48Spatrick   EVT ValVT = Val.getValueType();
247409467b48Spatrick 
247509467b48Spatrick   if (ValVT.isVector()) {
247609467b48Spatrick     // We only handle "native" vector sizes for now, e.g. <4 x double> is not
247709467b48Spatrick     // legal.  We can (and should) split that into 2 stores of <2 x double> here
247809467b48Spatrick     // but I'm leaving that as a TODO for now.
247909467b48Spatrick     if (!ValVT.isSimple())
248009467b48Spatrick       return SDValue();
248109467b48Spatrick     switch (ValVT.getSimpleVT().SimpleTy) {
248209467b48Spatrick     default:
248309467b48Spatrick       return SDValue();
248409467b48Spatrick     case MVT::v2i8:
248509467b48Spatrick     case MVT::v2i16:
248609467b48Spatrick     case MVT::v2i32:
248709467b48Spatrick     case MVT::v2i64:
248809467b48Spatrick     case MVT::v2f16:
2489*d415bd75Srobert     case MVT::v2bf16:
249009467b48Spatrick     case MVT::v2f32:
249109467b48Spatrick     case MVT::v2f64:
249209467b48Spatrick     case MVT::v4i8:
249309467b48Spatrick     case MVT::v4i16:
249409467b48Spatrick     case MVT::v4i32:
249509467b48Spatrick     case MVT::v4f16:
2496*d415bd75Srobert     case MVT::v4bf16:
249709467b48Spatrick     case MVT::v4f32:
249809467b48Spatrick     case MVT::v8f16: // <4 x f16x2>
2499*d415bd75Srobert     case MVT::v8bf16: // <4 x bf16x2>
250009467b48Spatrick       // This is a "native" vector type
250109467b48Spatrick       break;
250209467b48Spatrick     }
250309467b48Spatrick 
250409467b48Spatrick     MemSDNode *MemSD = cast<MemSDNode>(N);
250509467b48Spatrick     const DataLayout &TD = DAG.getDataLayout();
250609467b48Spatrick 
2507097a140dSpatrick     Align Alignment = MemSD->getAlign();
2508097a140dSpatrick     Align PrefAlign =
2509097a140dSpatrick         TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext()));
2510097a140dSpatrick     if (Alignment < PrefAlign) {
251109467b48Spatrick       // This store is not sufficiently aligned, so bail out and let this vector
251209467b48Spatrick       // store be scalarized.  Note that we may still be able to emit smaller
251309467b48Spatrick       // vector stores.  For example, if we are storing a <4 x float> with an
251409467b48Spatrick       // alignment of 8, this check will fail but the legalizer will try again
251509467b48Spatrick       // with 2 x <2 x float>, which will succeed with an alignment of 8.
251609467b48Spatrick       return SDValue();
251709467b48Spatrick     }
251809467b48Spatrick 
251909467b48Spatrick     unsigned Opcode = 0;
252009467b48Spatrick     EVT EltVT = ValVT.getVectorElementType();
252109467b48Spatrick     unsigned NumElts = ValVT.getVectorNumElements();
252209467b48Spatrick 
252309467b48Spatrick     // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
252409467b48Spatrick     // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
252509467b48Spatrick     // stored type to i16 and propagate the "real" type as the memory type.
252609467b48Spatrick     bool NeedExt = false;
252709467b48Spatrick     if (EltVT.getSizeInBits() < 16)
252809467b48Spatrick       NeedExt = true;
252909467b48Spatrick 
253009467b48Spatrick     bool StoreF16x2 = false;
253109467b48Spatrick     switch (NumElts) {
253209467b48Spatrick     default:
253309467b48Spatrick       return SDValue();
253409467b48Spatrick     case 2:
253509467b48Spatrick       Opcode = NVPTXISD::StoreV2;
253609467b48Spatrick       break;
253709467b48Spatrick     case 4:
253809467b48Spatrick       Opcode = NVPTXISD::StoreV4;
253909467b48Spatrick       break;
254009467b48Spatrick     case 8:
254109467b48Spatrick       // v8f16 is a special case. PTX doesn't have st.v8.f16
254209467b48Spatrick       // instruction. Instead, we split the vector into v2f16 chunks and
254309467b48Spatrick       // store them with st.v4.b32.
2544*d415bd75Srobert       assert((EltVT == MVT::f16 || EltVT == MVT::bf16) &&
2545*d415bd75Srobert              "Wrong type for the vector.");
254609467b48Spatrick       Opcode = NVPTXISD::StoreV4;
254709467b48Spatrick       StoreF16x2 = true;
254809467b48Spatrick       break;
254909467b48Spatrick     }
255009467b48Spatrick 
255109467b48Spatrick     SmallVector<SDValue, 8> Ops;
255209467b48Spatrick 
255309467b48Spatrick     // First is the chain
255409467b48Spatrick     Ops.push_back(N->getOperand(0));
255509467b48Spatrick 
255609467b48Spatrick     if (StoreF16x2) {
255709467b48Spatrick       // Combine f16,f16 -> v2f16
255809467b48Spatrick       NumElts /= 2;
255909467b48Spatrick       for (unsigned i = 0; i < NumElts; ++i) {
256009467b48Spatrick         SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
256109467b48Spatrick                                  DAG.getIntPtrConstant(i * 2, DL));
256209467b48Spatrick         SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
256309467b48Spatrick                                  DAG.getIntPtrConstant(i * 2 + 1, DL));
256409467b48Spatrick         SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f16, E0, E1);
256509467b48Spatrick         Ops.push_back(V2);
256609467b48Spatrick       }
256709467b48Spatrick     } else {
256809467b48Spatrick       // Then the split values
256909467b48Spatrick       for (unsigned i = 0; i < NumElts; ++i) {
257009467b48Spatrick         SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
257109467b48Spatrick                                      DAG.getIntPtrConstant(i, DL));
257209467b48Spatrick         if (NeedExt)
257309467b48Spatrick           ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
257409467b48Spatrick         Ops.push_back(ExtVal);
257509467b48Spatrick       }
257609467b48Spatrick     }
257709467b48Spatrick 
257809467b48Spatrick     // Then any remaining arguments
257909467b48Spatrick     Ops.append(N->op_begin() + 2, N->op_end());
258009467b48Spatrick 
258109467b48Spatrick     SDValue NewSt =
258209467b48Spatrick         DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops,
258309467b48Spatrick                                 MemSD->getMemoryVT(), MemSD->getMemOperand());
258409467b48Spatrick 
258509467b48Spatrick     // return DCI.CombineTo(N, NewSt, true);
258609467b48Spatrick     return NewSt;
258709467b48Spatrick   }
258809467b48Spatrick 
258909467b48Spatrick   return SDValue();
259009467b48Spatrick }
259109467b48Spatrick 
259209467b48Spatrick // st i1 v, addr
259309467b48Spatrick //    =>
259409467b48Spatrick // v1 = zxt v to i16
259509467b48Spatrick // st.u8 i16, addr
LowerSTOREi1(SDValue Op,SelectionDAG & DAG) const259609467b48Spatrick SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
259709467b48Spatrick   SDNode *Node = Op.getNode();
259809467b48Spatrick   SDLoc dl(Node);
259909467b48Spatrick   StoreSDNode *ST = cast<StoreSDNode>(Node);
260009467b48Spatrick   SDValue Tmp1 = ST->getChain();
260109467b48Spatrick   SDValue Tmp2 = ST->getBasePtr();
260209467b48Spatrick   SDValue Tmp3 = ST->getValue();
260309467b48Spatrick   assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
260409467b48Spatrick   Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
260509467b48Spatrick   SDValue Result =
260609467b48Spatrick       DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8,
2607*d415bd75Srobert                         ST->getAlign(), ST->getMemOperand()->getFlags());
260809467b48Spatrick   return Result;
260909467b48Spatrick }
261009467b48Spatrick 
2611*d415bd75Srobert // This creates target external symbol for a function parameter.
2612*d415bd75Srobert // Name of the symbol is composed from its index and the function name.
2613*d415bd75Srobert // Negative index corresponds to special parameter (unsized array) used for
2614*d415bd75Srobert // passing variable arguments.
getParamSymbol(SelectionDAG & DAG,int idx,EVT v) const2615*d415bd75Srobert SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx,
2616*d415bd75Srobert                                             EVT v) const {
261709467b48Spatrick   std::string ParamSym;
261809467b48Spatrick   raw_string_ostream ParamStr(ParamSym);
261909467b48Spatrick 
2620*d415bd75Srobert   ParamStr << DAG.getMachineFunction().getName();
262109467b48Spatrick 
2622*d415bd75Srobert   if (idx < 0)
2623*d415bd75Srobert     ParamStr << "_vararg";
2624*d415bd75Srobert   else
2625*d415bd75Srobert     ParamStr << "_param_" << idx;
262609467b48Spatrick 
2627*d415bd75Srobert   StringRef SavedStr =
2628*d415bd75Srobert     nvTM->getStrPool().save(ParamSym);
2629*d415bd75Srobert   return DAG.getTargetExternalSymbol(SavedStr.data(), v);
263009467b48Spatrick }
263109467b48Spatrick 
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const263209467b48Spatrick SDValue NVPTXTargetLowering::LowerFormalArguments(
263309467b48Spatrick     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
263409467b48Spatrick     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
263509467b48Spatrick     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
263609467b48Spatrick   MachineFunction &MF = DAG.getMachineFunction();
263709467b48Spatrick   const DataLayout &DL = DAG.getDataLayout();
263809467b48Spatrick   auto PtrVT = getPointerTy(DAG.getDataLayout());
263909467b48Spatrick 
264009467b48Spatrick   const Function *F = &MF.getFunction();
264109467b48Spatrick   const AttributeList &PAL = F->getAttributes();
264209467b48Spatrick   const TargetLowering *TLI = STI.getTargetLowering();
264309467b48Spatrick 
264409467b48Spatrick   SDValue Root = DAG.getRoot();
264509467b48Spatrick   std::vector<SDValue> OutChains;
264609467b48Spatrick 
264709467b48Spatrick   bool isABI = (STI.getSmVersion() >= 20);
264809467b48Spatrick   assert(isABI && "Non-ABI compilation is not supported");
264909467b48Spatrick   if (!isABI)
265009467b48Spatrick     return Chain;
265109467b48Spatrick 
265209467b48Spatrick   std::vector<Type *> argTypes;
265309467b48Spatrick   std::vector<const Argument *> theArgs;
265409467b48Spatrick   for (const Argument &I : F->args()) {
265509467b48Spatrick     theArgs.push_back(&I);
265609467b48Spatrick     argTypes.push_back(I.getType());
265709467b48Spatrick   }
265809467b48Spatrick   // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
265909467b48Spatrick   // Ins.size() will be larger
266009467b48Spatrick   //   * if there is an aggregate argument with multiple fields (each field
266109467b48Spatrick   //     showing up separately in Ins)
266209467b48Spatrick   //   * if there is a vector argument with more than typical vector-length
266309467b48Spatrick   //     elements (generally if more than 4) where each vector element is
266409467b48Spatrick   //     individually present in Ins.
266509467b48Spatrick   // So a different index should be used for indexing into Ins.
266609467b48Spatrick   // See similar issue in LowerCall.
266709467b48Spatrick   unsigned InsIdx = 0;
266809467b48Spatrick 
266909467b48Spatrick   int idx = 0;
267009467b48Spatrick   for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
267109467b48Spatrick     Type *Ty = argTypes[i];
267209467b48Spatrick 
267309467b48Spatrick     if (theArgs[i]->use_empty()) {
267409467b48Spatrick       // argument is dead
267509467b48Spatrick       if (Ty->isAggregateType() || Ty->isIntegerTy(128)) {
267609467b48Spatrick         SmallVector<EVT, 16> vtparts;
267709467b48Spatrick 
267809467b48Spatrick         ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
267909467b48Spatrick         assert(vtparts.size() > 0 && "empty aggregate type not expected");
268009467b48Spatrick         for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
268109467b48Spatrick              ++parti) {
268209467b48Spatrick           InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
268309467b48Spatrick           ++InsIdx;
268409467b48Spatrick         }
268509467b48Spatrick         if (vtparts.size() > 0)
268609467b48Spatrick           --InsIdx;
268709467b48Spatrick         continue;
268809467b48Spatrick       }
268909467b48Spatrick       if (Ty->isVectorTy()) {
269009467b48Spatrick         EVT ObjectVT = getValueType(DL, Ty);
269109467b48Spatrick         unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
269209467b48Spatrick         for (unsigned parti = 0; parti < NumRegs; ++parti) {
269309467b48Spatrick           InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
269409467b48Spatrick           ++InsIdx;
269509467b48Spatrick         }
269609467b48Spatrick         if (NumRegs > 0)
269709467b48Spatrick           --InsIdx;
269809467b48Spatrick         continue;
269909467b48Spatrick       }
270009467b48Spatrick       InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
270109467b48Spatrick       continue;
270209467b48Spatrick     }
270309467b48Spatrick 
270409467b48Spatrick     // In the following cases, assign a node order of "idx+1"
270509467b48Spatrick     // to newly created nodes. The SDNodes for params have to
270609467b48Spatrick     // appear in the same order as their order of appearance
270709467b48Spatrick     // in the original function. "idx+1" holds that order.
2708*d415bd75Srobert     if (!PAL.hasParamAttr(i, Attribute::ByVal)) {
270909467b48Spatrick       bool aggregateIsPacked = false;
271009467b48Spatrick       if (StructType *STy = dyn_cast<StructType>(Ty))
271109467b48Spatrick         aggregateIsPacked = STy->isPacked();
271209467b48Spatrick 
271309467b48Spatrick       SmallVector<EVT, 16> VTs;
271409467b48Spatrick       SmallVector<uint64_t, 16> Offsets;
271509467b48Spatrick       ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
271609467b48Spatrick       assert(VTs.size() > 0 && "Unexpected empty type.");
271709467b48Spatrick       auto VectorInfo =
2718097a140dSpatrick           VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlign(Ty));
271909467b48Spatrick 
272009467b48Spatrick       SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
272109467b48Spatrick       int VecIdx = -1; // Index of the first element of the current vector.
272209467b48Spatrick       for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) {
272309467b48Spatrick         if (VectorInfo[parti] & PVF_FIRST) {
272409467b48Spatrick           assert(VecIdx == -1 && "Orphaned vector.");
272509467b48Spatrick           VecIdx = parti;
272609467b48Spatrick         }
272709467b48Spatrick 
272809467b48Spatrick         // That's the last element of this store op.
272909467b48Spatrick         if (VectorInfo[parti] & PVF_LAST) {
273009467b48Spatrick           unsigned NumElts = parti - VecIdx + 1;
273109467b48Spatrick           EVT EltVT = VTs[parti];
273209467b48Spatrick           // i1 is loaded/stored as i8.
273309467b48Spatrick           EVT LoadVT = EltVT;
273409467b48Spatrick           if (EltVT == MVT::i1)
273509467b48Spatrick             LoadVT = MVT::i8;
273609467b48Spatrick           else if (EltVT == MVT::v2f16)
273709467b48Spatrick             // getLoad needs a vector type, but it can't handle
273809467b48Spatrick             // vectors which contain v2f16 elements. So we must load
273909467b48Spatrick             // using i32 here and then bitcast back.
274009467b48Spatrick             LoadVT = MVT::i32;
274109467b48Spatrick 
274209467b48Spatrick           EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts);
274309467b48Spatrick           SDValue VecAddr =
274409467b48Spatrick               DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
274509467b48Spatrick                           DAG.getConstant(Offsets[VecIdx], dl, PtrVT));
274609467b48Spatrick           Value *srcValue = Constant::getNullValue(PointerType::get(
274709467b48Spatrick               EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
2748*d415bd75Srobert           SDValue P = DAG.getLoad(VecVT, dl, Root, VecAddr,
2749*d415bd75Srobert                                   MachinePointerInfo(srcValue),
2750*d415bd75Srobert                                   MaybeAlign(aggregateIsPacked ? 1 : 0),
275109467b48Spatrick                                   MachineMemOperand::MODereferenceable |
275209467b48Spatrick                                       MachineMemOperand::MOInvariant);
275309467b48Spatrick           if (P.getNode())
275409467b48Spatrick             P.getNode()->setIROrder(idx + 1);
275509467b48Spatrick           for (unsigned j = 0; j < NumElts; ++j) {
275609467b48Spatrick             SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P,
275709467b48Spatrick                                       DAG.getIntPtrConstant(j, dl));
275809467b48Spatrick             // We've loaded i1 as an i8 and now must truncate it back to i1
275909467b48Spatrick             if (EltVT == MVT::i1)
276009467b48Spatrick               Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt);
276109467b48Spatrick             // v2f16 was loaded as an i32. Now we must bitcast it back.
276209467b48Spatrick             else if (EltVT == MVT::v2f16)
276309467b48Spatrick               Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt);
2764*d415bd75Srobert 
2765*d415bd75Srobert             // If a promoted integer type is used, truncate down to the original
2766*d415bd75Srobert             MVT PromotedVT;
2767*d415bd75Srobert             if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) {
2768*d415bd75Srobert               Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
2769*d415bd75Srobert             }
2770*d415bd75Srobert 
277109467b48Spatrick             // Extend the element if necessary (e.g. an i8 is loaded
277209467b48Spatrick             // into an i16 register)
277309467b48Spatrick             if (Ins[InsIdx].VT.isInteger() &&
277473471bf0Spatrick                 Ins[InsIdx].VT.getFixedSizeInBits() >
277573471bf0Spatrick                     LoadVT.getFixedSizeInBits()) {
277609467b48Spatrick               unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
277709467b48Spatrick                                                            : ISD::ZERO_EXTEND;
277809467b48Spatrick               Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt);
277909467b48Spatrick             }
278009467b48Spatrick             InVals.push_back(Elt);
278109467b48Spatrick           }
278209467b48Spatrick 
278309467b48Spatrick           // Reset vector tracking state.
278409467b48Spatrick           VecIdx = -1;
278509467b48Spatrick         }
278609467b48Spatrick         ++InsIdx;
278709467b48Spatrick       }
278809467b48Spatrick       if (VTs.size() > 0)
278909467b48Spatrick         --InsIdx;
279009467b48Spatrick       continue;
279109467b48Spatrick     }
279209467b48Spatrick 
279309467b48Spatrick     // Param has ByVal attribute
279409467b48Spatrick     // Return MoveParam(param symbol).
279509467b48Spatrick     // Ideally, the param symbol can be returned directly,
279609467b48Spatrick     // but when SDNode builder decides to use it in a CopyToReg(),
279709467b48Spatrick     // machine instruction fails because TargetExternalSymbol
279809467b48Spatrick     // (not lowered) is target dependent, and CopyToReg assumes
279909467b48Spatrick     // the source is lowered.
280009467b48Spatrick     EVT ObjectVT = getValueType(DL, Ty);
280109467b48Spatrick     assert(ObjectVT == Ins[InsIdx].VT &&
280209467b48Spatrick            "Ins type did not match function type");
280309467b48Spatrick     SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
280409467b48Spatrick     SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
280509467b48Spatrick     if (p.getNode())
280609467b48Spatrick       p.getNode()->setIROrder(idx + 1);
280709467b48Spatrick     InVals.push_back(p);
280809467b48Spatrick   }
280909467b48Spatrick 
281009467b48Spatrick   if (!OutChains.empty())
281109467b48Spatrick     DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
281209467b48Spatrick 
281309467b48Spatrick   return Chain;
281409467b48Spatrick }
281509467b48Spatrick 
281609467b48Spatrick SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & dl,SelectionDAG & DAG) const281709467b48Spatrick NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
281809467b48Spatrick                                  bool isVarArg,
281909467b48Spatrick                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
282009467b48Spatrick                                  const SmallVectorImpl<SDValue> &OutVals,
282109467b48Spatrick                                  const SDLoc &dl, SelectionDAG &DAG) const {
2822*d415bd75Srobert   const MachineFunction &MF = DAG.getMachineFunction();
2823*d415bd75Srobert   const Function &F = MF.getFunction();
282409467b48Spatrick   Type *RetTy = MF.getFunction().getReturnType();
282509467b48Spatrick 
282609467b48Spatrick   bool isABI = (STI.getSmVersion() >= 20);
282709467b48Spatrick   assert(isABI && "Non-ABI compilation is not supported");
282809467b48Spatrick   if (!isABI)
282909467b48Spatrick     return Chain;
283009467b48Spatrick 
283173471bf0Spatrick   const DataLayout &DL = DAG.getDataLayout();
2832*d415bd75Srobert   SmallVector<SDValue, 16> PromotedOutVals;
283309467b48Spatrick   SmallVector<EVT, 16> VTs;
283409467b48Spatrick   SmallVector<uint64_t, 16> Offsets;
283509467b48Spatrick   ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
283609467b48Spatrick   assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
283709467b48Spatrick 
2838*d415bd75Srobert   for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
2839*d415bd75Srobert     SDValue PromotedOutVal = OutVals[i];
2840*d415bd75Srobert     MVT PromotedVT;
2841*d415bd75Srobert     if (PromoteScalarIntegerPTX(VTs[i], &PromotedVT)) {
2842*d415bd75Srobert       VTs[i] = EVT(PromotedVT);
2843*d415bd75Srobert     }
2844*d415bd75Srobert     if (PromoteScalarIntegerPTX(PromotedOutVal.getValueType(), &PromotedVT)) {
2845*d415bd75Srobert       llvm::ISD::NodeType Ext =
2846*d415bd75Srobert           Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2847*d415bd75Srobert       PromotedOutVal = DAG.getNode(Ext, dl, PromotedVT, PromotedOutVal);
2848*d415bd75Srobert     }
2849*d415bd75Srobert     PromotedOutVals.push_back(PromotedOutVal);
2850*d415bd75Srobert   }
2851*d415bd75Srobert 
285209467b48Spatrick   auto VectorInfo = VectorizePTXValueVTs(
2853*d415bd75Srobert       VTs, Offsets,
2854*d415bd75Srobert       RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL)
2855*d415bd75Srobert                        : Align(1));
285609467b48Spatrick 
285709467b48Spatrick   // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
285809467b48Spatrick   // 32-bits are sign extended or zero extended, depending on whether
285909467b48Spatrick   // they are signed or unsigned types.
286009467b48Spatrick   bool ExtendIntegerRetVal =
286109467b48Spatrick       RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
286209467b48Spatrick 
286309467b48Spatrick   SmallVector<SDValue, 6> StoreOperands;
286409467b48Spatrick   for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
286509467b48Spatrick     // New load/store. Record chain and offset operands.
286609467b48Spatrick     if (VectorInfo[i] & PVF_FIRST) {
286709467b48Spatrick       assert(StoreOperands.empty() && "Orphaned operand list.");
286809467b48Spatrick       StoreOperands.push_back(Chain);
286909467b48Spatrick       StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32));
287009467b48Spatrick     }
287109467b48Spatrick 
2872*d415bd75Srobert     SDValue OutVal = OutVals[i];
2873*d415bd75Srobert     SDValue RetVal = PromotedOutVals[i];
2874*d415bd75Srobert 
287509467b48Spatrick     if (ExtendIntegerRetVal) {
287609467b48Spatrick       RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND
287709467b48Spatrick                                                   : ISD::ZERO_EXTEND,
287809467b48Spatrick                            dl, MVT::i32, RetVal);
2879*d415bd75Srobert     } else if (OutVal.getValueSizeInBits() < 16) {
288009467b48Spatrick       // Use 16-bit registers for small load-stores as it's the
288109467b48Spatrick       // smallest general purpose register size supported by NVPTX.
288209467b48Spatrick       RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal);
288309467b48Spatrick     }
288409467b48Spatrick 
288509467b48Spatrick     // Record the value to return.
288609467b48Spatrick     StoreOperands.push_back(RetVal);
288709467b48Spatrick 
288809467b48Spatrick     // That's the last element of this store op.
288909467b48Spatrick     if (VectorInfo[i] & PVF_LAST) {
289009467b48Spatrick       NVPTXISD::NodeType Op;
289109467b48Spatrick       unsigned NumElts = StoreOperands.size() - 2;
289209467b48Spatrick       switch (NumElts) {
289309467b48Spatrick       case 1:
289409467b48Spatrick         Op = NVPTXISD::StoreRetval;
289509467b48Spatrick         break;
289609467b48Spatrick       case 2:
289709467b48Spatrick         Op = NVPTXISD::StoreRetvalV2;
289809467b48Spatrick         break;
289909467b48Spatrick       case 4:
290009467b48Spatrick         Op = NVPTXISD::StoreRetvalV4;
290109467b48Spatrick         break;
290209467b48Spatrick       default:
290309467b48Spatrick         llvm_unreachable("Invalid vector info.");
290409467b48Spatrick       }
290509467b48Spatrick 
290609467b48Spatrick       // Adjust type of load/store op if we've extended the scalar
290709467b48Spatrick       // return value.
290809467b48Spatrick       EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
2909097a140dSpatrick       Chain = DAG.getMemIntrinsicNode(
2910097a140dSpatrick           Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType,
2911097a140dSpatrick           MachinePointerInfo(), Align(1), MachineMemOperand::MOStore);
291209467b48Spatrick       // Cleanup vector state.
291309467b48Spatrick       StoreOperands.clear();
291409467b48Spatrick     }
291509467b48Spatrick   }
291609467b48Spatrick 
291709467b48Spatrick   return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
291809467b48Spatrick }
291909467b48Spatrick 
LowerAsmOperandForConstraint(SDValue Op,std::string & Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const292009467b48Spatrick void NVPTXTargetLowering::LowerAsmOperandForConstraint(
292109467b48Spatrick     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
292209467b48Spatrick     SelectionDAG &DAG) const {
292309467b48Spatrick   if (Constraint.length() > 1)
292409467b48Spatrick     return;
292509467b48Spatrick   else
292609467b48Spatrick     TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
292709467b48Spatrick }
292809467b48Spatrick 
getOpcForTextureInstr(unsigned Intrinsic)292909467b48Spatrick static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
293009467b48Spatrick   switch (Intrinsic) {
293109467b48Spatrick   default:
293209467b48Spatrick     return 0;
293309467b48Spatrick 
293409467b48Spatrick   case Intrinsic::nvvm_tex_1d_v4f32_s32:
293509467b48Spatrick     return NVPTXISD::Tex1DFloatS32;
293609467b48Spatrick   case Intrinsic::nvvm_tex_1d_v4f32_f32:
293709467b48Spatrick     return NVPTXISD::Tex1DFloatFloat;
293809467b48Spatrick   case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
293909467b48Spatrick     return NVPTXISD::Tex1DFloatFloatLevel;
294009467b48Spatrick   case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
294109467b48Spatrick     return NVPTXISD::Tex1DFloatFloatGrad;
294209467b48Spatrick   case Intrinsic::nvvm_tex_1d_v4s32_s32:
294309467b48Spatrick     return NVPTXISD::Tex1DS32S32;
294409467b48Spatrick   case Intrinsic::nvvm_tex_1d_v4s32_f32:
294509467b48Spatrick     return NVPTXISD::Tex1DS32Float;
294609467b48Spatrick   case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
294709467b48Spatrick     return NVPTXISD::Tex1DS32FloatLevel;
294809467b48Spatrick   case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
294909467b48Spatrick     return NVPTXISD::Tex1DS32FloatGrad;
295009467b48Spatrick   case Intrinsic::nvvm_tex_1d_v4u32_s32:
295109467b48Spatrick     return NVPTXISD::Tex1DU32S32;
295209467b48Spatrick   case Intrinsic::nvvm_tex_1d_v4u32_f32:
295309467b48Spatrick     return NVPTXISD::Tex1DU32Float;
295409467b48Spatrick   case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
295509467b48Spatrick     return NVPTXISD::Tex1DU32FloatLevel;
295609467b48Spatrick   case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
295709467b48Spatrick     return NVPTXISD::Tex1DU32FloatGrad;
295809467b48Spatrick 
295909467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
296009467b48Spatrick     return NVPTXISD::Tex1DArrayFloatS32;
296109467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
296209467b48Spatrick     return NVPTXISD::Tex1DArrayFloatFloat;
296309467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
296409467b48Spatrick     return NVPTXISD::Tex1DArrayFloatFloatLevel;
296509467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
296609467b48Spatrick     return NVPTXISD::Tex1DArrayFloatFloatGrad;
296709467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
296809467b48Spatrick     return NVPTXISD::Tex1DArrayS32S32;
296909467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
297009467b48Spatrick     return NVPTXISD::Tex1DArrayS32Float;
297109467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
297209467b48Spatrick     return NVPTXISD::Tex1DArrayS32FloatLevel;
297309467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
297409467b48Spatrick     return NVPTXISD::Tex1DArrayS32FloatGrad;
297509467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
297609467b48Spatrick     return NVPTXISD::Tex1DArrayU32S32;
297709467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
297809467b48Spatrick     return NVPTXISD::Tex1DArrayU32Float;
297909467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
298009467b48Spatrick     return NVPTXISD::Tex1DArrayU32FloatLevel;
298109467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
298209467b48Spatrick     return NVPTXISD::Tex1DArrayU32FloatGrad;
298309467b48Spatrick 
298409467b48Spatrick   case Intrinsic::nvvm_tex_2d_v4f32_s32:
298509467b48Spatrick     return NVPTXISD::Tex2DFloatS32;
298609467b48Spatrick   case Intrinsic::nvvm_tex_2d_v4f32_f32:
298709467b48Spatrick     return NVPTXISD::Tex2DFloatFloat;
298809467b48Spatrick   case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
298909467b48Spatrick     return NVPTXISD::Tex2DFloatFloatLevel;
299009467b48Spatrick   case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
299109467b48Spatrick     return NVPTXISD::Tex2DFloatFloatGrad;
299209467b48Spatrick   case Intrinsic::nvvm_tex_2d_v4s32_s32:
299309467b48Spatrick     return NVPTXISD::Tex2DS32S32;
299409467b48Spatrick   case Intrinsic::nvvm_tex_2d_v4s32_f32:
299509467b48Spatrick     return NVPTXISD::Tex2DS32Float;
299609467b48Spatrick   case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
299709467b48Spatrick     return NVPTXISD::Tex2DS32FloatLevel;
299809467b48Spatrick   case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
299909467b48Spatrick     return NVPTXISD::Tex2DS32FloatGrad;
300009467b48Spatrick   case Intrinsic::nvvm_tex_2d_v4u32_s32:
300109467b48Spatrick     return NVPTXISD::Tex2DU32S32;
300209467b48Spatrick   case Intrinsic::nvvm_tex_2d_v4u32_f32:
300309467b48Spatrick     return NVPTXISD::Tex2DU32Float;
300409467b48Spatrick   case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
300509467b48Spatrick     return NVPTXISD::Tex2DU32FloatLevel;
300609467b48Spatrick   case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
300709467b48Spatrick     return NVPTXISD::Tex2DU32FloatGrad;
300809467b48Spatrick 
300909467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
301009467b48Spatrick     return NVPTXISD::Tex2DArrayFloatS32;
301109467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
301209467b48Spatrick     return NVPTXISD::Tex2DArrayFloatFloat;
301309467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
301409467b48Spatrick     return NVPTXISD::Tex2DArrayFloatFloatLevel;
301509467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
301609467b48Spatrick     return NVPTXISD::Tex2DArrayFloatFloatGrad;
301709467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
301809467b48Spatrick     return NVPTXISD::Tex2DArrayS32S32;
301909467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
302009467b48Spatrick     return NVPTXISD::Tex2DArrayS32Float;
302109467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
302209467b48Spatrick     return NVPTXISD::Tex2DArrayS32FloatLevel;
302309467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
302409467b48Spatrick     return NVPTXISD::Tex2DArrayS32FloatGrad;
302509467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
302609467b48Spatrick     return NVPTXISD::Tex2DArrayU32S32;
302709467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
302809467b48Spatrick     return NVPTXISD::Tex2DArrayU32Float;
302909467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
303009467b48Spatrick     return NVPTXISD::Tex2DArrayU32FloatLevel;
303109467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
303209467b48Spatrick     return NVPTXISD::Tex2DArrayU32FloatGrad;
303309467b48Spatrick 
303409467b48Spatrick   case Intrinsic::nvvm_tex_3d_v4f32_s32:
303509467b48Spatrick     return NVPTXISD::Tex3DFloatS32;
303609467b48Spatrick   case Intrinsic::nvvm_tex_3d_v4f32_f32:
303709467b48Spatrick     return NVPTXISD::Tex3DFloatFloat;
303809467b48Spatrick   case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
303909467b48Spatrick     return NVPTXISD::Tex3DFloatFloatLevel;
304009467b48Spatrick   case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
304109467b48Spatrick     return NVPTXISD::Tex3DFloatFloatGrad;
304209467b48Spatrick   case Intrinsic::nvvm_tex_3d_v4s32_s32:
304309467b48Spatrick     return NVPTXISD::Tex3DS32S32;
304409467b48Spatrick   case Intrinsic::nvvm_tex_3d_v4s32_f32:
304509467b48Spatrick     return NVPTXISD::Tex3DS32Float;
304609467b48Spatrick   case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
304709467b48Spatrick     return NVPTXISD::Tex3DS32FloatLevel;
304809467b48Spatrick   case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
304909467b48Spatrick     return NVPTXISD::Tex3DS32FloatGrad;
305009467b48Spatrick   case Intrinsic::nvvm_tex_3d_v4u32_s32:
305109467b48Spatrick     return NVPTXISD::Tex3DU32S32;
305209467b48Spatrick   case Intrinsic::nvvm_tex_3d_v4u32_f32:
305309467b48Spatrick     return NVPTXISD::Tex3DU32Float;
305409467b48Spatrick   case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
305509467b48Spatrick     return NVPTXISD::Tex3DU32FloatLevel;
305609467b48Spatrick   case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
305709467b48Spatrick     return NVPTXISD::Tex3DU32FloatGrad;
305809467b48Spatrick 
305909467b48Spatrick   case Intrinsic::nvvm_tex_cube_v4f32_f32:
306009467b48Spatrick     return NVPTXISD::TexCubeFloatFloat;
306109467b48Spatrick   case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
306209467b48Spatrick     return NVPTXISD::TexCubeFloatFloatLevel;
306309467b48Spatrick   case Intrinsic::nvvm_tex_cube_v4s32_f32:
306409467b48Spatrick     return NVPTXISD::TexCubeS32Float;
306509467b48Spatrick   case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
306609467b48Spatrick     return NVPTXISD::TexCubeS32FloatLevel;
306709467b48Spatrick   case Intrinsic::nvvm_tex_cube_v4u32_f32:
306809467b48Spatrick     return NVPTXISD::TexCubeU32Float;
306909467b48Spatrick   case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
307009467b48Spatrick     return NVPTXISD::TexCubeU32FloatLevel;
307109467b48Spatrick 
307209467b48Spatrick   case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
307309467b48Spatrick     return NVPTXISD::TexCubeArrayFloatFloat;
307409467b48Spatrick   case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
307509467b48Spatrick     return NVPTXISD::TexCubeArrayFloatFloatLevel;
307609467b48Spatrick   case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
307709467b48Spatrick     return NVPTXISD::TexCubeArrayS32Float;
307809467b48Spatrick   case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
307909467b48Spatrick     return NVPTXISD::TexCubeArrayS32FloatLevel;
308009467b48Spatrick   case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
308109467b48Spatrick     return NVPTXISD::TexCubeArrayU32Float;
308209467b48Spatrick   case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
308309467b48Spatrick     return NVPTXISD::TexCubeArrayU32FloatLevel;
308409467b48Spatrick 
308509467b48Spatrick   case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
308609467b48Spatrick     return NVPTXISD::Tld4R2DFloatFloat;
308709467b48Spatrick   case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
308809467b48Spatrick     return NVPTXISD::Tld4G2DFloatFloat;
308909467b48Spatrick   case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
309009467b48Spatrick     return NVPTXISD::Tld4B2DFloatFloat;
309109467b48Spatrick   case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
309209467b48Spatrick     return NVPTXISD::Tld4A2DFloatFloat;
309309467b48Spatrick   case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
309409467b48Spatrick     return NVPTXISD::Tld4R2DS64Float;
309509467b48Spatrick   case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
309609467b48Spatrick     return NVPTXISD::Tld4G2DS64Float;
309709467b48Spatrick   case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
309809467b48Spatrick     return NVPTXISD::Tld4B2DS64Float;
309909467b48Spatrick   case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
310009467b48Spatrick     return NVPTXISD::Tld4A2DS64Float;
310109467b48Spatrick   case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
310209467b48Spatrick     return NVPTXISD::Tld4R2DU64Float;
310309467b48Spatrick   case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
310409467b48Spatrick     return NVPTXISD::Tld4G2DU64Float;
310509467b48Spatrick   case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
310609467b48Spatrick     return NVPTXISD::Tld4B2DU64Float;
310709467b48Spatrick   case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
310809467b48Spatrick     return NVPTXISD::Tld4A2DU64Float;
310909467b48Spatrick 
311009467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
311109467b48Spatrick     return NVPTXISD::TexUnified1DFloatS32;
311209467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
311309467b48Spatrick     return NVPTXISD::TexUnified1DFloatFloat;
311409467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
311509467b48Spatrick     return NVPTXISD::TexUnified1DFloatFloatLevel;
311609467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
311709467b48Spatrick     return NVPTXISD::TexUnified1DFloatFloatGrad;
311809467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
311909467b48Spatrick     return NVPTXISD::TexUnified1DS32S32;
312009467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
312109467b48Spatrick     return NVPTXISD::TexUnified1DS32Float;
312209467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
312309467b48Spatrick     return NVPTXISD::TexUnified1DS32FloatLevel;
312409467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
312509467b48Spatrick     return NVPTXISD::TexUnified1DS32FloatGrad;
312609467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
312709467b48Spatrick     return NVPTXISD::TexUnified1DU32S32;
312809467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
312909467b48Spatrick     return NVPTXISD::TexUnified1DU32Float;
313009467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
313109467b48Spatrick     return NVPTXISD::TexUnified1DU32FloatLevel;
313209467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
313309467b48Spatrick     return NVPTXISD::TexUnified1DU32FloatGrad;
313409467b48Spatrick 
313509467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
313609467b48Spatrick     return NVPTXISD::TexUnified1DArrayFloatS32;
313709467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
313809467b48Spatrick     return NVPTXISD::TexUnified1DArrayFloatFloat;
313909467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
314009467b48Spatrick     return NVPTXISD::TexUnified1DArrayFloatFloatLevel;
314109467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
314209467b48Spatrick     return NVPTXISD::TexUnified1DArrayFloatFloatGrad;
314309467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
314409467b48Spatrick     return NVPTXISD::TexUnified1DArrayS32S32;
314509467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
314609467b48Spatrick     return NVPTXISD::TexUnified1DArrayS32Float;
314709467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
314809467b48Spatrick     return NVPTXISD::TexUnified1DArrayS32FloatLevel;
314909467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
315009467b48Spatrick     return NVPTXISD::TexUnified1DArrayS32FloatGrad;
315109467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
315209467b48Spatrick     return NVPTXISD::TexUnified1DArrayU32S32;
315309467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
315409467b48Spatrick     return NVPTXISD::TexUnified1DArrayU32Float;
315509467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
315609467b48Spatrick     return NVPTXISD::TexUnified1DArrayU32FloatLevel;
315709467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
315809467b48Spatrick     return NVPTXISD::TexUnified1DArrayU32FloatGrad;
315909467b48Spatrick 
316009467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
316109467b48Spatrick     return NVPTXISD::TexUnified2DFloatS32;
316209467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
316309467b48Spatrick     return NVPTXISD::TexUnified2DFloatFloat;
316409467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
316509467b48Spatrick     return NVPTXISD::TexUnified2DFloatFloatLevel;
316609467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
316709467b48Spatrick     return NVPTXISD::TexUnified2DFloatFloatGrad;
316809467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
316909467b48Spatrick     return NVPTXISD::TexUnified2DS32S32;
317009467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
317109467b48Spatrick     return NVPTXISD::TexUnified2DS32Float;
317209467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
317309467b48Spatrick     return NVPTXISD::TexUnified2DS32FloatLevel;
317409467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
317509467b48Spatrick     return NVPTXISD::TexUnified2DS32FloatGrad;
317609467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
317709467b48Spatrick     return NVPTXISD::TexUnified2DU32S32;
317809467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
317909467b48Spatrick     return NVPTXISD::TexUnified2DU32Float;
318009467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
318109467b48Spatrick     return NVPTXISD::TexUnified2DU32FloatLevel;
318209467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
318309467b48Spatrick     return NVPTXISD::TexUnified2DU32FloatGrad;
318409467b48Spatrick 
318509467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
318609467b48Spatrick     return NVPTXISD::TexUnified2DArrayFloatS32;
318709467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
318809467b48Spatrick     return NVPTXISD::TexUnified2DArrayFloatFloat;
318909467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
319009467b48Spatrick     return NVPTXISD::TexUnified2DArrayFloatFloatLevel;
319109467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
319209467b48Spatrick     return NVPTXISD::TexUnified2DArrayFloatFloatGrad;
319309467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
319409467b48Spatrick     return NVPTXISD::TexUnified2DArrayS32S32;
319509467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
319609467b48Spatrick     return NVPTXISD::TexUnified2DArrayS32Float;
319709467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
319809467b48Spatrick     return NVPTXISD::TexUnified2DArrayS32FloatLevel;
319909467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
320009467b48Spatrick     return NVPTXISD::TexUnified2DArrayS32FloatGrad;
320109467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
320209467b48Spatrick     return NVPTXISD::TexUnified2DArrayU32S32;
320309467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
320409467b48Spatrick     return NVPTXISD::TexUnified2DArrayU32Float;
320509467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
320609467b48Spatrick     return NVPTXISD::TexUnified2DArrayU32FloatLevel;
320709467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
320809467b48Spatrick     return NVPTXISD::TexUnified2DArrayU32FloatGrad;
320909467b48Spatrick 
321009467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
321109467b48Spatrick     return NVPTXISD::TexUnified3DFloatS32;
321209467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
321309467b48Spatrick     return NVPTXISD::TexUnified3DFloatFloat;
321409467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
321509467b48Spatrick     return NVPTXISD::TexUnified3DFloatFloatLevel;
321609467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
321709467b48Spatrick     return NVPTXISD::TexUnified3DFloatFloatGrad;
321809467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
321909467b48Spatrick     return NVPTXISD::TexUnified3DS32S32;
322009467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
322109467b48Spatrick     return NVPTXISD::TexUnified3DS32Float;
322209467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
322309467b48Spatrick     return NVPTXISD::TexUnified3DS32FloatLevel;
322409467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
322509467b48Spatrick     return NVPTXISD::TexUnified3DS32FloatGrad;
322609467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
322709467b48Spatrick     return NVPTXISD::TexUnified3DU32S32;
322809467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
322909467b48Spatrick     return NVPTXISD::TexUnified3DU32Float;
323009467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
323109467b48Spatrick     return NVPTXISD::TexUnified3DU32FloatLevel;
323209467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
323309467b48Spatrick     return NVPTXISD::TexUnified3DU32FloatGrad;
323409467b48Spatrick 
323509467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
323609467b48Spatrick     return NVPTXISD::TexUnifiedCubeFloatFloat;
323709467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
323809467b48Spatrick     return NVPTXISD::TexUnifiedCubeFloatFloatLevel;
323909467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
324009467b48Spatrick     return NVPTXISD::TexUnifiedCubeS32Float;
324109467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
324209467b48Spatrick     return NVPTXISD::TexUnifiedCubeS32FloatLevel;
324309467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
324409467b48Spatrick     return NVPTXISD::TexUnifiedCubeU32Float;
324509467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
324609467b48Spatrick     return NVPTXISD::TexUnifiedCubeU32FloatLevel;
324709467b48Spatrick 
324809467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
324909467b48Spatrick     return NVPTXISD::TexUnifiedCubeArrayFloatFloat;
325009467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
325109467b48Spatrick     return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel;
325209467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
325309467b48Spatrick     return NVPTXISD::TexUnifiedCubeArrayS32Float;
325409467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
325509467b48Spatrick     return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel;
325609467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
325709467b48Spatrick     return NVPTXISD::TexUnifiedCubeArrayU32Float;
325809467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
325909467b48Spatrick     return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
326009467b48Spatrick 
326109467b48Spatrick   case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
326209467b48Spatrick     return NVPTXISD::Tld4UnifiedR2DFloatFloat;
326309467b48Spatrick   case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
326409467b48Spatrick     return NVPTXISD::Tld4UnifiedG2DFloatFloat;
326509467b48Spatrick   case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
326609467b48Spatrick     return NVPTXISD::Tld4UnifiedB2DFloatFloat;
326709467b48Spatrick   case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
326809467b48Spatrick     return NVPTXISD::Tld4UnifiedA2DFloatFloat;
326909467b48Spatrick   case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
327009467b48Spatrick     return NVPTXISD::Tld4UnifiedR2DS64Float;
327109467b48Spatrick   case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
327209467b48Spatrick     return NVPTXISD::Tld4UnifiedG2DS64Float;
327309467b48Spatrick   case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
327409467b48Spatrick     return NVPTXISD::Tld4UnifiedB2DS64Float;
327509467b48Spatrick   case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
327609467b48Spatrick     return NVPTXISD::Tld4UnifiedA2DS64Float;
327709467b48Spatrick   case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
327809467b48Spatrick     return NVPTXISD::Tld4UnifiedR2DU64Float;
327909467b48Spatrick   case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
328009467b48Spatrick     return NVPTXISD::Tld4UnifiedG2DU64Float;
328109467b48Spatrick   case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
328209467b48Spatrick     return NVPTXISD::Tld4UnifiedB2DU64Float;
328309467b48Spatrick   case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
328409467b48Spatrick     return NVPTXISD::Tld4UnifiedA2DU64Float;
328509467b48Spatrick   }
328609467b48Spatrick }
328709467b48Spatrick 
getOpcForSurfaceInstr(unsigned Intrinsic)328809467b48Spatrick static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
328909467b48Spatrick   switch (Intrinsic) {
329009467b48Spatrick   default:
329109467b48Spatrick     return 0;
329209467b48Spatrick   case Intrinsic::nvvm_suld_1d_i8_clamp:
329309467b48Spatrick     return NVPTXISD::Suld1DI8Clamp;
329409467b48Spatrick   case Intrinsic::nvvm_suld_1d_i16_clamp:
329509467b48Spatrick     return NVPTXISD::Suld1DI16Clamp;
329609467b48Spatrick   case Intrinsic::nvvm_suld_1d_i32_clamp:
329709467b48Spatrick     return NVPTXISD::Suld1DI32Clamp;
329809467b48Spatrick   case Intrinsic::nvvm_suld_1d_i64_clamp:
329909467b48Spatrick     return NVPTXISD::Suld1DI64Clamp;
330009467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i8_clamp:
330109467b48Spatrick     return NVPTXISD::Suld1DV2I8Clamp;
330209467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i16_clamp:
330309467b48Spatrick     return NVPTXISD::Suld1DV2I16Clamp;
330409467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i32_clamp:
330509467b48Spatrick     return NVPTXISD::Suld1DV2I32Clamp;
330609467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i64_clamp:
330709467b48Spatrick     return NVPTXISD::Suld1DV2I64Clamp;
330809467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i8_clamp:
330909467b48Spatrick     return NVPTXISD::Suld1DV4I8Clamp;
331009467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i16_clamp:
331109467b48Spatrick     return NVPTXISD::Suld1DV4I16Clamp;
331209467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i32_clamp:
331309467b48Spatrick     return NVPTXISD::Suld1DV4I32Clamp;
331409467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i8_clamp:
331509467b48Spatrick     return NVPTXISD::Suld1DArrayI8Clamp;
331609467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i16_clamp:
331709467b48Spatrick     return NVPTXISD::Suld1DArrayI16Clamp;
331809467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i32_clamp:
331909467b48Spatrick     return NVPTXISD::Suld1DArrayI32Clamp;
332009467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i64_clamp:
332109467b48Spatrick     return NVPTXISD::Suld1DArrayI64Clamp;
332209467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
332309467b48Spatrick     return NVPTXISD::Suld1DArrayV2I8Clamp;
332409467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
332509467b48Spatrick     return NVPTXISD::Suld1DArrayV2I16Clamp;
332609467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
332709467b48Spatrick     return NVPTXISD::Suld1DArrayV2I32Clamp;
332809467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
332909467b48Spatrick     return NVPTXISD::Suld1DArrayV2I64Clamp;
333009467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
333109467b48Spatrick     return NVPTXISD::Suld1DArrayV4I8Clamp;
333209467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
333309467b48Spatrick     return NVPTXISD::Suld1DArrayV4I16Clamp;
333409467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
333509467b48Spatrick     return NVPTXISD::Suld1DArrayV4I32Clamp;
333609467b48Spatrick   case Intrinsic::nvvm_suld_2d_i8_clamp:
333709467b48Spatrick     return NVPTXISD::Suld2DI8Clamp;
333809467b48Spatrick   case Intrinsic::nvvm_suld_2d_i16_clamp:
333909467b48Spatrick     return NVPTXISD::Suld2DI16Clamp;
334009467b48Spatrick   case Intrinsic::nvvm_suld_2d_i32_clamp:
334109467b48Spatrick     return NVPTXISD::Suld2DI32Clamp;
334209467b48Spatrick   case Intrinsic::nvvm_suld_2d_i64_clamp:
334309467b48Spatrick     return NVPTXISD::Suld2DI64Clamp;
334409467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i8_clamp:
334509467b48Spatrick     return NVPTXISD::Suld2DV2I8Clamp;
334609467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i16_clamp:
334709467b48Spatrick     return NVPTXISD::Suld2DV2I16Clamp;
334809467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i32_clamp:
334909467b48Spatrick     return NVPTXISD::Suld2DV2I32Clamp;
335009467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i64_clamp:
335109467b48Spatrick     return NVPTXISD::Suld2DV2I64Clamp;
335209467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i8_clamp:
335309467b48Spatrick     return NVPTXISD::Suld2DV4I8Clamp;
335409467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i16_clamp:
335509467b48Spatrick     return NVPTXISD::Suld2DV4I16Clamp;
335609467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i32_clamp:
335709467b48Spatrick     return NVPTXISD::Suld2DV4I32Clamp;
335809467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i8_clamp:
335909467b48Spatrick     return NVPTXISD::Suld2DArrayI8Clamp;
336009467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i16_clamp:
336109467b48Spatrick     return NVPTXISD::Suld2DArrayI16Clamp;
336209467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i32_clamp:
336309467b48Spatrick     return NVPTXISD::Suld2DArrayI32Clamp;
336409467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i64_clamp:
336509467b48Spatrick     return NVPTXISD::Suld2DArrayI64Clamp;
336609467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
336709467b48Spatrick     return NVPTXISD::Suld2DArrayV2I8Clamp;
336809467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
336909467b48Spatrick     return NVPTXISD::Suld2DArrayV2I16Clamp;
337009467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
337109467b48Spatrick     return NVPTXISD::Suld2DArrayV2I32Clamp;
337209467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
337309467b48Spatrick     return NVPTXISD::Suld2DArrayV2I64Clamp;
337409467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
337509467b48Spatrick     return NVPTXISD::Suld2DArrayV4I8Clamp;
337609467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
337709467b48Spatrick     return NVPTXISD::Suld2DArrayV4I16Clamp;
337809467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
337909467b48Spatrick     return NVPTXISD::Suld2DArrayV4I32Clamp;
338009467b48Spatrick   case Intrinsic::nvvm_suld_3d_i8_clamp:
338109467b48Spatrick     return NVPTXISD::Suld3DI8Clamp;
338209467b48Spatrick   case Intrinsic::nvvm_suld_3d_i16_clamp:
338309467b48Spatrick     return NVPTXISD::Suld3DI16Clamp;
338409467b48Spatrick   case Intrinsic::nvvm_suld_3d_i32_clamp:
338509467b48Spatrick     return NVPTXISD::Suld3DI32Clamp;
338609467b48Spatrick   case Intrinsic::nvvm_suld_3d_i64_clamp:
338709467b48Spatrick     return NVPTXISD::Suld3DI64Clamp;
338809467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i8_clamp:
338909467b48Spatrick     return NVPTXISD::Suld3DV2I8Clamp;
339009467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i16_clamp:
339109467b48Spatrick     return NVPTXISD::Suld3DV2I16Clamp;
339209467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i32_clamp:
339309467b48Spatrick     return NVPTXISD::Suld3DV2I32Clamp;
339409467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i64_clamp:
339509467b48Spatrick     return NVPTXISD::Suld3DV2I64Clamp;
339609467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i8_clamp:
339709467b48Spatrick     return NVPTXISD::Suld3DV4I8Clamp;
339809467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i16_clamp:
339909467b48Spatrick     return NVPTXISD::Suld3DV4I16Clamp;
340009467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i32_clamp:
340109467b48Spatrick     return NVPTXISD::Suld3DV4I32Clamp;
340209467b48Spatrick   case Intrinsic::nvvm_suld_1d_i8_trap:
340309467b48Spatrick     return NVPTXISD::Suld1DI8Trap;
340409467b48Spatrick   case Intrinsic::nvvm_suld_1d_i16_trap:
340509467b48Spatrick     return NVPTXISD::Suld1DI16Trap;
340609467b48Spatrick   case Intrinsic::nvvm_suld_1d_i32_trap:
340709467b48Spatrick     return NVPTXISD::Suld1DI32Trap;
340809467b48Spatrick   case Intrinsic::nvvm_suld_1d_i64_trap:
340909467b48Spatrick     return NVPTXISD::Suld1DI64Trap;
341009467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i8_trap:
341109467b48Spatrick     return NVPTXISD::Suld1DV2I8Trap;
341209467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i16_trap:
341309467b48Spatrick     return NVPTXISD::Suld1DV2I16Trap;
341409467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i32_trap:
341509467b48Spatrick     return NVPTXISD::Suld1DV2I32Trap;
341609467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i64_trap:
341709467b48Spatrick     return NVPTXISD::Suld1DV2I64Trap;
341809467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i8_trap:
341909467b48Spatrick     return NVPTXISD::Suld1DV4I8Trap;
342009467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i16_trap:
342109467b48Spatrick     return NVPTXISD::Suld1DV4I16Trap;
342209467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i32_trap:
342309467b48Spatrick     return NVPTXISD::Suld1DV4I32Trap;
342409467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i8_trap:
342509467b48Spatrick     return NVPTXISD::Suld1DArrayI8Trap;
342609467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i16_trap:
342709467b48Spatrick     return NVPTXISD::Suld1DArrayI16Trap;
342809467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i32_trap:
342909467b48Spatrick     return NVPTXISD::Suld1DArrayI32Trap;
343009467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i64_trap:
343109467b48Spatrick     return NVPTXISD::Suld1DArrayI64Trap;
343209467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
343309467b48Spatrick     return NVPTXISD::Suld1DArrayV2I8Trap;
343409467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
343509467b48Spatrick     return NVPTXISD::Suld1DArrayV2I16Trap;
343609467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
343709467b48Spatrick     return NVPTXISD::Suld1DArrayV2I32Trap;
343809467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
343909467b48Spatrick     return NVPTXISD::Suld1DArrayV2I64Trap;
344009467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
344109467b48Spatrick     return NVPTXISD::Suld1DArrayV4I8Trap;
344209467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
344309467b48Spatrick     return NVPTXISD::Suld1DArrayV4I16Trap;
344409467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
344509467b48Spatrick     return NVPTXISD::Suld1DArrayV4I32Trap;
344609467b48Spatrick   case Intrinsic::nvvm_suld_2d_i8_trap:
344709467b48Spatrick     return NVPTXISD::Suld2DI8Trap;
344809467b48Spatrick   case Intrinsic::nvvm_suld_2d_i16_trap:
344909467b48Spatrick     return NVPTXISD::Suld2DI16Trap;
345009467b48Spatrick   case Intrinsic::nvvm_suld_2d_i32_trap:
345109467b48Spatrick     return NVPTXISD::Suld2DI32Trap;
345209467b48Spatrick   case Intrinsic::nvvm_suld_2d_i64_trap:
345309467b48Spatrick     return NVPTXISD::Suld2DI64Trap;
345409467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i8_trap:
345509467b48Spatrick     return NVPTXISD::Suld2DV2I8Trap;
345609467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i16_trap:
345709467b48Spatrick     return NVPTXISD::Suld2DV2I16Trap;
345809467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i32_trap:
345909467b48Spatrick     return NVPTXISD::Suld2DV2I32Trap;
346009467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i64_trap:
346109467b48Spatrick     return NVPTXISD::Suld2DV2I64Trap;
346209467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i8_trap:
346309467b48Spatrick     return NVPTXISD::Suld2DV4I8Trap;
346409467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i16_trap:
346509467b48Spatrick     return NVPTXISD::Suld2DV4I16Trap;
346609467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i32_trap:
346709467b48Spatrick     return NVPTXISD::Suld2DV4I32Trap;
346809467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i8_trap:
346909467b48Spatrick     return NVPTXISD::Suld2DArrayI8Trap;
347009467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i16_trap:
347109467b48Spatrick     return NVPTXISD::Suld2DArrayI16Trap;
347209467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i32_trap:
347309467b48Spatrick     return NVPTXISD::Suld2DArrayI32Trap;
347409467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i64_trap:
347509467b48Spatrick     return NVPTXISD::Suld2DArrayI64Trap;
347609467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
347709467b48Spatrick     return NVPTXISD::Suld2DArrayV2I8Trap;
347809467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
347909467b48Spatrick     return NVPTXISD::Suld2DArrayV2I16Trap;
348009467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
348109467b48Spatrick     return NVPTXISD::Suld2DArrayV2I32Trap;
348209467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
348309467b48Spatrick     return NVPTXISD::Suld2DArrayV2I64Trap;
348409467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
348509467b48Spatrick     return NVPTXISD::Suld2DArrayV4I8Trap;
348609467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
348709467b48Spatrick     return NVPTXISD::Suld2DArrayV4I16Trap;
348809467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
348909467b48Spatrick     return NVPTXISD::Suld2DArrayV4I32Trap;
349009467b48Spatrick   case Intrinsic::nvvm_suld_3d_i8_trap:
349109467b48Spatrick     return NVPTXISD::Suld3DI8Trap;
349209467b48Spatrick   case Intrinsic::nvvm_suld_3d_i16_trap:
349309467b48Spatrick     return NVPTXISD::Suld3DI16Trap;
349409467b48Spatrick   case Intrinsic::nvvm_suld_3d_i32_trap:
349509467b48Spatrick     return NVPTXISD::Suld3DI32Trap;
349609467b48Spatrick   case Intrinsic::nvvm_suld_3d_i64_trap:
349709467b48Spatrick     return NVPTXISD::Suld3DI64Trap;
349809467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i8_trap:
349909467b48Spatrick     return NVPTXISD::Suld3DV2I8Trap;
350009467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i16_trap:
350109467b48Spatrick     return NVPTXISD::Suld3DV2I16Trap;
350209467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i32_trap:
350309467b48Spatrick     return NVPTXISD::Suld3DV2I32Trap;
350409467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i64_trap:
350509467b48Spatrick     return NVPTXISD::Suld3DV2I64Trap;
350609467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i8_trap:
350709467b48Spatrick     return NVPTXISD::Suld3DV4I8Trap;
350809467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i16_trap:
350909467b48Spatrick     return NVPTXISD::Suld3DV4I16Trap;
351009467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i32_trap:
351109467b48Spatrick     return NVPTXISD::Suld3DV4I32Trap;
351209467b48Spatrick   case Intrinsic::nvvm_suld_1d_i8_zero:
351309467b48Spatrick     return NVPTXISD::Suld1DI8Zero;
351409467b48Spatrick   case Intrinsic::nvvm_suld_1d_i16_zero:
351509467b48Spatrick     return NVPTXISD::Suld1DI16Zero;
351609467b48Spatrick   case Intrinsic::nvvm_suld_1d_i32_zero:
351709467b48Spatrick     return NVPTXISD::Suld1DI32Zero;
351809467b48Spatrick   case Intrinsic::nvvm_suld_1d_i64_zero:
351909467b48Spatrick     return NVPTXISD::Suld1DI64Zero;
352009467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i8_zero:
352109467b48Spatrick     return NVPTXISD::Suld1DV2I8Zero;
352209467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i16_zero:
352309467b48Spatrick     return NVPTXISD::Suld1DV2I16Zero;
352409467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i32_zero:
352509467b48Spatrick     return NVPTXISD::Suld1DV2I32Zero;
352609467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i64_zero:
352709467b48Spatrick     return NVPTXISD::Suld1DV2I64Zero;
352809467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i8_zero:
352909467b48Spatrick     return NVPTXISD::Suld1DV4I8Zero;
353009467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i16_zero:
353109467b48Spatrick     return NVPTXISD::Suld1DV4I16Zero;
353209467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i32_zero:
353309467b48Spatrick     return NVPTXISD::Suld1DV4I32Zero;
353409467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i8_zero:
353509467b48Spatrick     return NVPTXISD::Suld1DArrayI8Zero;
353609467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i16_zero:
353709467b48Spatrick     return NVPTXISD::Suld1DArrayI16Zero;
353809467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i32_zero:
353909467b48Spatrick     return NVPTXISD::Suld1DArrayI32Zero;
354009467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i64_zero:
354109467b48Spatrick     return NVPTXISD::Suld1DArrayI64Zero;
354209467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
354309467b48Spatrick     return NVPTXISD::Suld1DArrayV2I8Zero;
354409467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
354509467b48Spatrick     return NVPTXISD::Suld1DArrayV2I16Zero;
354609467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
354709467b48Spatrick     return NVPTXISD::Suld1DArrayV2I32Zero;
354809467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
354909467b48Spatrick     return NVPTXISD::Suld1DArrayV2I64Zero;
355009467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
355109467b48Spatrick     return NVPTXISD::Suld1DArrayV4I8Zero;
355209467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
355309467b48Spatrick     return NVPTXISD::Suld1DArrayV4I16Zero;
355409467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
355509467b48Spatrick     return NVPTXISD::Suld1DArrayV4I32Zero;
355609467b48Spatrick   case Intrinsic::nvvm_suld_2d_i8_zero:
355709467b48Spatrick     return NVPTXISD::Suld2DI8Zero;
355809467b48Spatrick   case Intrinsic::nvvm_suld_2d_i16_zero:
355909467b48Spatrick     return NVPTXISD::Suld2DI16Zero;
356009467b48Spatrick   case Intrinsic::nvvm_suld_2d_i32_zero:
356109467b48Spatrick     return NVPTXISD::Suld2DI32Zero;
356209467b48Spatrick   case Intrinsic::nvvm_suld_2d_i64_zero:
356309467b48Spatrick     return NVPTXISD::Suld2DI64Zero;
356409467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i8_zero:
356509467b48Spatrick     return NVPTXISD::Suld2DV2I8Zero;
356609467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i16_zero:
356709467b48Spatrick     return NVPTXISD::Suld2DV2I16Zero;
356809467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i32_zero:
356909467b48Spatrick     return NVPTXISD::Suld2DV2I32Zero;
357009467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i64_zero:
357109467b48Spatrick     return NVPTXISD::Suld2DV2I64Zero;
357209467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i8_zero:
357309467b48Spatrick     return NVPTXISD::Suld2DV4I8Zero;
357409467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i16_zero:
357509467b48Spatrick     return NVPTXISD::Suld2DV4I16Zero;
357609467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i32_zero:
357709467b48Spatrick     return NVPTXISD::Suld2DV4I32Zero;
357809467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i8_zero:
357909467b48Spatrick     return NVPTXISD::Suld2DArrayI8Zero;
358009467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i16_zero:
358109467b48Spatrick     return NVPTXISD::Suld2DArrayI16Zero;
358209467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i32_zero:
358309467b48Spatrick     return NVPTXISD::Suld2DArrayI32Zero;
358409467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i64_zero:
358509467b48Spatrick     return NVPTXISD::Suld2DArrayI64Zero;
358609467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
358709467b48Spatrick     return NVPTXISD::Suld2DArrayV2I8Zero;
358809467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
358909467b48Spatrick     return NVPTXISD::Suld2DArrayV2I16Zero;
359009467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
359109467b48Spatrick     return NVPTXISD::Suld2DArrayV2I32Zero;
359209467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
359309467b48Spatrick     return NVPTXISD::Suld2DArrayV2I64Zero;
359409467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
359509467b48Spatrick     return NVPTXISD::Suld2DArrayV4I8Zero;
359609467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
359709467b48Spatrick     return NVPTXISD::Suld2DArrayV4I16Zero;
359809467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
359909467b48Spatrick     return NVPTXISD::Suld2DArrayV4I32Zero;
360009467b48Spatrick   case Intrinsic::nvvm_suld_3d_i8_zero:
360109467b48Spatrick     return NVPTXISD::Suld3DI8Zero;
360209467b48Spatrick   case Intrinsic::nvvm_suld_3d_i16_zero:
360309467b48Spatrick     return NVPTXISD::Suld3DI16Zero;
360409467b48Spatrick   case Intrinsic::nvvm_suld_3d_i32_zero:
360509467b48Spatrick     return NVPTXISD::Suld3DI32Zero;
360609467b48Spatrick   case Intrinsic::nvvm_suld_3d_i64_zero:
360709467b48Spatrick     return NVPTXISD::Suld3DI64Zero;
360809467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i8_zero:
360909467b48Spatrick     return NVPTXISD::Suld3DV2I8Zero;
361009467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i16_zero:
361109467b48Spatrick     return NVPTXISD::Suld3DV2I16Zero;
361209467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i32_zero:
361309467b48Spatrick     return NVPTXISD::Suld3DV2I32Zero;
361409467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i64_zero:
361509467b48Spatrick     return NVPTXISD::Suld3DV2I64Zero;
361609467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i8_zero:
361709467b48Spatrick     return NVPTXISD::Suld3DV4I8Zero;
361809467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i16_zero:
361909467b48Spatrick     return NVPTXISD::Suld3DV4I16Zero;
362009467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i32_zero:
362109467b48Spatrick     return NVPTXISD::Suld3DV4I32Zero;
362209467b48Spatrick   }
362309467b48Spatrick }
362409467b48Spatrick 
362509467b48Spatrick // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
362609467b48Spatrick // TgtMemIntrinsic
362709467b48Spatrick // because we need the information that is only available in the "Value" type
362809467b48Spatrick // of destination
362909467b48Spatrick // pointer. In particular, the address space information.
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,MachineFunction & MF,unsigned Intrinsic) const363009467b48Spatrick bool NVPTXTargetLowering::getTgtMemIntrinsic(
363109467b48Spatrick     IntrinsicInfo &Info, const CallInst &I,
363209467b48Spatrick     MachineFunction &MF, unsigned Intrinsic) const {
363309467b48Spatrick   switch (Intrinsic) {
363409467b48Spatrick   default:
363509467b48Spatrick     return false;
363609467b48Spatrick   case Intrinsic::nvvm_match_all_sync_i32p:
363709467b48Spatrick   case Intrinsic::nvvm_match_all_sync_i64p:
363809467b48Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
363909467b48Spatrick     // memVT is bogus. These intrinsics have IntrInaccessibleMemOnly attribute
364009467b48Spatrick     // in order to model data exchange with other threads, but perform no real
364109467b48Spatrick     // memory accesses.
364209467b48Spatrick     Info.memVT = MVT::i1;
364309467b48Spatrick 
364409467b48Spatrick     // Our result depends on both our and other thread's arguments.
364509467b48Spatrick     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
364609467b48Spatrick     return true;
364709467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col:
364809467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row:
364909467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride:
365009467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride:
365109467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col:
365209467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row:
365309467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride:
365409467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride:
365509467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col:
365609467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row:
365709467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride:
365809467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride:
365909467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col:
366009467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row:
366109467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride:
366209467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride:
366309467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col:
366409467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row:
366509467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride:
366609467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride:
366709467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col:
366809467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row:
366909467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride:
367009467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride: {
367109467b48Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
367209467b48Spatrick     Info.memVT = MVT::v8f16;
367309467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
367409467b48Spatrick     Info.offset = 0;
367509467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
367609467b48Spatrick     Info.align = Align(16);
367709467b48Spatrick     return true;
367809467b48Spatrick   }
367909467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col:
368009467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride:
368109467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride:
368209467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col:
368309467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row:
368409467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride:
368509467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride:
368609467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row:
368773471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col:
368873471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col_stride:
368973471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row:
369073471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row_stride:
369109467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col:
369209467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride:
369309467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride:
369409467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col:
369509467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row:
369609467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride:
369709467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride:
369873471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row:
369973471bf0Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col:
370073471bf0Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col_stride:
370173471bf0Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row:
370273471bf0Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row_stride: {
370309467b48Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
370409467b48Spatrick     Info.memVT = MVT::v2i32;
370509467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
370609467b48Spatrick     Info.offset = 0;
370709467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
370809467b48Spatrick     Info.align = Align(8);
370909467b48Spatrick     return true;
371009467b48Spatrick   }
371109467b48Spatrick 
371209467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col:
371309467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride:
371409467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride:
371509467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col:
371609467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row:
371709467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride:
371809467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride:
371909467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row:
372073471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col:
372173471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col_stride:
372273471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row:
372373471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row_stride:
372473471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col:
372573471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col_stride:
372673471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row:
372773471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row_stride:
372809467b48Spatrick 
372909467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col:
373009467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride:
373109467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride:
373209467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col:
373309467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row:
373409467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride:
373509467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride:
373673471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row:
373773471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col:
373873471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col_stride:
373973471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row:
374073471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row_stride:
374173471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col:
374273471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride:
374373471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row:
3744*d415bd75Srobert   case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride:
3745*d415bd75Srobert   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16:
3746*d415bd75Srobert   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16: {
374709467b48Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
374809467b48Spatrick     Info.memVT = MVT::v4i32;
374909467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
375009467b48Spatrick     Info.offset = 0;
375109467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
375209467b48Spatrick     Info.align = Align(16);
375309467b48Spatrick     return true;
375409467b48Spatrick   }
375509467b48Spatrick 
375609467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col:
375709467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride:
375809467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride:
375909467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col:
376009467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row:
376109467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride:
376209467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride:
376309467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row:
376409467b48Spatrick 
376509467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col:
376609467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride:
376709467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride:
376809467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col:
376909467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row:
377009467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride:
377109467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride:
377209467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row:
377309467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row:
377409467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride:
377509467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col:
377609467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride:
377709467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row:
377809467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride:
377909467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride:
378009467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row:
378109467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
378209467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
378309467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
3784*d415bd75Srobert   case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col:
3785*d415bd75Srobert   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16:
3786*d415bd75Srobert   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16: {
378709467b48Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
378809467b48Spatrick     Info.memVT = MVT::i32;
378909467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
379009467b48Spatrick     Info.offset = 0;
379109467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
379209467b48Spatrick     Info.align = Align(4);
379309467b48Spatrick     return true;
379409467b48Spatrick   }
379509467b48Spatrick 
379609467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col:
379709467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row:
379809467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride:
379909467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride:
380009467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col:
380109467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row:
380209467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride:
380309467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride:
380409467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col:
380509467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row:
380609467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride:
380709467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: {
380809467b48Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
380909467b48Spatrick     Info.memVT = MVT::v4f16;
381009467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
381109467b48Spatrick     Info.offset = 0;
381209467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
381309467b48Spatrick     Info.align = Align(16);
381409467b48Spatrick     return true;
381509467b48Spatrick   }
381609467b48Spatrick 
381709467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col:
381809467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row:
381909467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride:
382009467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride:
382109467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col:
382209467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row:
382309467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride:
382409467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride:
382509467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col:
382609467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row:
382709467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride:
382873471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride:
382973471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col:
383073471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row:
383173471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col_stride:
383273471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row_stride: {
383309467b48Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
383409467b48Spatrick     Info.memVT = MVT::v8f32;
383509467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
383609467b48Spatrick     Info.offset = 0;
383709467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
383809467b48Spatrick     Info.align = Align(16);
383909467b48Spatrick     return true;
384009467b48Spatrick   }
384109467b48Spatrick 
384273471bf0Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col:
384373471bf0Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col_stride:
384473471bf0Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row:
384573471bf0Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row_stride:
384673471bf0Spatrick 
384773471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col:
384873471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col_stride:
384973471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row:
385073471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row_stride:
385173471bf0Spatrick 
385209467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col:
385309467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride:
385409467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row:
385509467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride:
385609467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col:
385709467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride:
385809467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row:
385909467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride:
386009467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col:
386109467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride:
386209467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row:
386309467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: {
386409467b48Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
386509467b48Spatrick     Info.memVT = MVT::v8i32;
386609467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
386709467b48Spatrick     Info.offset = 0;
386809467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
386909467b48Spatrick     Info.align = Align(16);
387009467b48Spatrick     return true;
387109467b48Spatrick   }
387209467b48Spatrick 
387309467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col:
387409467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride:
387509467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row:
387609467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride:
387709467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
387809467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
387909467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
3880*d415bd75Srobert   case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride:
3881*d415bd75Srobert   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16:
3882*d415bd75Srobert   case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16: {
388309467b48Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
388409467b48Spatrick     Info.memVT = MVT::v2i32;
388509467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
388609467b48Spatrick     Info.offset = 0;
388709467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
388809467b48Spatrick     Info.align = Align(8);
388909467b48Spatrick     return true;
389009467b48Spatrick   }
389109467b48Spatrick 
389273471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col:
389373471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col_stride:
389473471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row:
389573471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row_stride:
389673471bf0Spatrick 
389773471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col:
389873471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col_stride:
389973471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row:
390073471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row_stride: {
390173471bf0Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
390273471bf0Spatrick     Info.memVT = MVT::f64;
390373471bf0Spatrick     Info.ptrVal = I.getArgOperand(0);
390473471bf0Spatrick     Info.offset = 0;
390573471bf0Spatrick     Info.flags = MachineMemOperand::MOLoad;
390673471bf0Spatrick     Info.align = Align(8);
390773471bf0Spatrick     return true;
390873471bf0Spatrick   }
390973471bf0Spatrick 
391073471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col:
391173471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col_stride:
391273471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row:
391373471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row_stride: {
391473471bf0Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
391573471bf0Spatrick     Info.memVT = MVT::v2f64;
391673471bf0Spatrick     Info.ptrVal = I.getArgOperand(0);
391773471bf0Spatrick     Info.offset = 0;
391873471bf0Spatrick     Info.flags = MachineMemOperand::MOLoad;
391973471bf0Spatrick     Info.align = Align(16);
392073471bf0Spatrick     return true;
392173471bf0Spatrick   }
392273471bf0Spatrick 
392309467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col:
392409467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row:
392509467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride:
392609467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride:
392709467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col:
392809467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row:
392909467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride:
393009467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride:
393109467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col:
393209467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row:
393309467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride:
393409467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: {
393509467b48Spatrick     Info.opc = ISD::INTRINSIC_VOID;
393609467b48Spatrick     Info.memVT = MVT::v4f16;
393709467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
393809467b48Spatrick     Info.offset = 0;
393909467b48Spatrick     Info.flags = MachineMemOperand::MOStore;
394009467b48Spatrick     Info.align = Align(16);
394109467b48Spatrick     return true;
394209467b48Spatrick   }
394309467b48Spatrick 
394409467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col:
394509467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row:
394609467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride:
394709467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride:
394809467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col:
394909467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row:
395009467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride:
395109467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride:
395209467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col:
395309467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row:
395409467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride:
395573471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride:
395673471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col:
395773471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row:
395873471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col_stride:
395973471bf0Spatrick   case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row_stride: {
396009467b48Spatrick     Info.opc = ISD::INTRINSIC_VOID;
396109467b48Spatrick     Info.memVT = MVT::v8f32;
396209467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
396309467b48Spatrick     Info.offset = 0;
396409467b48Spatrick     Info.flags = MachineMemOperand::MOStore;
396509467b48Spatrick     Info.align = Align(16);
396609467b48Spatrick     return true;
396709467b48Spatrick   }
396809467b48Spatrick 
396909467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col:
397009467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride:
397109467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row:
397209467b48Spatrick   case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride:
397309467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col:
397409467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride:
397509467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row:
397609467b48Spatrick   case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride:
397709467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col:
397809467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride:
397909467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row:
398009467b48Spatrick   case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: {
398109467b48Spatrick     Info.opc = ISD::INTRINSIC_VOID;
398209467b48Spatrick     Info.memVT = MVT::v8i32;
398309467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
398409467b48Spatrick     Info.offset = 0;
398509467b48Spatrick     Info.flags = MachineMemOperand::MOStore;
398609467b48Spatrick     Info.align = Align(16);
398709467b48Spatrick     return true;
398809467b48Spatrick   }
398909467b48Spatrick 
399009467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col:
399109467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride:
399209467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row:
399309467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride:
399409467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col:
399509467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride:
399609467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row:
399709467b48Spatrick   case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: {
399809467b48Spatrick     Info.opc = ISD::INTRINSIC_VOID;
399909467b48Spatrick     Info.memVT = MVT::v2i32;
400009467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
400109467b48Spatrick     Info.offset = 0;
400209467b48Spatrick     Info.flags = MachineMemOperand::MOStore;
400309467b48Spatrick     Info.align = Align(8);
400409467b48Spatrick     return true;
400509467b48Spatrick   }
400609467b48Spatrick 
400773471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col:
400873471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col_stride:
400973471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row:
401073471bf0Spatrick   case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row_stride: {
401173471bf0Spatrick     Info.opc = ISD::INTRINSIC_VOID;
401273471bf0Spatrick     Info.memVT = MVT::v2f64;
401373471bf0Spatrick     Info.ptrVal = I.getArgOperand(0);
401473471bf0Spatrick     Info.offset = 0;
401573471bf0Spatrick     Info.flags = MachineMemOperand::MOStore;
401673471bf0Spatrick     Info.align = Align(16);
401773471bf0Spatrick     return true;
401873471bf0Spatrick   }
401973471bf0Spatrick 
402009467b48Spatrick   case Intrinsic::nvvm_atomic_load_inc_32:
402109467b48Spatrick   case Intrinsic::nvvm_atomic_load_dec_32:
402209467b48Spatrick 
402309467b48Spatrick   case Intrinsic::nvvm_atomic_add_gen_f_cta:
402409467b48Spatrick   case Intrinsic::nvvm_atomic_add_gen_f_sys:
402509467b48Spatrick   case Intrinsic::nvvm_atomic_add_gen_i_cta:
402609467b48Spatrick   case Intrinsic::nvvm_atomic_add_gen_i_sys:
402709467b48Spatrick   case Intrinsic::nvvm_atomic_and_gen_i_cta:
402809467b48Spatrick   case Intrinsic::nvvm_atomic_and_gen_i_sys:
402909467b48Spatrick   case Intrinsic::nvvm_atomic_cas_gen_i_cta:
403009467b48Spatrick   case Intrinsic::nvvm_atomic_cas_gen_i_sys:
403109467b48Spatrick   case Intrinsic::nvvm_atomic_dec_gen_i_cta:
403209467b48Spatrick   case Intrinsic::nvvm_atomic_dec_gen_i_sys:
403309467b48Spatrick   case Intrinsic::nvvm_atomic_inc_gen_i_cta:
403409467b48Spatrick   case Intrinsic::nvvm_atomic_inc_gen_i_sys:
403509467b48Spatrick   case Intrinsic::nvvm_atomic_max_gen_i_cta:
403609467b48Spatrick   case Intrinsic::nvvm_atomic_max_gen_i_sys:
403709467b48Spatrick   case Intrinsic::nvvm_atomic_min_gen_i_cta:
403809467b48Spatrick   case Intrinsic::nvvm_atomic_min_gen_i_sys:
403909467b48Spatrick   case Intrinsic::nvvm_atomic_or_gen_i_cta:
404009467b48Spatrick   case Intrinsic::nvvm_atomic_or_gen_i_sys:
404109467b48Spatrick   case Intrinsic::nvvm_atomic_exch_gen_i_cta:
404209467b48Spatrick   case Intrinsic::nvvm_atomic_exch_gen_i_sys:
404309467b48Spatrick   case Intrinsic::nvvm_atomic_xor_gen_i_cta:
404409467b48Spatrick   case Intrinsic::nvvm_atomic_xor_gen_i_sys: {
404509467b48Spatrick     auto &DL = I.getModule()->getDataLayout();
404609467b48Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
404709467b48Spatrick     Info.memVT = getValueType(DL, I.getType());
404809467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
404909467b48Spatrick     Info.offset = 0;
405009467b48Spatrick     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
405109467b48Spatrick     Info.align.reset();
405209467b48Spatrick     return true;
405309467b48Spatrick   }
405409467b48Spatrick 
405509467b48Spatrick   case Intrinsic::nvvm_ldu_global_i:
405609467b48Spatrick   case Intrinsic::nvvm_ldu_global_f:
405709467b48Spatrick   case Intrinsic::nvvm_ldu_global_p: {
405809467b48Spatrick     auto &DL = I.getModule()->getDataLayout();
405909467b48Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
406009467b48Spatrick     if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
406109467b48Spatrick       Info.memVT = getValueType(DL, I.getType());
406209467b48Spatrick     else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
406309467b48Spatrick       Info.memVT = getPointerTy(DL);
406409467b48Spatrick     else
406509467b48Spatrick       Info.memVT = getValueType(DL, I.getType());
406609467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
406709467b48Spatrick     Info.offset = 0;
406809467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
4069097a140dSpatrick     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
407009467b48Spatrick 
407109467b48Spatrick     return true;
407209467b48Spatrick   }
407309467b48Spatrick   case Intrinsic::nvvm_ldg_global_i:
407409467b48Spatrick   case Intrinsic::nvvm_ldg_global_f:
407509467b48Spatrick   case Intrinsic::nvvm_ldg_global_p: {
407609467b48Spatrick     auto &DL = I.getModule()->getDataLayout();
407709467b48Spatrick 
407809467b48Spatrick     Info.opc = ISD::INTRINSIC_W_CHAIN;
407909467b48Spatrick     if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
408009467b48Spatrick       Info.memVT = getValueType(DL, I.getType());
408109467b48Spatrick     else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
408209467b48Spatrick       Info.memVT = getPointerTy(DL);
408309467b48Spatrick     else
408409467b48Spatrick       Info.memVT = getValueType(DL, I.getType());
408509467b48Spatrick     Info.ptrVal = I.getArgOperand(0);
408609467b48Spatrick     Info.offset = 0;
408709467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
4088097a140dSpatrick     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
408909467b48Spatrick 
409009467b48Spatrick     return true;
409109467b48Spatrick   }
409209467b48Spatrick 
409309467b48Spatrick   case Intrinsic::nvvm_tex_1d_v4f32_s32:
409409467b48Spatrick   case Intrinsic::nvvm_tex_1d_v4f32_f32:
409509467b48Spatrick   case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
409609467b48Spatrick   case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
409709467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
409809467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
409909467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
410009467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
410109467b48Spatrick   case Intrinsic::nvvm_tex_2d_v4f32_s32:
410209467b48Spatrick   case Intrinsic::nvvm_tex_2d_v4f32_f32:
410309467b48Spatrick   case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
410409467b48Spatrick   case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
410509467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
410609467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
410709467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
410809467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
410909467b48Spatrick   case Intrinsic::nvvm_tex_3d_v4f32_s32:
411009467b48Spatrick   case Intrinsic::nvvm_tex_3d_v4f32_f32:
411109467b48Spatrick   case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
411209467b48Spatrick   case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
411309467b48Spatrick   case Intrinsic::nvvm_tex_cube_v4f32_f32:
411409467b48Spatrick   case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
411509467b48Spatrick   case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
411609467b48Spatrick   case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
411709467b48Spatrick   case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
411809467b48Spatrick   case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
411909467b48Spatrick   case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
412009467b48Spatrick   case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
412109467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
412209467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
412309467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
412409467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
412509467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
412609467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
412709467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
412809467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
412909467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
413009467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
413109467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
413209467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
413309467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
413409467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
413509467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
413609467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
413709467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
413809467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
413909467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
414009467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
414109467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
414209467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
414309467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
414409467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
414509467b48Spatrick   case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
414609467b48Spatrick   case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
414709467b48Spatrick   case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
414809467b48Spatrick   case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
414909467b48Spatrick     Info.opc = getOpcForTextureInstr(Intrinsic);
415009467b48Spatrick     Info.memVT = MVT::v4f32;
415109467b48Spatrick     Info.ptrVal = nullptr;
415209467b48Spatrick     Info.offset = 0;
415309467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
415409467b48Spatrick     Info.align = Align(16);
415509467b48Spatrick     return true;
415609467b48Spatrick 
415709467b48Spatrick   case Intrinsic::nvvm_tex_1d_v4s32_s32:
415809467b48Spatrick   case Intrinsic::nvvm_tex_1d_v4s32_f32:
415909467b48Spatrick   case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
416009467b48Spatrick   case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
416109467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
416209467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
416309467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
416409467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
416509467b48Spatrick   case Intrinsic::nvvm_tex_2d_v4s32_s32:
416609467b48Spatrick   case Intrinsic::nvvm_tex_2d_v4s32_f32:
416709467b48Spatrick   case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
416809467b48Spatrick   case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
416909467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
417009467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
417109467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
417209467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
417309467b48Spatrick   case Intrinsic::nvvm_tex_3d_v4s32_s32:
417409467b48Spatrick   case Intrinsic::nvvm_tex_3d_v4s32_f32:
417509467b48Spatrick   case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
417609467b48Spatrick   case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
417709467b48Spatrick   case Intrinsic::nvvm_tex_cube_v4s32_f32:
417809467b48Spatrick   case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
417909467b48Spatrick   case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
418009467b48Spatrick   case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
418109467b48Spatrick   case Intrinsic::nvvm_tex_cube_v4u32_f32:
418209467b48Spatrick   case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
418309467b48Spatrick   case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
418409467b48Spatrick   case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
418509467b48Spatrick   case Intrinsic::nvvm_tex_1d_v4u32_s32:
418609467b48Spatrick   case Intrinsic::nvvm_tex_1d_v4u32_f32:
418709467b48Spatrick   case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
418809467b48Spatrick   case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
418909467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
419009467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
419109467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
419209467b48Spatrick   case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
419309467b48Spatrick   case Intrinsic::nvvm_tex_2d_v4u32_s32:
419409467b48Spatrick   case Intrinsic::nvvm_tex_2d_v4u32_f32:
419509467b48Spatrick   case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
419609467b48Spatrick   case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
419709467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
419809467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
419909467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
420009467b48Spatrick   case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
420109467b48Spatrick   case Intrinsic::nvvm_tex_3d_v4u32_s32:
420209467b48Spatrick   case Intrinsic::nvvm_tex_3d_v4u32_f32:
420309467b48Spatrick   case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
420409467b48Spatrick   case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
420509467b48Spatrick   case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
420609467b48Spatrick   case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
420709467b48Spatrick   case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
420809467b48Spatrick   case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
420909467b48Spatrick   case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
421009467b48Spatrick   case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
421109467b48Spatrick   case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
421209467b48Spatrick   case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
421309467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
421409467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
421509467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
421609467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
421709467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
421809467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
421909467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
422009467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
422109467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
422209467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
422309467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
422409467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
422509467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
422609467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
422709467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
422809467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
422909467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
423009467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
423109467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
423209467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
423309467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
423409467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
423509467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
423609467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
423709467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
423809467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
423909467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
424009467b48Spatrick   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
424109467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
424209467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
424309467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
424409467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
424509467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
424609467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
424709467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
424809467b48Spatrick   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
424909467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
425009467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
425109467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
425209467b48Spatrick   case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
425309467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
425409467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
425509467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
425609467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
425709467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
425809467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
425909467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
426009467b48Spatrick   case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
426109467b48Spatrick   case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
426209467b48Spatrick   case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
426309467b48Spatrick   case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
426409467b48Spatrick   case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
426509467b48Spatrick   case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
426609467b48Spatrick   case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
426709467b48Spatrick   case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
426809467b48Spatrick   case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
426909467b48Spatrick     Info.opc = getOpcForTextureInstr(Intrinsic);
427009467b48Spatrick     Info.memVT = MVT::v4i32;
427109467b48Spatrick     Info.ptrVal = nullptr;
427209467b48Spatrick     Info.offset = 0;
427309467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
427409467b48Spatrick     Info.align = Align(16);
427509467b48Spatrick     return true;
427609467b48Spatrick 
427709467b48Spatrick   case Intrinsic::nvvm_suld_1d_i8_clamp:
427809467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i8_clamp:
427909467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i8_clamp:
428009467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i8_clamp:
428109467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
428209467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
428309467b48Spatrick   case Intrinsic::nvvm_suld_2d_i8_clamp:
428409467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i8_clamp:
428509467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i8_clamp:
428609467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i8_clamp:
428709467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
428809467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
428909467b48Spatrick   case Intrinsic::nvvm_suld_3d_i8_clamp:
429009467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i8_clamp:
429109467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i8_clamp:
429209467b48Spatrick   case Intrinsic::nvvm_suld_1d_i8_trap:
429309467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i8_trap:
429409467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i8_trap:
429509467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i8_trap:
429609467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
429709467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
429809467b48Spatrick   case Intrinsic::nvvm_suld_2d_i8_trap:
429909467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i8_trap:
430009467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i8_trap:
430109467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i8_trap:
430209467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
430309467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
430409467b48Spatrick   case Intrinsic::nvvm_suld_3d_i8_trap:
430509467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i8_trap:
430609467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i8_trap:
430709467b48Spatrick   case Intrinsic::nvvm_suld_1d_i8_zero:
430809467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i8_zero:
430909467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i8_zero:
431009467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i8_zero:
431109467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
431209467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
431309467b48Spatrick   case Intrinsic::nvvm_suld_2d_i8_zero:
431409467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i8_zero:
431509467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i8_zero:
431609467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i8_zero:
431709467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
431809467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
431909467b48Spatrick   case Intrinsic::nvvm_suld_3d_i8_zero:
432009467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i8_zero:
432109467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i8_zero:
432209467b48Spatrick     Info.opc = getOpcForSurfaceInstr(Intrinsic);
432309467b48Spatrick     Info.memVT = MVT::i8;
432409467b48Spatrick     Info.ptrVal = nullptr;
432509467b48Spatrick     Info.offset = 0;
432609467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
432709467b48Spatrick     Info.align = Align(16);
432809467b48Spatrick     return true;
432909467b48Spatrick 
433009467b48Spatrick   case Intrinsic::nvvm_suld_1d_i16_clamp:
433109467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i16_clamp:
433209467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i16_clamp:
433309467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i16_clamp:
433409467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
433509467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
433609467b48Spatrick   case Intrinsic::nvvm_suld_2d_i16_clamp:
433709467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i16_clamp:
433809467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i16_clamp:
433909467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i16_clamp:
434009467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
434109467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
434209467b48Spatrick   case Intrinsic::nvvm_suld_3d_i16_clamp:
434309467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i16_clamp:
434409467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i16_clamp:
434509467b48Spatrick   case Intrinsic::nvvm_suld_1d_i16_trap:
434609467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i16_trap:
434709467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i16_trap:
434809467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i16_trap:
434909467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
435009467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
435109467b48Spatrick   case Intrinsic::nvvm_suld_2d_i16_trap:
435209467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i16_trap:
435309467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i16_trap:
435409467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i16_trap:
435509467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
435609467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
435709467b48Spatrick   case Intrinsic::nvvm_suld_3d_i16_trap:
435809467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i16_trap:
435909467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i16_trap:
436009467b48Spatrick   case Intrinsic::nvvm_suld_1d_i16_zero:
436109467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i16_zero:
436209467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i16_zero:
436309467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i16_zero:
436409467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
436509467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
436609467b48Spatrick   case Intrinsic::nvvm_suld_2d_i16_zero:
436709467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i16_zero:
436809467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i16_zero:
436909467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i16_zero:
437009467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
437109467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
437209467b48Spatrick   case Intrinsic::nvvm_suld_3d_i16_zero:
437309467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i16_zero:
437409467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i16_zero:
437509467b48Spatrick     Info.opc = getOpcForSurfaceInstr(Intrinsic);
437609467b48Spatrick     Info.memVT = MVT::i16;
437709467b48Spatrick     Info.ptrVal = nullptr;
437809467b48Spatrick     Info.offset = 0;
437909467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
438009467b48Spatrick     Info.align = Align(16);
438109467b48Spatrick     return true;
438209467b48Spatrick 
438309467b48Spatrick   case Intrinsic::nvvm_suld_1d_i32_clamp:
438409467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i32_clamp:
438509467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i32_clamp:
438609467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i32_clamp:
438709467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
438809467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
438909467b48Spatrick   case Intrinsic::nvvm_suld_2d_i32_clamp:
439009467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i32_clamp:
439109467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i32_clamp:
439209467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i32_clamp:
439309467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
439409467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
439509467b48Spatrick   case Intrinsic::nvvm_suld_3d_i32_clamp:
439609467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i32_clamp:
439709467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i32_clamp:
439809467b48Spatrick   case Intrinsic::nvvm_suld_1d_i32_trap:
439909467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i32_trap:
440009467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i32_trap:
440109467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i32_trap:
440209467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
440309467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
440409467b48Spatrick   case Intrinsic::nvvm_suld_2d_i32_trap:
440509467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i32_trap:
440609467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i32_trap:
440709467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i32_trap:
440809467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
440909467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
441009467b48Spatrick   case Intrinsic::nvvm_suld_3d_i32_trap:
441109467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i32_trap:
441209467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i32_trap:
441309467b48Spatrick   case Intrinsic::nvvm_suld_1d_i32_zero:
441409467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i32_zero:
441509467b48Spatrick   case Intrinsic::nvvm_suld_1d_v4i32_zero:
441609467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i32_zero:
441709467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
441809467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
441909467b48Spatrick   case Intrinsic::nvvm_suld_2d_i32_zero:
442009467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i32_zero:
442109467b48Spatrick   case Intrinsic::nvvm_suld_2d_v4i32_zero:
442209467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i32_zero:
442309467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
442409467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
442509467b48Spatrick   case Intrinsic::nvvm_suld_3d_i32_zero:
442609467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i32_zero:
442709467b48Spatrick   case Intrinsic::nvvm_suld_3d_v4i32_zero:
442809467b48Spatrick     Info.opc = getOpcForSurfaceInstr(Intrinsic);
442909467b48Spatrick     Info.memVT = MVT::i32;
443009467b48Spatrick     Info.ptrVal = nullptr;
443109467b48Spatrick     Info.offset = 0;
443209467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
443309467b48Spatrick     Info.align = Align(16);
443409467b48Spatrick     return true;
443509467b48Spatrick 
443609467b48Spatrick   case Intrinsic::nvvm_suld_1d_i64_clamp:
443709467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i64_clamp:
443809467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i64_clamp:
443909467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
444009467b48Spatrick   case Intrinsic::nvvm_suld_2d_i64_clamp:
444109467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i64_clamp:
444209467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i64_clamp:
444309467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
444409467b48Spatrick   case Intrinsic::nvvm_suld_3d_i64_clamp:
444509467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i64_clamp:
444609467b48Spatrick   case Intrinsic::nvvm_suld_1d_i64_trap:
444709467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i64_trap:
444809467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i64_trap:
444909467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
445009467b48Spatrick   case Intrinsic::nvvm_suld_2d_i64_trap:
445109467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i64_trap:
445209467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i64_trap:
445309467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
445409467b48Spatrick   case Intrinsic::nvvm_suld_3d_i64_trap:
445509467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i64_trap:
445609467b48Spatrick   case Intrinsic::nvvm_suld_1d_i64_zero:
445709467b48Spatrick   case Intrinsic::nvvm_suld_1d_v2i64_zero:
445809467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_i64_zero:
445909467b48Spatrick   case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
446009467b48Spatrick   case Intrinsic::nvvm_suld_2d_i64_zero:
446109467b48Spatrick   case Intrinsic::nvvm_suld_2d_v2i64_zero:
446209467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_i64_zero:
446309467b48Spatrick   case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
446409467b48Spatrick   case Intrinsic::nvvm_suld_3d_i64_zero:
446509467b48Spatrick   case Intrinsic::nvvm_suld_3d_v2i64_zero:
446609467b48Spatrick     Info.opc = getOpcForSurfaceInstr(Intrinsic);
446709467b48Spatrick     Info.memVT = MVT::i64;
446809467b48Spatrick     Info.ptrVal = nullptr;
446909467b48Spatrick     Info.offset = 0;
447009467b48Spatrick     Info.flags = MachineMemOperand::MOLoad;
447109467b48Spatrick     Info.align = Align(16);
447209467b48Spatrick     return true;
447309467b48Spatrick   }
447409467b48Spatrick   return false;
447509467b48Spatrick }
447609467b48Spatrick 
4477*d415bd75Srobert /// getFunctionParamOptimizedAlign - since function arguments are passed via
4478*d415bd75Srobert /// .param space, we may want to increase their alignment in a way that
4479*d415bd75Srobert /// ensures that we can effectively vectorize their loads & stores. We can
4480*d415bd75Srobert /// increase alignment only if the function has internal or has private
4481*d415bd75Srobert /// linkage as for other linkage types callers may already rely on default
4482*d415bd75Srobert /// alignment. To allow using 128-bit vectorized loads/stores, this function
4483*d415bd75Srobert /// ensures that alignment is 16 or greater.
getFunctionParamOptimizedAlign(const Function * F,Type * ArgTy,const DataLayout & DL) const4484*d415bd75Srobert Align NVPTXTargetLowering::getFunctionParamOptimizedAlign(
4485*d415bd75Srobert     const Function *F, Type *ArgTy, const DataLayout &DL) const {
4486*d415bd75Srobert   const uint64_t ABITypeAlign = DL.getABITypeAlign(ArgTy).value();
4487*d415bd75Srobert 
4488*d415bd75Srobert   // If a function has linkage different from internal or private, we
4489*d415bd75Srobert   // must use default ABI alignment as external users rely on it. Same
4490*d415bd75Srobert   // for a function that may be called from a function pointer.
4491*d415bd75Srobert   if (!F || !F->hasLocalLinkage() ||
4492*d415bd75Srobert       F->hasAddressTaken(/*Users=*/nullptr,
4493*d415bd75Srobert                          /*IgnoreCallbackUses=*/false,
4494*d415bd75Srobert                          /*IgnoreAssumeLikeCalls=*/true,
4495*d415bd75Srobert                          /*IgnoreLLVMUsed=*/true))
4496*d415bd75Srobert     return Align(ABITypeAlign);
4497*d415bd75Srobert 
4498*d415bd75Srobert   assert(!isKernelFunction(*F) && "Expect kernels to have non-local linkage");
4499*d415bd75Srobert   return Align(std::max(uint64_t(16), ABITypeAlign));
4500*d415bd75Srobert }
4501*d415bd75Srobert 
4502*d415bd75Srobert /// Helper for computing alignment of a device function byval parameter.
getFunctionByValParamAlign(const Function * F,Type * ArgTy,Align InitialAlign,const DataLayout & DL) const4503*d415bd75Srobert Align NVPTXTargetLowering::getFunctionByValParamAlign(
4504*d415bd75Srobert     const Function *F, Type *ArgTy, Align InitialAlign,
4505*d415bd75Srobert     const DataLayout &DL) const {
4506*d415bd75Srobert   Align ArgAlign = InitialAlign;
4507*d415bd75Srobert   // Try to increase alignment to enhance vectorization options.
4508*d415bd75Srobert   if (F)
4509*d415bd75Srobert     ArgAlign = std::max(ArgAlign, getFunctionParamOptimizedAlign(F, ArgTy, DL));
4510*d415bd75Srobert 
4511*d415bd75Srobert   // Work around a bug in ptxas. When PTX code takes address of
4512*d415bd75Srobert   // byval parameter with alignment < 4, ptxas generates code to
4513*d415bd75Srobert   // spill argument into memory. Alas on sm_50+ ptxas generates
4514*d415bd75Srobert   // SASS code that fails with misaligned access. To work around
4515*d415bd75Srobert   // the problem, make sure that we align byval parameters by at
4516*d415bd75Srobert   // least 4.
4517*d415bd75Srobert   // TODO: this will need to be undone when we get to support multi-TU
4518*d415bd75Srobert   // device-side compilation as it breaks ABI compatibility with nvcc.
4519*d415bd75Srobert   // Hopefully ptxas bug is fixed by then.
4520*d415bd75Srobert   ArgAlign = std::max(ArgAlign, Align(4));
4521*d415bd75Srobert 
4522*d415bd75Srobert   return ArgAlign;
4523*d415bd75Srobert }
4524*d415bd75Srobert 
452509467b48Spatrick /// isLegalAddressingMode - Return true if the addressing mode represented
452609467b48Spatrick /// by AM is legal for this target, for a load/store of the specified type.
452709467b48Spatrick /// Used to guide target specific optimizations, like loop strength reduction
452809467b48Spatrick /// (LoopStrengthReduce.cpp) and memory optimization for address mode
452909467b48Spatrick /// (CodeGenPrepare.cpp)
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const453009467b48Spatrick bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
453109467b48Spatrick                                                 const AddrMode &AM, Type *Ty,
453209467b48Spatrick                                                 unsigned AS, Instruction *I) const {
453309467b48Spatrick   // AddrMode - This represents an addressing mode of:
453409467b48Spatrick   //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
453509467b48Spatrick   //
453609467b48Spatrick   // The legal address modes are
453709467b48Spatrick   // - [avar]
453809467b48Spatrick   // - [areg]
453909467b48Spatrick   // - [areg+immoff]
454009467b48Spatrick   // - [immAddr]
454109467b48Spatrick 
454209467b48Spatrick   if (AM.BaseGV) {
454309467b48Spatrick     return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
454409467b48Spatrick   }
454509467b48Spatrick 
454609467b48Spatrick   switch (AM.Scale) {
454709467b48Spatrick   case 0: // "r", "r+i" or "i" is allowed
454809467b48Spatrick     break;
454909467b48Spatrick   case 1:
455009467b48Spatrick     if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
455109467b48Spatrick       return false;
455209467b48Spatrick     // Otherwise we have r+i.
455309467b48Spatrick     break;
455409467b48Spatrick   default:
455509467b48Spatrick     // No scale > 1 is allowed
455609467b48Spatrick     return false;
455709467b48Spatrick   }
455809467b48Spatrick   return true;
455909467b48Spatrick }
456009467b48Spatrick 
456109467b48Spatrick //===----------------------------------------------------------------------===//
456209467b48Spatrick //                         NVPTX Inline Assembly Support
456309467b48Spatrick //===----------------------------------------------------------------------===//
456409467b48Spatrick 
456509467b48Spatrick /// getConstraintType - Given a constraint letter, return the type of
456609467b48Spatrick /// constraint it is for this target.
456709467b48Spatrick NVPTXTargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const456809467b48Spatrick NVPTXTargetLowering::getConstraintType(StringRef Constraint) const {
456909467b48Spatrick   if (Constraint.size() == 1) {
457009467b48Spatrick     switch (Constraint[0]) {
457109467b48Spatrick     default:
457209467b48Spatrick       break;
457309467b48Spatrick     case 'b':
457409467b48Spatrick     case 'r':
457509467b48Spatrick     case 'h':
457609467b48Spatrick     case 'c':
457709467b48Spatrick     case 'l':
457809467b48Spatrick     case 'f':
457909467b48Spatrick     case 'd':
458009467b48Spatrick     case '0':
458109467b48Spatrick     case 'N':
458209467b48Spatrick       return C_RegisterClass;
458309467b48Spatrick     }
458409467b48Spatrick   }
458509467b48Spatrick   return TargetLowering::getConstraintType(Constraint);
458609467b48Spatrick }
458709467b48Spatrick 
458809467b48Spatrick std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const458909467b48Spatrick NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
459009467b48Spatrick                                                   StringRef Constraint,
459109467b48Spatrick                                                   MVT VT) const {
459209467b48Spatrick   if (Constraint.size() == 1) {
459309467b48Spatrick     switch (Constraint[0]) {
459409467b48Spatrick     case 'b':
459509467b48Spatrick       return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
459609467b48Spatrick     case 'c':
459709467b48Spatrick       return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
459809467b48Spatrick     case 'h':
459909467b48Spatrick       return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
460009467b48Spatrick     case 'r':
460109467b48Spatrick       return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
460209467b48Spatrick     case 'l':
460309467b48Spatrick     case 'N':
460409467b48Spatrick       return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
460509467b48Spatrick     case 'f':
460609467b48Spatrick       return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
460709467b48Spatrick     case 'd':
460809467b48Spatrick       return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
460909467b48Spatrick     }
461009467b48Spatrick   }
461109467b48Spatrick   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
461209467b48Spatrick }
461309467b48Spatrick 
461409467b48Spatrick //===----------------------------------------------------------------------===//
461509467b48Spatrick //                         NVPTX DAG Combining
461609467b48Spatrick //===----------------------------------------------------------------------===//
461709467b48Spatrick 
allowFMA(MachineFunction & MF,CodeGenOpt::Level OptLevel) const461809467b48Spatrick bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
461909467b48Spatrick                                    CodeGenOpt::Level OptLevel) const {
462009467b48Spatrick   // Always honor command-line argument
462109467b48Spatrick   if (FMAContractLevelOpt.getNumOccurrences() > 0)
462209467b48Spatrick     return FMAContractLevelOpt > 0;
462309467b48Spatrick 
462409467b48Spatrick   // Do not contract if we're not optimizing the code.
462509467b48Spatrick   if (OptLevel == 0)
462609467b48Spatrick     return false;
462709467b48Spatrick 
462809467b48Spatrick   // Honor TargetOptions flags that explicitly say fusion is okay.
462909467b48Spatrick   if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
463009467b48Spatrick     return true;
463109467b48Spatrick 
463209467b48Spatrick   return allowUnsafeFPMath(MF);
463309467b48Spatrick }
463409467b48Spatrick 
allowUnsafeFPMath(MachineFunction & MF) const463509467b48Spatrick bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const {
463609467b48Spatrick   // Honor TargetOptions flags that explicitly say unsafe math is okay.
463709467b48Spatrick   if (MF.getTarget().Options.UnsafeFPMath)
463809467b48Spatrick     return true;
463909467b48Spatrick 
464009467b48Spatrick   // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
464109467b48Spatrick   const Function &F = MF.getFunction();
464273471bf0Spatrick   return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
464309467b48Spatrick }
464409467b48Spatrick 
464509467b48Spatrick /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
464609467b48Spatrick /// operands N0 and N1.  This is a helper for PerformADDCombine that is
464709467b48Spatrick /// called with the default operands, and if that fails, with commuted
464809467b48Spatrick /// operands.
PerformADDCombineWithOperands(SDNode * N,SDValue N0,SDValue N1,TargetLowering::DAGCombinerInfo & DCI,const NVPTXSubtarget & Subtarget,CodeGenOpt::Level OptLevel)464909467b48Spatrick static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
465009467b48Spatrick                                            TargetLowering::DAGCombinerInfo &DCI,
465109467b48Spatrick                                              const NVPTXSubtarget &Subtarget,
465209467b48Spatrick                                              CodeGenOpt::Level OptLevel) {
465309467b48Spatrick   SelectionDAG  &DAG = DCI.DAG;
465409467b48Spatrick   // Skip non-integer, non-scalar case
465509467b48Spatrick   EVT VT=N0.getValueType();
465609467b48Spatrick   if (VT.isVector())
465709467b48Spatrick     return SDValue();
465809467b48Spatrick 
465909467b48Spatrick   // fold (add (mul a, b), c) -> (mad a, b, c)
466009467b48Spatrick   //
466109467b48Spatrick   if (N0.getOpcode() == ISD::MUL) {
466209467b48Spatrick     assert (VT.isInteger());
466309467b48Spatrick     // For integer:
466409467b48Spatrick     // Since integer multiply-add costs the same as integer multiply
466509467b48Spatrick     // but is more costly than integer add, do the fusion only when
466609467b48Spatrick     // the mul is only used in the add.
466709467b48Spatrick     if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
466809467b48Spatrick         !N0.getNode()->hasOneUse())
466909467b48Spatrick       return SDValue();
467009467b48Spatrick 
467109467b48Spatrick     // Do the folding
467209467b48Spatrick     return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
467309467b48Spatrick                        N0.getOperand(0), N0.getOperand(1), N1);
467409467b48Spatrick   }
467509467b48Spatrick   else if (N0.getOpcode() == ISD::FMUL) {
467609467b48Spatrick     if (VT == MVT::f32 || VT == MVT::f64) {
467709467b48Spatrick       const auto *TLI = static_cast<const NVPTXTargetLowering *>(
467809467b48Spatrick           &DAG.getTargetLoweringInfo());
467909467b48Spatrick       if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
468009467b48Spatrick         return SDValue();
468109467b48Spatrick 
468209467b48Spatrick       // For floating point:
468309467b48Spatrick       // Do the fusion only when the mul has less than 5 uses and all
468409467b48Spatrick       // are add.
468509467b48Spatrick       // The heuristic is that if a use is not an add, then that use
468609467b48Spatrick       // cannot be fused into fma, therefore mul is still needed anyway.
468709467b48Spatrick       // If there are more than 4 uses, even if they are all add, fusing
468809467b48Spatrick       // them will increase register pressue.
468909467b48Spatrick       //
469009467b48Spatrick       int numUses = 0;
469109467b48Spatrick       int nonAddCount = 0;
4692*d415bd75Srobert       for (const SDNode *User : N0.getNode()->uses()) {
469309467b48Spatrick         numUses++;
469409467b48Spatrick         if (User->getOpcode() != ISD::FADD)
469509467b48Spatrick           ++nonAddCount;
469609467b48Spatrick       }
469709467b48Spatrick       if (numUses >= 5)
469809467b48Spatrick         return SDValue();
469909467b48Spatrick       if (nonAddCount) {
470009467b48Spatrick         int orderNo = N->getIROrder();
470109467b48Spatrick         int orderNo2 = N0.getNode()->getIROrder();
470209467b48Spatrick         // simple heuristics here for considering potential register
470309467b48Spatrick         // pressure, the logics here is that the differnce are used
470409467b48Spatrick         // to measure the distance between def and use, the longer distance
470509467b48Spatrick         // more likely cause register pressure.
470609467b48Spatrick         if (orderNo - orderNo2 < 500)
470709467b48Spatrick           return SDValue();
470809467b48Spatrick 
470909467b48Spatrick         // Now, check if at least one of the FMUL's operands is live beyond the node N,
471009467b48Spatrick         // which guarantees that the FMA will not increase register pressure at node N.
471109467b48Spatrick         bool opIsLive = false;
471209467b48Spatrick         const SDNode *left = N0.getOperand(0).getNode();
471309467b48Spatrick         const SDNode *right = N0.getOperand(1).getNode();
471409467b48Spatrick 
471509467b48Spatrick         if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
471609467b48Spatrick           opIsLive = true;
471709467b48Spatrick 
471809467b48Spatrick         if (!opIsLive)
4719*d415bd75Srobert           for (const SDNode *User : left->uses()) {
472009467b48Spatrick             int orderNo3 = User->getIROrder();
472109467b48Spatrick             if (orderNo3 > orderNo) {
472209467b48Spatrick               opIsLive = true;
472309467b48Spatrick               break;
472409467b48Spatrick             }
472509467b48Spatrick           }
472609467b48Spatrick 
472709467b48Spatrick         if (!opIsLive)
4728*d415bd75Srobert           for (const SDNode *User : right->uses()) {
472909467b48Spatrick             int orderNo3 = User->getIROrder();
473009467b48Spatrick             if (orderNo3 > orderNo) {
473109467b48Spatrick               opIsLive = true;
473209467b48Spatrick               break;
473309467b48Spatrick             }
473409467b48Spatrick           }
473509467b48Spatrick 
473609467b48Spatrick         if (!opIsLive)
473709467b48Spatrick           return SDValue();
473809467b48Spatrick       }
473909467b48Spatrick 
474009467b48Spatrick       return DAG.getNode(ISD::FMA, SDLoc(N), VT,
474109467b48Spatrick                          N0.getOperand(0), N0.getOperand(1), N1);
474209467b48Spatrick     }
474309467b48Spatrick   }
474409467b48Spatrick 
474509467b48Spatrick   return SDValue();
474609467b48Spatrick }
474709467b48Spatrick 
PerformStoreRetvalCombine(SDNode * N)4748*d415bd75Srobert static SDValue PerformStoreRetvalCombine(SDNode *N) {
4749*d415bd75Srobert   // Operands from the 2nd to the last one are the values to be stored
4750*d415bd75Srobert   for (std::size_t I = 2, OpsCount = N->ops().size(); I != OpsCount; ++I)
4751*d415bd75Srobert     if (!N->getOperand(I).isUndef())
4752*d415bd75Srobert       return SDValue();
4753*d415bd75Srobert 
4754*d415bd75Srobert   // Operand 0 is the previous value in the chain. Cannot return EntryToken
4755*d415bd75Srobert   // as the previous value will become unused and eliminated later.
4756*d415bd75Srobert   return N->getOperand(0);
4757*d415bd75Srobert }
4758*d415bd75Srobert 
475909467b48Spatrick /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
476009467b48Spatrick ///
PerformADDCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,const NVPTXSubtarget & Subtarget,CodeGenOpt::Level OptLevel)476109467b48Spatrick static SDValue PerformADDCombine(SDNode *N,
476209467b48Spatrick                                  TargetLowering::DAGCombinerInfo &DCI,
476309467b48Spatrick                                  const NVPTXSubtarget &Subtarget,
476409467b48Spatrick                                  CodeGenOpt::Level OptLevel) {
476509467b48Spatrick   SDValue N0 = N->getOperand(0);
476609467b48Spatrick   SDValue N1 = N->getOperand(1);
476709467b48Spatrick 
476809467b48Spatrick   // First try with the default operand order.
476909467b48Spatrick   if (SDValue Result =
477009467b48Spatrick           PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, OptLevel))
477109467b48Spatrick     return Result;
477209467b48Spatrick 
477309467b48Spatrick   // If that didn't work, try again with the operands commuted.
477409467b48Spatrick   return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
477509467b48Spatrick }
477609467b48Spatrick 
PerformANDCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)477709467b48Spatrick static SDValue PerformANDCombine(SDNode *N,
477809467b48Spatrick                                  TargetLowering::DAGCombinerInfo &DCI) {
477909467b48Spatrick   // The type legalizer turns a vector load of i8 values into a zextload to i16
478009467b48Spatrick   // registers, optionally ANY_EXTENDs it (if target type is integer),
478109467b48Spatrick   // and ANDs off the high 8 bits. Since we turn this load into a
478209467b48Spatrick   // target-specific DAG node, the DAG combiner fails to eliminate these AND
478309467b48Spatrick   // nodes. Do that here.
478409467b48Spatrick   SDValue Val = N->getOperand(0);
478509467b48Spatrick   SDValue Mask = N->getOperand(1);
478609467b48Spatrick 
478709467b48Spatrick   if (isa<ConstantSDNode>(Val)) {
478809467b48Spatrick     std::swap(Val, Mask);
478909467b48Spatrick   }
479009467b48Spatrick 
479109467b48Spatrick   SDValue AExt;
479209467b48Spatrick   // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
479309467b48Spatrick   if (Val.getOpcode() == ISD::ANY_EXTEND) {
479409467b48Spatrick     AExt = Val;
479509467b48Spatrick     Val = Val->getOperand(0);
479609467b48Spatrick   }
479709467b48Spatrick 
479809467b48Spatrick   if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
479909467b48Spatrick     Val = Val->getOperand(0);
480009467b48Spatrick   }
480109467b48Spatrick 
480209467b48Spatrick   if (Val->getOpcode() == NVPTXISD::LoadV2 ||
480309467b48Spatrick       Val->getOpcode() == NVPTXISD::LoadV4) {
480409467b48Spatrick     ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
480509467b48Spatrick     if (!MaskCnst) {
480609467b48Spatrick       // Not an AND with a constant
480709467b48Spatrick       return SDValue();
480809467b48Spatrick     }
480909467b48Spatrick 
481009467b48Spatrick     uint64_t MaskVal = MaskCnst->getZExtValue();
481109467b48Spatrick     if (MaskVal != 0xff) {
481209467b48Spatrick       // Not an AND that chops off top 8 bits
481309467b48Spatrick       return SDValue();
481409467b48Spatrick     }
481509467b48Spatrick 
481609467b48Spatrick     MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
481709467b48Spatrick     if (!Mem) {
481809467b48Spatrick       // Not a MemSDNode?!?
481909467b48Spatrick       return SDValue();
482009467b48Spatrick     }
482109467b48Spatrick 
482209467b48Spatrick     EVT MemVT = Mem->getMemoryVT();
482309467b48Spatrick     if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
482409467b48Spatrick       // We only handle the i8 case
482509467b48Spatrick       return SDValue();
482609467b48Spatrick     }
482709467b48Spatrick 
482809467b48Spatrick     unsigned ExtType =
482909467b48Spatrick       cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))->
483009467b48Spatrick         getZExtValue();
483109467b48Spatrick     if (ExtType == ISD::SEXTLOAD) {
483209467b48Spatrick       // If for some reason the load is a sextload, the and is needed to zero
483309467b48Spatrick       // out the high 8 bits
483409467b48Spatrick       return SDValue();
483509467b48Spatrick     }
483609467b48Spatrick 
483709467b48Spatrick     bool AddTo = false;
483809467b48Spatrick     if (AExt.getNode() != nullptr) {
483909467b48Spatrick       // Re-insert the ext as a zext.
484009467b48Spatrick       Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
484109467b48Spatrick                             AExt.getValueType(), Val);
484209467b48Spatrick       AddTo = true;
484309467b48Spatrick     }
484409467b48Spatrick 
484509467b48Spatrick     // If we get here, the AND is unnecessary.  Just replace it with the load
484609467b48Spatrick     DCI.CombineTo(N, Val, AddTo);
484709467b48Spatrick   }
484809467b48Spatrick 
484909467b48Spatrick   return SDValue();
485009467b48Spatrick }
485109467b48Spatrick 
PerformREMCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOpt::Level OptLevel)485209467b48Spatrick static SDValue PerformREMCombine(SDNode *N,
485309467b48Spatrick                                  TargetLowering::DAGCombinerInfo &DCI,
485409467b48Spatrick                                  CodeGenOpt::Level OptLevel) {
485509467b48Spatrick   assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM);
485609467b48Spatrick 
485709467b48Spatrick   // Don't do anything at less than -O2.
485809467b48Spatrick   if (OptLevel < CodeGenOpt::Default)
485909467b48Spatrick     return SDValue();
486009467b48Spatrick 
486109467b48Spatrick   SelectionDAG &DAG = DCI.DAG;
486209467b48Spatrick   SDLoc DL(N);
486309467b48Spatrick   EVT VT = N->getValueType(0);
486409467b48Spatrick   bool IsSigned = N->getOpcode() == ISD::SREM;
486509467b48Spatrick   unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV;
486609467b48Spatrick 
486709467b48Spatrick   const SDValue &Num = N->getOperand(0);
486809467b48Spatrick   const SDValue &Den = N->getOperand(1);
486909467b48Spatrick 
487009467b48Spatrick   for (const SDNode *U : Num->uses()) {
487109467b48Spatrick     if (U->getOpcode() == DivOpc && U->getOperand(0) == Num &&
487209467b48Spatrick         U->getOperand(1) == Den) {
487309467b48Spatrick       // Num % Den -> Num - (Num / Den) * Den
487409467b48Spatrick       return DAG.getNode(ISD::SUB, DL, VT, Num,
487509467b48Spatrick                          DAG.getNode(ISD::MUL, DL, VT,
487609467b48Spatrick                                      DAG.getNode(DivOpc, DL, VT, Num, Den),
487709467b48Spatrick                                      Den));
487809467b48Spatrick     }
487909467b48Spatrick   }
488009467b48Spatrick   return SDValue();
488109467b48Spatrick }
488209467b48Spatrick 
488309467b48Spatrick enum OperandSignedness {
488409467b48Spatrick   Signed = 0,
488509467b48Spatrick   Unsigned,
488609467b48Spatrick   Unknown
488709467b48Spatrick };
488809467b48Spatrick 
488909467b48Spatrick /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
489009467b48Spatrick /// that can be demoted to \p OptSize bits without loss of information. The
489109467b48Spatrick /// signedness of the operand, if determinable, is placed in \p S.
IsMulWideOperandDemotable(SDValue Op,unsigned OptSize,OperandSignedness & S)489209467b48Spatrick static bool IsMulWideOperandDemotable(SDValue Op,
489309467b48Spatrick                                       unsigned OptSize,
489409467b48Spatrick                                       OperandSignedness &S) {
489509467b48Spatrick   S = Unknown;
489609467b48Spatrick 
489709467b48Spatrick   if (Op.getOpcode() == ISD::SIGN_EXTEND ||
489809467b48Spatrick       Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
489909467b48Spatrick     EVT OrigVT = Op.getOperand(0).getValueType();
490073471bf0Spatrick     if (OrigVT.getFixedSizeInBits() <= OptSize) {
490109467b48Spatrick       S = Signed;
490209467b48Spatrick       return true;
490309467b48Spatrick     }
490409467b48Spatrick   } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
490509467b48Spatrick     EVT OrigVT = Op.getOperand(0).getValueType();
490673471bf0Spatrick     if (OrigVT.getFixedSizeInBits() <= OptSize) {
490709467b48Spatrick       S = Unsigned;
490809467b48Spatrick       return true;
490909467b48Spatrick     }
491009467b48Spatrick   }
491109467b48Spatrick 
491209467b48Spatrick   return false;
491309467b48Spatrick }
491409467b48Spatrick 
491509467b48Spatrick /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
491609467b48Spatrick /// be demoted to \p OptSize bits without loss of information. If the operands
491709467b48Spatrick /// contain a constant, it should appear as the RHS operand. The signedness of
491809467b48Spatrick /// the operands is placed in \p IsSigned.
AreMulWideOperandsDemotable(SDValue LHS,SDValue RHS,unsigned OptSize,bool & IsSigned)491909467b48Spatrick static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
492009467b48Spatrick                                         unsigned OptSize,
492109467b48Spatrick                                         bool &IsSigned) {
492209467b48Spatrick   OperandSignedness LHSSign;
492309467b48Spatrick 
492409467b48Spatrick   // The LHS operand must be a demotable op
492509467b48Spatrick   if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
492609467b48Spatrick     return false;
492709467b48Spatrick 
492809467b48Spatrick   // We should have been able to determine the signedness from the LHS
492909467b48Spatrick   if (LHSSign == Unknown)
493009467b48Spatrick     return false;
493109467b48Spatrick 
493209467b48Spatrick   IsSigned = (LHSSign == Signed);
493309467b48Spatrick 
493409467b48Spatrick   // The RHS can be a demotable op or a constant
493509467b48Spatrick   if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
493609467b48Spatrick     const APInt &Val = CI->getAPIntValue();
493709467b48Spatrick     if (LHSSign == Unsigned) {
493809467b48Spatrick       return Val.isIntN(OptSize);
493909467b48Spatrick     } else {
494009467b48Spatrick       return Val.isSignedIntN(OptSize);
494109467b48Spatrick     }
494209467b48Spatrick   } else {
494309467b48Spatrick     OperandSignedness RHSSign;
494409467b48Spatrick     if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
494509467b48Spatrick       return false;
494609467b48Spatrick 
494709467b48Spatrick     return LHSSign == RHSSign;
494809467b48Spatrick   }
494909467b48Spatrick }
495009467b48Spatrick 
495109467b48Spatrick /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
495209467b48Spatrick /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
495309467b48Spatrick /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
495409467b48Spatrick /// amount.
TryMULWIDECombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)495509467b48Spatrick static SDValue TryMULWIDECombine(SDNode *N,
495609467b48Spatrick                                  TargetLowering::DAGCombinerInfo &DCI) {
495709467b48Spatrick   EVT MulType = N->getValueType(0);
495809467b48Spatrick   if (MulType != MVT::i32 && MulType != MVT::i64) {
495909467b48Spatrick     return SDValue();
496009467b48Spatrick   }
496109467b48Spatrick 
496209467b48Spatrick   SDLoc DL(N);
496309467b48Spatrick   unsigned OptSize = MulType.getSizeInBits() >> 1;
496409467b48Spatrick   SDValue LHS = N->getOperand(0);
496509467b48Spatrick   SDValue RHS = N->getOperand(1);
496609467b48Spatrick 
496709467b48Spatrick   // Canonicalize the multiply so the constant (if any) is on the right
496809467b48Spatrick   if (N->getOpcode() == ISD::MUL) {
496909467b48Spatrick     if (isa<ConstantSDNode>(LHS)) {
497009467b48Spatrick       std::swap(LHS, RHS);
497109467b48Spatrick     }
497209467b48Spatrick   }
497309467b48Spatrick 
497409467b48Spatrick   // If we have a SHL, determine the actual multiply amount
497509467b48Spatrick   if (N->getOpcode() == ISD::SHL) {
497609467b48Spatrick     ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
497709467b48Spatrick     if (!ShlRHS) {
497809467b48Spatrick       return SDValue();
497909467b48Spatrick     }
498009467b48Spatrick 
498109467b48Spatrick     APInt ShiftAmt = ShlRHS->getAPIntValue();
498209467b48Spatrick     unsigned BitWidth = MulType.getSizeInBits();
498309467b48Spatrick     if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
498409467b48Spatrick       APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
498509467b48Spatrick       RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
498609467b48Spatrick     } else {
498709467b48Spatrick       return SDValue();
498809467b48Spatrick     }
498909467b48Spatrick   }
499009467b48Spatrick 
499109467b48Spatrick   bool Signed;
499209467b48Spatrick   // Verify that our operands are demotable
499309467b48Spatrick   if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
499409467b48Spatrick     return SDValue();
499509467b48Spatrick   }
499609467b48Spatrick 
499709467b48Spatrick   EVT DemotedVT;
499809467b48Spatrick   if (MulType == MVT::i32) {
499909467b48Spatrick     DemotedVT = MVT::i16;
500009467b48Spatrick   } else {
500109467b48Spatrick     DemotedVT = MVT::i32;
500209467b48Spatrick   }
500309467b48Spatrick 
500409467b48Spatrick   // Truncate the operands to the correct size. Note that these are just for
500509467b48Spatrick   // type consistency and will (likely) be eliminated in later phases.
500609467b48Spatrick   SDValue TruncLHS =
500709467b48Spatrick     DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
500809467b48Spatrick   SDValue TruncRHS =
500909467b48Spatrick     DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
501009467b48Spatrick 
501109467b48Spatrick   unsigned Opc;
501209467b48Spatrick   if (Signed) {
501309467b48Spatrick     Opc = NVPTXISD::MUL_WIDE_SIGNED;
501409467b48Spatrick   } else {
501509467b48Spatrick     Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
501609467b48Spatrick   }
501709467b48Spatrick 
501809467b48Spatrick   return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
501909467b48Spatrick }
502009467b48Spatrick 
502109467b48Spatrick /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
PerformMULCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOpt::Level OptLevel)502209467b48Spatrick static SDValue PerformMULCombine(SDNode *N,
502309467b48Spatrick                                  TargetLowering::DAGCombinerInfo &DCI,
502409467b48Spatrick                                  CodeGenOpt::Level OptLevel) {
502509467b48Spatrick   if (OptLevel > 0) {
502609467b48Spatrick     // Try mul.wide combining at OptLevel > 0
502709467b48Spatrick     if (SDValue Ret = TryMULWIDECombine(N, DCI))
502809467b48Spatrick       return Ret;
502909467b48Spatrick   }
503009467b48Spatrick 
503109467b48Spatrick   return SDValue();
503209467b48Spatrick }
503309467b48Spatrick 
503409467b48Spatrick /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
PerformSHLCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOpt::Level OptLevel)503509467b48Spatrick static SDValue PerformSHLCombine(SDNode *N,
503609467b48Spatrick                                  TargetLowering::DAGCombinerInfo &DCI,
503709467b48Spatrick                                  CodeGenOpt::Level OptLevel) {
503809467b48Spatrick   if (OptLevel > 0) {
503909467b48Spatrick     // Try mul.wide combining at OptLevel > 0
504009467b48Spatrick     if (SDValue Ret = TryMULWIDECombine(N, DCI))
504109467b48Spatrick       return Ret;
504209467b48Spatrick   }
504309467b48Spatrick 
504409467b48Spatrick   return SDValue();
504509467b48Spatrick }
504609467b48Spatrick 
PerformSETCCCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)504709467b48Spatrick static SDValue PerformSETCCCombine(SDNode *N,
504809467b48Spatrick                                    TargetLowering::DAGCombinerInfo &DCI) {
504909467b48Spatrick   EVT CCType = N->getValueType(0);
505009467b48Spatrick   SDValue A = N->getOperand(0);
505109467b48Spatrick   SDValue B = N->getOperand(1);
505209467b48Spatrick 
505309467b48Spatrick   if (CCType != MVT::v2i1 || A.getValueType() != MVT::v2f16)
505409467b48Spatrick     return SDValue();
505509467b48Spatrick 
505609467b48Spatrick   SDLoc DL(N);
505709467b48Spatrick   // setp.f16x2 returns two scalar predicates, which we need to
505809467b48Spatrick   // convert back to v2i1. The returned result will be scalarized by
505909467b48Spatrick   // the legalizer, but the comparison will remain a single vector
506009467b48Spatrick   // instruction.
506109467b48Spatrick   SDValue CCNode = DCI.DAG.getNode(NVPTXISD::SETP_F16X2, DL,
506209467b48Spatrick                                    DCI.DAG.getVTList(MVT::i1, MVT::i1),
506309467b48Spatrick                                    {A, B, N->getOperand(2)});
506409467b48Spatrick   return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0),
506509467b48Spatrick                          CCNode.getValue(1));
506609467b48Spatrick }
506709467b48Spatrick 
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const506809467b48Spatrick SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
506909467b48Spatrick                                                DAGCombinerInfo &DCI) const {
507009467b48Spatrick   CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
507109467b48Spatrick   switch (N->getOpcode()) {
507209467b48Spatrick     default: break;
507309467b48Spatrick     case ISD::ADD:
507409467b48Spatrick     case ISD::FADD:
507509467b48Spatrick       return PerformADDCombine(N, DCI, STI, OptLevel);
507609467b48Spatrick     case ISD::MUL:
507709467b48Spatrick       return PerformMULCombine(N, DCI, OptLevel);
507809467b48Spatrick     case ISD::SHL:
507909467b48Spatrick       return PerformSHLCombine(N, DCI, OptLevel);
508009467b48Spatrick     case ISD::AND:
508109467b48Spatrick       return PerformANDCombine(N, DCI);
508209467b48Spatrick     case ISD::UREM:
508309467b48Spatrick     case ISD::SREM:
508409467b48Spatrick       return PerformREMCombine(N, DCI, OptLevel);
508509467b48Spatrick     case ISD::SETCC:
508609467b48Spatrick       return PerformSETCCCombine(N, DCI);
5087*d415bd75Srobert     case NVPTXISD::StoreRetval:
5088*d415bd75Srobert     case NVPTXISD::StoreRetvalV2:
5089*d415bd75Srobert     case NVPTXISD::StoreRetvalV4:
5090*d415bd75Srobert       return PerformStoreRetvalCombine(N);
509109467b48Spatrick   }
509209467b48Spatrick   return SDValue();
509309467b48Spatrick }
509409467b48Spatrick 
509509467b48Spatrick /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
ReplaceLoadVector(SDNode * N,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results)509609467b48Spatrick static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
509709467b48Spatrick                               SmallVectorImpl<SDValue> &Results) {
509809467b48Spatrick   EVT ResVT = N->getValueType(0);
509909467b48Spatrick   SDLoc DL(N);
510009467b48Spatrick 
510109467b48Spatrick   assert(ResVT.isVector() && "Vector load must have vector type");
510209467b48Spatrick 
510309467b48Spatrick   // We only handle "native" vector sizes for now, e.g. <4 x double> is not
510409467b48Spatrick   // legal.  We can (and should) split that into 2 loads of <2 x double> here
510509467b48Spatrick   // but I'm leaving that as a TODO for now.
510609467b48Spatrick   assert(ResVT.isSimple() && "Can only handle simple types");
510709467b48Spatrick   switch (ResVT.getSimpleVT().SimpleTy) {
510809467b48Spatrick   default:
510909467b48Spatrick     return;
511009467b48Spatrick   case MVT::v2i8:
511109467b48Spatrick   case MVT::v2i16:
511209467b48Spatrick   case MVT::v2i32:
511309467b48Spatrick   case MVT::v2i64:
511409467b48Spatrick   case MVT::v2f16:
511509467b48Spatrick   case MVT::v2f32:
511609467b48Spatrick   case MVT::v2f64:
511709467b48Spatrick   case MVT::v4i8:
511809467b48Spatrick   case MVT::v4i16:
511909467b48Spatrick   case MVT::v4i32:
512009467b48Spatrick   case MVT::v4f16:
512109467b48Spatrick   case MVT::v4f32:
512209467b48Spatrick   case MVT::v8f16: // <4 x f16x2>
512309467b48Spatrick     // This is a "native" vector type
512409467b48Spatrick     break;
512509467b48Spatrick   }
512609467b48Spatrick 
512709467b48Spatrick   LoadSDNode *LD = cast<LoadSDNode>(N);
512809467b48Spatrick 
5129097a140dSpatrick   Align Alignment = LD->getAlign();
513009467b48Spatrick   auto &TD = DAG.getDataLayout();
5131097a140dSpatrick   Align PrefAlign = TD.getPrefTypeAlign(ResVT.getTypeForEVT(*DAG.getContext()));
5132097a140dSpatrick   if (Alignment < PrefAlign) {
513309467b48Spatrick     // This load is not sufficiently aligned, so bail out and let this vector
513409467b48Spatrick     // load be scalarized.  Note that we may still be able to emit smaller
513509467b48Spatrick     // vector loads.  For example, if we are loading a <4 x float> with an
513609467b48Spatrick     // alignment of 8, this check will fail but the legalizer will try again
513709467b48Spatrick     // with 2 x <2 x float>, which will succeed with an alignment of 8.
513809467b48Spatrick     return;
513909467b48Spatrick   }
514009467b48Spatrick 
514109467b48Spatrick   EVT EltVT = ResVT.getVectorElementType();
514209467b48Spatrick   unsigned NumElts = ResVT.getVectorNumElements();
514309467b48Spatrick 
514409467b48Spatrick   // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
514509467b48Spatrick   // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
514609467b48Spatrick   // loaded type to i16 and propagate the "real" type as the memory type.
514709467b48Spatrick   bool NeedTrunc = false;
514809467b48Spatrick   if (EltVT.getSizeInBits() < 16) {
514909467b48Spatrick     EltVT = MVT::i16;
515009467b48Spatrick     NeedTrunc = true;
515109467b48Spatrick   }
515209467b48Spatrick 
515309467b48Spatrick   unsigned Opcode = 0;
515409467b48Spatrick   SDVTList LdResVTs;
515509467b48Spatrick   bool LoadF16x2 = false;
515609467b48Spatrick 
515709467b48Spatrick   switch (NumElts) {
515809467b48Spatrick   default:
515909467b48Spatrick     return;
516009467b48Spatrick   case 2:
516109467b48Spatrick     Opcode = NVPTXISD::LoadV2;
516209467b48Spatrick     LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
516309467b48Spatrick     break;
516409467b48Spatrick   case 4: {
516509467b48Spatrick     Opcode = NVPTXISD::LoadV4;
516609467b48Spatrick     EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
516709467b48Spatrick     LdResVTs = DAG.getVTList(ListVTs);
516809467b48Spatrick     break;
516909467b48Spatrick   }
517009467b48Spatrick   case 8: {
517109467b48Spatrick     // v8f16 is a special case. PTX doesn't have ld.v8.f16
517209467b48Spatrick     // instruction. Instead, we split the vector into v2f16 chunks and
517309467b48Spatrick     // load them with ld.v4.b32.
5174*d415bd75Srobert     assert((EltVT == MVT::f16 || EltVT == MVT::bf16) &&
5175*d415bd75Srobert            "Unsupported v8 vector type.");
517609467b48Spatrick     LoadF16x2 = true;
517709467b48Spatrick     Opcode = NVPTXISD::LoadV4;
5178*d415bd75Srobert     EVT VVT = (EltVT == MVT::f16) ? MVT::v2f16 : MVT::v2bf16;
5179*d415bd75Srobert     EVT ListVTs[] = {VVT, VVT, VVT, VVT, MVT::Other};
518009467b48Spatrick     LdResVTs = DAG.getVTList(ListVTs);
518109467b48Spatrick     break;
518209467b48Spatrick   }
518309467b48Spatrick   }
518409467b48Spatrick 
518509467b48Spatrick   // Copy regular operands
518609467b48Spatrick   SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
518709467b48Spatrick 
518809467b48Spatrick   // The select routine does not have access to the LoadSDNode instance, so
518909467b48Spatrick   // pass along the extension information
519009467b48Spatrick   OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
519109467b48Spatrick 
519209467b48Spatrick   SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
519309467b48Spatrick                                           LD->getMemoryVT(),
519409467b48Spatrick                                           LD->getMemOperand());
519509467b48Spatrick 
519609467b48Spatrick   SmallVector<SDValue, 8> ScalarRes;
519709467b48Spatrick   if (LoadF16x2) {
519809467b48Spatrick     // Split v2f16 subvectors back into individual elements.
519909467b48Spatrick     NumElts /= 2;
520009467b48Spatrick     for (unsigned i = 0; i < NumElts; ++i) {
520109467b48Spatrick       SDValue SubVector = NewLD.getValue(i);
520209467b48Spatrick       SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
520309467b48Spatrick                                DAG.getIntPtrConstant(0, DL));
520409467b48Spatrick       SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
520509467b48Spatrick                                DAG.getIntPtrConstant(1, DL));
520609467b48Spatrick       ScalarRes.push_back(E0);
520709467b48Spatrick       ScalarRes.push_back(E1);
520809467b48Spatrick     }
520909467b48Spatrick   } else {
521009467b48Spatrick     for (unsigned i = 0; i < NumElts; ++i) {
521109467b48Spatrick       SDValue Res = NewLD.getValue(i);
521209467b48Spatrick       if (NeedTrunc)
521309467b48Spatrick         Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
521409467b48Spatrick       ScalarRes.push_back(Res);
521509467b48Spatrick     }
521609467b48Spatrick   }
521709467b48Spatrick 
521809467b48Spatrick   SDValue LoadChain = NewLD.getValue(NumElts);
521909467b48Spatrick 
522009467b48Spatrick   SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes);
522109467b48Spatrick 
522209467b48Spatrick   Results.push_back(BuildVec);
522309467b48Spatrick   Results.push_back(LoadChain);
522409467b48Spatrick }
522509467b48Spatrick 
ReplaceINTRINSIC_W_CHAIN(SDNode * N,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results)522609467b48Spatrick static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
522709467b48Spatrick                                      SmallVectorImpl<SDValue> &Results) {
522809467b48Spatrick   SDValue Chain = N->getOperand(0);
522909467b48Spatrick   SDValue Intrin = N->getOperand(1);
523009467b48Spatrick   SDLoc DL(N);
523109467b48Spatrick 
523209467b48Spatrick   // Get the intrinsic ID
523309467b48Spatrick   unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
523409467b48Spatrick   switch (IntrinNo) {
523509467b48Spatrick   default:
523609467b48Spatrick     return;
523709467b48Spatrick   case Intrinsic::nvvm_ldg_global_i:
523809467b48Spatrick   case Intrinsic::nvvm_ldg_global_f:
523909467b48Spatrick   case Intrinsic::nvvm_ldg_global_p:
524009467b48Spatrick   case Intrinsic::nvvm_ldu_global_i:
524109467b48Spatrick   case Intrinsic::nvvm_ldu_global_f:
524209467b48Spatrick   case Intrinsic::nvvm_ldu_global_p: {
524309467b48Spatrick     EVT ResVT = N->getValueType(0);
524409467b48Spatrick 
524509467b48Spatrick     if (ResVT.isVector()) {
524609467b48Spatrick       // Vector LDG/LDU
524709467b48Spatrick 
524809467b48Spatrick       unsigned NumElts = ResVT.getVectorNumElements();
524909467b48Spatrick       EVT EltVT = ResVT.getVectorElementType();
525009467b48Spatrick 
525109467b48Spatrick       // Since LDU/LDG are target nodes, we cannot rely on DAG type
525209467b48Spatrick       // legalization.
525309467b48Spatrick       // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
525409467b48Spatrick       // loaded type to i16 and propagate the "real" type as the memory type.
525509467b48Spatrick       bool NeedTrunc = false;
525609467b48Spatrick       if (EltVT.getSizeInBits() < 16) {
525709467b48Spatrick         EltVT = MVT::i16;
525809467b48Spatrick         NeedTrunc = true;
525909467b48Spatrick       }
526009467b48Spatrick 
526109467b48Spatrick       unsigned Opcode = 0;
526209467b48Spatrick       SDVTList LdResVTs;
526309467b48Spatrick 
526409467b48Spatrick       switch (NumElts) {
526509467b48Spatrick       default:
526609467b48Spatrick         return;
526709467b48Spatrick       case 2:
526809467b48Spatrick         switch (IntrinNo) {
526909467b48Spatrick         default:
527009467b48Spatrick           return;
527109467b48Spatrick         case Intrinsic::nvvm_ldg_global_i:
527209467b48Spatrick         case Intrinsic::nvvm_ldg_global_f:
527309467b48Spatrick         case Intrinsic::nvvm_ldg_global_p:
527409467b48Spatrick           Opcode = NVPTXISD::LDGV2;
527509467b48Spatrick           break;
527609467b48Spatrick         case Intrinsic::nvvm_ldu_global_i:
527709467b48Spatrick         case Intrinsic::nvvm_ldu_global_f:
527809467b48Spatrick         case Intrinsic::nvvm_ldu_global_p:
527909467b48Spatrick           Opcode = NVPTXISD::LDUV2;
528009467b48Spatrick           break;
528109467b48Spatrick         }
528209467b48Spatrick         LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
528309467b48Spatrick         break;
528409467b48Spatrick       case 4: {
528509467b48Spatrick         switch (IntrinNo) {
528609467b48Spatrick         default:
528709467b48Spatrick           return;
528809467b48Spatrick         case Intrinsic::nvvm_ldg_global_i:
528909467b48Spatrick         case Intrinsic::nvvm_ldg_global_f:
529009467b48Spatrick         case Intrinsic::nvvm_ldg_global_p:
529109467b48Spatrick           Opcode = NVPTXISD::LDGV4;
529209467b48Spatrick           break;
529309467b48Spatrick         case Intrinsic::nvvm_ldu_global_i:
529409467b48Spatrick         case Intrinsic::nvvm_ldu_global_f:
529509467b48Spatrick         case Intrinsic::nvvm_ldu_global_p:
529609467b48Spatrick           Opcode = NVPTXISD::LDUV4;
529709467b48Spatrick           break;
529809467b48Spatrick         }
529909467b48Spatrick         EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
530009467b48Spatrick         LdResVTs = DAG.getVTList(ListVTs);
530109467b48Spatrick         break;
530209467b48Spatrick       }
530309467b48Spatrick       }
530409467b48Spatrick 
530509467b48Spatrick       SmallVector<SDValue, 8> OtherOps;
530609467b48Spatrick 
530709467b48Spatrick       // Copy regular operands
530809467b48Spatrick 
530909467b48Spatrick       OtherOps.push_back(Chain); // Chain
531009467b48Spatrick                                  // Skip operand 1 (intrinsic ID)
531109467b48Spatrick       // Others
531209467b48Spatrick       OtherOps.append(N->op_begin() + 2, N->op_end());
531309467b48Spatrick 
531409467b48Spatrick       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
531509467b48Spatrick 
531609467b48Spatrick       SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
531709467b48Spatrick                                               MemSD->getMemoryVT(),
531809467b48Spatrick                                               MemSD->getMemOperand());
531909467b48Spatrick 
532009467b48Spatrick       SmallVector<SDValue, 4> ScalarRes;
532109467b48Spatrick 
532209467b48Spatrick       for (unsigned i = 0; i < NumElts; ++i) {
532309467b48Spatrick         SDValue Res = NewLD.getValue(i);
532409467b48Spatrick         if (NeedTrunc)
532509467b48Spatrick           Res =
532609467b48Spatrick               DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
532709467b48Spatrick         ScalarRes.push_back(Res);
532809467b48Spatrick       }
532909467b48Spatrick 
533009467b48Spatrick       SDValue LoadChain = NewLD.getValue(NumElts);
533109467b48Spatrick 
533209467b48Spatrick       SDValue BuildVec =
533309467b48Spatrick           DAG.getBuildVector(ResVT, DL, ScalarRes);
533409467b48Spatrick 
533509467b48Spatrick       Results.push_back(BuildVec);
533609467b48Spatrick       Results.push_back(LoadChain);
533709467b48Spatrick     } else {
533809467b48Spatrick       // i8 LDG/LDU
533909467b48Spatrick       assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
534009467b48Spatrick              "Custom handling of non-i8 ldu/ldg?");
534109467b48Spatrick 
534209467b48Spatrick       // Just copy all operands as-is
534309467b48Spatrick       SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
534409467b48Spatrick 
534509467b48Spatrick       // Force output to i16
534609467b48Spatrick       SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
534709467b48Spatrick 
534809467b48Spatrick       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
534909467b48Spatrick 
535009467b48Spatrick       // We make sure the memory type is i8, which will be used during isel
535109467b48Spatrick       // to select the proper instruction.
535209467b48Spatrick       SDValue NewLD =
535309467b48Spatrick           DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
535409467b48Spatrick                                   MVT::i8, MemSD->getMemOperand());
535509467b48Spatrick 
535609467b48Spatrick       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
535709467b48Spatrick                                     NewLD.getValue(0)));
535809467b48Spatrick       Results.push_back(NewLD.getValue(1));
535909467b48Spatrick     }
536009467b48Spatrick   }
536109467b48Spatrick   }
536209467b48Spatrick }
536309467b48Spatrick 
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const536409467b48Spatrick void NVPTXTargetLowering::ReplaceNodeResults(
536509467b48Spatrick     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
536609467b48Spatrick   switch (N->getOpcode()) {
536709467b48Spatrick   default:
536809467b48Spatrick     report_fatal_error("Unhandled custom legalization");
536909467b48Spatrick   case ISD::LOAD:
537009467b48Spatrick     ReplaceLoadVector(N, DAG, Results);
537109467b48Spatrick     return;
537209467b48Spatrick   case ISD::INTRINSIC_W_CHAIN:
537309467b48Spatrick     ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
537409467b48Spatrick     return;
537509467b48Spatrick   }
537609467b48Spatrick }
537709467b48Spatrick 
5378*d415bd75Srobert NVPTXTargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const5379*d415bd75Srobert NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
5380*d415bd75Srobert   Type *Ty = AI->getValOperand()->getType();
5381*d415bd75Srobert 
5382*d415bd75Srobert   if (AI->isFloatingPointOperation()) {
5383*d415bd75Srobert     if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
5384*d415bd75Srobert       if (Ty->isFloatTy())
5385*d415bd75Srobert         return AtomicExpansionKind::None;
5386*d415bd75Srobert       if (Ty->isDoubleTy() && STI.hasAtomAddF64())
5387*d415bd75Srobert         return AtomicExpansionKind::None;
5388*d415bd75Srobert     }
5389*d415bd75Srobert     return AtomicExpansionKind::CmpXChg;
5390*d415bd75Srobert   }
5391*d415bd75Srobert 
5392*d415bd75Srobert   assert(Ty->isIntegerTy() && "Ty should be integer at this point");
5393*d415bd75Srobert   auto ITy = cast<llvm::IntegerType>(Ty);
5394*d415bd75Srobert 
5395*d415bd75Srobert   switch (AI->getOperation()) {
5396*d415bd75Srobert   default:
5397*d415bd75Srobert     return AtomicExpansionKind::CmpXChg;
5398*d415bd75Srobert   case AtomicRMWInst::BinOp::And:
5399*d415bd75Srobert   case AtomicRMWInst::BinOp::Or:
5400*d415bd75Srobert   case AtomicRMWInst::BinOp::Xor:
5401*d415bd75Srobert   case AtomicRMWInst::BinOp::Xchg:
5402*d415bd75Srobert     switch (ITy->getBitWidth()) {
5403*d415bd75Srobert     case 8:
5404*d415bd75Srobert     case 16:
5405*d415bd75Srobert       return AtomicExpansionKind::CmpXChg;
5406*d415bd75Srobert     case 32:
5407*d415bd75Srobert       return AtomicExpansionKind::None;
5408*d415bd75Srobert     case 64:
5409*d415bd75Srobert       if (STI.hasAtomBitwise64())
5410*d415bd75Srobert         return AtomicExpansionKind::None;
5411*d415bd75Srobert       return AtomicExpansionKind::CmpXChg;
5412*d415bd75Srobert     default:
5413*d415bd75Srobert       llvm_unreachable("unsupported width encountered");
5414*d415bd75Srobert     }
5415*d415bd75Srobert   case AtomicRMWInst::BinOp::Add:
5416*d415bd75Srobert   case AtomicRMWInst::BinOp::Sub:
5417*d415bd75Srobert   case AtomicRMWInst::BinOp::Max:
5418*d415bd75Srobert   case AtomicRMWInst::BinOp::Min:
5419*d415bd75Srobert   case AtomicRMWInst::BinOp::UMax:
5420*d415bd75Srobert   case AtomicRMWInst::BinOp::UMin:
5421*d415bd75Srobert     switch (ITy->getBitWidth()) {
5422*d415bd75Srobert     case 8:
5423*d415bd75Srobert     case 16:
5424*d415bd75Srobert       return AtomicExpansionKind::CmpXChg;
5425*d415bd75Srobert     case 32:
5426*d415bd75Srobert       return AtomicExpansionKind::None;
5427*d415bd75Srobert     case 64:
5428*d415bd75Srobert       if (STI.hasAtomMinMax64())
5429*d415bd75Srobert         return AtomicExpansionKind::None;
5430*d415bd75Srobert       return AtomicExpansionKind::CmpXChg;
5431*d415bd75Srobert     default:
5432*d415bd75Srobert       llvm_unreachable("unsupported width encountered");
5433*d415bd75Srobert     }
5434*d415bd75Srobert   }
5435*d415bd75Srobert 
5436*d415bd75Srobert   return AtomicExpansionKind::CmpXChg;
5437*d415bd75Srobert }
5438*d415bd75Srobert 
543909467b48Spatrick // Pin NVPTXTargetObjectFile's vtables to this file.
5440*d415bd75Srobert NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;
544109467b48Spatrick 
SelectSectionForGlobal(const GlobalObject * GO,SectionKind Kind,const TargetMachine & TM) const544209467b48Spatrick MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal(
544309467b48Spatrick     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
544409467b48Spatrick   return getDataSection();
544509467b48Spatrick }
5446