109467b48Spatrick //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file defines the interfaces that NVPTX uses to lower LLVM code into a
1009467b48Spatrick // selection DAG.
1109467b48Spatrick //
1209467b48Spatrick //===----------------------------------------------------------------------===//
1309467b48Spatrick
1409467b48Spatrick #include "NVPTXISelLowering.h"
1509467b48Spatrick #include "MCTargetDesc/NVPTXBaseInfo.h"
1609467b48Spatrick #include "NVPTX.h"
1709467b48Spatrick #include "NVPTXSubtarget.h"
1809467b48Spatrick #include "NVPTXTargetMachine.h"
1909467b48Spatrick #include "NVPTXTargetObjectFile.h"
2009467b48Spatrick #include "NVPTXUtilities.h"
2109467b48Spatrick #include "llvm/ADT/APInt.h"
2273471bf0Spatrick #include "llvm/ADT/STLExtras.h"
2309467b48Spatrick #include "llvm/ADT/SmallVector.h"
2409467b48Spatrick #include "llvm/ADT/StringRef.h"
2509467b48Spatrick #include "llvm/CodeGen/Analysis.h"
2609467b48Spatrick #include "llvm/CodeGen/MachineFunction.h"
2709467b48Spatrick #include "llvm/CodeGen/MachineMemOperand.h"
2809467b48Spatrick #include "llvm/CodeGen/SelectionDAG.h"
2909467b48Spatrick #include "llvm/CodeGen/SelectionDAGNodes.h"
3009467b48Spatrick #include "llvm/CodeGen/TargetCallingConv.h"
3109467b48Spatrick #include "llvm/CodeGen/TargetLowering.h"
3209467b48Spatrick #include "llvm/CodeGen/ValueTypes.h"
3309467b48Spatrick #include "llvm/IR/Argument.h"
3409467b48Spatrick #include "llvm/IR/Attributes.h"
3509467b48Spatrick #include "llvm/IR/Constants.h"
3609467b48Spatrick #include "llvm/IR/DataLayout.h"
3709467b48Spatrick #include "llvm/IR/DerivedTypes.h"
38*d415bd75Srobert #include "llvm/IR/FPEnv.h"
3909467b48Spatrick #include "llvm/IR/Function.h"
4009467b48Spatrick #include "llvm/IR/GlobalValue.h"
4109467b48Spatrick #include "llvm/IR/Instruction.h"
4209467b48Spatrick #include "llvm/IR/Instructions.h"
4309467b48Spatrick #include "llvm/IR/IntrinsicsNVPTX.h"
4409467b48Spatrick #include "llvm/IR/Module.h"
4509467b48Spatrick #include "llvm/IR/Type.h"
4609467b48Spatrick #include "llvm/IR/Value.h"
4709467b48Spatrick #include "llvm/Support/Casting.h"
4809467b48Spatrick #include "llvm/Support/CodeGen.h"
4909467b48Spatrick #include "llvm/Support/CommandLine.h"
5009467b48Spatrick #include "llvm/Support/ErrorHandling.h"
5109467b48Spatrick #include "llvm/Support/MachineValueType.h"
5209467b48Spatrick #include "llvm/Support/raw_ostream.h"
5309467b48Spatrick #include "llvm/Target/TargetMachine.h"
5409467b48Spatrick #include "llvm/Target/TargetOptions.h"
5509467b48Spatrick #include <algorithm>
5609467b48Spatrick #include <cassert>
57*d415bd75Srobert #include <cmath>
5809467b48Spatrick #include <cstdint>
5909467b48Spatrick #include <iterator>
6009467b48Spatrick #include <sstream>
6109467b48Spatrick #include <string>
6209467b48Spatrick #include <utility>
6309467b48Spatrick #include <vector>
6409467b48Spatrick
6509467b48Spatrick #define DEBUG_TYPE "nvptx-lower"
6609467b48Spatrick
6709467b48Spatrick using namespace llvm;
6809467b48Spatrick
6973471bf0Spatrick static std::atomic<unsigned> GlobalUniqueCallSite;
7009467b48Spatrick
7109467b48Spatrick static cl::opt<bool> sched4reg(
7209467b48Spatrick "nvptx-sched4reg",
7309467b48Spatrick cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
7409467b48Spatrick
75*d415bd75Srobert static cl::opt<unsigned> FMAContractLevelOpt(
76*d415bd75Srobert "nvptx-fma-level", cl::Hidden,
7709467b48Spatrick cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
7809467b48Spatrick " 1: do it 2: do it aggressively"),
7909467b48Spatrick cl::init(2));
8009467b48Spatrick
8109467b48Spatrick static cl::opt<int> UsePrecDivF32(
82*d415bd75Srobert "nvptx-prec-divf32", cl::Hidden,
8309467b48Spatrick cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
8409467b48Spatrick " IEEE Compliant F32 div.rnd if available."),
8509467b48Spatrick cl::init(2));
8609467b48Spatrick
8709467b48Spatrick static cl::opt<bool> UsePrecSqrtF32(
8809467b48Spatrick "nvptx-prec-sqrtf32", cl::Hidden,
8909467b48Spatrick cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
9009467b48Spatrick cl::init(true));
9109467b48Spatrick
getDivF32Level() const9209467b48Spatrick int NVPTXTargetLowering::getDivF32Level() const {
9309467b48Spatrick if (UsePrecDivF32.getNumOccurrences() > 0) {
9409467b48Spatrick // If nvptx-prec-div32=N is used on the command-line, always honor it
9509467b48Spatrick return UsePrecDivF32;
9609467b48Spatrick } else {
9709467b48Spatrick // Otherwise, use div.approx if fast math is enabled
9809467b48Spatrick if (getTargetMachine().Options.UnsafeFPMath)
9909467b48Spatrick return 0;
10009467b48Spatrick else
10109467b48Spatrick return 2;
10209467b48Spatrick }
10309467b48Spatrick }
10409467b48Spatrick
usePrecSqrtF32() const10509467b48Spatrick bool NVPTXTargetLowering::usePrecSqrtF32() const {
10609467b48Spatrick if (UsePrecSqrtF32.getNumOccurrences() > 0) {
10709467b48Spatrick // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
10809467b48Spatrick return UsePrecSqrtF32;
10909467b48Spatrick } else {
11009467b48Spatrick // Otherwise, use sqrt.approx if fast math is enabled
11109467b48Spatrick return !getTargetMachine().Options.UnsafeFPMath;
11209467b48Spatrick }
11309467b48Spatrick }
11409467b48Spatrick
useF32FTZ(const MachineFunction & MF) const11509467b48Spatrick bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const {
116097a140dSpatrick return MF.getDenormalMode(APFloat::IEEEsingle()).Output ==
117097a140dSpatrick DenormalMode::PreserveSign;
11809467b48Spatrick }
11909467b48Spatrick
IsPTXVectorType(MVT VT)12009467b48Spatrick static bool IsPTXVectorType(MVT VT) {
12109467b48Spatrick switch (VT.SimpleTy) {
12209467b48Spatrick default:
12309467b48Spatrick return false;
12409467b48Spatrick case MVT::v2i1:
12509467b48Spatrick case MVT::v4i1:
12609467b48Spatrick case MVT::v2i8:
12709467b48Spatrick case MVT::v4i8:
12809467b48Spatrick case MVT::v2i16:
12909467b48Spatrick case MVT::v4i16:
13009467b48Spatrick case MVT::v2i32:
13109467b48Spatrick case MVT::v4i32:
13209467b48Spatrick case MVT::v2i64:
13309467b48Spatrick case MVT::v2f16:
13409467b48Spatrick case MVT::v4f16:
13509467b48Spatrick case MVT::v8f16: // <4 x f16x2>
136*d415bd75Srobert case MVT::v2bf16:
137*d415bd75Srobert case MVT::v4bf16:
138*d415bd75Srobert case MVT::v8bf16: // <4 x bf16x2>
13909467b48Spatrick case MVT::v2f32:
14009467b48Spatrick case MVT::v4f32:
14109467b48Spatrick case MVT::v2f64:
14209467b48Spatrick return true;
14309467b48Spatrick }
14409467b48Spatrick }
14509467b48Spatrick
14609467b48Spatrick /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
14709467b48Spatrick /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors
14809467b48Spatrick /// into their primitive components.
14909467b48Spatrick /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
15009467b48Spatrick /// same number of types as the Ins/Outs arrays in LowerFormalArguments,
15109467b48Spatrick /// LowerCall, and LowerReturn.
ComputePTXValueVTs(const TargetLowering & TLI,const DataLayout & DL,Type * Ty,SmallVectorImpl<EVT> & ValueVTs,SmallVectorImpl<uint64_t> * Offsets=nullptr,uint64_t StartingOffset=0)15209467b48Spatrick static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
15309467b48Spatrick Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
15409467b48Spatrick SmallVectorImpl<uint64_t> *Offsets = nullptr,
15509467b48Spatrick uint64_t StartingOffset = 0) {
15609467b48Spatrick SmallVector<EVT, 16> TempVTs;
15709467b48Spatrick SmallVector<uint64_t, 16> TempOffsets;
15809467b48Spatrick
15909467b48Spatrick // Special case for i128 - decompose to (i64, i64)
16009467b48Spatrick if (Ty->isIntegerTy(128)) {
16109467b48Spatrick ValueVTs.push_back(EVT(MVT::i64));
16209467b48Spatrick ValueVTs.push_back(EVT(MVT::i64));
16309467b48Spatrick
16409467b48Spatrick if (Offsets) {
16509467b48Spatrick Offsets->push_back(StartingOffset + 0);
16609467b48Spatrick Offsets->push_back(StartingOffset + 8);
16709467b48Spatrick }
16809467b48Spatrick
16909467b48Spatrick return;
17009467b48Spatrick }
17109467b48Spatrick
17209467b48Spatrick // Given a struct type, recursively traverse the elements with custom ComputePTXValueVTs.
17309467b48Spatrick if (StructType *STy = dyn_cast<StructType>(Ty)) {
17409467b48Spatrick auto const *SL = DL.getStructLayout(STy);
17509467b48Spatrick auto ElementNum = 0;
17609467b48Spatrick for(auto *EI : STy->elements()) {
17709467b48Spatrick ComputePTXValueVTs(TLI, DL, EI, ValueVTs, Offsets,
17809467b48Spatrick StartingOffset + SL->getElementOffset(ElementNum));
17909467b48Spatrick ++ElementNum;
18009467b48Spatrick }
18109467b48Spatrick return;
18209467b48Spatrick }
18309467b48Spatrick
18409467b48Spatrick ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
18509467b48Spatrick for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
18609467b48Spatrick EVT VT = TempVTs[i];
18709467b48Spatrick uint64_t Off = TempOffsets[i];
18809467b48Spatrick // Split vectors into individual elements, except for v2f16, which
18909467b48Spatrick // we will pass as a single scalar.
19009467b48Spatrick if (VT.isVector()) {
19109467b48Spatrick unsigned NumElts = VT.getVectorNumElements();
19209467b48Spatrick EVT EltVT = VT.getVectorElementType();
19309467b48Spatrick // Vectors with an even number of f16 elements will be passed to
194*d415bd75Srobert // us as an array of v2f16/v2bf16 elements. We must match this so we
19509467b48Spatrick // stay in sync with Ins/Outs.
196*d415bd75Srobert if ((EltVT == MVT::f16 || EltVT == MVT::bf16) && NumElts % 2 == 0) {
197*d415bd75Srobert EltVT = EltVT == MVT::f16 ? MVT::v2f16 : MVT::v2bf16;
19809467b48Spatrick NumElts /= 2;
19909467b48Spatrick }
20009467b48Spatrick for (unsigned j = 0; j != NumElts; ++j) {
20109467b48Spatrick ValueVTs.push_back(EltVT);
20209467b48Spatrick if (Offsets)
20309467b48Spatrick Offsets->push_back(Off + j * EltVT.getStoreSize());
20409467b48Spatrick }
20509467b48Spatrick } else {
20609467b48Spatrick ValueVTs.push_back(VT);
20709467b48Spatrick if (Offsets)
20809467b48Spatrick Offsets->push_back(Off);
20909467b48Spatrick }
21009467b48Spatrick }
21109467b48Spatrick }
21209467b48Spatrick
213*d415bd75Srobert /// PromoteScalarIntegerPTX
214*d415bd75Srobert /// Used to make sure the arguments/returns are suitable for passing
215*d415bd75Srobert /// and promote them to a larger size if they're not.
216*d415bd75Srobert ///
217*d415bd75Srobert /// The promoted type is placed in \p PromoteVT if the function returns true.
PromoteScalarIntegerPTX(const EVT & VT,MVT * PromotedVT)218*d415bd75Srobert static bool PromoteScalarIntegerPTX(const EVT &VT, MVT *PromotedVT) {
219*d415bd75Srobert if (VT.isScalarInteger()) {
220*d415bd75Srobert switch (PowerOf2Ceil(VT.getFixedSizeInBits())) {
221*d415bd75Srobert default:
222*d415bd75Srobert llvm_unreachable(
223*d415bd75Srobert "Promotion is not suitable for scalars of size larger than 64-bits");
224*d415bd75Srobert case 1:
225*d415bd75Srobert *PromotedVT = MVT::i1;
226*d415bd75Srobert break;
227*d415bd75Srobert case 2:
228*d415bd75Srobert case 4:
229*d415bd75Srobert case 8:
230*d415bd75Srobert *PromotedVT = MVT::i8;
231*d415bd75Srobert break;
232*d415bd75Srobert case 16:
233*d415bd75Srobert *PromotedVT = MVT::i16;
234*d415bd75Srobert break;
235*d415bd75Srobert case 32:
236*d415bd75Srobert *PromotedVT = MVT::i32;
237*d415bd75Srobert break;
238*d415bd75Srobert case 64:
239*d415bd75Srobert *PromotedVT = MVT::i64;
240*d415bd75Srobert break;
241*d415bd75Srobert }
242*d415bd75Srobert return EVT(*PromotedVT) != VT;
243*d415bd75Srobert }
244*d415bd75Srobert return false;
245*d415bd75Srobert }
246*d415bd75Srobert
24709467b48Spatrick // Check whether we can merge loads/stores of some of the pieces of a
24809467b48Spatrick // flattened function parameter or return value into a single vector
24909467b48Spatrick // load/store.
25009467b48Spatrick //
25109467b48Spatrick // The flattened parameter is represented as a list of EVTs and
25209467b48Spatrick // offsets, and the whole structure is aligned to ParamAlignment. This
25309467b48Spatrick // function determines whether we can load/store pieces of the
25409467b48Spatrick // parameter starting at index Idx using a single vectorized op of
25509467b48Spatrick // size AccessSize. If so, it returns the number of param pieces
25609467b48Spatrick // covered by the vector op. Otherwise, it returns 1.
CanMergeParamLoadStoresStartingAt(unsigned Idx,uint32_t AccessSize,const SmallVectorImpl<EVT> & ValueVTs,const SmallVectorImpl<uint64_t> & Offsets,Align ParamAlignment)25709467b48Spatrick static unsigned CanMergeParamLoadStoresStartingAt(
25809467b48Spatrick unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
259097a140dSpatrick const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) {
26009467b48Spatrick
26109467b48Spatrick // Can't vectorize if param alignment is not sufficient.
262097a140dSpatrick if (ParamAlignment < AccessSize)
26309467b48Spatrick return 1;
26409467b48Spatrick // Can't vectorize if offset is not aligned.
26509467b48Spatrick if (Offsets[Idx] & (AccessSize - 1))
26609467b48Spatrick return 1;
26709467b48Spatrick
26809467b48Spatrick EVT EltVT = ValueVTs[Idx];
26909467b48Spatrick unsigned EltSize = EltVT.getStoreSize();
27009467b48Spatrick
27109467b48Spatrick // Element is too large to vectorize.
27209467b48Spatrick if (EltSize >= AccessSize)
27309467b48Spatrick return 1;
27409467b48Spatrick
27509467b48Spatrick unsigned NumElts = AccessSize / EltSize;
27609467b48Spatrick // Can't vectorize if AccessBytes if not a multiple of EltSize.
27709467b48Spatrick if (AccessSize != EltSize * NumElts)
27809467b48Spatrick return 1;
27909467b48Spatrick
28009467b48Spatrick // We don't have enough elements to vectorize.
28109467b48Spatrick if (Idx + NumElts > ValueVTs.size())
28209467b48Spatrick return 1;
28309467b48Spatrick
28409467b48Spatrick // PTX ISA can only deal with 2- and 4-element vector ops.
28509467b48Spatrick if (NumElts != 4 && NumElts != 2)
28609467b48Spatrick return 1;
28709467b48Spatrick
28809467b48Spatrick for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) {
28909467b48Spatrick // Types do not match.
29009467b48Spatrick if (ValueVTs[j] != EltVT)
29109467b48Spatrick return 1;
29209467b48Spatrick
29309467b48Spatrick // Elements are not contiguous.
29409467b48Spatrick if (Offsets[j] - Offsets[j - 1] != EltSize)
29509467b48Spatrick return 1;
29609467b48Spatrick }
29709467b48Spatrick // OK. We can vectorize ValueVTs[i..i+NumElts)
29809467b48Spatrick return NumElts;
29909467b48Spatrick }
30009467b48Spatrick
30109467b48Spatrick // Flags for tracking per-element vectorization state of loads/stores
30209467b48Spatrick // of a flattened function parameter or return value.
30309467b48Spatrick enum ParamVectorizationFlags {
30409467b48Spatrick PVF_INNER = 0x0, // Middle elements of a vector.
30509467b48Spatrick PVF_FIRST = 0x1, // First element of the vector.
30609467b48Spatrick PVF_LAST = 0x2, // Last element of the vector.
30709467b48Spatrick // Scalar is effectively a 1-element vector.
30809467b48Spatrick PVF_SCALAR = PVF_FIRST | PVF_LAST
30909467b48Spatrick };
31009467b48Spatrick
31109467b48Spatrick // Computes whether and how we can vectorize the loads/stores of a
31209467b48Spatrick // flattened function parameter or return value.
31309467b48Spatrick //
31409467b48Spatrick // The flattened parameter is represented as the list of ValueVTs and
31509467b48Spatrick // Offsets, and is aligned to ParamAlignment bytes. We return a vector
31609467b48Spatrick // of the same size as ValueVTs indicating how each piece should be
31709467b48Spatrick // loaded/stored (i.e. as a scalar, or as part of a vector
31809467b48Spatrick // load/store).
31909467b48Spatrick static SmallVector<ParamVectorizationFlags, 16>
VectorizePTXValueVTs(const SmallVectorImpl<EVT> & ValueVTs,const SmallVectorImpl<uint64_t> & Offsets,Align ParamAlignment,bool IsVAArg=false)32009467b48Spatrick VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
32109467b48Spatrick const SmallVectorImpl<uint64_t> &Offsets,
322*d415bd75Srobert Align ParamAlignment, bool IsVAArg = false) {
32309467b48Spatrick // Set vector size to match ValueVTs and mark all elements as
32409467b48Spatrick // scalars by default.
32509467b48Spatrick SmallVector<ParamVectorizationFlags, 16> VectorInfo;
32609467b48Spatrick VectorInfo.assign(ValueVTs.size(), PVF_SCALAR);
32709467b48Spatrick
328*d415bd75Srobert if (IsVAArg)
329*d415bd75Srobert return VectorInfo;
330*d415bd75Srobert
33109467b48Spatrick // Check what we can vectorize using 128/64/32-bit accesses.
33209467b48Spatrick for (int I = 0, E = ValueVTs.size(); I != E; ++I) {
33309467b48Spatrick // Skip elements we've already processed.
33409467b48Spatrick assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state.");
33509467b48Spatrick for (unsigned AccessSize : {16, 8, 4, 2}) {
33609467b48Spatrick unsigned NumElts = CanMergeParamLoadStoresStartingAt(
33709467b48Spatrick I, AccessSize, ValueVTs, Offsets, ParamAlignment);
33809467b48Spatrick // Mark vectorized elements.
33909467b48Spatrick switch (NumElts) {
34009467b48Spatrick default:
34109467b48Spatrick llvm_unreachable("Unexpected return value");
34209467b48Spatrick case 1:
34309467b48Spatrick // Can't vectorize using this size, try next smaller size.
34409467b48Spatrick continue;
34509467b48Spatrick case 2:
34609467b48Spatrick assert(I + 1 < E && "Not enough elements.");
34709467b48Spatrick VectorInfo[I] = PVF_FIRST;
34809467b48Spatrick VectorInfo[I + 1] = PVF_LAST;
34909467b48Spatrick I += 1;
35009467b48Spatrick break;
35109467b48Spatrick case 4:
35209467b48Spatrick assert(I + 3 < E && "Not enough elements.");
35309467b48Spatrick VectorInfo[I] = PVF_FIRST;
35409467b48Spatrick VectorInfo[I + 1] = PVF_INNER;
35509467b48Spatrick VectorInfo[I + 2] = PVF_INNER;
35609467b48Spatrick VectorInfo[I + 3] = PVF_LAST;
35709467b48Spatrick I += 3;
35809467b48Spatrick break;
35909467b48Spatrick }
36009467b48Spatrick // Break out of the inner loop because we've already succeeded
36109467b48Spatrick // using largest possible AccessSize.
36209467b48Spatrick break;
36309467b48Spatrick }
36409467b48Spatrick }
36509467b48Spatrick return VectorInfo;
36609467b48Spatrick }
36709467b48Spatrick
36809467b48Spatrick // NVPTXTargetLowering Constructor.
NVPTXTargetLowering(const NVPTXTargetMachine & TM,const NVPTXSubtarget & STI)36909467b48Spatrick NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
37009467b48Spatrick const NVPTXSubtarget &STI)
37109467b48Spatrick : TargetLowering(TM), nvTM(&TM), STI(STI) {
37209467b48Spatrick // always lower memset, memcpy, and memmove intrinsics to load/store
37309467b48Spatrick // instructions, rather
37409467b48Spatrick // then generating calls to memset, mempcy or memmove.
37509467b48Spatrick MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
37609467b48Spatrick MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
37709467b48Spatrick MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
37809467b48Spatrick
37909467b48Spatrick setBooleanContents(ZeroOrNegativeOneBooleanContent);
38009467b48Spatrick setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
38109467b48Spatrick
38209467b48Spatrick // Jump is Expensive. Don't create extra control flow for 'and', 'or'
38309467b48Spatrick // condition branches.
38409467b48Spatrick setJumpIsExpensive(true);
38509467b48Spatrick
38609467b48Spatrick // Wide divides are _very_ slow. Try to reduce the width of the divide if
38709467b48Spatrick // possible.
38809467b48Spatrick addBypassSlowDiv(64, 32);
38909467b48Spatrick
39009467b48Spatrick // By default, use the Source scheduling
39109467b48Spatrick if (sched4reg)
39209467b48Spatrick setSchedulingPreference(Sched::RegPressure);
39309467b48Spatrick else
39409467b48Spatrick setSchedulingPreference(Sched::Source);
39509467b48Spatrick
39609467b48Spatrick auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
39709467b48Spatrick LegalizeAction NoF16Action) {
39809467b48Spatrick setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action);
39909467b48Spatrick };
40009467b48Spatrick
40109467b48Spatrick addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
40209467b48Spatrick addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
40309467b48Spatrick addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
40409467b48Spatrick addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
40509467b48Spatrick addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
40609467b48Spatrick addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
40709467b48Spatrick addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass);
40809467b48Spatrick addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass);
409*d415bd75Srobert addRegisterClass(MVT::bf16, &NVPTX::Float16RegsRegClass);
410*d415bd75Srobert addRegisterClass(MVT::v2bf16, &NVPTX::Float16x2RegsRegClass);
41109467b48Spatrick
41209467b48Spatrick // Conversion to/from FP16/FP16x2 is always legal.
41309467b48Spatrick setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal);
41409467b48Spatrick setOperationAction(ISD::FP_TO_SINT, MVT::f16, Legal);
41509467b48Spatrick setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
41609467b48Spatrick setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
41709467b48Spatrick setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand);
41809467b48Spatrick setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand);
41909467b48Spatrick
42009467b48Spatrick setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
42109467b48Spatrick setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
42209467b48Spatrick
42309467b48Spatrick // Operations not directly supported by NVPTX.
42409467b48Spatrick for (MVT VT : {MVT::f16, MVT::v2f16, MVT::f32, MVT::f64, MVT::i1, MVT::i8,
42509467b48Spatrick MVT::i16, MVT::i32, MVT::i64}) {
42609467b48Spatrick setOperationAction(ISD::SELECT_CC, VT, Expand);
42709467b48Spatrick setOperationAction(ISD::BR_CC, VT, Expand);
42809467b48Spatrick }
42909467b48Spatrick
43009467b48Spatrick // Some SIGN_EXTEND_INREG can be done using cvt instruction.
43109467b48Spatrick // For others we will expand to a SHL/SRA pair.
43209467b48Spatrick setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal);
43309467b48Spatrick setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
43409467b48Spatrick setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
43509467b48Spatrick setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
43609467b48Spatrick setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
43709467b48Spatrick
43809467b48Spatrick setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom);
43909467b48Spatrick setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom);
44009467b48Spatrick setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom);
44109467b48Spatrick setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom);
44209467b48Spatrick setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom);
44309467b48Spatrick setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom);
44409467b48Spatrick
44509467b48Spatrick setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
44609467b48Spatrick setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
44709467b48Spatrick
44809467b48Spatrick // TODO: we may consider expanding ROTL/ROTR on older GPUs. Currently on GPUs
44909467b48Spatrick // that don't have h/w rotation we lower them to multi-instruction assembly.
45009467b48Spatrick // See ROT*_sw in NVPTXIntrInfo.td
45109467b48Spatrick setOperationAction(ISD::ROTL, MVT::i64, Legal);
45209467b48Spatrick setOperationAction(ISD::ROTR, MVT::i64, Legal);
45309467b48Spatrick setOperationAction(ISD::ROTL, MVT::i32, Legal);
45409467b48Spatrick setOperationAction(ISD::ROTR, MVT::i32, Legal);
45509467b48Spatrick
45609467b48Spatrick setOperationAction(ISD::ROTL, MVT::i16, Expand);
45709467b48Spatrick setOperationAction(ISD::ROTR, MVT::i16, Expand);
45809467b48Spatrick setOperationAction(ISD::ROTL, MVT::i8, Expand);
45909467b48Spatrick setOperationAction(ISD::ROTR, MVT::i8, Expand);
46009467b48Spatrick setOperationAction(ISD::BSWAP, MVT::i16, Expand);
46109467b48Spatrick setOperationAction(ISD::BSWAP, MVT::i32, Expand);
46209467b48Spatrick setOperationAction(ISD::BSWAP, MVT::i64, Expand);
46309467b48Spatrick
46409467b48Spatrick // Indirect branch is not supported.
46509467b48Spatrick // This also disables Jump Table creation.
46609467b48Spatrick setOperationAction(ISD::BR_JT, MVT::Other, Expand);
46709467b48Spatrick setOperationAction(ISD::BRIND, MVT::Other, Expand);
46809467b48Spatrick
46909467b48Spatrick setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
47009467b48Spatrick setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
47109467b48Spatrick
47209467b48Spatrick // We want to legalize constant related memmove and memcopy
47309467b48Spatrick // intrinsics.
47409467b48Spatrick setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
47509467b48Spatrick
47609467b48Spatrick // Turn FP extload into load/fpextend
47709467b48Spatrick setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
47809467b48Spatrick setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
47909467b48Spatrick setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
48009467b48Spatrick setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
48109467b48Spatrick setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
48209467b48Spatrick setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
48309467b48Spatrick setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
48409467b48Spatrick setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
48509467b48Spatrick setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
48609467b48Spatrick // Turn FP truncstore into trunc + store.
48709467b48Spatrick // FIXME: vector types should also be expanded
48809467b48Spatrick setTruncStoreAction(MVT::f32, MVT::f16, Expand);
48909467b48Spatrick setTruncStoreAction(MVT::f64, MVT::f16, Expand);
49009467b48Spatrick setTruncStoreAction(MVT::f64, MVT::f32, Expand);
49109467b48Spatrick
49209467b48Spatrick // PTX does not support load / store predicate registers
49309467b48Spatrick setOperationAction(ISD::LOAD, MVT::i1, Custom);
49409467b48Spatrick setOperationAction(ISD::STORE, MVT::i1, Custom);
49509467b48Spatrick
49609467b48Spatrick for (MVT VT : MVT::integer_valuetypes()) {
49709467b48Spatrick setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
49809467b48Spatrick setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
49909467b48Spatrick setTruncStoreAction(VT, MVT::i1, Expand);
50009467b48Spatrick }
50109467b48Spatrick
50209467b48Spatrick // This is legal in NVPTX
50309467b48Spatrick setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
50409467b48Spatrick setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
50509467b48Spatrick setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
506*d415bd75Srobert setOperationAction(ISD::ConstantFP, MVT::bf16, Legal);
50709467b48Spatrick
50809467b48Spatrick // TRAP can be lowered to PTX trap
50909467b48Spatrick setOperationAction(ISD::TRAP, MVT::Other, Legal);
51009467b48Spatrick
51109467b48Spatrick // Register custom handling for vector loads/stores
51209467b48Spatrick for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
51309467b48Spatrick if (IsPTXVectorType(VT)) {
51409467b48Spatrick setOperationAction(ISD::LOAD, VT, Custom);
51509467b48Spatrick setOperationAction(ISD::STORE, VT, Custom);
51609467b48Spatrick setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
51709467b48Spatrick }
51809467b48Spatrick }
51909467b48Spatrick
520*d415bd75Srobert // Support varargs.
521*d415bd75Srobert setOperationAction(ISD::VASTART, MVT::Other, Custom);
522*d415bd75Srobert setOperationAction(ISD::VAARG, MVT::Other, Custom);
523*d415bd75Srobert setOperationAction(ISD::VACOPY, MVT::Other, Expand);
524*d415bd75Srobert setOperationAction(ISD::VAEND, MVT::Other, Expand);
525*d415bd75Srobert
52609467b48Spatrick // Custom handling for i8 intrinsics
52709467b48Spatrick setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
52809467b48Spatrick
52909467b48Spatrick for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) {
53009467b48Spatrick setOperationAction(ISD::ABS, Ty, Legal);
53109467b48Spatrick setOperationAction(ISD::SMIN, Ty, Legal);
53209467b48Spatrick setOperationAction(ISD::SMAX, Ty, Legal);
53309467b48Spatrick setOperationAction(ISD::UMIN, Ty, Legal);
53409467b48Spatrick setOperationAction(ISD::UMAX, Ty, Legal);
53509467b48Spatrick
53609467b48Spatrick setOperationAction(ISD::CTPOP, Ty, Legal);
53709467b48Spatrick setOperationAction(ISD::CTLZ, Ty, Legal);
53809467b48Spatrick }
53909467b48Spatrick
540*d415bd75Srobert setOperationAction(ISD::ADDC, MVT::i32, Legal);
541*d415bd75Srobert setOperationAction(ISD::ADDE, MVT::i32, Legal);
542*d415bd75Srobert setOperationAction(ISD::SUBC, MVT::i32, Legal);
543*d415bd75Srobert setOperationAction(ISD::SUBE, MVT::i32, Legal);
544*d415bd75Srobert if (STI.getPTXVersion() >= 43) {
545*d415bd75Srobert setOperationAction(ISD::ADDC, MVT::i64, Legal);
546*d415bd75Srobert setOperationAction(ISD::ADDE, MVT::i64, Legal);
547*d415bd75Srobert setOperationAction(ISD::SUBC, MVT::i64, Legal);
548*d415bd75Srobert setOperationAction(ISD::SUBE, MVT::i64, Legal);
549*d415bd75Srobert }
550*d415bd75Srobert
55109467b48Spatrick setOperationAction(ISD::CTTZ, MVT::i16, Expand);
55209467b48Spatrick setOperationAction(ISD::CTTZ, MVT::i32, Expand);
55309467b48Spatrick setOperationAction(ISD::CTTZ, MVT::i64, Expand);
55409467b48Spatrick
55509467b48Spatrick // PTX does not directly support SELP of i1, so promote to i32 first
55609467b48Spatrick setOperationAction(ISD::SELECT, MVT::i1, Custom);
55709467b48Spatrick
55809467b48Spatrick // PTX cannot multiply two i64s in a single instruction.
55909467b48Spatrick setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
56009467b48Spatrick setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
56109467b48Spatrick
56209467b48Spatrick // We have some custom DAG combine patterns for these nodes
563*d415bd75Srobert setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::FADD, ISD::MUL, ISD::SHL,
564*d415bd75Srobert ISD::SREM, ISD::UREM});
56509467b48Spatrick
56609467b48Spatrick // setcc for f16x2 needs special handling to prevent legalizer's
56709467b48Spatrick // attempt to scalarize it due to v2i1 not being legal.
56809467b48Spatrick if (STI.allowFP16Math())
56909467b48Spatrick setTargetDAGCombine(ISD::SETCC);
57009467b48Spatrick
57109467b48Spatrick // Promote fp16 arithmetic if fp16 hardware isn't available or the
57209467b48Spatrick // user passed --nvptx-no-fp16-math. The flag is useful because,
57309467b48Spatrick // although sm_53+ GPUs have some sort of FP16 support in
57409467b48Spatrick // hardware, only sm_53 and sm_60 have full implementation. Others
57509467b48Spatrick // only have token amount of hardware and are likely to run faster
57609467b48Spatrick // by using fp32 units instead.
57709467b48Spatrick for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) {
57809467b48Spatrick setFP16OperationAction(Op, MVT::f16, Legal, Promote);
57909467b48Spatrick setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
58009467b48Spatrick }
58109467b48Spatrick
582*d415bd75Srobert // f16/f16x2 neg was introduced in PTX 60, SM_53.
583*d415bd75Srobert const bool IsFP16FP16x2NegAvailable = STI.getSmVersion() >= 53 &&
584*d415bd75Srobert STI.getPTXVersion() >= 60 &&
585*d415bd75Srobert STI.allowFP16Math();
586*d415bd75Srobert for (const auto &VT : {MVT::f16, MVT::v2f16})
587*d415bd75Srobert setOperationAction(ISD::FNEG, VT,
588*d415bd75Srobert IsFP16FP16x2NegAvailable ? Legal : Expand);
58909467b48Spatrick
59009467b48Spatrick // (would be) Library functions.
59109467b48Spatrick
59209467b48Spatrick // These map to conversion instructions for scalar FP types.
59309467b48Spatrick for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
594*d415bd75Srobert ISD::FROUNDEVEN, ISD::FTRUNC}) {
59509467b48Spatrick setOperationAction(Op, MVT::f16, Legal);
59609467b48Spatrick setOperationAction(Op, MVT::f32, Legal);
59709467b48Spatrick setOperationAction(Op, MVT::f64, Legal);
59809467b48Spatrick setOperationAction(Op, MVT::v2f16, Expand);
59909467b48Spatrick }
60009467b48Spatrick
60109467b48Spatrick setOperationAction(ISD::FROUND, MVT::f16, Promote);
60209467b48Spatrick setOperationAction(ISD::FROUND, MVT::v2f16, Expand);
60309467b48Spatrick setOperationAction(ISD::FROUND, MVT::f32, Custom);
60409467b48Spatrick setOperationAction(ISD::FROUND, MVT::f64, Custom);
60509467b48Spatrick
60609467b48Spatrick
60709467b48Spatrick // 'Expand' implements FCOPYSIGN without calling an external library.
60809467b48Spatrick setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
60909467b48Spatrick setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
61009467b48Spatrick setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
61109467b48Spatrick setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
61209467b48Spatrick
61309467b48Spatrick // These map to corresponding instructions for f32/f64. f16 must be
61409467b48Spatrick // promoted to f32. v2f16 is expanded to f16, which is then promoted
61509467b48Spatrick // to f32.
616*d415bd75Srobert for (const auto &Op :
617*d415bd75Srobert {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FABS}) {
61809467b48Spatrick setOperationAction(Op, MVT::f16, Promote);
61909467b48Spatrick setOperationAction(Op, MVT::f32, Legal);
62009467b48Spatrick setOperationAction(Op, MVT::f64, Legal);
62109467b48Spatrick setOperationAction(Op, MVT::v2f16, Expand);
62209467b48Spatrick }
623*d415bd75Srobert // max.f16, max.f16x2 and max.NaN are supported on sm_80+.
624*d415bd75Srobert auto GetMinMaxAction = [&](LegalizeAction NotSm80Action) {
625*d415bd75Srobert bool IsAtLeastSm80 = STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 70;
626*d415bd75Srobert return IsAtLeastSm80 ? Legal : NotSm80Action;
627*d415bd75Srobert };
628*d415bd75Srobert for (const auto &Op : {ISD::FMINNUM, ISD::FMAXNUM}) {
629*d415bd75Srobert setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Promote), Promote);
630*d415bd75Srobert setOperationAction(Op, MVT::f32, Legal);
631*d415bd75Srobert setOperationAction(Op, MVT::f64, Legal);
632*d415bd75Srobert setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
633*d415bd75Srobert }
634*d415bd75Srobert for (const auto &Op : {ISD::FMINIMUM, ISD::FMAXIMUM}) {
635*d415bd75Srobert setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Expand), Expand);
636*d415bd75Srobert setOperationAction(Op, MVT::f32, GetMinMaxAction(Expand));
637*d415bd75Srobert setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
638*d415bd75Srobert }
63909467b48Spatrick
64009467b48Spatrick // No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate.
64109467b48Spatrick // No FPOW or FREM in PTX.
64209467b48Spatrick
64309467b48Spatrick // Now deduce the information based on the above mentioned
64409467b48Spatrick // actions
64509467b48Spatrick computeRegisterProperties(STI.getRegisterInfo());
646*d415bd75Srobert
647*d415bd75Srobert setMinCmpXchgSizeInBits(32);
64809467b48Spatrick }
64909467b48Spatrick
getTargetNodeName(unsigned Opcode) const65009467b48Spatrick const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
65109467b48Spatrick switch ((NVPTXISD::NodeType)Opcode) {
65209467b48Spatrick case NVPTXISD::FIRST_NUMBER:
65309467b48Spatrick break;
65409467b48Spatrick case NVPTXISD::CALL:
65509467b48Spatrick return "NVPTXISD::CALL";
65609467b48Spatrick case NVPTXISD::RET_FLAG:
65709467b48Spatrick return "NVPTXISD::RET_FLAG";
65809467b48Spatrick case NVPTXISD::LOAD_PARAM:
65909467b48Spatrick return "NVPTXISD::LOAD_PARAM";
66009467b48Spatrick case NVPTXISD::Wrapper:
66109467b48Spatrick return "NVPTXISD::Wrapper";
66209467b48Spatrick case NVPTXISD::DeclareParam:
66309467b48Spatrick return "NVPTXISD::DeclareParam";
66409467b48Spatrick case NVPTXISD::DeclareScalarParam:
66509467b48Spatrick return "NVPTXISD::DeclareScalarParam";
66609467b48Spatrick case NVPTXISD::DeclareRet:
66709467b48Spatrick return "NVPTXISD::DeclareRet";
66809467b48Spatrick case NVPTXISD::DeclareScalarRet:
66909467b48Spatrick return "NVPTXISD::DeclareScalarRet";
67009467b48Spatrick case NVPTXISD::DeclareRetParam:
67109467b48Spatrick return "NVPTXISD::DeclareRetParam";
67209467b48Spatrick case NVPTXISD::PrintCall:
67309467b48Spatrick return "NVPTXISD::PrintCall";
67409467b48Spatrick case NVPTXISD::PrintConvergentCall:
67509467b48Spatrick return "NVPTXISD::PrintConvergentCall";
67609467b48Spatrick case NVPTXISD::PrintCallUni:
67709467b48Spatrick return "NVPTXISD::PrintCallUni";
67809467b48Spatrick case NVPTXISD::PrintConvergentCallUni:
67909467b48Spatrick return "NVPTXISD::PrintConvergentCallUni";
68009467b48Spatrick case NVPTXISD::LoadParam:
68109467b48Spatrick return "NVPTXISD::LoadParam";
68209467b48Spatrick case NVPTXISD::LoadParamV2:
68309467b48Spatrick return "NVPTXISD::LoadParamV2";
68409467b48Spatrick case NVPTXISD::LoadParamV4:
68509467b48Spatrick return "NVPTXISD::LoadParamV4";
68609467b48Spatrick case NVPTXISD::StoreParam:
68709467b48Spatrick return "NVPTXISD::StoreParam";
68809467b48Spatrick case NVPTXISD::StoreParamV2:
68909467b48Spatrick return "NVPTXISD::StoreParamV2";
69009467b48Spatrick case NVPTXISD::StoreParamV4:
69109467b48Spatrick return "NVPTXISD::StoreParamV4";
69209467b48Spatrick case NVPTXISD::StoreParamS32:
69309467b48Spatrick return "NVPTXISD::StoreParamS32";
69409467b48Spatrick case NVPTXISD::StoreParamU32:
69509467b48Spatrick return "NVPTXISD::StoreParamU32";
69609467b48Spatrick case NVPTXISD::CallArgBegin:
69709467b48Spatrick return "NVPTXISD::CallArgBegin";
69809467b48Spatrick case NVPTXISD::CallArg:
69909467b48Spatrick return "NVPTXISD::CallArg";
70009467b48Spatrick case NVPTXISD::LastCallArg:
70109467b48Spatrick return "NVPTXISD::LastCallArg";
70209467b48Spatrick case NVPTXISD::CallArgEnd:
70309467b48Spatrick return "NVPTXISD::CallArgEnd";
70409467b48Spatrick case NVPTXISD::CallVoid:
70509467b48Spatrick return "NVPTXISD::CallVoid";
70609467b48Spatrick case NVPTXISD::CallVal:
70709467b48Spatrick return "NVPTXISD::CallVal";
70809467b48Spatrick case NVPTXISD::CallSymbol:
70909467b48Spatrick return "NVPTXISD::CallSymbol";
71009467b48Spatrick case NVPTXISD::Prototype:
71109467b48Spatrick return "NVPTXISD::Prototype";
71209467b48Spatrick case NVPTXISD::MoveParam:
71309467b48Spatrick return "NVPTXISD::MoveParam";
71409467b48Spatrick case NVPTXISD::StoreRetval:
71509467b48Spatrick return "NVPTXISD::StoreRetval";
71609467b48Spatrick case NVPTXISD::StoreRetvalV2:
71709467b48Spatrick return "NVPTXISD::StoreRetvalV2";
71809467b48Spatrick case NVPTXISD::StoreRetvalV4:
71909467b48Spatrick return "NVPTXISD::StoreRetvalV4";
72009467b48Spatrick case NVPTXISD::PseudoUseParam:
72109467b48Spatrick return "NVPTXISD::PseudoUseParam";
72209467b48Spatrick case NVPTXISD::RETURN:
72309467b48Spatrick return "NVPTXISD::RETURN";
72409467b48Spatrick case NVPTXISD::CallSeqBegin:
72509467b48Spatrick return "NVPTXISD::CallSeqBegin";
72609467b48Spatrick case NVPTXISD::CallSeqEnd:
72709467b48Spatrick return "NVPTXISD::CallSeqEnd";
72809467b48Spatrick case NVPTXISD::CallPrototype:
72909467b48Spatrick return "NVPTXISD::CallPrototype";
73009467b48Spatrick case NVPTXISD::ProxyReg:
73109467b48Spatrick return "NVPTXISD::ProxyReg";
73209467b48Spatrick case NVPTXISD::LoadV2:
73309467b48Spatrick return "NVPTXISD::LoadV2";
73409467b48Spatrick case NVPTXISD::LoadV4:
73509467b48Spatrick return "NVPTXISD::LoadV4";
73609467b48Spatrick case NVPTXISD::LDGV2:
73709467b48Spatrick return "NVPTXISD::LDGV2";
73809467b48Spatrick case NVPTXISD::LDGV4:
73909467b48Spatrick return "NVPTXISD::LDGV4";
74009467b48Spatrick case NVPTXISD::LDUV2:
74109467b48Spatrick return "NVPTXISD::LDUV2";
74209467b48Spatrick case NVPTXISD::LDUV4:
74309467b48Spatrick return "NVPTXISD::LDUV4";
74409467b48Spatrick case NVPTXISD::StoreV2:
74509467b48Spatrick return "NVPTXISD::StoreV2";
74609467b48Spatrick case NVPTXISD::StoreV4:
74709467b48Spatrick return "NVPTXISD::StoreV4";
74809467b48Spatrick case NVPTXISD::FUN_SHFL_CLAMP:
74909467b48Spatrick return "NVPTXISD::FUN_SHFL_CLAMP";
75009467b48Spatrick case NVPTXISD::FUN_SHFR_CLAMP:
75109467b48Spatrick return "NVPTXISD::FUN_SHFR_CLAMP";
75209467b48Spatrick case NVPTXISD::IMAD:
75309467b48Spatrick return "NVPTXISD::IMAD";
75409467b48Spatrick case NVPTXISD::SETP_F16X2:
75509467b48Spatrick return "NVPTXISD::SETP_F16X2";
75609467b48Spatrick case NVPTXISD::Dummy:
75709467b48Spatrick return "NVPTXISD::Dummy";
75809467b48Spatrick case NVPTXISD::MUL_WIDE_SIGNED:
75909467b48Spatrick return "NVPTXISD::MUL_WIDE_SIGNED";
76009467b48Spatrick case NVPTXISD::MUL_WIDE_UNSIGNED:
76109467b48Spatrick return "NVPTXISD::MUL_WIDE_UNSIGNED";
76209467b48Spatrick case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32";
76309467b48Spatrick case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
76409467b48Spatrick case NVPTXISD::Tex1DFloatFloatLevel:
76509467b48Spatrick return "NVPTXISD::Tex1DFloatFloatLevel";
76609467b48Spatrick case NVPTXISD::Tex1DFloatFloatGrad:
76709467b48Spatrick return "NVPTXISD::Tex1DFloatFloatGrad";
76809467b48Spatrick case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32";
76909467b48Spatrick case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float";
77009467b48Spatrick case NVPTXISD::Tex1DS32FloatLevel:
77109467b48Spatrick return "NVPTXISD::Tex1DS32FloatLevel";
77209467b48Spatrick case NVPTXISD::Tex1DS32FloatGrad:
77309467b48Spatrick return "NVPTXISD::Tex1DS32FloatGrad";
77409467b48Spatrick case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32";
77509467b48Spatrick case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float";
77609467b48Spatrick case NVPTXISD::Tex1DU32FloatLevel:
77709467b48Spatrick return "NVPTXISD::Tex1DU32FloatLevel";
77809467b48Spatrick case NVPTXISD::Tex1DU32FloatGrad:
77909467b48Spatrick return "NVPTXISD::Tex1DU32FloatGrad";
78009467b48Spatrick case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32";
78109467b48Spatrick case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
78209467b48Spatrick case NVPTXISD::Tex1DArrayFloatFloatLevel:
78309467b48Spatrick return "NVPTXISD::Tex1DArrayFloatFloatLevel";
78409467b48Spatrick case NVPTXISD::Tex1DArrayFloatFloatGrad:
78509467b48Spatrick return "NVPTXISD::Tex1DArrayFloatFloatGrad";
78609467b48Spatrick case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32";
78709467b48Spatrick case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float";
78809467b48Spatrick case NVPTXISD::Tex1DArrayS32FloatLevel:
78909467b48Spatrick return "NVPTXISD::Tex1DArrayS32FloatLevel";
79009467b48Spatrick case NVPTXISD::Tex1DArrayS32FloatGrad:
79109467b48Spatrick return "NVPTXISD::Tex1DArrayS32FloatGrad";
79209467b48Spatrick case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32";
79309467b48Spatrick case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float";
79409467b48Spatrick case NVPTXISD::Tex1DArrayU32FloatLevel:
79509467b48Spatrick return "NVPTXISD::Tex1DArrayU32FloatLevel";
79609467b48Spatrick case NVPTXISD::Tex1DArrayU32FloatGrad:
79709467b48Spatrick return "NVPTXISD::Tex1DArrayU32FloatGrad";
79809467b48Spatrick case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32";
79909467b48Spatrick case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat";
80009467b48Spatrick case NVPTXISD::Tex2DFloatFloatLevel:
80109467b48Spatrick return "NVPTXISD::Tex2DFloatFloatLevel";
80209467b48Spatrick case NVPTXISD::Tex2DFloatFloatGrad:
80309467b48Spatrick return "NVPTXISD::Tex2DFloatFloatGrad";
80409467b48Spatrick case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32";
80509467b48Spatrick case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float";
80609467b48Spatrick case NVPTXISD::Tex2DS32FloatLevel:
80709467b48Spatrick return "NVPTXISD::Tex2DS32FloatLevel";
80809467b48Spatrick case NVPTXISD::Tex2DS32FloatGrad:
80909467b48Spatrick return "NVPTXISD::Tex2DS32FloatGrad";
81009467b48Spatrick case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32";
81109467b48Spatrick case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float";
81209467b48Spatrick case NVPTXISD::Tex2DU32FloatLevel:
81309467b48Spatrick return "NVPTXISD::Tex2DU32FloatLevel";
81409467b48Spatrick case NVPTXISD::Tex2DU32FloatGrad:
81509467b48Spatrick return "NVPTXISD::Tex2DU32FloatGrad";
81609467b48Spatrick case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32";
81709467b48Spatrick case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
81809467b48Spatrick case NVPTXISD::Tex2DArrayFloatFloatLevel:
81909467b48Spatrick return "NVPTXISD::Tex2DArrayFloatFloatLevel";
82009467b48Spatrick case NVPTXISD::Tex2DArrayFloatFloatGrad:
82109467b48Spatrick return "NVPTXISD::Tex2DArrayFloatFloatGrad";
82209467b48Spatrick case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32";
82309467b48Spatrick case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float";
82409467b48Spatrick case NVPTXISD::Tex2DArrayS32FloatLevel:
82509467b48Spatrick return "NVPTXISD::Tex2DArrayS32FloatLevel";
82609467b48Spatrick case NVPTXISD::Tex2DArrayS32FloatGrad:
82709467b48Spatrick return "NVPTXISD::Tex2DArrayS32FloatGrad";
82809467b48Spatrick case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32";
82909467b48Spatrick case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float";
83009467b48Spatrick case NVPTXISD::Tex2DArrayU32FloatLevel:
83109467b48Spatrick return "NVPTXISD::Tex2DArrayU32FloatLevel";
83209467b48Spatrick case NVPTXISD::Tex2DArrayU32FloatGrad:
83309467b48Spatrick return "NVPTXISD::Tex2DArrayU32FloatGrad";
83409467b48Spatrick case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32";
83509467b48Spatrick case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat";
83609467b48Spatrick case NVPTXISD::Tex3DFloatFloatLevel:
83709467b48Spatrick return "NVPTXISD::Tex3DFloatFloatLevel";
83809467b48Spatrick case NVPTXISD::Tex3DFloatFloatGrad:
83909467b48Spatrick return "NVPTXISD::Tex3DFloatFloatGrad";
84009467b48Spatrick case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32";
84109467b48Spatrick case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float";
84209467b48Spatrick case NVPTXISD::Tex3DS32FloatLevel:
84309467b48Spatrick return "NVPTXISD::Tex3DS32FloatLevel";
84409467b48Spatrick case NVPTXISD::Tex3DS32FloatGrad:
84509467b48Spatrick return "NVPTXISD::Tex3DS32FloatGrad";
84609467b48Spatrick case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32";
84709467b48Spatrick case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float";
84809467b48Spatrick case NVPTXISD::Tex3DU32FloatLevel:
84909467b48Spatrick return "NVPTXISD::Tex3DU32FloatLevel";
85009467b48Spatrick case NVPTXISD::Tex3DU32FloatGrad:
85109467b48Spatrick return "NVPTXISD::Tex3DU32FloatGrad";
85209467b48Spatrick case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat";
85309467b48Spatrick case NVPTXISD::TexCubeFloatFloatLevel:
85409467b48Spatrick return "NVPTXISD::TexCubeFloatFloatLevel";
85509467b48Spatrick case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float";
85609467b48Spatrick case NVPTXISD::TexCubeS32FloatLevel:
85709467b48Spatrick return "NVPTXISD::TexCubeS32FloatLevel";
85809467b48Spatrick case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float";
85909467b48Spatrick case NVPTXISD::TexCubeU32FloatLevel:
86009467b48Spatrick return "NVPTXISD::TexCubeU32FloatLevel";
86109467b48Spatrick case NVPTXISD::TexCubeArrayFloatFloat:
86209467b48Spatrick return "NVPTXISD::TexCubeArrayFloatFloat";
86309467b48Spatrick case NVPTXISD::TexCubeArrayFloatFloatLevel:
86409467b48Spatrick return "NVPTXISD::TexCubeArrayFloatFloatLevel";
86509467b48Spatrick case NVPTXISD::TexCubeArrayS32Float:
86609467b48Spatrick return "NVPTXISD::TexCubeArrayS32Float";
86709467b48Spatrick case NVPTXISD::TexCubeArrayS32FloatLevel:
86809467b48Spatrick return "NVPTXISD::TexCubeArrayS32FloatLevel";
86909467b48Spatrick case NVPTXISD::TexCubeArrayU32Float:
87009467b48Spatrick return "NVPTXISD::TexCubeArrayU32Float";
87109467b48Spatrick case NVPTXISD::TexCubeArrayU32FloatLevel:
87209467b48Spatrick return "NVPTXISD::TexCubeArrayU32FloatLevel";
87309467b48Spatrick case NVPTXISD::Tld4R2DFloatFloat:
87409467b48Spatrick return "NVPTXISD::Tld4R2DFloatFloat";
87509467b48Spatrick case NVPTXISD::Tld4G2DFloatFloat:
87609467b48Spatrick return "NVPTXISD::Tld4G2DFloatFloat";
87709467b48Spatrick case NVPTXISD::Tld4B2DFloatFloat:
87809467b48Spatrick return "NVPTXISD::Tld4B2DFloatFloat";
87909467b48Spatrick case NVPTXISD::Tld4A2DFloatFloat:
88009467b48Spatrick return "NVPTXISD::Tld4A2DFloatFloat";
88109467b48Spatrick case NVPTXISD::Tld4R2DS64Float:
88209467b48Spatrick return "NVPTXISD::Tld4R2DS64Float";
88309467b48Spatrick case NVPTXISD::Tld4G2DS64Float:
88409467b48Spatrick return "NVPTXISD::Tld4G2DS64Float";
88509467b48Spatrick case NVPTXISD::Tld4B2DS64Float:
88609467b48Spatrick return "NVPTXISD::Tld4B2DS64Float";
88709467b48Spatrick case NVPTXISD::Tld4A2DS64Float:
88809467b48Spatrick return "NVPTXISD::Tld4A2DS64Float";
88909467b48Spatrick case NVPTXISD::Tld4R2DU64Float:
89009467b48Spatrick return "NVPTXISD::Tld4R2DU64Float";
89109467b48Spatrick case NVPTXISD::Tld4G2DU64Float:
89209467b48Spatrick return "NVPTXISD::Tld4G2DU64Float";
89309467b48Spatrick case NVPTXISD::Tld4B2DU64Float:
89409467b48Spatrick return "NVPTXISD::Tld4B2DU64Float";
89509467b48Spatrick case NVPTXISD::Tld4A2DU64Float:
89609467b48Spatrick return "NVPTXISD::Tld4A2DU64Float";
89709467b48Spatrick
89809467b48Spatrick case NVPTXISD::TexUnified1DFloatS32:
89909467b48Spatrick return "NVPTXISD::TexUnified1DFloatS32";
90009467b48Spatrick case NVPTXISD::TexUnified1DFloatFloat:
90109467b48Spatrick return "NVPTXISD::TexUnified1DFloatFloat";
90209467b48Spatrick case NVPTXISD::TexUnified1DFloatFloatLevel:
90309467b48Spatrick return "NVPTXISD::TexUnified1DFloatFloatLevel";
90409467b48Spatrick case NVPTXISD::TexUnified1DFloatFloatGrad:
90509467b48Spatrick return "NVPTXISD::TexUnified1DFloatFloatGrad";
90609467b48Spatrick case NVPTXISD::TexUnified1DS32S32:
90709467b48Spatrick return "NVPTXISD::TexUnified1DS32S32";
90809467b48Spatrick case NVPTXISD::TexUnified1DS32Float:
90909467b48Spatrick return "NVPTXISD::TexUnified1DS32Float";
91009467b48Spatrick case NVPTXISD::TexUnified1DS32FloatLevel:
91109467b48Spatrick return "NVPTXISD::TexUnified1DS32FloatLevel";
91209467b48Spatrick case NVPTXISD::TexUnified1DS32FloatGrad:
91309467b48Spatrick return "NVPTXISD::TexUnified1DS32FloatGrad";
91409467b48Spatrick case NVPTXISD::TexUnified1DU32S32:
91509467b48Spatrick return "NVPTXISD::TexUnified1DU32S32";
91609467b48Spatrick case NVPTXISD::TexUnified1DU32Float:
91709467b48Spatrick return "NVPTXISD::TexUnified1DU32Float";
91809467b48Spatrick case NVPTXISD::TexUnified1DU32FloatLevel:
91909467b48Spatrick return "NVPTXISD::TexUnified1DU32FloatLevel";
92009467b48Spatrick case NVPTXISD::TexUnified1DU32FloatGrad:
92109467b48Spatrick return "NVPTXISD::TexUnified1DU32FloatGrad";
92209467b48Spatrick case NVPTXISD::TexUnified1DArrayFloatS32:
92309467b48Spatrick return "NVPTXISD::TexUnified1DArrayFloatS32";
92409467b48Spatrick case NVPTXISD::TexUnified1DArrayFloatFloat:
92509467b48Spatrick return "NVPTXISD::TexUnified1DArrayFloatFloat";
92609467b48Spatrick case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
92709467b48Spatrick return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
92809467b48Spatrick case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
92909467b48Spatrick return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
93009467b48Spatrick case NVPTXISD::TexUnified1DArrayS32S32:
93109467b48Spatrick return "NVPTXISD::TexUnified1DArrayS32S32";
93209467b48Spatrick case NVPTXISD::TexUnified1DArrayS32Float:
93309467b48Spatrick return "NVPTXISD::TexUnified1DArrayS32Float";
93409467b48Spatrick case NVPTXISD::TexUnified1DArrayS32FloatLevel:
93509467b48Spatrick return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
93609467b48Spatrick case NVPTXISD::TexUnified1DArrayS32FloatGrad:
93709467b48Spatrick return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
93809467b48Spatrick case NVPTXISD::TexUnified1DArrayU32S32:
93909467b48Spatrick return "NVPTXISD::TexUnified1DArrayU32S32";
94009467b48Spatrick case NVPTXISD::TexUnified1DArrayU32Float:
94109467b48Spatrick return "NVPTXISD::TexUnified1DArrayU32Float";
94209467b48Spatrick case NVPTXISD::TexUnified1DArrayU32FloatLevel:
94309467b48Spatrick return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
94409467b48Spatrick case NVPTXISD::TexUnified1DArrayU32FloatGrad:
94509467b48Spatrick return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
94609467b48Spatrick case NVPTXISD::TexUnified2DFloatS32:
94709467b48Spatrick return "NVPTXISD::TexUnified2DFloatS32";
94809467b48Spatrick case NVPTXISD::TexUnified2DFloatFloat:
94909467b48Spatrick return "NVPTXISD::TexUnified2DFloatFloat";
95009467b48Spatrick case NVPTXISD::TexUnified2DFloatFloatLevel:
95109467b48Spatrick return "NVPTXISD::TexUnified2DFloatFloatLevel";
95209467b48Spatrick case NVPTXISD::TexUnified2DFloatFloatGrad:
95309467b48Spatrick return "NVPTXISD::TexUnified2DFloatFloatGrad";
95409467b48Spatrick case NVPTXISD::TexUnified2DS32S32:
95509467b48Spatrick return "NVPTXISD::TexUnified2DS32S32";
95609467b48Spatrick case NVPTXISD::TexUnified2DS32Float:
95709467b48Spatrick return "NVPTXISD::TexUnified2DS32Float";
95809467b48Spatrick case NVPTXISD::TexUnified2DS32FloatLevel:
95909467b48Spatrick return "NVPTXISD::TexUnified2DS32FloatLevel";
96009467b48Spatrick case NVPTXISD::TexUnified2DS32FloatGrad:
96109467b48Spatrick return "NVPTXISD::TexUnified2DS32FloatGrad";
96209467b48Spatrick case NVPTXISD::TexUnified2DU32S32:
96309467b48Spatrick return "NVPTXISD::TexUnified2DU32S32";
96409467b48Spatrick case NVPTXISD::TexUnified2DU32Float:
96509467b48Spatrick return "NVPTXISD::TexUnified2DU32Float";
96609467b48Spatrick case NVPTXISD::TexUnified2DU32FloatLevel:
96709467b48Spatrick return "NVPTXISD::TexUnified2DU32FloatLevel";
96809467b48Spatrick case NVPTXISD::TexUnified2DU32FloatGrad:
96909467b48Spatrick return "NVPTXISD::TexUnified2DU32FloatGrad";
97009467b48Spatrick case NVPTXISD::TexUnified2DArrayFloatS32:
97109467b48Spatrick return "NVPTXISD::TexUnified2DArrayFloatS32";
97209467b48Spatrick case NVPTXISD::TexUnified2DArrayFloatFloat:
97309467b48Spatrick return "NVPTXISD::TexUnified2DArrayFloatFloat";
97409467b48Spatrick case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
97509467b48Spatrick return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
97609467b48Spatrick case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
97709467b48Spatrick return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
97809467b48Spatrick case NVPTXISD::TexUnified2DArrayS32S32:
97909467b48Spatrick return "NVPTXISD::TexUnified2DArrayS32S32";
98009467b48Spatrick case NVPTXISD::TexUnified2DArrayS32Float:
98109467b48Spatrick return "NVPTXISD::TexUnified2DArrayS32Float";
98209467b48Spatrick case NVPTXISD::TexUnified2DArrayS32FloatLevel:
98309467b48Spatrick return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
98409467b48Spatrick case NVPTXISD::TexUnified2DArrayS32FloatGrad:
98509467b48Spatrick return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
98609467b48Spatrick case NVPTXISD::TexUnified2DArrayU32S32:
98709467b48Spatrick return "NVPTXISD::TexUnified2DArrayU32S32";
98809467b48Spatrick case NVPTXISD::TexUnified2DArrayU32Float:
98909467b48Spatrick return "NVPTXISD::TexUnified2DArrayU32Float";
99009467b48Spatrick case NVPTXISD::TexUnified2DArrayU32FloatLevel:
99109467b48Spatrick return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
99209467b48Spatrick case NVPTXISD::TexUnified2DArrayU32FloatGrad:
99309467b48Spatrick return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
99409467b48Spatrick case NVPTXISD::TexUnified3DFloatS32:
99509467b48Spatrick return "NVPTXISD::TexUnified3DFloatS32";
99609467b48Spatrick case NVPTXISD::TexUnified3DFloatFloat:
99709467b48Spatrick return "NVPTXISD::TexUnified3DFloatFloat";
99809467b48Spatrick case NVPTXISD::TexUnified3DFloatFloatLevel:
99909467b48Spatrick return "NVPTXISD::TexUnified3DFloatFloatLevel";
100009467b48Spatrick case NVPTXISD::TexUnified3DFloatFloatGrad:
100109467b48Spatrick return "NVPTXISD::TexUnified3DFloatFloatGrad";
100209467b48Spatrick case NVPTXISD::TexUnified3DS32S32:
100309467b48Spatrick return "NVPTXISD::TexUnified3DS32S32";
100409467b48Spatrick case NVPTXISD::TexUnified3DS32Float:
100509467b48Spatrick return "NVPTXISD::TexUnified3DS32Float";
100609467b48Spatrick case NVPTXISD::TexUnified3DS32FloatLevel:
100709467b48Spatrick return "NVPTXISD::TexUnified3DS32FloatLevel";
100809467b48Spatrick case NVPTXISD::TexUnified3DS32FloatGrad:
100909467b48Spatrick return "NVPTXISD::TexUnified3DS32FloatGrad";
101009467b48Spatrick case NVPTXISD::TexUnified3DU32S32:
101109467b48Spatrick return "NVPTXISD::TexUnified3DU32S32";
101209467b48Spatrick case NVPTXISD::TexUnified3DU32Float:
101309467b48Spatrick return "NVPTXISD::TexUnified3DU32Float";
101409467b48Spatrick case NVPTXISD::TexUnified3DU32FloatLevel:
101509467b48Spatrick return "NVPTXISD::TexUnified3DU32FloatLevel";
101609467b48Spatrick case NVPTXISD::TexUnified3DU32FloatGrad:
101709467b48Spatrick return "NVPTXISD::TexUnified3DU32FloatGrad";
101809467b48Spatrick case NVPTXISD::TexUnifiedCubeFloatFloat:
101909467b48Spatrick return "NVPTXISD::TexUnifiedCubeFloatFloat";
102009467b48Spatrick case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
102109467b48Spatrick return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
102209467b48Spatrick case NVPTXISD::TexUnifiedCubeS32Float:
102309467b48Spatrick return "NVPTXISD::TexUnifiedCubeS32Float";
102409467b48Spatrick case NVPTXISD::TexUnifiedCubeS32FloatLevel:
102509467b48Spatrick return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
102609467b48Spatrick case NVPTXISD::TexUnifiedCubeU32Float:
102709467b48Spatrick return "NVPTXISD::TexUnifiedCubeU32Float";
102809467b48Spatrick case NVPTXISD::TexUnifiedCubeU32FloatLevel:
102909467b48Spatrick return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
103009467b48Spatrick case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
103109467b48Spatrick return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
103209467b48Spatrick case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
103309467b48Spatrick return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
103409467b48Spatrick case NVPTXISD::TexUnifiedCubeArrayS32Float:
103509467b48Spatrick return "NVPTXISD::TexUnifiedCubeArrayS32Float";
103609467b48Spatrick case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
103709467b48Spatrick return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
103809467b48Spatrick case NVPTXISD::TexUnifiedCubeArrayU32Float:
103909467b48Spatrick return "NVPTXISD::TexUnifiedCubeArrayU32Float";
104009467b48Spatrick case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
104109467b48Spatrick return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
104209467b48Spatrick case NVPTXISD::Tld4UnifiedR2DFloatFloat:
104309467b48Spatrick return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
104409467b48Spatrick case NVPTXISD::Tld4UnifiedG2DFloatFloat:
104509467b48Spatrick return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
104609467b48Spatrick case NVPTXISD::Tld4UnifiedB2DFloatFloat:
104709467b48Spatrick return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
104809467b48Spatrick case NVPTXISD::Tld4UnifiedA2DFloatFloat:
104909467b48Spatrick return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
105009467b48Spatrick case NVPTXISD::Tld4UnifiedR2DS64Float:
105109467b48Spatrick return "NVPTXISD::Tld4UnifiedR2DS64Float";
105209467b48Spatrick case NVPTXISD::Tld4UnifiedG2DS64Float:
105309467b48Spatrick return "NVPTXISD::Tld4UnifiedG2DS64Float";
105409467b48Spatrick case NVPTXISD::Tld4UnifiedB2DS64Float:
105509467b48Spatrick return "NVPTXISD::Tld4UnifiedB2DS64Float";
105609467b48Spatrick case NVPTXISD::Tld4UnifiedA2DS64Float:
105709467b48Spatrick return "NVPTXISD::Tld4UnifiedA2DS64Float";
105809467b48Spatrick case NVPTXISD::Tld4UnifiedR2DU64Float:
105909467b48Spatrick return "NVPTXISD::Tld4UnifiedR2DU64Float";
106009467b48Spatrick case NVPTXISD::Tld4UnifiedG2DU64Float:
106109467b48Spatrick return "NVPTXISD::Tld4UnifiedG2DU64Float";
106209467b48Spatrick case NVPTXISD::Tld4UnifiedB2DU64Float:
106309467b48Spatrick return "NVPTXISD::Tld4UnifiedB2DU64Float";
106409467b48Spatrick case NVPTXISD::Tld4UnifiedA2DU64Float:
106509467b48Spatrick return "NVPTXISD::Tld4UnifiedA2DU64Float";
106609467b48Spatrick
106709467b48Spatrick case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp";
106809467b48Spatrick case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp";
106909467b48Spatrick case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp";
107009467b48Spatrick case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp";
107109467b48Spatrick case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp";
107209467b48Spatrick case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp";
107309467b48Spatrick case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp";
107409467b48Spatrick case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp";
107509467b48Spatrick case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp";
107609467b48Spatrick case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp";
107709467b48Spatrick case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp";
107809467b48Spatrick
107909467b48Spatrick case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp";
108009467b48Spatrick case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp";
108109467b48Spatrick case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp";
108209467b48Spatrick case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp";
108309467b48Spatrick case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
108409467b48Spatrick case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
108509467b48Spatrick case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
108609467b48Spatrick case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
108709467b48Spatrick case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
108809467b48Spatrick case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
108909467b48Spatrick case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
109009467b48Spatrick
109109467b48Spatrick case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp";
109209467b48Spatrick case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp";
109309467b48Spatrick case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp";
109409467b48Spatrick case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp";
109509467b48Spatrick case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp";
109609467b48Spatrick case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp";
109709467b48Spatrick case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp";
109809467b48Spatrick case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp";
109909467b48Spatrick case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp";
110009467b48Spatrick case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp";
110109467b48Spatrick case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp";
110209467b48Spatrick
110309467b48Spatrick case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp";
110409467b48Spatrick case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp";
110509467b48Spatrick case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp";
110609467b48Spatrick case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp";
110709467b48Spatrick case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
110809467b48Spatrick case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
110909467b48Spatrick case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
111009467b48Spatrick case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
111109467b48Spatrick case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
111209467b48Spatrick case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
111309467b48Spatrick case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
111409467b48Spatrick
111509467b48Spatrick case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp";
111609467b48Spatrick case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp";
111709467b48Spatrick case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp";
111809467b48Spatrick case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp";
111909467b48Spatrick case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp";
112009467b48Spatrick case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp";
112109467b48Spatrick case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp";
112209467b48Spatrick case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp";
112309467b48Spatrick case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp";
112409467b48Spatrick case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp";
112509467b48Spatrick case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp";
112609467b48Spatrick
112709467b48Spatrick case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap";
112809467b48Spatrick case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap";
112909467b48Spatrick case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap";
113009467b48Spatrick case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap";
113109467b48Spatrick case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap";
113209467b48Spatrick case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap";
113309467b48Spatrick case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap";
113409467b48Spatrick case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap";
113509467b48Spatrick case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap";
113609467b48Spatrick case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap";
113709467b48Spatrick case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap";
113809467b48Spatrick
113909467b48Spatrick case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap";
114009467b48Spatrick case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap";
114109467b48Spatrick case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap";
114209467b48Spatrick case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap";
114309467b48Spatrick case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap";
114409467b48Spatrick case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap";
114509467b48Spatrick case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap";
114609467b48Spatrick case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap";
114709467b48Spatrick case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap";
114809467b48Spatrick case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap";
114909467b48Spatrick case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap";
115009467b48Spatrick
115109467b48Spatrick case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap";
115209467b48Spatrick case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap";
115309467b48Spatrick case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap";
115409467b48Spatrick case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap";
115509467b48Spatrick case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap";
115609467b48Spatrick case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap";
115709467b48Spatrick case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap";
115809467b48Spatrick case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap";
115909467b48Spatrick case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap";
116009467b48Spatrick case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap";
116109467b48Spatrick case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap";
116209467b48Spatrick
116309467b48Spatrick case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap";
116409467b48Spatrick case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap";
116509467b48Spatrick case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap";
116609467b48Spatrick case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap";
116709467b48Spatrick case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap";
116809467b48Spatrick case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap";
116909467b48Spatrick case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap";
117009467b48Spatrick case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap";
117109467b48Spatrick case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap";
117209467b48Spatrick case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap";
117309467b48Spatrick case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap";
117409467b48Spatrick
117509467b48Spatrick case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap";
117609467b48Spatrick case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap";
117709467b48Spatrick case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap";
117809467b48Spatrick case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap";
117909467b48Spatrick case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap";
118009467b48Spatrick case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap";
118109467b48Spatrick case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap";
118209467b48Spatrick case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap";
118309467b48Spatrick case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap";
118409467b48Spatrick case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap";
118509467b48Spatrick case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap";
118609467b48Spatrick
118709467b48Spatrick case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero";
118809467b48Spatrick case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero";
118909467b48Spatrick case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero";
119009467b48Spatrick case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero";
119109467b48Spatrick case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero";
119209467b48Spatrick case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero";
119309467b48Spatrick case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero";
119409467b48Spatrick case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero";
119509467b48Spatrick case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero";
119609467b48Spatrick case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero";
119709467b48Spatrick case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero";
119809467b48Spatrick
119909467b48Spatrick case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero";
120009467b48Spatrick case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero";
120109467b48Spatrick case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero";
120209467b48Spatrick case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero";
120309467b48Spatrick case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero";
120409467b48Spatrick case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero";
120509467b48Spatrick case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero";
120609467b48Spatrick case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero";
120709467b48Spatrick case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero";
120809467b48Spatrick case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero";
120909467b48Spatrick case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero";
121009467b48Spatrick
121109467b48Spatrick case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero";
121209467b48Spatrick case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero";
121309467b48Spatrick case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero";
121409467b48Spatrick case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero";
121509467b48Spatrick case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero";
121609467b48Spatrick case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero";
121709467b48Spatrick case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero";
121809467b48Spatrick case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero";
121909467b48Spatrick case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero";
122009467b48Spatrick case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero";
122109467b48Spatrick case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero";
122209467b48Spatrick
122309467b48Spatrick case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero";
122409467b48Spatrick case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero";
122509467b48Spatrick case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero";
122609467b48Spatrick case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero";
122709467b48Spatrick case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero";
122809467b48Spatrick case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero";
122909467b48Spatrick case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero";
123009467b48Spatrick case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero";
123109467b48Spatrick case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero";
123209467b48Spatrick case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero";
123309467b48Spatrick case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero";
123409467b48Spatrick
123509467b48Spatrick case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero";
123609467b48Spatrick case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero";
123709467b48Spatrick case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero";
123809467b48Spatrick case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero";
123909467b48Spatrick case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero";
124009467b48Spatrick case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero";
124109467b48Spatrick case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero";
124209467b48Spatrick case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero";
124309467b48Spatrick case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero";
124409467b48Spatrick case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero";
124509467b48Spatrick case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero";
124609467b48Spatrick }
124709467b48Spatrick return nullptr;
124809467b48Spatrick }
124909467b48Spatrick
125009467b48Spatrick TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const125109467b48Spatrick NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
125273471bf0Spatrick if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
125373471bf0Spatrick VT.getScalarType() == MVT::i1)
125409467b48Spatrick return TypeSplitVector;
125509467b48Spatrick if (VT == MVT::v2f16)
125609467b48Spatrick return TypeLegal;
125709467b48Spatrick return TargetLoweringBase::getPreferredVectorAction(VT);
125809467b48Spatrick }
125909467b48Spatrick
getSqrtEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & ExtraSteps,bool & UseOneConst,bool Reciprocal) const126009467b48Spatrick SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
126109467b48Spatrick int Enabled, int &ExtraSteps,
126209467b48Spatrick bool &UseOneConst,
126309467b48Spatrick bool Reciprocal) const {
126409467b48Spatrick if (!(Enabled == ReciprocalEstimate::Enabled ||
126509467b48Spatrick (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32())))
126609467b48Spatrick return SDValue();
126709467b48Spatrick
126809467b48Spatrick if (ExtraSteps == ReciprocalEstimate::Unspecified)
126909467b48Spatrick ExtraSteps = 0;
127009467b48Spatrick
127109467b48Spatrick SDLoc DL(Operand);
127209467b48Spatrick EVT VT = Operand.getValueType();
127309467b48Spatrick bool Ftz = useF32FTZ(DAG.getMachineFunction());
127409467b48Spatrick
127509467b48Spatrick auto MakeIntrinsicCall = [&](Intrinsic::ID IID) {
127609467b48Spatrick return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
127709467b48Spatrick DAG.getConstant(IID, DL, MVT::i32), Operand);
127809467b48Spatrick };
127909467b48Spatrick
128009467b48Spatrick // The sqrt and rsqrt refinement processes assume we always start out with an
128109467b48Spatrick // approximation of the rsqrt. Therefore, if we're going to do any refinement
128209467b48Spatrick // (i.e. ExtraSteps > 0), we must return an rsqrt. But if we're *not* doing
128309467b48Spatrick // any refinement, we must return a regular sqrt.
128409467b48Spatrick if (Reciprocal || ExtraSteps > 0) {
128509467b48Spatrick if (VT == MVT::f32)
128609467b48Spatrick return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f
128709467b48Spatrick : Intrinsic::nvvm_rsqrt_approx_f);
128809467b48Spatrick else if (VT == MVT::f64)
128909467b48Spatrick return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d);
129009467b48Spatrick else
129109467b48Spatrick return SDValue();
129209467b48Spatrick } else {
129309467b48Spatrick if (VT == MVT::f32)
129409467b48Spatrick return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
129509467b48Spatrick : Intrinsic::nvvm_sqrt_approx_f);
129609467b48Spatrick else {
129709467b48Spatrick // There's no sqrt.approx.f64 instruction, so we emit
129809467b48Spatrick // reciprocal(rsqrt(x)). This is faster than
129909467b48Spatrick // select(x == 0, 0, x * rsqrt(x)). (In fact, it's faster than plain
130009467b48Spatrick // x * rsqrt(x).)
130109467b48Spatrick return DAG.getNode(
130209467b48Spatrick ISD::INTRINSIC_WO_CHAIN, DL, VT,
130309467b48Spatrick DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32),
130409467b48Spatrick MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
130509467b48Spatrick }
130609467b48Spatrick }
130709467b48Spatrick }
130809467b48Spatrick
130909467b48Spatrick SDValue
LowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const131009467b48Spatrick NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
131109467b48Spatrick SDLoc dl(Op);
131209467b48Spatrick const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op);
131309467b48Spatrick auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());
131409467b48Spatrick Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);
131509467b48Spatrick return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
131609467b48Spatrick }
131709467b48Spatrick
getPrototype(const DataLayout & DL,Type * retTy,const ArgListTy & Args,const SmallVectorImpl<ISD::OutputArg> & Outs,MaybeAlign retAlignment,std::optional<std::pair<unsigned,const APInt &>> VAInfo,const CallBase & CB,unsigned UniqueCallSite) const131809467b48Spatrick std::string NVPTXTargetLowering::getPrototype(
131909467b48Spatrick const DataLayout &DL, Type *retTy, const ArgListTy &Args,
1320097a140dSpatrick const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment,
1321*d415bd75Srobert std::optional<std::pair<unsigned, const APInt &>> VAInfo,
132273471bf0Spatrick const CallBase &CB, unsigned UniqueCallSite) const {
132309467b48Spatrick auto PtrVT = getPointerTy(DL);
132409467b48Spatrick
132509467b48Spatrick bool isABI = (STI.getSmVersion() >= 20);
132609467b48Spatrick assert(isABI && "Non-ABI compilation is not supported");
132709467b48Spatrick if (!isABI)
132809467b48Spatrick return "";
132909467b48Spatrick
1330*d415bd75Srobert std::string Prototype;
1331*d415bd75Srobert raw_string_ostream O(Prototype);
133273471bf0Spatrick O << "prototype_" << UniqueCallSite << " : .callprototype ";
133309467b48Spatrick
133409467b48Spatrick if (retTy->getTypeID() == Type::VoidTyID) {
133509467b48Spatrick O << "()";
133609467b48Spatrick } else {
133709467b48Spatrick O << "(";
133809467b48Spatrick if (retTy->isFloatingPointTy() || (retTy->isIntegerTy() && !retTy->isIntegerTy(128))) {
133909467b48Spatrick unsigned size = 0;
134009467b48Spatrick if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
134109467b48Spatrick size = ITy->getBitWidth();
134209467b48Spatrick } else {
134309467b48Spatrick assert(retTy->isFloatingPointTy() &&
134409467b48Spatrick "Floating point type expected here");
134509467b48Spatrick size = retTy->getPrimitiveSizeInBits();
134609467b48Spatrick }
134709467b48Spatrick // PTX ABI requires all scalar return values to be at least 32
134809467b48Spatrick // bits in size. fp16 normally uses .b16 as its storage type in
134909467b48Spatrick // PTX, so its size must be adjusted here, too.
1350*d415bd75Srobert size = promoteScalarArgumentSize(size);
135109467b48Spatrick
135209467b48Spatrick O << ".param .b" << size << " _";
135309467b48Spatrick } else if (isa<PointerType>(retTy)) {
135409467b48Spatrick O << ".param .b" << PtrVT.getSizeInBits() << " _";
135509467b48Spatrick } else if (retTy->isAggregateType() || retTy->isVectorTy() ||
135609467b48Spatrick retTy->isIntegerTy(128)) {
1357097a140dSpatrick O << ".param .align " << (retAlignment ? retAlignment->value() : 0)
1358097a140dSpatrick << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]";
135909467b48Spatrick } else {
136009467b48Spatrick llvm_unreachable("Unknown return type");
136109467b48Spatrick }
136209467b48Spatrick O << ") ";
136309467b48Spatrick }
136409467b48Spatrick O << "_ (";
136509467b48Spatrick
136609467b48Spatrick bool first = true;
136709467b48Spatrick
1368*d415bd75Srobert const Function *F = CB.getFunction();
1369*d415bd75Srobert unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();
1370*d415bd75Srobert for (unsigned i = 0, OIdx = 0; i != NumArgs; ++i, ++OIdx) {
137109467b48Spatrick Type *Ty = Args[i].Ty;
137209467b48Spatrick if (!first) {
137309467b48Spatrick O << ", ";
137409467b48Spatrick }
137509467b48Spatrick first = false;
137609467b48Spatrick
137709467b48Spatrick if (!Outs[OIdx].Flags.isByVal()) {
137809467b48Spatrick if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1379*d415bd75Srobert unsigned ParamAlign = 0;
1380097a140dSpatrick const CallInst *CallI = cast<CallInst>(&CB);
138109467b48Spatrick // +1 because index 0 is reserved for return type alignment
1382*d415bd75Srobert if (!getAlign(*CallI, i + 1, ParamAlign))
1383*d415bd75Srobert ParamAlign = getFunctionParamOptimizedAlign(F, Ty, DL).value();
1384*d415bd75Srobert O << ".param .align " << ParamAlign << " .b8 ";
138509467b48Spatrick O << "_";
1386*d415bd75Srobert O << "[" << DL.getTypeAllocSize(Ty) << "]";
138709467b48Spatrick // update the index for Outs
138809467b48Spatrick SmallVector<EVT, 16> vtparts;
138909467b48Spatrick ComputeValueVTs(*this, DL, Ty, vtparts);
139009467b48Spatrick if (unsigned len = vtparts.size())
139109467b48Spatrick OIdx += len - 1;
139209467b48Spatrick continue;
139309467b48Spatrick }
139409467b48Spatrick // i8 types in IR will be i16 types in SDAG
139509467b48Spatrick assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
139609467b48Spatrick (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
139709467b48Spatrick "type mismatch between callee prototype and arguments");
139809467b48Spatrick // scalar type
139909467b48Spatrick unsigned sz = 0;
140009467b48Spatrick if (isa<IntegerType>(Ty)) {
140109467b48Spatrick sz = cast<IntegerType>(Ty)->getBitWidth();
1402*d415bd75Srobert sz = promoteScalarArgumentSize(sz);
140309467b48Spatrick } else if (isa<PointerType>(Ty)) {
140409467b48Spatrick sz = PtrVT.getSizeInBits();
140509467b48Spatrick } else if (Ty->isHalfTy())
140609467b48Spatrick // PTX ABI requires all scalar parameters to be at least 32
140709467b48Spatrick // bits in size. fp16 normally uses .b16 as its storage type
140809467b48Spatrick // in PTX, so its size must be adjusted here, too.
140909467b48Spatrick sz = 32;
141009467b48Spatrick else
141109467b48Spatrick sz = Ty->getPrimitiveSizeInBits();
141209467b48Spatrick O << ".param .b" << sz << " ";
141309467b48Spatrick O << "_";
141409467b48Spatrick continue;
141509467b48Spatrick }
141609467b48Spatrick
1417*d415bd75Srobert Type *ETy = Args[i].IndirectType;
1418*d415bd75Srobert Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1419*d415bd75Srobert Align ParamByValAlign =
1420*d415bd75Srobert getFunctionByValParamAlign(F, ETy, InitialAlign, DL);
1421*d415bd75Srobert
1422*d415bd75Srobert O << ".param .align " << ParamByValAlign.value() << " .b8 ";
142309467b48Spatrick O << "_";
1424*d415bd75Srobert O << "[" << Outs[OIdx].Flags.getByValSize() << "]";
142509467b48Spatrick }
1426*d415bd75Srobert
1427*d415bd75Srobert if (VAInfo)
1428*d415bd75Srobert O << (first ? "" : ",") << " .param .align " << VAInfo->second
1429*d415bd75Srobert << " .b8 _[]\n";
1430*d415bd75Srobert O << ")";
1431*d415bd75Srobert if (shouldEmitPTXNoReturn(&CB, *nvTM))
1432*d415bd75Srobert O << " .noreturn";
1433*d415bd75Srobert O << ";";
1434*d415bd75Srobert
1435*d415bd75Srobert return Prototype;
143609467b48Spatrick }
143709467b48Spatrick
getArgumentAlignment(SDValue Callee,const CallBase * CB,Type * Ty,unsigned Idx,const DataLayout & DL) const1438097a140dSpatrick Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
1439097a140dSpatrick const CallBase *CB, Type *Ty,
1440097a140dSpatrick unsigned Idx,
144109467b48Spatrick const DataLayout &DL) const {
1442097a140dSpatrick if (!CB) {
144309467b48Spatrick // CallSite is zero, fallback to ABI type alignment
1444097a140dSpatrick return DL.getABITypeAlign(Ty);
144509467b48Spatrick }
144609467b48Spatrick
1447097a140dSpatrick unsigned Alignment = 0;
1448097a140dSpatrick const Function *DirectCallee = CB->getCalledFunction();
144909467b48Spatrick
145009467b48Spatrick if (!DirectCallee) {
145109467b48Spatrick // We don't have a direct function symbol, but that may be because of
145209467b48Spatrick // constant cast instructions in the call.
145309467b48Spatrick
145409467b48Spatrick // With bitcast'd call targets, the instruction will be the call
1455097a140dSpatrick if (const auto *CI = dyn_cast<CallInst>(CB)) {
145609467b48Spatrick // Check if we have call alignment metadata
1457097a140dSpatrick if (getAlign(*CI, Idx, Alignment))
1458097a140dSpatrick return Align(Alignment);
145909467b48Spatrick }
1460*d415bd75Srobert DirectCallee = getMaybeBitcastedCallee(CB);
146109467b48Spatrick }
146209467b48Spatrick
146309467b48Spatrick // Check for function alignment information if we found that the
146409467b48Spatrick // ultimate target is a Function
1465*d415bd75Srobert if (DirectCallee) {
1466097a140dSpatrick if (getAlign(*DirectCallee, Idx, Alignment))
1467097a140dSpatrick return Align(Alignment);
1468*d415bd75Srobert // If alignment information is not available, fall back to the
1469*d415bd75Srobert // default function param optimized type alignment
1470*d415bd75Srobert return getFunctionParamOptimizedAlign(DirectCallee, Ty, DL);
1471*d415bd75Srobert }
147209467b48Spatrick
1473*d415bd75Srobert // Call is indirect, fall back to the ABI type alignment
1474097a140dSpatrick return DL.getABITypeAlign(Ty);
147509467b48Spatrick }
147609467b48Spatrick
LowerCall(TargetLowering::CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const147709467b48Spatrick SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
147809467b48Spatrick SmallVectorImpl<SDValue> &InVals) const {
1479*d415bd75Srobert
1480*d415bd75Srobert if (CLI.IsVarArg && (STI.getPTXVersion() < 60 || STI.getSmVersion() < 30))
1481*d415bd75Srobert report_fatal_error(
1482*d415bd75Srobert "Support for variadic functions (unsized array parameter) introduced "
1483*d415bd75Srobert "in PTX ISA version 6.0 and requires target sm_30.");
1484*d415bd75Srobert
148509467b48Spatrick SelectionDAG &DAG = CLI.DAG;
148609467b48Spatrick SDLoc dl = CLI.DL;
148709467b48Spatrick SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
148809467b48Spatrick SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
148909467b48Spatrick SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
149009467b48Spatrick SDValue Chain = CLI.Chain;
149109467b48Spatrick SDValue Callee = CLI.Callee;
149209467b48Spatrick bool &isTailCall = CLI.IsTailCall;
149309467b48Spatrick ArgListTy &Args = CLI.getArgs();
149409467b48Spatrick Type *RetTy = CLI.RetTy;
1495097a140dSpatrick const CallBase *CB = CLI.CB;
149609467b48Spatrick const DataLayout &DL = DAG.getDataLayout();
149709467b48Spatrick
149809467b48Spatrick bool isABI = (STI.getSmVersion() >= 20);
149909467b48Spatrick assert(isABI && "Non-ABI compilation is not supported");
150009467b48Spatrick if (!isABI)
150109467b48Spatrick return Chain;
150209467b48Spatrick
1503*d415bd75Srobert // Variadic arguments.
1504*d415bd75Srobert //
1505*d415bd75Srobert // Normally, for each argument, we declare a param scalar or a param
1506*d415bd75Srobert // byte array in the .param space, and store the argument value to that
1507*d415bd75Srobert // param scalar or array starting at offset 0.
1508*d415bd75Srobert //
1509*d415bd75Srobert // In the case of the first variadic argument, we declare a vararg byte array
1510*d415bd75Srobert // with size 0. The exact size of this array isn't known at this point, so
1511*d415bd75Srobert // it'll be patched later. All the variadic arguments will be stored to this
1512*d415bd75Srobert // array at a certain offset (which gets tracked by 'VAOffset'). The offset is
1513*d415bd75Srobert // initially set to 0, so it can be used for non-variadic arguments (which use
1514*d415bd75Srobert // 0 offset) to simplify the code.
1515*d415bd75Srobert //
1516*d415bd75Srobert // After all vararg is processed, 'VAOffset' holds the size of the
1517*d415bd75Srobert // vararg byte array.
1518*d415bd75Srobert
1519*d415bd75Srobert SDValue VADeclareParam; // vararg byte array
1520*d415bd75Srobert unsigned FirstVAArg = CLI.NumFixedArgs; // position of the first variadic
1521*d415bd75Srobert unsigned VAOffset = 0; // current offset in the param array
1522*d415bd75Srobert
152373471bf0Spatrick unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1);
1524*d415bd75Srobert SDValue TempChain = Chain;
152573471bf0Spatrick Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl);
152609467b48Spatrick SDValue InFlag = Chain.getValue(1);
152709467b48Spatrick
1528*d415bd75Srobert unsigned ParamCount = 0;
152909467b48Spatrick // Args.size() and Outs.size() need not match.
153009467b48Spatrick // Outs.size() will be larger
153109467b48Spatrick // * if there is an aggregate argument with multiple fields (each field
153209467b48Spatrick // showing up separately in Outs)
153309467b48Spatrick // * if there is a vector argument with more than typical vector-length
153409467b48Spatrick // elements (generally if more than 4) where each vector element is
153509467b48Spatrick // individually present in Outs.
153609467b48Spatrick // So a different index should be used for indexing into Outs/OutVals.
153709467b48Spatrick // See similar issue in LowerFormalArguments.
153809467b48Spatrick unsigned OIdx = 0;
153909467b48Spatrick // Declare the .params or .reg need to pass values
154009467b48Spatrick // to the function
154109467b48Spatrick for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
154209467b48Spatrick EVT VT = Outs[OIdx].VT;
154309467b48Spatrick Type *Ty = Args[i].Ty;
1544*d415bd75Srobert bool IsVAArg = (i >= CLI.NumFixedArgs);
1545*d415bd75Srobert bool IsByVal = Outs[OIdx].Flags.isByVal();
154609467b48Spatrick
154709467b48Spatrick SmallVector<EVT, 16> VTs;
154809467b48Spatrick SmallVector<uint64_t, 16> Offsets;
1549*d415bd75Srobert
1550*d415bd75Srobert assert((!IsByVal || Args[i].IndirectType) &&
1551*d415bd75Srobert "byval arg must have indirect type");
1552*d415bd75Srobert Type *ETy = (IsByVal ? Args[i].IndirectType : Ty);
1553*d415bd75Srobert ComputePTXValueVTs(*this, DL, ETy, VTs, &Offsets, IsByVal ? 0 : VAOffset);
1554*d415bd75Srobert
1555*d415bd75Srobert Align ArgAlign;
1556*d415bd75Srobert if (IsByVal) {
1557*d415bd75Srobert // The ByValAlign in the Outs[OIdx].Flags is always set at this point,
1558*d415bd75Srobert // so we don't need to worry whether it's naturally aligned or not.
1559*d415bd75Srobert // See TargetLowering::LowerCallTo().
1560*d415bd75Srobert Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1561*d415bd75Srobert ArgAlign = getFunctionByValParamAlign(CB->getCalledFunction(), ETy,
1562*d415bd75Srobert InitialAlign, DL);
1563*d415bd75Srobert if (IsVAArg)
1564*d415bd75Srobert VAOffset = alignTo(VAOffset, ArgAlign);
1565*d415bd75Srobert } else {
1566*d415bd75Srobert ArgAlign = getArgumentAlignment(Callee, CB, Ty, ParamCount + 1, DL);
1567*d415bd75Srobert }
1568*d415bd75Srobert
1569*d415bd75Srobert unsigned TypeSize =
1570*d415bd75Srobert (IsByVal ? Outs[OIdx].Flags.getByValSize() : DL.getTypeAllocSize(Ty));
157109467b48Spatrick SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1572*d415bd75Srobert
157309467b48Spatrick bool NeedAlign; // Does argument declaration specify alignment?
1574*d415bd75Srobert if (IsVAArg) {
1575*d415bd75Srobert if (ParamCount == FirstVAArg) {
1576*d415bd75Srobert SDValue DeclareParamOps[] = {
1577*d415bd75Srobert Chain, DAG.getConstant(STI.getMaxRequiredAlignment(), dl, MVT::i32),
1578*d415bd75Srobert DAG.getConstant(ParamCount, dl, MVT::i32),
1579*d415bd75Srobert DAG.getConstant(1, dl, MVT::i32), InFlag};
1580*d415bd75Srobert VADeclareParam = Chain = DAG.getNode(NVPTXISD::DeclareParam, dl,
1581*d415bd75Srobert DeclareParamVTs, DeclareParamOps);
1582*d415bd75Srobert }
1583*d415bd75Srobert NeedAlign = IsByVal || Ty->isAggregateType() || Ty->isVectorTy() ||
1584*d415bd75Srobert Ty->isIntegerTy(128);
1585*d415bd75Srobert } else if (IsByVal || Ty->isAggregateType() || Ty->isVectorTy() ||
1586*d415bd75Srobert Ty->isIntegerTy(128)) {
158709467b48Spatrick // declare .param .align <align> .b8 .param<n>[<size>];
158809467b48Spatrick SDValue DeclareParamOps[] = {
1589097a140dSpatrick Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
1590*d415bd75Srobert DAG.getConstant(ParamCount, dl, MVT::i32),
1591*d415bd75Srobert DAG.getConstant(TypeSize, dl, MVT::i32), InFlag};
159209467b48Spatrick Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
159309467b48Spatrick DeclareParamOps);
159409467b48Spatrick NeedAlign = true;
159509467b48Spatrick } else {
159609467b48Spatrick // declare .param .b<size> .param<n>;
1597*d415bd75Srobert if (VT.isInteger() || VT.isFloatingPoint()) {
159809467b48Spatrick // PTX ABI requires integral types to be at least 32 bits in
159909467b48Spatrick // size. FP16 is loaded/stored using i16, so it's handled
160009467b48Spatrick // here as well.
1601*d415bd75Srobert TypeSize = promoteScalarArgumentSize(TypeSize * 8) / 8;
160209467b48Spatrick }
160309467b48Spatrick SDValue DeclareScalarParamOps[] = {
1604*d415bd75Srobert Chain, DAG.getConstant(ParamCount, dl, MVT::i32),
1605*d415bd75Srobert DAG.getConstant(TypeSize * 8, dl, MVT::i32),
160609467b48Spatrick DAG.getConstant(0, dl, MVT::i32), InFlag};
160709467b48Spatrick Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
160809467b48Spatrick DeclareScalarParamOps);
160909467b48Spatrick NeedAlign = false;
161009467b48Spatrick }
161109467b48Spatrick InFlag = Chain.getValue(1);
161209467b48Spatrick
161309467b48Spatrick // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
161409467b48Spatrick // than 32-bits are sign extended or zero extended, depending on
161509467b48Spatrick // whether they are signed or unsigned types. This case applies
161609467b48Spatrick // only to scalar parameters and not to aggregate values.
161709467b48Spatrick bool ExtendIntegerParam =
161809467b48Spatrick Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;
161909467b48Spatrick
1620*d415bd75Srobert auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign, IsVAArg);
162109467b48Spatrick SmallVector<SDValue, 6> StoreOperands;
162209467b48Spatrick for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
1623*d415bd75Srobert EVT EltVT = VTs[j];
1624*d415bd75Srobert int CurOffset = Offsets[j];
1625*d415bd75Srobert MaybeAlign PartAlign;
1626*d415bd75Srobert if (NeedAlign)
1627*d415bd75Srobert PartAlign = commonAlignment(ArgAlign, CurOffset);
1628*d415bd75Srobert
162909467b48Spatrick // New store.
163009467b48Spatrick if (VectorInfo[j] & PVF_FIRST) {
163109467b48Spatrick assert(StoreOperands.empty() && "Unfinished preceding store.");
163209467b48Spatrick StoreOperands.push_back(Chain);
1633*d415bd75Srobert StoreOperands.push_back(
1634*d415bd75Srobert DAG.getConstant(IsVAArg ? FirstVAArg : ParamCount, dl, MVT::i32));
1635*d415bd75Srobert StoreOperands.push_back(DAG.getConstant(
1636*d415bd75Srobert IsByVal ? CurOffset + VAOffset : (IsVAArg ? VAOffset : CurOffset),
1637*d415bd75Srobert dl, MVT::i32));
163809467b48Spatrick }
163909467b48Spatrick
164009467b48Spatrick SDValue StVal = OutVals[OIdx];
1641*d415bd75Srobert
1642*d415bd75Srobert MVT PromotedVT;
1643*d415bd75Srobert if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) {
1644*d415bd75Srobert EltVT = EVT(PromotedVT);
1645*d415bd75Srobert }
1646*d415bd75Srobert if (PromoteScalarIntegerPTX(StVal.getValueType(), &PromotedVT)) {
1647*d415bd75Srobert llvm::ISD::NodeType Ext =
1648*d415bd75Srobert Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1649*d415bd75Srobert StVal = DAG.getNode(Ext, dl, PromotedVT, StVal);
1650*d415bd75Srobert }
1651*d415bd75Srobert
1652*d415bd75Srobert if (IsByVal) {
1653*d415bd75Srobert auto PtrVT = getPointerTy(DL);
1654*d415bd75Srobert SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StVal,
1655*d415bd75Srobert DAG.getConstant(CurOffset, dl, PtrVT));
1656*d415bd75Srobert StVal = DAG.getLoad(EltVT, dl, TempChain, srcAddr, MachinePointerInfo(),
1657*d415bd75Srobert PartAlign);
1658*d415bd75Srobert } else if (ExtendIntegerParam) {
165909467b48Spatrick assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
166009467b48Spatrick // zext/sext to i32
166109467b48Spatrick StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
166209467b48Spatrick : ISD::ZERO_EXTEND,
166309467b48Spatrick dl, MVT::i32, StVal);
1664*d415bd75Srobert }
1665*d415bd75Srobert
1666*d415bd75Srobert if (!ExtendIntegerParam && EltVT.getSizeInBits() < 16) {
166709467b48Spatrick // Use 16-bit registers for small stores as it's the
166809467b48Spatrick // smallest general purpose register size supported by NVPTX.
166909467b48Spatrick StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
167009467b48Spatrick }
167109467b48Spatrick
167209467b48Spatrick // Record the value to store.
167309467b48Spatrick StoreOperands.push_back(StVal);
167409467b48Spatrick
167509467b48Spatrick if (VectorInfo[j] & PVF_LAST) {
167609467b48Spatrick unsigned NumElts = StoreOperands.size() - 3;
167709467b48Spatrick NVPTXISD::NodeType Op;
167809467b48Spatrick switch (NumElts) {
167909467b48Spatrick case 1:
168009467b48Spatrick Op = NVPTXISD::StoreParam;
168109467b48Spatrick break;
168209467b48Spatrick case 2:
168309467b48Spatrick Op = NVPTXISD::StoreParamV2;
168409467b48Spatrick break;
168509467b48Spatrick case 4:
168609467b48Spatrick Op = NVPTXISD::StoreParamV4;
168709467b48Spatrick break;
168809467b48Spatrick default:
168909467b48Spatrick llvm_unreachable("Invalid vector info.");
169009467b48Spatrick }
169109467b48Spatrick
169209467b48Spatrick StoreOperands.push_back(InFlag);
169309467b48Spatrick
169409467b48Spatrick // Adjust type of the store op if we've extended the scalar
169509467b48Spatrick // return value.
1696*d415bd75Srobert EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
169709467b48Spatrick
169809467b48Spatrick Chain = DAG.getMemIntrinsicNode(
169909467b48Spatrick Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
1700*d415bd75Srobert TheStoreType, MachinePointerInfo(), PartAlign,
170109467b48Spatrick MachineMemOperand::MOStore);
170209467b48Spatrick InFlag = Chain.getValue(1);
170309467b48Spatrick
170409467b48Spatrick // Cleanup.
170509467b48Spatrick StoreOperands.clear();
1706*d415bd75Srobert
1707*d415bd75Srobert // TODO: We may need to support vector types that can be passed
1708*d415bd75Srobert // as scalars in variadic arguments.
1709*d415bd75Srobert if (!IsByVal && IsVAArg) {
1710*d415bd75Srobert assert(NumElts == 1 &&
1711*d415bd75Srobert "Vectorization is expected to be disabled for variadics.");
1712*d415bd75Srobert VAOffset += DL.getTypeAllocSize(
1713*d415bd75Srobert TheStoreType.getTypeForEVT(*DAG.getContext()));
171409467b48Spatrick }
1715*d415bd75Srobert }
1716*d415bd75Srobert if (!IsByVal)
171709467b48Spatrick ++OIdx;
171809467b48Spatrick }
171909467b48Spatrick assert(StoreOperands.empty() && "Unfinished parameter store.");
1720*d415bd75Srobert if (!IsByVal && VTs.size() > 0)
172109467b48Spatrick --OIdx;
1722*d415bd75Srobert ++ParamCount;
1723*d415bd75Srobert if (IsByVal && IsVAArg)
1724*d415bd75Srobert VAOffset += TypeSize;
172509467b48Spatrick }
172609467b48Spatrick
172709467b48Spatrick GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
1728*d415bd75Srobert MaybeAlign retAlignment = std::nullopt;
172909467b48Spatrick
173009467b48Spatrick // Handle Result
173109467b48Spatrick if (Ins.size() > 0) {
173209467b48Spatrick SmallVector<EVT, 16> resvtparts;
173309467b48Spatrick ComputeValueVTs(*this, DL, RetTy, resvtparts);
173409467b48Spatrick
173509467b48Spatrick // Declare
173609467b48Spatrick // .param .align 16 .b8 retval0[<size-in-bytes>], or
173709467b48Spatrick // .param .b<size-in-bits> retval0
173809467b48Spatrick unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy);
173909467b48Spatrick // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
174009467b48Spatrick // these three types to match the logic in
174109467b48Spatrick // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
174209467b48Spatrick // Plus, this behavior is consistent with nvcc's.
174309467b48Spatrick if (RetTy->isFloatingPointTy() || RetTy->isPointerTy() ||
174409467b48Spatrick (RetTy->isIntegerTy() && !RetTy->isIntegerTy(128))) {
1745*d415bd75Srobert resultsz = promoteScalarArgumentSize(resultsz);
174609467b48Spatrick SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
174709467b48Spatrick SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
174809467b48Spatrick DAG.getConstant(resultsz, dl, MVT::i32),
174909467b48Spatrick DAG.getConstant(0, dl, MVT::i32), InFlag };
175009467b48Spatrick Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
175109467b48Spatrick DeclareRetOps);
175209467b48Spatrick InFlag = Chain.getValue(1);
175309467b48Spatrick } else {
1754097a140dSpatrick retAlignment = getArgumentAlignment(Callee, CB, RetTy, 0, DL);
1755097a140dSpatrick assert(retAlignment && "retAlignment is guaranteed to be set");
175609467b48Spatrick SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1757097a140dSpatrick SDValue DeclareRetOps[] = {
1758097a140dSpatrick Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32),
175909467b48Spatrick DAG.getConstant(resultsz / 8, dl, MVT::i32),
176009467b48Spatrick DAG.getConstant(0, dl, MVT::i32), InFlag};
176109467b48Spatrick Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
176209467b48Spatrick DeclareRetOps);
176309467b48Spatrick InFlag = Chain.getValue(1);
176409467b48Spatrick }
176509467b48Spatrick }
176609467b48Spatrick
1767*d415bd75Srobert bool HasVAArgs = CLI.IsVarArg && (CLI.Args.size() > CLI.NumFixedArgs);
1768*d415bd75Srobert // Set the size of the vararg param byte array if the callee is a variadic
1769*d415bd75Srobert // function and the variadic part is not empty.
1770*d415bd75Srobert if (HasVAArgs) {
1771*d415bd75Srobert SDValue DeclareParamOps[] = {
1772*d415bd75Srobert VADeclareParam.getOperand(0), VADeclareParam.getOperand(1),
1773*d415bd75Srobert VADeclareParam.getOperand(2), DAG.getConstant(VAOffset, dl, MVT::i32),
1774*d415bd75Srobert VADeclareParam.getOperand(4)};
1775*d415bd75Srobert DAG.MorphNodeTo(VADeclareParam.getNode(), VADeclareParam.getOpcode(),
1776*d415bd75Srobert VADeclareParam->getVTList(), DeclareParamOps);
1777*d415bd75Srobert }
1778*d415bd75Srobert
177909467b48Spatrick // Both indirect calls and libcalls have nullptr Func. In order to distinguish
178009467b48Spatrick // between them we must rely on the call site value which is valid for
178109467b48Spatrick // indirect calls but is always null for libcalls.
1782097a140dSpatrick bool isIndirectCall = !Func && CB;
178309467b48Spatrick
178409467b48Spatrick if (isa<ExternalSymbolSDNode>(Callee)) {
178509467b48Spatrick Function* CalleeFunc = nullptr;
178609467b48Spatrick
178709467b48Spatrick // Try to find the callee in the current module.
178809467b48Spatrick Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc);
178909467b48Spatrick assert(CalleeFunc != nullptr && "Libcall callee must be set.");
179009467b48Spatrick
179109467b48Spatrick // Set the "libcall callee" attribute to indicate that the function
179209467b48Spatrick // must always have a declaration.
179309467b48Spatrick CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");
179409467b48Spatrick }
179509467b48Spatrick
179609467b48Spatrick if (isIndirectCall) {
179709467b48Spatrick // This is indirect function call case : PTX requires a prototype of the
179809467b48Spatrick // form
179909467b48Spatrick // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
180009467b48Spatrick // to be emitted, and the label has to used as the last arg of call
180109467b48Spatrick // instruction.
180209467b48Spatrick // The prototype is embedded in a string and put as the operand for a
180309467b48Spatrick // CallPrototype SDNode which will print out to the value of the string.
180409467b48Spatrick SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1805*d415bd75Srobert std::string Proto = getPrototype(
1806*d415bd75Srobert DL, RetTy, Args, Outs, retAlignment,
1807*d415bd75Srobert HasVAArgs
1808*d415bd75Srobert ? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair(
1809*d415bd75Srobert CLI.NumFixedArgs,
1810*d415bd75Srobert cast<ConstantSDNode>(VADeclareParam->getOperand(1))
1811*d415bd75Srobert ->getAPIntValue()))
1812*d415bd75Srobert : std::nullopt,
1813*d415bd75Srobert *CB, UniqueCallSite);
1814*d415bd75Srobert const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
181509467b48Spatrick SDValue ProtoOps[] = {
1816*d415bd75Srobert Chain,
1817*d415bd75Srobert DAG.getTargetExternalSymbol(ProtoStr, MVT::i32),
1818*d415bd75Srobert InFlag,
181909467b48Spatrick };
182009467b48Spatrick Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
182109467b48Spatrick InFlag = Chain.getValue(1);
182209467b48Spatrick }
182309467b48Spatrick // Op to just print "call"
182409467b48Spatrick SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
182509467b48Spatrick SDValue PrintCallOps[] = {
182609467b48Spatrick Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
182709467b48Spatrick };
182809467b48Spatrick // We model convergent calls as separate opcodes.
182909467b48Spatrick unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
183009467b48Spatrick if (CLI.IsConvergent)
183109467b48Spatrick Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
183209467b48Spatrick : NVPTXISD::PrintConvergentCall;
183309467b48Spatrick Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
183409467b48Spatrick InFlag = Chain.getValue(1);
183509467b48Spatrick
183609467b48Spatrick // Ops to print out the function name
183709467b48Spatrick SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
183809467b48Spatrick SDValue CallVoidOps[] = { Chain, Callee, InFlag };
183909467b48Spatrick Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
184009467b48Spatrick InFlag = Chain.getValue(1);
184109467b48Spatrick
184209467b48Spatrick // Ops to print out the param list
184309467b48Spatrick SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
184409467b48Spatrick SDValue CallArgBeginOps[] = { Chain, InFlag };
184509467b48Spatrick Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
184609467b48Spatrick CallArgBeginOps);
184709467b48Spatrick InFlag = Chain.getValue(1);
184809467b48Spatrick
1849*d415bd75Srobert for (unsigned i = 0, e = std::min(CLI.NumFixedArgs + 1, ParamCount); i != e;
1850*d415bd75Srobert ++i) {
185109467b48Spatrick unsigned opcode;
185209467b48Spatrick if (i == (e - 1))
185309467b48Spatrick opcode = NVPTXISD::LastCallArg;
185409467b48Spatrick else
185509467b48Spatrick opcode = NVPTXISD::CallArg;
185609467b48Spatrick SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
185709467b48Spatrick SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
185809467b48Spatrick DAG.getConstant(i, dl, MVT::i32), InFlag };
185909467b48Spatrick Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
186009467b48Spatrick InFlag = Chain.getValue(1);
186109467b48Spatrick }
186209467b48Spatrick SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
186309467b48Spatrick SDValue CallArgEndOps[] = { Chain,
186409467b48Spatrick DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32),
186509467b48Spatrick InFlag };
186609467b48Spatrick Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
186709467b48Spatrick InFlag = Chain.getValue(1);
186809467b48Spatrick
186909467b48Spatrick if (isIndirectCall) {
187009467b48Spatrick SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
187173471bf0Spatrick SDValue PrototypeOps[] = {
187273471bf0Spatrick Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InFlag};
187309467b48Spatrick Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
187409467b48Spatrick InFlag = Chain.getValue(1);
187509467b48Spatrick }
187609467b48Spatrick
187709467b48Spatrick SmallVector<SDValue, 16> ProxyRegOps;
1878*d415bd75Srobert SmallVector<std::optional<MVT>, 16> ProxyRegTruncates;
187909467b48Spatrick
188009467b48Spatrick // Generate loads from param memory/moves from registers for result
188109467b48Spatrick if (Ins.size() > 0) {
188209467b48Spatrick SmallVector<EVT, 16> VTs;
188309467b48Spatrick SmallVector<uint64_t, 16> Offsets;
188409467b48Spatrick ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
188509467b48Spatrick assert(VTs.size() == Ins.size() && "Bad value decomposition");
188609467b48Spatrick
1887097a140dSpatrick Align RetAlign = getArgumentAlignment(Callee, CB, RetTy, 0, DL);
188809467b48Spatrick auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
188909467b48Spatrick
189009467b48Spatrick SmallVector<EVT, 6> LoadVTs;
189109467b48Spatrick int VecIdx = -1; // Index of the first element of the vector.
189209467b48Spatrick
189309467b48Spatrick // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
189409467b48Spatrick // 32-bits are sign extended or zero extended, depending on whether
189509467b48Spatrick // they are signed or unsigned types.
189609467b48Spatrick bool ExtendIntegerRetVal =
189709467b48Spatrick RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
189809467b48Spatrick
189909467b48Spatrick for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
190009467b48Spatrick bool needTruncate = false;
190109467b48Spatrick EVT TheLoadType = VTs[i];
190209467b48Spatrick EVT EltType = Ins[i].VT;
1903097a140dSpatrick Align EltAlign = commonAlignment(RetAlign, Offsets[i]);
1904*d415bd75Srobert MVT PromotedVT;
1905*d415bd75Srobert
1906*d415bd75Srobert if (PromoteScalarIntegerPTX(TheLoadType, &PromotedVT)) {
1907*d415bd75Srobert TheLoadType = EVT(PromotedVT);
1908*d415bd75Srobert EltType = EVT(PromotedVT);
1909*d415bd75Srobert needTruncate = true;
1910*d415bd75Srobert }
1911*d415bd75Srobert
191209467b48Spatrick if (ExtendIntegerRetVal) {
191309467b48Spatrick TheLoadType = MVT::i32;
191409467b48Spatrick EltType = MVT::i32;
191509467b48Spatrick needTruncate = true;
191609467b48Spatrick } else if (TheLoadType.getSizeInBits() < 16) {
191709467b48Spatrick if (VTs[i].isInteger())
191809467b48Spatrick needTruncate = true;
191909467b48Spatrick EltType = MVT::i16;
192009467b48Spatrick }
192109467b48Spatrick
192209467b48Spatrick // Record index of the very first element of the vector.
192309467b48Spatrick if (VectorInfo[i] & PVF_FIRST) {
192409467b48Spatrick assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");
192509467b48Spatrick VecIdx = i;
192609467b48Spatrick }
192709467b48Spatrick
192809467b48Spatrick LoadVTs.push_back(EltType);
192909467b48Spatrick
193009467b48Spatrick if (VectorInfo[i] & PVF_LAST) {
193109467b48Spatrick unsigned NumElts = LoadVTs.size();
193209467b48Spatrick LoadVTs.push_back(MVT::Other);
193309467b48Spatrick LoadVTs.push_back(MVT::Glue);
193409467b48Spatrick NVPTXISD::NodeType Op;
193509467b48Spatrick switch (NumElts) {
193609467b48Spatrick case 1:
193709467b48Spatrick Op = NVPTXISD::LoadParam;
193809467b48Spatrick break;
193909467b48Spatrick case 2:
194009467b48Spatrick Op = NVPTXISD::LoadParamV2;
194109467b48Spatrick break;
194209467b48Spatrick case 4:
194309467b48Spatrick Op = NVPTXISD::LoadParamV4;
194409467b48Spatrick break;
194509467b48Spatrick default:
194609467b48Spatrick llvm_unreachable("Invalid vector info.");
194709467b48Spatrick }
194809467b48Spatrick
194909467b48Spatrick SDValue LoadOperands[] = {
195009467b48Spatrick Chain, DAG.getConstant(1, dl, MVT::i32),
195109467b48Spatrick DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag};
195209467b48Spatrick SDValue RetVal = DAG.getMemIntrinsicNode(
195309467b48Spatrick Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
195409467b48Spatrick MachinePointerInfo(), EltAlign,
195509467b48Spatrick MachineMemOperand::MOLoad);
195609467b48Spatrick
195709467b48Spatrick for (unsigned j = 0; j < NumElts; ++j) {
195809467b48Spatrick ProxyRegOps.push_back(RetVal.getValue(j));
195909467b48Spatrick
196009467b48Spatrick if (needTruncate)
1961*d415bd75Srobert ProxyRegTruncates.push_back(std::optional<MVT>(Ins[VecIdx + j].VT));
196209467b48Spatrick else
1963*d415bd75Srobert ProxyRegTruncates.push_back(std::optional<MVT>());
196409467b48Spatrick }
196509467b48Spatrick
196609467b48Spatrick Chain = RetVal.getValue(NumElts);
196709467b48Spatrick InFlag = RetVal.getValue(NumElts + 1);
196809467b48Spatrick
196909467b48Spatrick // Cleanup
197009467b48Spatrick VecIdx = -1;
197109467b48Spatrick LoadVTs.clear();
197209467b48Spatrick }
197309467b48Spatrick }
197409467b48Spatrick }
197509467b48Spatrick
1976*d415bd75Srobert Chain =
1977*d415bd75Srobert DAG.getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InFlag, dl);
197809467b48Spatrick InFlag = Chain.getValue(1);
197909467b48Spatrick
198009467b48Spatrick // Append ProxyReg instructions to the chain to make sure that `callseq_end`
198109467b48Spatrick // will not get lost. Otherwise, during libcalls expansion, the nodes can become
198209467b48Spatrick // dangling.
198309467b48Spatrick for (unsigned i = 0; i < ProxyRegOps.size(); ++i) {
198409467b48Spatrick SDValue Ret = DAG.getNode(
198509467b48Spatrick NVPTXISD::ProxyReg, dl,
198609467b48Spatrick DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
198709467b48Spatrick { Chain, ProxyRegOps[i], InFlag }
198809467b48Spatrick );
198909467b48Spatrick
199009467b48Spatrick Chain = Ret.getValue(1);
199109467b48Spatrick InFlag = Ret.getValue(2);
199209467b48Spatrick
1993*d415bd75Srobert if (ProxyRegTruncates[i]) {
1994*d415bd75Srobert Ret = DAG.getNode(ISD::TRUNCATE, dl, *ProxyRegTruncates[i], Ret);
199509467b48Spatrick }
199609467b48Spatrick
199709467b48Spatrick InVals.push_back(Ret);
199809467b48Spatrick }
199909467b48Spatrick
200009467b48Spatrick // set isTailCall to false for now, until we figure out how to express
200109467b48Spatrick // tail call optimization in PTX
200209467b48Spatrick isTailCall = false;
200309467b48Spatrick return Chain;
200409467b48Spatrick }
200509467b48Spatrick
200609467b48Spatrick // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
200709467b48Spatrick // (see LegalizeDAG.cpp). This is slow and uses local memory.
200809467b48Spatrick // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
200909467b48Spatrick SDValue
LowerCONCAT_VECTORS(SDValue Op,SelectionDAG & DAG) const201009467b48Spatrick NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
201109467b48Spatrick SDNode *Node = Op.getNode();
201209467b48Spatrick SDLoc dl(Node);
201309467b48Spatrick SmallVector<SDValue, 8> Ops;
201409467b48Spatrick unsigned NumOperands = Node->getNumOperands();
201509467b48Spatrick for (unsigned i = 0; i < NumOperands; ++i) {
201609467b48Spatrick SDValue SubOp = Node->getOperand(i);
201709467b48Spatrick EVT VVT = SubOp.getNode()->getValueType(0);
201809467b48Spatrick EVT EltVT = VVT.getVectorElementType();
201909467b48Spatrick unsigned NumSubElem = VVT.getVectorNumElements();
202009467b48Spatrick for (unsigned j = 0; j < NumSubElem; ++j) {
202109467b48Spatrick Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
202209467b48Spatrick DAG.getIntPtrConstant(j, dl)));
202309467b48Spatrick }
202409467b48Spatrick }
202509467b48Spatrick return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
202609467b48Spatrick }
202709467b48Spatrick
202809467b48Spatrick // We can init constant f16x2 with a single .b32 move. Normally it
202909467b48Spatrick // would get lowered as two constant loads and vector-packing move.
203009467b48Spatrick // mov.b16 %h1, 0x4000;
203109467b48Spatrick // mov.b16 %h2, 0x3C00;
203209467b48Spatrick // mov.b32 %hh2, {%h2, %h1};
203309467b48Spatrick // Instead we want just a constant move:
203409467b48Spatrick // mov.b32 %hh2, 0x40003C00
203509467b48Spatrick //
203609467b48Spatrick // This results in better SASS code with CUDA 7.x. Ptxas in CUDA 8.0
203709467b48Spatrick // generates good SASS in both cases.
LowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const203809467b48Spatrick SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
203909467b48Spatrick SelectionDAG &DAG) const {
204009467b48Spatrick if (!(Op->getValueType(0) == MVT::v2f16 &&
204109467b48Spatrick isa<ConstantFPSDNode>(Op->getOperand(0)) &&
204209467b48Spatrick isa<ConstantFPSDNode>(Op->getOperand(1))))
204309467b48Spatrick return Op;
204409467b48Spatrick
204509467b48Spatrick APInt E0 =
204609467b48Spatrick cast<ConstantFPSDNode>(Op->getOperand(0))->getValueAPF().bitcastToAPInt();
204709467b48Spatrick APInt E1 =
204809467b48Spatrick cast<ConstantFPSDNode>(Op->getOperand(1))->getValueAPF().bitcastToAPInt();
204909467b48Spatrick SDValue Const =
205009467b48Spatrick DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32);
205109467b48Spatrick return DAG.getNode(ISD::BITCAST, SDLoc(Op), MVT::v2f16, Const);
205209467b48Spatrick }
205309467b48Spatrick
LowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const205409467b48Spatrick SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
205509467b48Spatrick SelectionDAG &DAG) const {
205609467b48Spatrick SDValue Index = Op->getOperand(1);
205709467b48Spatrick // Constant index will be matched by tablegen.
205809467b48Spatrick if (isa<ConstantSDNode>(Index.getNode()))
205909467b48Spatrick return Op;
206009467b48Spatrick
206109467b48Spatrick // Extract individual elements and select one of them.
206209467b48Spatrick SDValue Vector = Op->getOperand(0);
206309467b48Spatrick EVT VectorVT = Vector.getValueType();
206409467b48Spatrick assert(VectorVT == MVT::v2f16 && "Unexpected vector type.");
206509467b48Spatrick EVT EltVT = VectorVT.getVectorElementType();
206609467b48Spatrick
206709467b48Spatrick SDLoc dl(Op.getNode());
206809467b48Spatrick SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
206909467b48Spatrick DAG.getIntPtrConstant(0, dl));
207009467b48Spatrick SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
207109467b48Spatrick DAG.getIntPtrConstant(1, dl));
207209467b48Spatrick return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1,
207309467b48Spatrick ISD::CondCode::SETEQ);
207409467b48Spatrick }
207509467b48Spatrick
207609467b48Spatrick /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
207709467b48Spatrick /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
207809467b48Spatrick /// amount, or
207909467b48Spatrick /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
208009467b48Spatrick /// amount.
LowerShiftRightParts(SDValue Op,SelectionDAG & DAG) const208109467b48Spatrick SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
208209467b48Spatrick SelectionDAG &DAG) const {
208309467b48Spatrick assert(Op.getNumOperands() == 3 && "Not a double-shift!");
208409467b48Spatrick assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
208509467b48Spatrick
208609467b48Spatrick EVT VT = Op.getValueType();
208709467b48Spatrick unsigned VTBits = VT.getSizeInBits();
208809467b48Spatrick SDLoc dl(Op);
208909467b48Spatrick SDValue ShOpLo = Op.getOperand(0);
209009467b48Spatrick SDValue ShOpHi = Op.getOperand(1);
209109467b48Spatrick SDValue ShAmt = Op.getOperand(2);
209209467b48Spatrick unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
209309467b48Spatrick
209409467b48Spatrick if (VTBits == 32 && STI.getSmVersion() >= 35) {
209509467b48Spatrick // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
209609467b48Spatrick // {dHi, dLo} = {aHi, aLo} >> Amt
209709467b48Spatrick // dHi = aHi >> Amt
209809467b48Spatrick // dLo = shf.r.clamp aLo, aHi, Amt
209909467b48Spatrick
210009467b48Spatrick SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
210109467b48Spatrick SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
210209467b48Spatrick ShAmt);
210309467b48Spatrick
210409467b48Spatrick SDValue Ops[2] = { Lo, Hi };
210509467b48Spatrick return DAG.getMergeValues(Ops, dl);
210609467b48Spatrick }
210709467b48Spatrick else {
210809467b48Spatrick // {dHi, dLo} = {aHi, aLo} >> Amt
210909467b48Spatrick // - if (Amt>=size) then
211009467b48Spatrick // dLo = aHi >> (Amt-size)
211109467b48Spatrick // dHi = aHi >> Amt (this is either all 0 or all 1)
211209467b48Spatrick // else
211309467b48Spatrick // dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
211409467b48Spatrick // dHi = aHi >> Amt
211509467b48Spatrick
211609467b48Spatrick SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
211709467b48Spatrick DAG.getConstant(VTBits, dl, MVT::i32),
211809467b48Spatrick ShAmt);
211909467b48Spatrick SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
212009467b48Spatrick SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
212109467b48Spatrick DAG.getConstant(VTBits, dl, MVT::i32));
212209467b48Spatrick SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
212309467b48Spatrick SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
212409467b48Spatrick SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
212509467b48Spatrick
212609467b48Spatrick SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
212709467b48Spatrick DAG.getConstant(VTBits, dl, MVT::i32),
212809467b48Spatrick ISD::SETGE);
212909467b48Spatrick SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
213009467b48Spatrick SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
213109467b48Spatrick
213209467b48Spatrick SDValue Ops[2] = { Lo, Hi };
213309467b48Spatrick return DAG.getMergeValues(Ops, dl);
213409467b48Spatrick }
213509467b48Spatrick }
213609467b48Spatrick
213709467b48Spatrick /// LowerShiftLeftParts - Lower SHL_PARTS, which
213809467b48Spatrick /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
213909467b48Spatrick /// amount, or
214009467b48Spatrick /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
214109467b48Spatrick /// amount.
LowerShiftLeftParts(SDValue Op,SelectionDAG & DAG) const214209467b48Spatrick SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
214309467b48Spatrick SelectionDAG &DAG) const {
214409467b48Spatrick assert(Op.getNumOperands() == 3 && "Not a double-shift!");
214509467b48Spatrick assert(Op.getOpcode() == ISD::SHL_PARTS);
214609467b48Spatrick
214709467b48Spatrick EVT VT = Op.getValueType();
214809467b48Spatrick unsigned VTBits = VT.getSizeInBits();
214909467b48Spatrick SDLoc dl(Op);
215009467b48Spatrick SDValue ShOpLo = Op.getOperand(0);
215109467b48Spatrick SDValue ShOpHi = Op.getOperand(1);
215209467b48Spatrick SDValue ShAmt = Op.getOperand(2);
215309467b48Spatrick
215409467b48Spatrick if (VTBits == 32 && STI.getSmVersion() >= 35) {
215509467b48Spatrick // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
215609467b48Spatrick // {dHi, dLo} = {aHi, aLo} << Amt
215709467b48Spatrick // dHi = shf.l.clamp aLo, aHi, Amt
215809467b48Spatrick // dLo = aLo << Amt
215909467b48Spatrick
216009467b48Spatrick SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
216109467b48Spatrick ShAmt);
216209467b48Spatrick SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
216309467b48Spatrick
216409467b48Spatrick SDValue Ops[2] = { Lo, Hi };
216509467b48Spatrick return DAG.getMergeValues(Ops, dl);
216609467b48Spatrick }
216709467b48Spatrick else {
216809467b48Spatrick // {dHi, dLo} = {aHi, aLo} << Amt
216909467b48Spatrick // - if (Amt>=size) then
217009467b48Spatrick // dLo = aLo << Amt (all 0)
217109467b48Spatrick // dLo = aLo << (Amt-size)
217209467b48Spatrick // else
217309467b48Spatrick // dLo = aLo << Amt
217409467b48Spatrick // dHi = (aHi << Amt) | (aLo >> (size-Amt))
217509467b48Spatrick
217609467b48Spatrick SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
217709467b48Spatrick DAG.getConstant(VTBits, dl, MVT::i32),
217809467b48Spatrick ShAmt);
217909467b48Spatrick SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
218009467b48Spatrick SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
218109467b48Spatrick DAG.getConstant(VTBits, dl, MVT::i32));
218209467b48Spatrick SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
218309467b48Spatrick SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
218409467b48Spatrick SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
218509467b48Spatrick
218609467b48Spatrick SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
218709467b48Spatrick DAG.getConstant(VTBits, dl, MVT::i32),
218809467b48Spatrick ISD::SETGE);
218909467b48Spatrick SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
219009467b48Spatrick SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
219109467b48Spatrick
219209467b48Spatrick SDValue Ops[2] = { Lo, Hi };
219309467b48Spatrick return DAG.getMergeValues(Ops, dl);
219409467b48Spatrick }
219509467b48Spatrick }
219609467b48Spatrick
LowerFROUND(SDValue Op,SelectionDAG & DAG) const219709467b48Spatrick SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
219809467b48Spatrick EVT VT = Op.getValueType();
219909467b48Spatrick
220009467b48Spatrick if (VT == MVT::f32)
220109467b48Spatrick return LowerFROUND32(Op, DAG);
220209467b48Spatrick
220309467b48Spatrick if (VT == MVT::f64)
220409467b48Spatrick return LowerFROUND64(Op, DAG);
220509467b48Spatrick
220609467b48Spatrick llvm_unreachable("unhandled type");
220709467b48Spatrick }
220809467b48Spatrick
220909467b48Spatrick // This is the the rounding method used in CUDA libdevice in C like code:
221009467b48Spatrick // float roundf(float A)
221109467b48Spatrick // {
221209467b48Spatrick // float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f));
221309467b48Spatrick // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
221409467b48Spatrick // return abs(A) < 0.5 ? (float)(int)A : RoundedA;
221509467b48Spatrick // }
LowerFROUND32(SDValue Op,SelectionDAG & DAG) const221609467b48Spatrick SDValue NVPTXTargetLowering::LowerFROUND32(SDValue Op,
221709467b48Spatrick SelectionDAG &DAG) const {
221809467b48Spatrick SDLoc SL(Op);
221909467b48Spatrick SDValue A = Op.getOperand(0);
222009467b48Spatrick EVT VT = Op.getValueType();
222109467b48Spatrick
222209467b48Spatrick SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
222309467b48Spatrick
222409467b48Spatrick // RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f))
222509467b48Spatrick SDValue Bitcast = DAG.getNode(ISD::BITCAST, SL, MVT::i32, A);
222609467b48Spatrick const int SignBitMask = 0x80000000;
222709467b48Spatrick SDValue Sign = DAG.getNode(ISD::AND, SL, MVT::i32, Bitcast,
222809467b48Spatrick DAG.getConstant(SignBitMask, SL, MVT::i32));
222909467b48Spatrick const int PointFiveInBits = 0x3F000000;
223009467b48Spatrick SDValue PointFiveWithSignRaw =
223109467b48Spatrick DAG.getNode(ISD::OR, SL, MVT::i32, Sign,
223209467b48Spatrick DAG.getConstant(PointFiveInBits, SL, MVT::i32));
223309467b48Spatrick SDValue PointFiveWithSign =
223409467b48Spatrick DAG.getNode(ISD::BITCAST, SL, VT, PointFiveWithSignRaw);
223509467b48Spatrick SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, A, PointFiveWithSign);
223609467b48Spatrick SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
223709467b48Spatrick
223809467b48Spatrick // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
223909467b48Spatrick EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
224009467b48Spatrick SDValue IsLarge =
224109467b48Spatrick DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 23.0), SL, VT),
224209467b48Spatrick ISD::SETOGT);
224309467b48Spatrick RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
224409467b48Spatrick
224509467b48Spatrick // return abs(A) < 0.5 ? (float)(int)A : RoundedA;
224609467b48Spatrick SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
224709467b48Spatrick DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
224809467b48Spatrick SDValue RoundedAForSmallA = DAG.getNode(ISD::FTRUNC, SL, VT, A);
224909467b48Spatrick return DAG.getNode(ISD::SELECT, SL, VT, IsSmall, RoundedAForSmallA, RoundedA);
225009467b48Spatrick }
225109467b48Spatrick
225209467b48Spatrick // The implementation of round(double) is similar to that of round(float) in
225309467b48Spatrick // that they both separate the value range into three regions and use a method
225409467b48Spatrick // specific to the region to round the values. However, round(double) first
225509467b48Spatrick // calculates the round of the absolute value and then adds the sign back while
225609467b48Spatrick // round(float) directly rounds the value with sign.
LowerFROUND64(SDValue Op,SelectionDAG & DAG) const225709467b48Spatrick SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op,
225809467b48Spatrick SelectionDAG &DAG) const {
225909467b48Spatrick SDLoc SL(Op);
226009467b48Spatrick SDValue A = Op.getOperand(0);
226109467b48Spatrick EVT VT = Op.getValueType();
226209467b48Spatrick
226309467b48Spatrick SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
226409467b48Spatrick
226509467b48Spatrick // double RoundedA = (double) (int) (abs(A) + 0.5f);
226609467b48Spatrick SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, AbsA,
226709467b48Spatrick DAG.getConstantFP(0.5, SL, VT));
226809467b48Spatrick SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
226909467b48Spatrick
227009467b48Spatrick // RoundedA = abs(A) < 0.5 ? (double)0 : RoundedA;
227109467b48Spatrick EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
227209467b48Spatrick SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
227309467b48Spatrick DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
227409467b48Spatrick RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsSmall,
227509467b48Spatrick DAG.getConstantFP(0, SL, VT),
227609467b48Spatrick RoundedA);
227709467b48Spatrick
227809467b48Spatrick // Add sign to rounded_A
227909467b48Spatrick RoundedA = DAG.getNode(ISD::FCOPYSIGN, SL, VT, RoundedA, A);
228009467b48Spatrick DAG.getNode(ISD::FTRUNC, SL, VT, A);
228109467b48Spatrick
228209467b48Spatrick // RoundedA = abs(A) > 0x1.0p52 ? A : RoundedA;
228309467b48Spatrick SDValue IsLarge =
228409467b48Spatrick DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 52.0), SL, VT),
228509467b48Spatrick ISD::SETOGT);
228609467b48Spatrick return DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
228709467b48Spatrick }
228809467b48Spatrick
228909467b48Spatrick
229009467b48Spatrick
229109467b48Spatrick SDValue
LowerOperation(SDValue Op,SelectionDAG & DAG) const229209467b48Spatrick NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
229309467b48Spatrick switch (Op.getOpcode()) {
229409467b48Spatrick case ISD::RETURNADDR:
229509467b48Spatrick return SDValue();
229609467b48Spatrick case ISD::FRAMEADDR:
229709467b48Spatrick return SDValue();
229809467b48Spatrick case ISD::GlobalAddress:
229909467b48Spatrick return LowerGlobalAddress(Op, DAG);
230009467b48Spatrick case ISD::INTRINSIC_W_CHAIN:
230109467b48Spatrick return Op;
230209467b48Spatrick case ISD::BUILD_VECTOR:
230309467b48Spatrick return LowerBUILD_VECTOR(Op, DAG);
230409467b48Spatrick case ISD::EXTRACT_SUBVECTOR:
230509467b48Spatrick return Op;
230609467b48Spatrick case ISD::EXTRACT_VECTOR_ELT:
230709467b48Spatrick return LowerEXTRACT_VECTOR_ELT(Op, DAG);
230809467b48Spatrick case ISD::CONCAT_VECTORS:
230909467b48Spatrick return LowerCONCAT_VECTORS(Op, DAG);
231009467b48Spatrick case ISD::STORE:
231109467b48Spatrick return LowerSTORE(Op, DAG);
231209467b48Spatrick case ISD::LOAD:
231309467b48Spatrick return LowerLOAD(Op, DAG);
231409467b48Spatrick case ISD::SHL_PARTS:
231509467b48Spatrick return LowerShiftLeftParts(Op, DAG);
231609467b48Spatrick case ISD::SRA_PARTS:
231709467b48Spatrick case ISD::SRL_PARTS:
231809467b48Spatrick return LowerShiftRightParts(Op, DAG);
231909467b48Spatrick case ISD::SELECT:
232009467b48Spatrick return LowerSelect(Op, DAG);
232109467b48Spatrick case ISD::FROUND:
232209467b48Spatrick return LowerFROUND(Op, DAG);
2323*d415bd75Srobert case ISD::VAARG:
2324*d415bd75Srobert return LowerVAARG(Op, DAG);
2325*d415bd75Srobert case ISD::VASTART:
2326*d415bd75Srobert return LowerVASTART(Op, DAG);
232709467b48Spatrick default:
232809467b48Spatrick llvm_unreachable("Custom lowering not defined for operation");
232909467b48Spatrick }
233009467b48Spatrick }
233109467b48Spatrick
2332*d415bd75Srobert // This function is almost a copy of SelectionDAG::expandVAArg().
2333*d415bd75Srobert // The only diff is that this one produces loads from local address space.
LowerVAARG(SDValue Op,SelectionDAG & DAG) const2334*d415bd75Srobert SDValue NVPTXTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2335*d415bd75Srobert const TargetLowering *TLI = STI.getTargetLowering();
2336*d415bd75Srobert SDLoc DL(Op);
2337*d415bd75Srobert
2338*d415bd75Srobert SDNode *Node = Op.getNode();
2339*d415bd75Srobert const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2340*d415bd75Srobert EVT VT = Node->getValueType(0);
2341*d415bd75Srobert auto *Ty = VT.getTypeForEVT(*DAG.getContext());
2342*d415bd75Srobert SDValue Tmp1 = Node->getOperand(0);
2343*d415bd75Srobert SDValue Tmp2 = Node->getOperand(1);
2344*d415bd75Srobert const MaybeAlign MA(Node->getConstantOperandVal(3));
2345*d415bd75Srobert
2346*d415bd75Srobert SDValue VAListLoad = DAG.getLoad(TLI->getPointerTy(DAG.getDataLayout()), DL,
2347*d415bd75Srobert Tmp1, Tmp2, MachinePointerInfo(V));
2348*d415bd75Srobert SDValue VAList = VAListLoad;
2349*d415bd75Srobert
2350*d415bd75Srobert if (MA && *MA > TLI->getMinStackArgumentAlignment()) {
2351*d415bd75Srobert VAList = DAG.getNode(
2352*d415bd75Srobert ISD::ADD, DL, VAList.getValueType(), VAList,
2353*d415bd75Srobert DAG.getConstant(MA->value() - 1, DL, VAList.getValueType()));
2354*d415bd75Srobert
2355*d415bd75Srobert VAList = DAG.getNode(
2356*d415bd75Srobert ISD::AND, DL, VAList.getValueType(), VAList,
2357*d415bd75Srobert DAG.getConstant(-(int64_t)MA->value(), DL, VAList.getValueType()));
2358*d415bd75Srobert }
2359*d415bd75Srobert
2360*d415bd75Srobert // Increment the pointer, VAList, to the next vaarg
2361*d415bd75Srobert Tmp1 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
2362*d415bd75Srobert DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(Ty),
2363*d415bd75Srobert DL, VAList.getValueType()));
2364*d415bd75Srobert
2365*d415bd75Srobert // Store the incremented VAList to the legalized pointer
2366*d415bd75Srobert Tmp1 = DAG.getStore(VAListLoad.getValue(1), DL, Tmp1, Tmp2,
2367*d415bd75Srobert MachinePointerInfo(V));
2368*d415bd75Srobert
2369*d415bd75Srobert const Value *SrcV =
2370*d415bd75Srobert Constant::getNullValue(PointerType::get(Ty, ADDRESS_SPACE_LOCAL));
2371*d415bd75Srobert
2372*d415bd75Srobert // Load the actual argument out of the pointer VAList
2373*d415bd75Srobert return DAG.getLoad(VT, DL, Tmp1, VAList, MachinePointerInfo(SrcV));
2374*d415bd75Srobert }
2375*d415bd75Srobert
LowerVASTART(SDValue Op,SelectionDAG & DAG) const2376*d415bd75Srobert SDValue NVPTXTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2377*d415bd75Srobert const TargetLowering *TLI = STI.getTargetLowering();
2378*d415bd75Srobert SDLoc DL(Op);
2379*d415bd75Srobert EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout());
2380*d415bd75Srobert
2381*d415bd75Srobert // Store the address of unsized array <function>_vararg[] in the ap object.
2382*d415bd75Srobert SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);
2383*d415bd75Srobert SDValue VAReg = DAG.getNode(NVPTXISD::Wrapper, DL, PtrVT, Arg);
2384*d415bd75Srobert
2385*d415bd75Srobert const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2386*d415bd75Srobert return DAG.getStore(Op.getOperand(0), DL, VAReg, Op.getOperand(1),
2387*d415bd75Srobert MachinePointerInfo(SV));
2388*d415bd75Srobert }
2389*d415bd75Srobert
LowerSelect(SDValue Op,SelectionDAG & DAG) const239009467b48Spatrick SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
239109467b48Spatrick SDValue Op0 = Op->getOperand(0);
239209467b48Spatrick SDValue Op1 = Op->getOperand(1);
239309467b48Spatrick SDValue Op2 = Op->getOperand(2);
239409467b48Spatrick SDLoc DL(Op.getNode());
239509467b48Spatrick
239609467b48Spatrick assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
239709467b48Spatrick
239809467b48Spatrick Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
239909467b48Spatrick Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
240009467b48Spatrick SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
240109467b48Spatrick SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
240209467b48Spatrick
240309467b48Spatrick return Trunc;
240409467b48Spatrick }
240509467b48Spatrick
LowerLOAD(SDValue Op,SelectionDAG & DAG) const240609467b48Spatrick SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
240709467b48Spatrick if (Op.getValueType() == MVT::i1)
240809467b48Spatrick return LowerLOADi1(Op, DAG);
240909467b48Spatrick
241009467b48Spatrick // v2f16 is legal, so we can't rely on legalizer to handle unaligned
241109467b48Spatrick // loads and have to handle it here.
241209467b48Spatrick if (Op.getValueType() == MVT::v2f16) {
241309467b48Spatrick LoadSDNode *Load = cast<LoadSDNode>(Op);
241409467b48Spatrick EVT MemVT = Load->getMemoryVT();
241509467b48Spatrick if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
241609467b48Spatrick MemVT, *Load->getMemOperand())) {
241709467b48Spatrick SDValue Ops[2];
241809467b48Spatrick std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
241909467b48Spatrick return DAG.getMergeValues(Ops, SDLoc(Op));
242009467b48Spatrick }
242109467b48Spatrick }
242209467b48Spatrick
242309467b48Spatrick return SDValue();
242409467b48Spatrick }
242509467b48Spatrick
242609467b48Spatrick // v = ld i1* addr
242709467b48Spatrick // =>
242809467b48Spatrick // v1 = ld i8* addr (-> i16)
242909467b48Spatrick // v = trunc i16 to i1
LowerLOADi1(SDValue Op,SelectionDAG & DAG) const243009467b48Spatrick SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
243109467b48Spatrick SDNode *Node = Op.getNode();
243209467b48Spatrick LoadSDNode *LD = cast<LoadSDNode>(Node);
243309467b48Spatrick SDLoc dl(Node);
243409467b48Spatrick assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
243509467b48Spatrick assert(Node->getValueType(0) == MVT::i1 &&
243609467b48Spatrick "Custom lowering for i1 load only");
243709467b48Spatrick SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
2438*d415bd75Srobert LD->getPointerInfo(), LD->getAlign(),
243909467b48Spatrick LD->getMemOperand()->getFlags());
244009467b48Spatrick SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
244109467b48Spatrick // The legalizer (the caller) is expecting two values from the legalized
244209467b48Spatrick // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
244309467b48Spatrick // in LegalizeDAG.cpp which also uses MergeValues.
244409467b48Spatrick SDValue Ops[] = { result, LD->getChain() };
244509467b48Spatrick return DAG.getMergeValues(Ops, dl);
244609467b48Spatrick }
244709467b48Spatrick
LowerSTORE(SDValue Op,SelectionDAG & DAG) const244809467b48Spatrick SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
244909467b48Spatrick StoreSDNode *Store = cast<StoreSDNode>(Op);
245009467b48Spatrick EVT VT = Store->getMemoryVT();
245109467b48Spatrick
245209467b48Spatrick if (VT == MVT::i1)
245309467b48Spatrick return LowerSTOREi1(Op, DAG);
245409467b48Spatrick
245509467b48Spatrick // v2f16 is legal, so we can't rely on legalizer to handle unaligned
245609467b48Spatrick // stores and have to handle it here.
245709467b48Spatrick if (VT == MVT::v2f16 &&
245809467b48Spatrick !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
245909467b48Spatrick VT, *Store->getMemOperand()))
246009467b48Spatrick return expandUnalignedStore(Store, DAG);
246109467b48Spatrick
246209467b48Spatrick if (VT.isVector())
246309467b48Spatrick return LowerSTOREVector(Op, DAG);
246409467b48Spatrick
246509467b48Spatrick return SDValue();
246609467b48Spatrick }
246709467b48Spatrick
246809467b48Spatrick SDValue
LowerSTOREVector(SDValue Op,SelectionDAG & DAG) const246909467b48Spatrick NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
247009467b48Spatrick SDNode *N = Op.getNode();
247109467b48Spatrick SDValue Val = N->getOperand(1);
247209467b48Spatrick SDLoc DL(N);
247309467b48Spatrick EVT ValVT = Val.getValueType();
247409467b48Spatrick
247509467b48Spatrick if (ValVT.isVector()) {
247609467b48Spatrick // We only handle "native" vector sizes for now, e.g. <4 x double> is not
247709467b48Spatrick // legal. We can (and should) split that into 2 stores of <2 x double> here
247809467b48Spatrick // but I'm leaving that as a TODO for now.
247909467b48Spatrick if (!ValVT.isSimple())
248009467b48Spatrick return SDValue();
248109467b48Spatrick switch (ValVT.getSimpleVT().SimpleTy) {
248209467b48Spatrick default:
248309467b48Spatrick return SDValue();
248409467b48Spatrick case MVT::v2i8:
248509467b48Spatrick case MVT::v2i16:
248609467b48Spatrick case MVT::v2i32:
248709467b48Spatrick case MVT::v2i64:
248809467b48Spatrick case MVT::v2f16:
2489*d415bd75Srobert case MVT::v2bf16:
249009467b48Spatrick case MVT::v2f32:
249109467b48Spatrick case MVT::v2f64:
249209467b48Spatrick case MVT::v4i8:
249309467b48Spatrick case MVT::v4i16:
249409467b48Spatrick case MVT::v4i32:
249509467b48Spatrick case MVT::v4f16:
2496*d415bd75Srobert case MVT::v4bf16:
249709467b48Spatrick case MVT::v4f32:
249809467b48Spatrick case MVT::v8f16: // <4 x f16x2>
2499*d415bd75Srobert case MVT::v8bf16: // <4 x bf16x2>
250009467b48Spatrick // This is a "native" vector type
250109467b48Spatrick break;
250209467b48Spatrick }
250309467b48Spatrick
250409467b48Spatrick MemSDNode *MemSD = cast<MemSDNode>(N);
250509467b48Spatrick const DataLayout &TD = DAG.getDataLayout();
250609467b48Spatrick
2507097a140dSpatrick Align Alignment = MemSD->getAlign();
2508097a140dSpatrick Align PrefAlign =
2509097a140dSpatrick TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext()));
2510097a140dSpatrick if (Alignment < PrefAlign) {
251109467b48Spatrick // This store is not sufficiently aligned, so bail out and let this vector
251209467b48Spatrick // store be scalarized. Note that we may still be able to emit smaller
251309467b48Spatrick // vector stores. For example, if we are storing a <4 x float> with an
251409467b48Spatrick // alignment of 8, this check will fail but the legalizer will try again
251509467b48Spatrick // with 2 x <2 x float>, which will succeed with an alignment of 8.
251609467b48Spatrick return SDValue();
251709467b48Spatrick }
251809467b48Spatrick
251909467b48Spatrick unsigned Opcode = 0;
252009467b48Spatrick EVT EltVT = ValVT.getVectorElementType();
252109467b48Spatrick unsigned NumElts = ValVT.getVectorNumElements();
252209467b48Spatrick
252309467b48Spatrick // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
252409467b48Spatrick // Therefore, we must ensure the type is legal. For i1 and i8, we set the
252509467b48Spatrick // stored type to i16 and propagate the "real" type as the memory type.
252609467b48Spatrick bool NeedExt = false;
252709467b48Spatrick if (EltVT.getSizeInBits() < 16)
252809467b48Spatrick NeedExt = true;
252909467b48Spatrick
253009467b48Spatrick bool StoreF16x2 = false;
253109467b48Spatrick switch (NumElts) {
253209467b48Spatrick default:
253309467b48Spatrick return SDValue();
253409467b48Spatrick case 2:
253509467b48Spatrick Opcode = NVPTXISD::StoreV2;
253609467b48Spatrick break;
253709467b48Spatrick case 4:
253809467b48Spatrick Opcode = NVPTXISD::StoreV4;
253909467b48Spatrick break;
254009467b48Spatrick case 8:
254109467b48Spatrick // v8f16 is a special case. PTX doesn't have st.v8.f16
254209467b48Spatrick // instruction. Instead, we split the vector into v2f16 chunks and
254309467b48Spatrick // store them with st.v4.b32.
2544*d415bd75Srobert assert((EltVT == MVT::f16 || EltVT == MVT::bf16) &&
2545*d415bd75Srobert "Wrong type for the vector.");
254609467b48Spatrick Opcode = NVPTXISD::StoreV4;
254709467b48Spatrick StoreF16x2 = true;
254809467b48Spatrick break;
254909467b48Spatrick }
255009467b48Spatrick
255109467b48Spatrick SmallVector<SDValue, 8> Ops;
255209467b48Spatrick
255309467b48Spatrick // First is the chain
255409467b48Spatrick Ops.push_back(N->getOperand(0));
255509467b48Spatrick
255609467b48Spatrick if (StoreF16x2) {
255709467b48Spatrick // Combine f16,f16 -> v2f16
255809467b48Spatrick NumElts /= 2;
255909467b48Spatrick for (unsigned i = 0; i < NumElts; ++i) {
256009467b48Spatrick SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
256109467b48Spatrick DAG.getIntPtrConstant(i * 2, DL));
256209467b48Spatrick SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
256309467b48Spatrick DAG.getIntPtrConstant(i * 2 + 1, DL));
256409467b48Spatrick SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f16, E0, E1);
256509467b48Spatrick Ops.push_back(V2);
256609467b48Spatrick }
256709467b48Spatrick } else {
256809467b48Spatrick // Then the split values
256909467b48Spatrick for (unsigned i = 0; i < NumElts; ++i) {
257009467b48Spatrick SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
257109467b48Spatrick DAG.getIntPtrConstant(i, DL));
257209467b48Spatrick if (NeedExt)
257309467b48Spatrick ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
257409467b48Spatrick Ops.push_back(ExtVal);
257509467b48Spatrick }
257609467b48Spatrick }
257709467b48Spatrick
257809467b48Spatrick // Then any remaining arguments
257909467b48Spatrick Ops.append(N->op_begin() + 2, N->op_end());
258009467b48Spatrick
258109467b48Spatrick SDValue NewSt =
258209467b48Spatrick DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops,
258309467b48Spatrick MemSD->getMemoryVT(), MemSD->getMemOperand());
258409467b48Spatrick
258509467b48Spatrick // return DCI.CombineTo(N, NewSt, true);
258609467b48Spatrick return NewSt;
258709467b48Spatrick }
258809467b48Spatrick
258909467b48Spatrick return SDValue();
259009467b48Spatrick }
259109467b48Spatrick
259209467b48Spatrick // st i1 v, addr
259309467b48Spatrick // =>
259409467b48Spatrick // v1 = zxt v to i16
259509467b48Spatrick // st.u8 i16, addr
LowerSTOREi1(SDValue Op,SelectionDAG & DAG) const259609467b48Spatrick SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
259709467b48Spatrick SDNode *Node = Op.getNode();
259809467b48Spatrick SDLoc dl(Node);
259909467b48Spatrick StoreSDNode *ST = cast<StoreSDNode>(Node);
260009467b48Spatrick SDValue Tmp1 = ST->getChain();
260109467b48Spatrick SDValue Tmp2 = ST->getBasePtr();
260209467b48Spatrick SDValue Tmp3 = ST->getValue();
260309467b48Spatrick assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
260409467b48Spatrick Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
260509467b48Spatrick SDValue Result =
260609467b48Spatrick DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8,
2607*d415bd75Srobert ST->getAlign(), ST->getMemOperand()->getFlags());
260809467b48Spatrick return Result;
260909467b48Spatrick }
261009467b48Spatrick
2611*d415bd75Srobert // This creates target external symbol for a function parameter.
2612*d415bd75Srobert // Name of the symbol is composed from its index and the function name.
2613*d415bd75Srobert // Negative index corresponds to special parameter (unsized array) used for
2614*d415bd75Srobert // passing variable arguments.
getParamSymbol(SelectionDAG & DAG,int idx,EVT v) const2615*d415bd75Srobert SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx,
2616*d415bd75Srobert EVT v) const {
261709467b48Spatrick std::string ParamSym;
261809467b48Spatrick raw_string_ostream ParamStr(ParamSym);
261909467b48Spatrick
2620*d415bd75Srobert ParamStr << DAG.getMachineFunction().getName();
262109467b48Spatrick
2622*d415bd75Srobert if (idx < 0)
2623*d415bd75Srobert ParamStr << "_vararg";
2624*d415bd75Srobert else
2625*d415bd75Srobert ParamStr << "_param_" << idx;
262609467b48Spatrick
2627*d415bd75Srobert StringRef SavedStr =
2628*d415bd75Srobert nvTM->getStrPool().save(ParamSym);
2629*d415bd75Srobert return DAG.getTargetExternalSymbol(SavedStr.data(), v);
263009467b48Spatrick }
263109467b48Spatrick
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const263209467b48Spatrick SDValue NVPTXTargetLowering::LowerFormalArguments(
263309467b48Spatrick SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
263409467b48Spatrick const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
263509467b48Spatrick SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
263609467b48Spatrick MachineFunction &MF = DAG.getMachineFunction();
263709467b48Spatrick const DataLayout &DL = DAG.getDataLayout();
263809467b48Spatrick auto PtrVT = getPointerTy(DAG.getDataLayout());
263909467b48Spatrick
264009467b48Spatrick const Function *F = &MF.getFunction();
264109467b48Spatrick const AttributeList &PAL = F->getAttributes();
264209467b48Spatrick const TargetLowering *TLI = STI.getTargetLowering();
264309467b48Spatrick
264409467b48Spatrick SDValue Root = DAG.getRoot();
264509467b48Spatrick std::vector<SDValue> OutChains;
264609467b48Spatrick
264709467b48Spatrick bool isABI = (STI.getSmVersion() >= 20);
264809467b48Spatrick assert(isABI && "Non-ABI compilation is not supported");
264909467b48Spatrick if (!isABI)
265009467b48Spatrick return Chain;
265109467b48Spatrick
265209467b48Spatrick std::vector<Type *> argTypes;
265309467b48Spatrick std::vector<const Argument *> theArgs;
265409467b48Spatrick for (const Argument &I : F->args()) {
265509467b48Spatrick theArgs.push_back(&I);
265609467b48Spatrick argTypes.push_back(I.getType());
265709467b48Spatrick }
265809467b48Spatrick // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
265909467b48Spatrick // Ins.size() will be larger
266009467b48Spatrick // * if there is an aggregate argument with multiple fields (each field
266109467b48Spatrick // showing up separately in Ins)
266209467b48Spatrick // * if there is a vector argument with more than typical vector-length
266309467b48Spatrick // elements (generally if more than 4) where each vector element is
266409467b48Spatrick // individually present in Ins.
266509467b48Spatrick // So a different index should be used for indexing into Ins.
266609467b48Spatrick // See similar issue in LowerCall.
266709467b48Spatrick unsigned InsIdx = 0;
266809467b48Spatrick
266909467b48Spatrick int idx = 0;
267009467b48Spatrick for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
267109467b48Spatrick Type *Ty = argTypes[i];
267209467b48Spatrick
267309467b48Spatrick if (theArgs[i]->use_empty()) {
267409467b48Spatrick // argument is dead
267509467b48Spatrick if (Ty->isAggregateType() || Ty->isIntegerTy(128)) {
267609467b48Spatrick SmallVector<EVT, 16> vtparts;
267709467b48Spatrick
267809467b48Spatrick ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
267909467b48Spatrick assert(vtparts.size() > 0 && "empty aggregate type not expected");
268009467b48Spatrick for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
268109467b48Spatrick ++parti) {
268209467b48Spatrick InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
268309467b48Spatrick ++InsIdx;
268409467b48Spatrick }
268509467b48Spatrick if (vtparts.size() > 0)
268609467b48Spatrick --InsIdx;
268709467b48Spatrick continue;
268809467b48Spatrick }
268909467b48Spatrick if (Ty->isVectorTy()) {
269009467b48Spatrick EVT ObjectVT = getValueType(DL, Ty);
269109467b48Spatrick unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
269209467b48Spatrick for (unsigned parti = 0; parti < NumRegs; ++parti) {
269309467b48Spatrick InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
269409467b48Spatrick ++InsIdx;
269509467b48Spatrick }
269609467b48Spatrick if (NumRegs > 0)
269709467b48Spatrick --InsIdx;
269809467b48Spatrick continue;
269909467b48Spatrick }
270009467b48Spatrick InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
270109467b48Spatrick continue;
270209467b48Spatrick }
270309467b48Spatrick
270409467b48Spatrick // In the following cases, assign a node order of "idx+1"
270509467b48Spatrick // to newly created nodes. The SDNodes for params have to
270609467b48Spatrick // appear in the same order as their order of appearance
270709467b48Spatrick // in the original function. "idx+1" holds that order.
2708*d415bd75Srobert if (!PAL.hasParamAttr(i, Attribute::ByVal)) {
270909467b48Spatrick bool aggregateIsPacked = false;
271009467b48Spatrick if (StructType *STy = dyn_cast<StructType>(Ty))
271109467b48Spatrick aggregateIsPacked = STy->isPacked();
271209467b48Spatrick
271309467b48Spatrick SmallVector<EVT, 16> VTs;
271409467b48Spatrick SmallVector<uint64_t, 16> Offsets;
271509467b48Spatrick ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
271609467b48Spatrick assert(VTs.size() > 0 && "Unexpected empty type.");
271709467b48Spatrick auto VectorInfo =
2718097a140dSpatrick VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlign(Ty));
271909467b48Spatrick
272009467b48Spatrick SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
272109467b48Spatrick int VecIdx = -1; // Index of the first element of the current vector.
272209467b48Spatrick for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) {
272309467b48Spatrick if (VectorInfo[parti] & PVF_FIRST) {
272409467b48Spatrick assert(VecIdx == -1 && "Orphaned vector.");
272509467b48Spatrick VecIdx = parti;
272609467b48Spatrick }
272709467b48Spatrick
272809467b48Spatrick // That's the last element of this store op.
272909467b48Spatrick if (VectorInfo[parti] & PVF_LAST) {
273009467b48Spatrick unsigned NumElts = parti - VecIdx + 1;
273109467b48Spatrick EVT EltVT = VTs[parti];
273209467b48Spatrick // i1 is loaded/stored as i8.
273309467b48Spatrick EVT LoadVT = EltVT;
273409467b48Spatrick if (EltVT == MVT::i1)
273509467b48Spatrick LoadVT = MVT::i8;
273609467b48Spatrick else if (EltVT == MVT::v2f16)
273709467b48Spatrick // getLoad needs a vector type, but it can't handle
273809467b48Spatrick // vectors which contain v2f16 elements. So we must load
273909467b48Spatrick // using i32 here and then bitcast back.
274009467b48Spatrick LoadVT = MVT::i32;
274109467b48Spatrick
274209467b48Spatrick EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts);
274309467b48Spatrick SDValue VecAddr =
274409467b48Spatrick DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
274509467b48Spatrick DAG.getConstant(Offsets[VecIdx], dl, PtrVT));
274609467b48Spatrick Value *srcValue = Constant::getNullValue(PointerType::get(
274709467b48Spatrick EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
2748*d415bd75Srobert SDValue P = DAG.getLoad(VecVT, dl, Root, VecAddr,
2749*d415bd75Srobert MachinePointerInfo(srcValue),
2750*d415bd75Srobert MaybeAlign(aggregateIsPacked ? 1 : 0),
275109467b48Spatrick MachineMemOperand::MODereferenceable |
275209467b48Spatrick MachineMemOperand::MOInvariant);
275309467b48Spatrick if (P.getNode())
275409467b48Spatrick P.getNode()->setIROrder(idx + 1);
275509467b48Spatrick for (unsigned j = 0; j < NumElts; ++j) {
275609467b48Spatrick SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P,
275709467b48Spatrick DAG.getIntPtrConstant(j, dl));
275809467b48Spatrick // We've loaded i1 as an i8 and now must truncate it back to i1
275909467b48Spatrick if (EltVT == MVT::i1)
276009467b48Spatrick Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt);
276109467b48Spatrick // v2f16 was loaded as an i32. Now we must bitcast it back.
276209467b48Spatrick else if (EltVT == MVT::v2f16)
276309467b48Spatrick Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt);
2764*d415bd75Srobert
2765*d415bd75Srobert // If a promoted integer type is used, truncate down to the original
2766*d415bd75Srobert MVT PromotedVT;
2767*d415bd75Srobert if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) {
2768*d415bd75Srobert Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
2769*d415bd75Srobert }
2770*d415bd75Srobert
277109467b48Spatrick // Extend the element if necessary (e.g. an i8 is loaded
277209467b48Spatrick // into an i16 register)
277309467b48Spatrick if (Ins[InsIdx].VT.isInteger() &&
277473471bf0Spatrick Ins[InsIdx].VT.getFixedSizeInBits() >
277573471bf0Spatrick LoadVT.getFixedSizeInBits()) {
277609467b48Spatrick unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
277709467b48Spatrick : ISD::ZERO_EXTEND;
277809467b48Spatrick Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt);
277909467b48Spatrick }
278009467b48Spatrick InVals.push_back(Elt);
278109467b48Spatrick }
278209467b48Spatrick
278309467b48Spatrick // Reset vector tracking state.
278409467b48Spatrick VecIdx = -1;
278509467b48Spatrick }
278609467b48Spatrick ++InsIdx;
278709467b48Spatrick }
278809467b48Spatrick if (VTs.size() > 0)
278909467b48Spatrick --InsIdx;
279009467b48Spatrick continue;
279109467b48Spatrick }
279209467b48Spatrick
279309467b48Spatrick // Param has ByVal attribute
279409467b48Spatrick // Return MoveParam(param symbol).
279509467b48Spatrick // Ideally, the param symbol can be returned directly,
279609467b48Spatrick // but when SDNode builder decides to use it in a CopyToReg(),
279709467b48Spatrick // machine instruction fails because TargetExternalSymbol
279809467b48Spatrick // (not lowered) is target dependent, and CopyToReg assumes
279909467b48Spatrick // the source is lowered.
280009467b48Spatrick EVT ObjectVT = getValueType(DL, Ty);
280109467b48Spatrick assert(ObjectVT == Ins[InsIdx].VT &&
280209467b48Spatrick "Ins type did not match function type");
280309467b48Spatrick SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
280409467b48Spatrick SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
280509467b48Spatrick if (p.getNode())
280609467b48Spatrick p.getNode()->setIROrder(idx + 1);
280709467b48Spatrick InVals.push_back(p);
280809467b48Spatrick }
280909467b48Spatrick
281009467b48Spatrick if (!OutChains.empty())
281109467b48Spatrick DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
281209467b48Spatrick
281309467b48Spatrick return Chain;
281409467b48Spatrick }
281509467b48Spatrick
281609467b48Spatrick SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & dl,SelectionDAG & DAG) const281709467b48Spatrick NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
281809467b48Spatrick bool isVarArg,
281909467b48Spatrick const SmallVectorImpl<ISD::OutputArg> &Outs,
282009467b48Spatrick const SmallVectorImpl<SDValue> &OutVals,
282109467b48Spatrick const SDLoc &dl, SelectionDAG &DAG) const {
2822*d415bd75Srobert const MachineFunction &MF = DAG.getMachineFunction();
2823*d415bd75Srobert const Function &F = MF.getFunction();
282409467b48Spatrick Type *RetTy = MF.getFunction().getReturnType();
282509467b48Spatrick
282609467b48Spatrick bool isABI = (STI.getSmVersion() >= 20);
282709467b48Spatrick assert(isABI && "Non-ABI compilation is not supported");
282809467b48Spatrick if (!isABI)
282909467b48Spatrick return Chain;
283009467b48Spatrick
283173471bf0Spatrick const DataLayout &DL = DAG.getDataLayout();
2832*d415bd75Srobert SmallVector<SDValue, 16> PromotedOutVals;
283309467b48Spatrick SmallVector<EVT, 16> VTs;
283409467b48Spatrick SmallVector<uint64_t, 16> Offsets;
283509467b48Spatrick ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
283609467b48Spatrick assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
283709467b48Spatrick
2838*d415bd75Srobert for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
2839*d415bd75Srobert SDValue PromotedOutVal = OutVals[i];
2840*d415bd75Srobert MVT PromotedVT;
2841*d415bd75Srobert if (PromoteScalarIntegerPTX(VTs[i], &PromotedVT)) {
2842*d415bd75Srobert VTs[i] = EVT(PromotedVT);
2843*d415bd75Srobert }
2844*d415bd75Srobert if (PromoteScalarIntegerPTX(PromotedOutVal.getValueType(), &PromotedVT)) {
2845*d415bd75Srobert llvm::ISD::NodeType Ext =
2846*d415bd75Srobert Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2847*d415bd75Srobert PromotedOutVal = DAG.getNode(Ext, dl, PromotedVT, PromotedOutVal);
2848*d415bd75Srobert }
2849*d415bd75Srobert PromotedOutVals.push_back(PromotedOutVal);
2850*d415bd75Srobert }
2851*d415bd75Srobert
285209467b48Spatrick auto VectorInfo = VectorizePTXValueVTs(
2853*d415bd75Srobert VTs, Offsets,
2854*d415bd75Srobert RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL)
2855*d415bd75Srobert : Align(1));
285609467b48Spatrick
285709467b48Spatrick // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
285809467b48Spatrick // 32-bits are sign extended or zero extended, depending on whether
285909467b48Spatrick // they are signed or unsigned types.
286009467b48Spatrick bool ExtendIntegerRetVal =
286109467b48Spatrick RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
286209467b48Spatrick
286309467b48Spatrick SmallVector<SDValue, 6> StoreOperands;
286409467b48Spatrick for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
286509467b48Spatrick // New load/store. Record chain and offset operands.
286609467b48Spatrick if (VectorInfo[i] & PVF_FIRST) {
286709467b48Spatrick assert(StoreOperands.empty() && "Orphaned operand list.");
286809467b48Spatrick StoreOperands.push_back(Chain);
286909467b48Spatrick StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32));
287009467b48Spatrick }
287109467b48Spatrick
2872*d415bd75Srobert SDValue OutVal = OutVals[i];
2873*d415bd75Srobert SDValue RetVal = PromotedOutVals[i];
2874*d415bd75Srobert
287509467b48Spatrick if (ExtendIntegerRetVal) {
287609467b48Spatrick RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND
287709467b48Spatrick : ISD::ZERO_EXTEND,
287809467b48Spatrick dl, MVT::i32, RetVal);
2879*d415bd75Srobert } else if (OutVal.getValueSizeInBits() < 16) {
288009467b48Spatrick // Use 16-bit registers for small load-stores as it's the
288109467b48Spatrick // smallest general purpose register size supported by NVPTX.
288209467b48Spatrick RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal);
288309467b48Spatrick }
288409467b48Spatrick
288509467b48Spatrick // Record the value to return.
288609467b48Spatrick StoreOperands.push_back(RetVal);
288709467b48Spatrick
288809467b48Spatrick // That's the last element of this store op.
288909467b48Spatrick if (VectorInfo[i] & PVF_LAST) {
289009467b48Spatrick NVPTXISD::NodeType Op;
289109467b48Spatrick unsigned NumElts = StoreOperands.size() - 2;
289209467b48Spatrick switch (NumElts) {
289309467b48Spatrick case 1:
289409467b48Spatrick Op = NVPTXISD::StoreRetval;
289509467b48Spatrick break;
289609467b48Spatrick case 2:
289709467b48Spatrick Op = NVPTXISD::StoreRetvalV2;
289809467b48Spatrick break;
289909467b48Spatrick case 4:
290009467b48Spatrick Op = NVPTXISD::StoreRetvalV4;
290109467b48Spatrick break;
290209467b48Spatrick default:
290309467b48Spatrick llvm_unreachable("Invalid vector info.");
290409467b48Spatrick }
290509467b48Spatrick
290609467b48Spatrick // Adjust type of load/store op if we've extended the scalar
290709467b48Spatrick // return value.
290809467b48Spatrick EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
2909097a140dSpatrick Chain = DAG.getMemIntrinsicNode(
2910097a140dSpatrick Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType,
2911097a140dSpatrick MachinePointerInfo(), Align(1), MachineMemOperand::MOStore);
291209467b48Spatrick // Cleanup vector state.
291309467b48Spatrick StoreOperands.clear();
291409467b48Spatrick }
291509467b48Spatrick }
291609467b48Spatrick
291709467b48Spatrick return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
291809467b48Spatrick }
291909467b48Spatrick
LowerAsmOperandForConstraint(SDValue Op,std::string & Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const292009467b48Spatrick void NVPTXTargetLowering::LowerAsmOperandForConstraint(
292109467b48Spatrick SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
292209467b48Spatrick SelectionDAG &DAG) const {
292309467b48Spatrick if (Constraint.length() > 1)
292409467b48Spatrick return;
292509467b48Spatrick else
292609467b48Spatrick TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
292709467b48Spatrick }
292809467b48Spatrick
getOpcForTextureInstr(unsigned Intrinsic)292909467b48Spatrick static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
293009467b48Spatrick switch (Intrinsic) {
293109467b48Spatrick default:
293209467b48Spatrick return 0;
293309467b48Spatrick
293409467b48Spatrick case Intrinsic::nvvm_tex_1d_v4f32_s32:
293509467b48Spatrick return NVPTXISD::Tex1DFloatS32;
293609467b48Spatrick case Intrinsic::nvvm_tex_1d_v4f32_f32:
293709467b48Spatrick return NVPTXISD::Tex1DFloatFloat;
293809467b48Spatrick case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
293909467b48Spatrick return NVPTXISD::Tex1DFloatFloatLevel;
294009467b48Spatrick case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
294109467b48Spatrick return NVPTXISD::Tex1DFloatFloatGrad;
294209467b48Spatrick case Intrinsic::nvvm_tex_1d_v4s32_s32:
294309467b48Spatrick return NVPTXISD::Tex1DS32S32;
294409467b48Spatrick case Intrinsic::nvvm_tex_1d_v4s32_f32:
294509467b48Spatrick return NVPTXISD::Tex1DS32Float;
294609467b48Spatrick case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
294709467b48Spatrick return NVPTXISD::Tex1DS32FloatLevel;
294809467b48Spatrick case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
294909467b48Spatrick return NVPTXISD::Tex1DS32FloatGrad;
295009467b48Spatrick case Intrinsic::nvvm_tex_1d_v4u32_s32:
295109467b48Spatrick return NVPTXISD::Tex1DU32S32;
295209467b48Spatrick case Intrinsic::nvvm_tex_1d_v4u32_f32:
295309467b48Spatrick return NVPTXISD::Tex1DU32Float;
295409467b48Spatrick case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
295509467b48Spatrick return NVPTXISD::Tex1DU32FloatLevel;
295609467b48Spatrick case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
295709467b48Spatrick return NVPTXISD::Tex1DU32FloatGrad;
295809467b48Spatrick
295909467b48Spatrick case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
296009467b48Spatrick return NVPTXISD::Tex1DArrayFloatS32;
296109467b48Spatrick case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
296209467b48Spatrick return NVPTXISD::Tex1DArrayFloatFloat;
296309467b48Spatrick case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
296409467b48Spatrick return NVPTXISD::Tex1DArrayFloatFloatLevel;
296509467b48Spatrick case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
296609467b48Spatrick return NVPTXISD::Tex1DArrayFloatFloatGrad;
296709467b48Spatrick case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
296809467b48Spatrick return NVPTXISD::Tex1DArrayS32S32;
296909467b48Spatrick case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
297009467b48Spatrick return NVPTXISD::Tex1DArrayS32Float;
297109467b48Spatrick case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
297209467b48Spatrick return NVPTXISD::Tex1DArrayS32FloatLevel;
297309467b48Spatrick case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
297409467b48Spatrick return NVPTXISD::Tex1DArrayS32FloatGrad;
297509467b48Spatrick case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
297609467b48Spatrick return NVPTXISD::Tex1DArrayU32S32;
297709467b48Spatrick case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
297809467b48Spatrick return NVPTXISD::Tex1DArrayU32Float;
297909467b48Spatrick case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
298009467b48Spatrick return NVPTXISD::Tex1DArrayU32FloatLevel;
298109467b48Spatrick case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
298209467b48Spatrick return NVPTXISD::Tex1DArrayU32FloatGrad;
298309467b48Spatrick
298409467b48Spatrick case Intrinsic::nvvm_tex_2d_v4f32_s32:
298509467b48Spatrick return NVPTXISD::Tex2DFloatS32;
298609467b48Spatrick case Intrinsic::nvvm_tex_2d_v4f32_f32:
298709467b48Spatrick return NVPTXISD::Tex2DFloatFloat;
298809467b48Spatrick case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
298909467b48Spatrick return NVPTXISD::Tex2DFloatFloatLevel;
299009467b48Spatrick case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
299109467b48Spatrick return NVPTXISD::Tex2DFloatFloatGrad;
299209467b48Spatrick case Intrinsic::nvvm_tex_2d_v4s32_s32:
299309467b48Spatrick return NVPTXISD::Tex2DS32S32;
299409467b48Spatrick case Intrinsic::nvvm_tex_2d_v4s32_f32:
299509467b48Spatrick return NVPTXISD::Tex2DS32Float;
299609467b48Spatrick case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
299709467b48Spatrick return NVPTXISD::Tex2DS32FloatLevel;
299809467b48Spatrick case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
299909467b48Spatrick return NVPTXISD::Tex2DS32FloatGrad;
300009467b48Spatrick case Intrinsic::nvvm_tex_2d_v4u32_s32:
300109467b48Spatrick return NVPTXISD::Tex2DU32S32;
300209467b48Spatrick case Intrinsic::nvvm_tex_2d_v4u32_f32:
300309467b48Spatrick return NVPTXISD::Tex2DU32Float;
300409467b48Spatrick case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
300509467b48Spatrick return NVPTXISD::Tex2DU32FloatLevel;
300609467b48Spatrick case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
300709467b48Spatrick return NVPTXISD::Tex2DU32FloatGrad;
300809467b48Spatrick
300909467b48Spatrick case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
301009467b48Spatrick return NVPTXISD::Tex2DArrayFloatS32;
301109467b48Spatrick case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
301209467b48Spatrick return NVPTXISD::Tex2DArrayFloatFloat;
301309467b48Spatrick case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
301409467b48Spatrick return NVPTXISD::Tex2DArrayFloatFloatLevel;
301509467b48Spatrick case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
301609467b48Spatrick return NVPTXISD::Tex2DArrayFloatFloatGrad;
301709467b48Spatrick case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
301809467b48Spatrick return NVPTXISD::Tex2DArrayS32S32;
301909467b48Spatrick case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
302009467b48Spatrick return NVPTXISD::Tex2DArrayS32Float;
302109467b48Spatrick case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
302209467b48Spatrick return NVPTXISD::Tex2DArrayS32FloatLevel;
302309467b48Spatrick case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
302409467b48Spatrick return NVPTXISD::Tex2DArrayS32FloatGrad;
302509467b48Spatrick case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
302609467b48Spatrick return NVPTXISD::Tex2DArrayU32S32;
302709467b48Spatrick case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
302809467b48Spatrick return NVPTXISD::Tex2DArrayU32Float;
302909467b48Spatrick case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
303009467b48Spatrick return NVPTXISD::Tex2DArrayU32FloatLevel;
303109467b48Spatrick case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
303209467b48Spatrick return NVPTXISD::Tex2DArrayU32FloatGrad;
303309467b48Spatrick
303409467b48Spatrick case Intrinsic::nvvm_tex_3d_v4f32_s32:
303509467b48Spatrick return NVPTXISD::Tex3DFloatS32;
303609467b48Spatrick case Intrinsic::nvvm_tex_3d_v4f32_f32:
303709467b48Spatrick return NVPTXISD::Tex3DFloatFloat;
303809467b48Spatrick case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
303909467b48Spatrick return NVPTXISD::Tex3DFloatFloatLevel;
304009467b48Spatrick case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
304109467b48Spatrick return NVPTXISD::Tex3DFloatFloatGrad;
304209467b48Spatrick case Intrinsic::nvvm_tex_3d_v4s32_s32:
304309467b48Spatrick return NVPTXISD::Tex3DS32S32;
304409467b48Spatrick case Intrinsic::nvvm_tex_3d_v4s32_f32:
304509467b48Spatrick return NVPTXISD::Tex3DS32Float;
304609467b48Spatrick case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
304709467b48Spatrick return NVPTXISD::Tex3DS32FloatLevel;
304809467b48Spatrick case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
304909467b48Spatrick return NVPTXISD::Tex3DS32FloatGrad;
305009467b48Spatrick case Intrinsic::nvvm_tex_3d_v4u32_s32:
305109467b48Spatrick return NVPTXISD::Tex3DU32S32;
305209467b48Spatrick case Intrinsic::nvvm_tex_3d_v4u32_f32:
305309467b48Spatrick return NVPTXISD::Tex3DU32Float;
305409467b48Spatrick case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
305509467b48Spatrick return NVPTXISD::Tex3DU32FloatLevel;
305609467b48Spatrick case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
305709467b48Spatrick return NVPTXISD::Tex3DU32FloatGrad;
305809467b48Spatrick
305909467b48Spatrick case Intrinsic::nvvm_tex_cube_v4f32_f32:
306009467b48Spatrick return NVPTXISD::TexCubeFloatFloat;
306109467b48Spatrick case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
306209467b48Spatrick return NVPTXISD::TexCubeFloatFloatLevel;
306309467b48Spatrick case Intrinsic::nvvm_tex_cube_v4s32_f32:
306409467b48Spatrick return NVPTXISD::TexCubeS32Float;
306509467b48Spatrick case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
306609467b48Spatrick return NVPTXISD::TexCubeS32FloatLevel;
306709467b48Spatrick case Intrinsic::nvvm_tex_cube_v4u32_f32:
306809467b48Spatrick return NVPTXISD::TexCubeU32Float;
306909467b48Spatrick case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
307009467b48Spatrick return NVPTXISD::TexCubeU32FloatLevel;
307109467b48Spatrick
307209467b48Spatrick case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
307309467b48Spatrick return NVPTXISD::TexCubeArrayFloatFloat;
307409467b48Spatrick case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
307509467b48Spatrick return NVPTXISD::TexCubeArrayFloatFloatLevel;
307609467b48Spatrick case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
307709467b48Spatrick return NVPTXISD::TexCubeArrayS32Float;
307809467b48Spatrick case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
307909467b48Spatrick return NVPTXISD::TexCubeArrayS32FloatLevel;
308009467b48Spatrick case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
308109467b48Spatrick return NVPTXISD::TexCubeArrayU32Float;
308209467b48Spatrick case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
308309467b48Spatrick return NVPTXISD::TexCubeArrayU32FloatLevel;
308409467b48Spatrick
308509467b48Spatrick case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
308609467b48Spatrick return NVPTXISD::Tld4R2DFloatFloat;
308709467b48Spatrick case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
308809467b48Spatrick return NVPTXISD::Tld4G2DFloatFloat;
308909467b48Spatrick case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
309009467b48Spatrick return NVPTXISD::Tld4B2DFloatFloat;
309109467b48Spatrick case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
309209467b48Spatrick return NVPTXISD::Tld4A2DFloatFloat;
309309467b48Spatrick case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
309409467b48Spatrick return NVPTXISD::Tld4R2DS64Float;
309509467b48Spatrick case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
309609467b48Spatrick return NVPTXISD::Tld4G2DS64Float;
309709467b48Spatrick case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
309809467b48Spatrick return NVPTXISD::Tld4B2DS64Float;
309909467b48Spatrick case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
310009467b48Spatrick return NVPTXISD::Tld4A2DS64Float;
310109467b48Spatrick case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
310209467b48Spatrick return NVPTXISD::Tld4R2DU64Float;
310309467b48Spatrick case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
310409467b48Spatrick return NVPTXISD::Tld4G2DU64Float;
310509467b48Spatrick case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
310609467b48Spatrick return NVPTXISD::Tld4B2DU64Float;
310709467b48Spatrick case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
310809467b48Spatrick return NVPTXISD::Tld4A2DU64Float;
310909467b48Spatrick
311009467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
311109467b48Spatrick return NVPTXISD::TexUnified1DFloatS32;
311209467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
311309467b48Spatrick return NVPTXISD::TexUnified1DFloatFloat;
311409467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
311509467b48Spatrick return NVPTXISD::TexUnified1DFloatFloatLevel;
311609467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
311709467b48Spatrick return NVPTXISD::TexUnified1DFloatFloatGrad;
311809467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
311909467b48Spatrick return NVPTXISD::TexUnified1DS32S32;
312009467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
312109467b48Spatrick return NVPTXISD::TexUnified1DS32Float;
312209467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
312309467b48Spatrick return NVPTXISD::TexUnified1DS32FloatLevel;
312409467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
312509467b48Spatrick return NVPTXISD::TexUnified1DS32FloatGrad;
312609467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
312709467b48Spatrick return NVPTXISD::TexUnified1DU32S32;
312809467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
312909467b48Spatrick return NVPTXISD::TexUnified1DU32Float;
313009467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
313109467b48Spatrick return NVPTXISD::TexUnified1DU32FloatLevel;
313209467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
313309467b48Spatrick return NVPTXISD::TexUnified1DU32FloatGrad;
313409467b48Spatrick
313509467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
313609467b48Spatrick return NVPTXISD::TexUnified1DArrayFloatS32;
313709467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
313809467b48Spatrick return NVPTXISD::TexUnified1DArrayFloatFloat;
313909467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
314009467b48Spatrick return NVPTXISD::TexUnified1DArrayFloatFloatLevel;
314109467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
314209467b48Spatrick return NVPTXISD::TexUnified1DArrayFloatFloatGrad;
314309467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
314409467b48Spatrick return NVPTXISD::TexUnified1DArrayS32S32;
314509467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
314609467b48Spatrick return NVPTXISD::TexUnified1DArrayS32Float;
314709467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
314809467b48Spatrick return NVPTXISD::TexUnified1DArrayS32FloatLevel;
314909467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
315009467b48Spatrick return NVPTXISD::TexUnified1DArrayS32FloatGrad;
315109467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
315209467b48Spatrick return NVPTXISD::TexUnified1DArrayU32S32;
315309467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
315409467b48Spatrick return NVPTXISD::TexUnified1DArrayU32Float;
315509467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
315609467b48Spatrick return NVPTXISD::TexUnified1DArrayU32FloatLevel;
315709467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
315809467b48Spatrick return NVPTXISD::TexUnified1DArrayU32FloatGrad;
315909467b48Spatrick
316009467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
316109467b48Spatrick return NVPTXISD::TexUnified2DFloatS32;
316209467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
316309467b48Spatrick return NVPTXISD::TexUnified2DFloatFloat;
316409467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
316509467b48Spatrick return NVPTXISD::TexUnified2DFloatFloatLevel;
316609467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
316709467b48Spatrick return NVPTXISD::TexUnified2DFloatFloatGrad;
316809467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
316909467b48Spatrick return NVPTXISD::TexUnified2DS32S32;
317009467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
317109467b48Spatrick return NVPTXISD::TexUnified2DS32Float;
317209467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
317309467b48Spatrick return NVPTXISD::TexUnified2DS32FloatLevel;
317409467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
317509467b48Spatrick return NVPTXISD::TexUnified2DS32FloatGrad;
317609467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
317709467b48Spatrick return NVPTXISD::TexUnified2DU32S32;
317809467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
317909467b48Spatrick return NVPTXISD::TexUnified2DU32Float;
318009467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
318109467b48Spatrick return NVPTXISD::TexUnified2DU32FloatLevel;
318209467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
318309467b48Spatrick return NVPTXISD::TexUnified2DU32FloatGrad;
318409467b48Spatrick
318509467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
318609467b48Spatrick return NVPTXISD::TexUnified2DArrayFloatS32;
318709467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
318809467b48Spatrick return NVPTXISD::TexUnified2DArrayFloatFloat;
318909467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
319009467b48Spatrick return NVPTXISD::TexUnified2DArrayFloatFloatLevel;
319109467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
319209467b48Spatrick return NVPTXISD::TexUnified2DArrayFloatFloatGrad;
319309467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
319409467b48Spatrick return NVPTXISD::TexUnified2DArrayS32S32;
319509467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
319609467b48Spatrick return NVPTXISD::TexUnified2DArrayS32Float;
319709467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
319809467b48Spatrick return NVPTXISD::TexUnified2DArrayS32FloatLevel;
319909467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
320009467b48Spatrick return NVPTXISD::TexUnified2DArrayS32FloatGrad;
320109467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
320209467b48Spatrick return NVPTXISD::TexUnified2DArrayU32S32;
320309467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
320409467b48Spatrick return NVPTXISD::TexUnified2DArrayU32Float;
320509467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
320609467b48Spatrick return NVPTXISD::TexUnified2DArrayU32FloatLevel;
320709467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
320809467b48Spatrick return NVPTXISD::TexUnified2DArrayU32FloatGrad;
320909467b48Spatrick
321009467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
321109467b48Spatrick return NVPTXISD::TexUnified3DFloatS32;
321209467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
321309467b48Spatrick return NVPTXISD::TexUnified3DFloatFloat;
321409467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
321509467b48Spatrick return NVPTXISD::TexUnified3DFloatFloatLevel;
321609467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
321709467b48Spatrick return NVPTXISD::TexUnified3DFloatFloatGrad;
321809467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
321909467b48Spatrick return NVPTXISD::TexUnified3DS32S32;
322009467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
322109467b48Spatrick return NVPTXISD::TexUnified3DS32Float;
322209467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
322309467b48Spatrick return NVPTXISD::TexUnified3DS32FloatLevel;
322409467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
322509467b48Spatrick return NVPTXISD::TexUnified3DS32FloatGrad;
322609467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
322709467b48Spatrick return NVPTXISD::TexUnified3DU32S32;
322809467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
322909467b48Spatrick return NVPTXISD::TexUnified3DU32Float;
323009467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
323109467b48Spatrick return NVPTXISD::TexUnified3DU32FloatLevel;
323209467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
323309467b48Spatrick return NVPTXISD::TexUnified3DU32FloatGrad;
323409467b48Spatrick
323509467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
323609467b48Spatrick return NVPTXISD::TexUnifiedCubeFloatFloat;
323709467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
323809467b48Spatrick return NVPTXISD::TexUnifiedCubeFloatFloatLevel;
323909467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
324009467b48Spatrick return NVPTXISD::TexUnifiedCubeS32Float;
324109467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
324209467b48Spatrick return NVPTXISD::TexUnifiedCubeS32FloatLevel;
324309467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
324409467b48Spatrick return NVPTXISD::TexUnifiedCubeU32Float;
324509467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
324609467b48Spatrick return NVPTXISD::TexUnifiedCubeU32FloatLevel;
324709467b48Spatrick
324809467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
324909467b48Spatrick return NVPTXISD::TexUnifiedCubeArrayFloatFloat;
325009467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
325109467b48Spatrick return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel;
325209467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
325309467b48Spatrick return NVPTXISD::TexUnifiedCubeArrayS32Float;
325409467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
325509467b48Spatrick return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel;
325609467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
325709467b48Spatrick return NVPTXISD::TexUnifiedCubeArrayU32Float;
325809467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
325909467b48Spatrick return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
326009467b48Spatrick
326109467b48Spatrick case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
326209467b48Spatrick return NVPTXISD::Tld4UnifiedR2DFloatFloat;
326309467b48Spatrick case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
326409467b48Spatrick return NVPTXISD::Tld4UnifiedG2DFloatFloat;
326509467b48Spatrick case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
326609467b48Spatrick return NVPTXISD::Tld4UnifiedB2DFloatFloat;
326709467b48Spatrick case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
326809467b48Spatrick return NVPTXISD::Tld4UnifiedA2DFloatFloat;
326909467b48Spatrick case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
327009467b48Spatrick return NVPTXISD::Tld4UnifiedR2DS64Float;
327109467b48Spatrick case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
327209467b48Spatrick return NVPTXISD::Tld4UnifiedG2DS64Float;
327309467b48Spatrick case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
327409467b48Spatrick return NVPTXISD::Tld4UnifiedB2DS64Float;
327509467b48Spatrick case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
327609467b48Spatrick return NVPTXISD::Tld4UnifiedA2DS64Float;
327709467b48Spatrick case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
327809467b48Spatrick return NVPTXISD::Tld4UnifiedR2DU64Float;
327909467b48Spatrick case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
328009467b48Spatrick return NVPTXISD::Tld4UnifiedG2DU64Float;
328109467b48Spatrick case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
328209467b48Spatrick return NVPTXISD::Tld4UnifiedB2DU64Float;
328309467b48Spatrick case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
328409467b48Spatrick return NVPTXISD::Tld4UnifiedA2DU64Float;
328509467b48Spatrick }
328609467b48Spatrick }
328709467b48Spatrick
getOpcForSurfaceInstr(unsigned Intrinsic)328809467b48Spatrick static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
328909467b48Spatrick switch (Intrinsic) {
329009467b48Spatrick default:
329109467b48Spatrick return 0;
329209467b48Spatrick case Intrinsic::nvvm_suld_1d_i8_clamp:
329309467b48Spatrick return NVPTXISD::Suld1DI8Clamp;
329409467b48Spatrick case Intrinsic::nvvm_suld_1d_i16_clamp:
329509467b48Spatrick return NVPTXISD::Suld1DI16Clamp;
329609467b48Spatrick case Intrinsic::nvvm_suld_1d_i32_clamp:
329709467b48Spatrick return NVPTXISD::Suld1DI32Clamp;
329809467b48Spatrick case Intrinsic::nvvm_suld_1d_i64_clamp:
329909467b48Spatrick return NVPTXISD::Suld1DI64Clamp;
330009467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i8_clamp:
330109467b48Spatrick return NVPTXISD::Suld1DV2I8Clamp;
330209467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i16_clamp:
330309467b48Spatrick return NVPTXISD::Suld1DV2I16Clamp;
330409467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i32_clamp:
330509467b48Spatrick return NVPTXISD::Suld1DV2I32Clamp;
330609467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i64_clamp:
330709467b48Spatrick return NVPTXISD::Suld1DV2I64Clamp;
330809467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i8_clamp:
330909467b48Spatrick return NVPTXISD::Suld1DV4I8Clamp;
331009467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i16_clamp:
331109467b48Spatrick return NVPTXISD::Suld1DV4I16Clamp;
331209467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i32_clamp:
331309467b48Spatrick return NVPTXISD::Suld1DV4I32Clamp;
331409467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i8_clamp:
331509467b48Spatrick return NVPTXISD::Suld1DArrayI8Clamp;
331609467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i16_clamp:
331709467b48Spatrick return NVPTXISD::Suld1DArrayI16Clamp;
331809467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i32_clamp:
331909467b48Spatrick return NVPTXISD::Suld1DArrayI32Clamp;
332009467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i64_clamp:
332109467b48Spatrick return NVPTXISD::Suld1DArrayI64Clamp;
332209467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
332309467b48Spatrick return NVPTXISD::Suld1DArrayV2I8Clamp;
332409467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
332509467b48Spatrick return NVPTXISD::Suld1DArrayV2I16Clamp;
332609467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
332709467b48Spatrick return NVPTXISD::Suld1DArrayV2I32Clamp;
332809467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
332909467b48Spatrick return NVPTXISD::Suld1DArrayV2I64Clamp;
333009467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
333109467b48Spatrick return NVPTXISD::Suld1DArrayV4I8Clamp;
333209467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
333309467b48Spatrick return NVPTXISD::Suld1DArrayV4I16Clamp;
333409467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
333509467b48Spatrick return NVPTXISD::Suld1DArrayV4I32Clamp;
333609467b48Spatrick case Intrinsic::nvvm_suld_2d_i8_clamp:
333709467b48Spatrick return NVPTXISD::Suld2DI8Clamp;
333809467b48Spatrick case Intrinsic::nvvm_suld_2d_i16_clamp:
333909467b48Spatrick return NVPTXISD::Suld2DI16Clamp;
334009467b48Spatrick case Intrinsic::nvvm_suld_2d_i32_clamp:
334109467b48Spatrick return NVPTXISD::Suld2DI32Clamp;
334209467b48Spatrick case Intrinsic::nvvm_suld_2d_i64_clamp:
334309467b48Spatrick return NVPTXISD::Suld2DI64Clamp;
334409467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i8_clamp:
334509467b48Spatrick return NVPTXISD::Suld2DV2I8Clamp;
334609467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i16_clamp:
334709467b48Spatrick return NVPTXISD::Suld2DV2I16Clamp;
334809467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i32_clamp:
334909467b48Spatrick return NVPTXISD::Suld2DV2I32Clamp;
335009467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i64_clamp:
335109467b48Spatrick return NVPTXISD::Suld2DV2I64Clamp;
335209467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i8_clamp:
335309467b48Spatrick return NVPTXISD::Suld2DV4I8Clamp;
335409467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i16_clamp:
335509467b48Spatrick return NVPTXISD::Suld2DV4I16Clamp;
335609467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i32_clamp:
335709467b48Spatrick return NVPTXISD::Suld2DV4I32Clamp;
335809467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i8_clamp:
335909467b48Spatrick return NVPTXISD::Suld2DArrayI8Clamp;
336009467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i16_clamp:
336109467b48Spatrick return NVPTXISD::Suld2DArrayI16Clamp;
336209467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i32_clamp:
336309467b48Spatrick return NVPTXISD::Suld2DArrayI32Clamp;
336409467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i64_clamp:
336509467b48Spatrick return NVPTXISD::Suld2DArrayI64Clamp;
336609467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
336709467b48Spatrick return NVPTXISD::Suld2DArrayV2I8Clamp;
336809467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
336909467b48Spatrick return NVPTXISD::Suld2DArrayV2I16Clamp;
337009467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
337109467b48Spatrick return NVPTXISD::Suld2DArrayV2I32Clamp;
337209467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
337309467b48Spatrick return NVPTXISD::Suld2DArrayV2I64Clamp;
337409467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
337509467b48Spatrick return NVPTXISD::Suld2DArrayV4I8Clamp;
337609467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
337709467b48Spatrick return NVPTXISD::Suld2DArrayV4I16Clamp;
337809467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
337909467b48Spatrick return NVPTXISD::Suld2DArrayV4I32Clamp;
338009467b48Spatrick case Intrinsic::nvvm_suld_3d_i8_clamp:
338109467b48Spatrick return NVPTXISD::Suld3DI8Clamp;
338209467b48Spatrick case Intrinsic::nvvm_suld_3d_i16_clamp:
338309467b48Spatrick return NVPTXISD::Suld3DI16Clamp;
338409467b48Spatrick case Intrinsic::nvvm_suld_3d_i32_clamp:
338509467b48Spatrick return NVPTXISD::Suld3DI32Clamp;
338609467b48Spatrick case Intrinsic::nvvm_suld_3d_i64_clamp:
338709467b48Spatrick return NVPTXISD::Suld3DI64Clamp;
338809467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i8_clamp:
338909467b48Spatrick return NVPTXISD::Suld3DV2I8Clamp;
339009467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i16_clamp:
339109467b48Spatrick return NVPTXISD::Suld3DV2I16Clamp;
339209467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i32_clamp:
339309467b48Spatrick return NVPTXISD::Suld3DV2I32Clamp;
339409467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i64_clamp:
339509467b48Spatrick return NVPTXISD::Suld3DV2I64Clamp;
339609467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i8_clamp:
339709467b48Spatrick return NVPTXISD::Suld3DV4I8Clamp;
339809467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i16_clamp:
339909467b48Spatrick return NVPTXISD::Suld3DV4I16Clamp;
340009467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i32_clamp:
340109467b48Spatrick return NVPTXISD::Suld3DV4I32Clamp;
340209467b48Spatrick case Intrinsic::nvvm_suld_1d_i8_trap:
340309467b48Spatrick return NVPTXISD::Suld1DI8Trap;
340409467b48Spatrick case Intrinsic::nvvm_suld_1d_i16_trap:
340509467b48Spatrick return NVPTXISD::Suld1DI16Trap;
340609467b48Spatrick case Intrinsic::nvvm_suld_1d_i32_trap:
340709467b48Spatrick return NVPTXISD::Suld1DI32Trap;
340809467b48Spatrick case Intrinsic::nvvm_suld_1d_i64_trap:
340909467b48Spatrick return NVPTXISD::Suld1DI64Trap;
341009467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i8_trap:
341109467b48Spatrick return NVPTXISD::Suld1DV2I8Trap;
341209467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i16_trap:
341309467b48Spatrick return NVPTXISD::Suld1DV2I16Trap;
341409467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i32_trap:
341509467b48Spatrick return NVPTXISD::Suld1DV2I32Trap;
341609467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i64_trap:
341709467b48Spatrick return NVPTXISD::Suld1DV2I64Trap;
341809467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i8_trap:
341909467b48Spatrick return NVPTXISD::Suld1DV4I8Trap;
342009467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i16_trap:
342109467b48Spatrick return NVPTXISD::Suld1DV4I16Trap;
342209467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i32_trap:
342309467b48Spatrick return NVPTXISD::Suld1DV4I32Trap;
342409467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i8_trap:
342509467b48Spatrick return NVPTXISD::Suld1DArrayI8Trap;
342609467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i16_trap:
342709467b48Spatrick return NVPTXISD::Suld1DArrayI16Trap;
342809467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i32_trap:
342909467b48Spatrick return NVPTXISD::Suld1DArrayI32Trap;
343009467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i64_trap:
343109467b48Spatrick return NVPTXISD::Suld1DArrayI64Trap;
343209467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
343309467b48Spatrick return NVPTXISD::Suld1DArrayV2I8Trap;
343409467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
343509467b48Spatrick return NVPTXISD::Suld1DArrayV2I16Trap;
343609467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
343709467b48Spatrick return NVPTXISD::Suld1DArrayV2I32Trap;
343809467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
343909467b48Spatrick return NVPTXISD::Suld1DArrayV2I64Trap;
344009467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
344109467b48Spatrick return NVPTXISD::Suld1DArrayV4I8Trap;
344209467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
344309467b48Spatrick return NVPTXISD::Suld1DArrayV4I16Trap;
344409467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
344509467b48Spatrick return NVPTXISD::Suld1DArrayV4I32Trap;
344609467b48Spatrick case Intrinsic::nvvm_suld_2d_i8_trap:
344709467b48Spatrick return NVPTXISD::Suld2DI8Trap;
344809467b48Spatrick case Intrinsic::nvvm_suld_2d_i16_trap:
344909467b48Spatrick return NVPTXISD::Suld2DI16Trap;
345009467b48Spatrick case Intrinsic::nvvm_suld_2d_i32_trap:
345109467b48Spatrick return NVPTXISD::Suld2DI32Trap;
345209467b48Spatrick case Intrinsic::nvvm_suld_2d_i64_trap:
345309467b48Spatrick return NVPTXISD::Suld2DI64Trap;
345409467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i8_trap:
345509467b48Spatrick return NVPTXISD::Suld2DV2I8Trap;
345609467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i16_trap:
345709467b48Spatrick return NVPTXISD::Suld2DV2I16Trap;
345809467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i32_trap:
345909467b48Spatrick return NVPTXISD::Suld2DV2I32Trap;
346009467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i64_trap:
346109467b48Spatrick return NVPTXISD::Suld2DV2I64Trap;
346209467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i8_trap:
346309467b48Spatrick return NVPTXISD::Suld2DV4I8Trap;
346409467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i16_trap:
346509467b48Spatrick return NVPTXISD::Suld2DV4I16Trap;
346609467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i32_trap:
346709467b48Spatrick return NVPTXISD::Suld2DV4I32Trap;
346809467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i8_trap:
346909467b48Spatrick return NVPTXISD::Suld2DArrayI8Trap;
347009467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i16_trap:
347109467b48Spatrick return NVPTXISD::Suld2DArrayI16Trap;
347209467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i32_trap:
347309467b48Spatrick return NVPTXISD::Suld2DArrayI32Trap;
347409467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i64_trap:
347509467b48Spatrick return NVPTXISD::Suld2DArrayI64Trap;
347609467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
347709467b48Spatrick return NVPTXISD::Suld2DArrayV2I8Trap;
347809467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
347909467b48Spatrick return NVPTXISD::Suld2DArrayV2I16Trap;
348009467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
348109467b48Spatrick return NVPTXISD::Suld2DArrayV2I32Trap;
348209467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
348309467b48Spatrick return NVPTXISD::Suld2DArrayV2I64Trap;
348409467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
348509467b48Spatrick return NVPTXISD::Suld2DArrayV4I8Trap;
348609467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
348709467b48Spatrick return NVPTXISD::Suld2DArrayV4I16Trap;
348809467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
348909467b48Spatrick return NVPTXISD::Suld2DArrayV4I32Trap;
349009467b48Spatrick case Intrinsic::nvvm_suld_3d_i8_trap:
349109467b48Spatrick return NVPTXISD::Suld3DI8Trap;
349209467b48Spatrick case Intrinsic::nvvm_suld_3d_i16_trap:
349309467b48Spatrick return NVPTXISD::Suld3DI16Trap;
349409467b48Spatrick case Intrinsic::nvvm_suld_3d_i32_trap:
349509467b48Spatrick return NVPTXISD::Suld3DI32Trap;
349609467b48Spatrick case Intrinsic::nvvm_suld_3d_i64_trap:
349709467b48Spatrick return NVPTXISD::Suld3DI64Trap;
349809467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i8_trap:
349909467b48Spatrick return NVPTXISD::Suld3DV2I8Trap;
350009467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i16_trap:
350109467b48Spatrick return NVPTXISD::Suld3DV2I16Trap;
350209467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i32_trap:
350309467b48Spatrick return NVPTXISD::Suld3DV2I32Trap;
350409467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i64_trap:
350509467b48Spatrick return NVPTXISD::Suld3DV2I64Trap;
350609467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i8_trap:
350709467b48Spatrick return NVPTXISD::Suld3DV4I8Trap;
350809467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i16_trap:
350909467b48Spatrick return NVPTXISD::Suld3DV4I16Trap;
351009467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i32_trap:
351109467b48Spatrick return NVPTXISD::Suld3DV4I32Trap;
351209467b48Spatrick case Intrinsic::nvvm_suld_1d_i8_zero:
351309467b48Spatrick return NVPTXISD::Suld1DI8Zero;
351409467b48Spatrick case Intrinsic::nvvm_suld_1d_i16_zero:
351509467b48Spatrick return NVPTXISD::Suld1DI16Zero;
351609467b48Spatrick case Intrinsic::nvvm_suld_1d_i32_zero:
351709467b48Spatrick return NVPTXISD::Suld1DI32Zero;
351809467b48Spatrick case Intrinsic::nvvm_suld_1d_i64_zero:
351909467b48Spatrick return NVPTXISD::Suld1DI64Zero;
352009467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i8_zero:
352109467b48Spatrick return NVPTXISD::Suld1DV2I8Zero;
352209467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i16_zero:
352309467b48Spatrick return NVPTXISD::Suld1DV2I16Zero;
352409467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i32_zero:
352509467b48Spatrick return NVPTXISD::Suld1DV2I32Zero;
352609467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i64_zero:
352709467b48Spatrick return NVPTXISD::Suld1DV2I64Zero;
352809467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i8_zero:
352909467b48Spatrick return NVPTXISD::Suld1DV4I8Zero;
353009467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i16_zero:
353109467b48Spatrick return NVPTXISD::Suld1DV4I16Zero;
353209467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i32_zero:
353309467b48Spatrick return NVPTXISD::Suld1DV4I32Zero;
353409467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i8_zero:
353509467b48Spatrick return NVPTXISD::Suld1DArrayI8Zero;
353609467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i16_zero:
353709467b48Spatrick return NVPTXISD::Suld1DArrayI16Zero;
353809467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i32_zero:
353909467b48Spatrick return NVPTXISD::Suld1DArrayI32Zero;
354009467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i64_zero:
354109467b48Spatrick return NVPTXISD::Suld1DArrayI64Zero;
354209467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
354309467b48Spatrick return NVPTXISD::Suld1DArrayV2I8Zero;
354409467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
354509467b48Spatrick return NVPTXISD::Suld1DArrayV2I16Zero;
354609467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
354709467b48Spatrick return NVPTXISD::Suld1DArrayV2I32Zero;
354809467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
354909467b48Spatrick return NVPTXISD::Suld1DArrayV2I64Zero;
355009467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
355109467b48Spatrick return NVPTXISD::Suld1DArrayV4I8Zero;
355209467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
355309467b48Spatrick return NVPTXISD::Suld1DArrayV4I16Zero;
355409467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
355509467b48Spatrick return NVPTXISD::Suld1DArrayV4I32Zero;
355609467b48Spatrick case Intrinsic::nvvm_suld_2d_i8_zero:
355709467b48Spatrick return NVPTXISD::Suld2DI8Zero;
355809467b48Spatrick case Intrinsic::nvvm_suld_2d_i16_zero:
355909467b48Spatrick return NVPTXISD::Suld2DI16Zero;
356009467b48Spatrick case Intrinsic::nvvm_suld_2d_i32_zero:
356109467b48Spatrick return NVPTXISD::Suld2DI32Zero;
356209467b48Spatrick case Intrinsic::nvvm_suld_2d_i64_zero:
356309467b48Spatrick return NVPTXISD::Suld2DI64Zero;
356409467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i8_zero:
356509467b48Spatrick return NVPTXISD::Suld2DV2I8Zero;
356609467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i16_zero:
356709467b48Spatrick return NVPTXISD::Suld2DV2I16Zero;
356809467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i32_zero:
356909467b48Spatrick return NVPTXISD::Suld2DV2I32Zero;
357009467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i64_zero:
357109467b48Spatrick return NVPTXISD::Suld2DV2I64Zero;
357209467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i8_zero:
357309467b48Spatrick return NVPTXISD::Suld2DV4I8Zero;
357409467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i16_zero:
357509467b48Spatrick return NVPTXISD::Suld2DV4I16Zero;
357609467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i32_zero:
357709467b48Spatrick return NVPTXISD::Suld2DV4I32Zero;
357809467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i8_zero:
357909467b48Spatrick return NVPTXISD::Suld2DArrayI8Zero;
358009467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i16_zero:
358109467b48Spatrick return NVPTXISD::Suld2DArrayI16Zero;
358209467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i32_zero:
358309467b48Spatrick return NVPTXISD::Suld2DArrayI32Zero;
358409467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i64_zero:
358509467b48Spatrick return NVPTXISD::Suld2DArrayI64Zero;
358609467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
358709467b48Spatrick return NVPTXISD::Suld2DArrayV2I8Zero;
358809467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
358909467b48Spatrick return NVPTXISD::Suld2DArrayV2I16Zero;
359009467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
359109467b48Spatrick return NVPTXISD::Suld2DArrayV2I32Zero;
359209467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
359309467b48Spatrick return NVPTXISD::Suld2DArrayV2I64Zero;
359409467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
359509467b48Spatrick return NVPTXISD::Suld2DArrayV4I8Zero;
359609467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
359709467b48Spatrick return NVPTXISD::Suld2DArrayV4I16Zero;
359809467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
359909467b48Spatrick return NVPTXISD::Suld2DArrayV4I32Zero;
360009467b48Spatrick case Intrinsic::nvvm_suld_3d_i8_zero:
360109467b48Spatrick return NVPTXISD::Suld3DI8Zero;
360209467b48Spatrick case Intrinsic::nvvm_suld_3d_i16_zero:
360309467b48Spatrick return NVPTXISD::Suld3DI16Zero;
360409467b48Spatrick case Intrinsic::nvvm_suld_3d_i32_zero:
360509467b48Spatrick return NVPTXISD::Suld3DI32Zero;
360609467b48Spatrick case Intrinsic::nvvm_suld_3d_i64_zero:
360709467b48Spatrick return NVPTXISD::Suld3DI64Zero;
360809467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i8_zero:
360909467b48Spatrick return NVPTXISD::Suld3DV2I8Zero;
361009467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i16_zero:
361109467b48Spatrick return NVPTXISD::Suld3DV2I16Zero;
361209467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i32_zero:
361309467b48Spatrick return NVPTXISD::Suld3DV2I32Zero;
361409467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i64_zero:
361509467b48Spatrick return NVPTXISD::Suld3DV2I64Zero;
361609467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i8_zero:
361709467b48Spatrick return NVPTXISD::Suld3DV4I8Zero;
361809467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i16_zero:
361909467b48Spatrick return NVPTXISD::Suld3DV4I16Zero;
362009467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i32_zero:
362109467b48Spatrick return NVPTXISD::Suld3DV4I32Zero;
362209467b48Spatrick }
362309467b48Spatrick }
362409467b48Spatrick
362509467b48Spatrick // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
362609467b48Spatrick // TgtMemIntrinsic
362709467b48Spatrick // because we need the information that is only available in the "Value" type
362809467b48Spatrick // of destination
362909467b48Spatrick // pointer. In particular, the address space information.
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,MachineFunction & MF,unsigned Intrinsic) const363009467b48Spatrick bool NVPTXTargetLowering::getTgtMemIntrinsic(
363109467b48Spatrick IntrinsicInfo &Info, const CallInst &I,
363209467b48Spatrick MachineFunction &MF, unsigned Intrinsic) const {
363309467b48Spatrick switch (Intrinsic) {
363409467b48Spatrick default:
363509467b48Spatrick return false;
363609467b48Spatrick case Intrinsic::nvvm_match_all_sync_i32p:
363709467b48Spatrick case Intrinsic::nvvm_match_all_sync_i64p:
363809467b48Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
363909467b48Spatrick // memVT is bogus. These intrinsics have IntrInaccessibleMemOnly attribute
364009467b48Spatrick // in order to model data exchange with other threads, but perform no real
364109467b48Spatrick // memory accesses.
364209467b48Spatrick Info.memVT = MVT::i1;
364309467b48Spatrick
364409467b48Spatrick // Our result depends on both our and other thread's arguments.
364509467b48Spatrick Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
364609467b48Spatrick return true;
364709467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col:
364809467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row:
364909467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride:
365009467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride:
365109467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col:
365209467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row:
365309467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride:
365409467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride:
365509467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col:
365609467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row:
365709467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride:
365809467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride:
365909467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col:
366009467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row:
366109467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride:
366209467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride:
366309467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col:
366409467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row:
366509467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride:
366609467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride:
366709467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col:
366809467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row:
366909467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride:
367009467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride: {
367109467b48Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
367209467b48Spatrick Info.memVT = MVT::v8f16;
367309467b48Spatrick Info.ptrVal = I.getArgOperand(0);
367409467b48Spatrick Info.offset = 0;
367509467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
367609467b48Spatrick Info.align = Align(16);
367709467b48Spatrick return true;
367809467b48Spatrick }
367909467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col:
368009467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride:
368109467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride:
368209467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col:
368309467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row:
368409467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride:
368509467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride:
368609467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row:
368773471bf0Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col:
368873471bf0Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col_stride:
368973471bf0Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row:
369073471bf0Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row_stride:
369109467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col:
369209467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride:
369309467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride:
369409467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col:
369509467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row:
369609467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride:
369709467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride:
369873471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row:
369973471bf0Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col:
370073471bf0Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col_stride:
370173471bf0Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row:
370273471bf0Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row_stride: {
370309467b48Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
370409467b48Spatrick Info.memVT = MVT::v2i32;
370509467b48Spatrick Info.ptrVal = I.getArgOperand(0);
370609467b48Spatrick Info.offset = 0;
370709467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
370809467b48Spatrick Info.align = Align(8);
370909467b48Spatrick return true;
371009467b48Spatrick }
371109467b48Spatrick
371209467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col:
371309467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride:
371409467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride:
371509467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col:
371609467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row:
371709467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride:
371809467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride:
371909467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row:
372073471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col:
372173471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col_stride:
372273471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row:
372373471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row_stride:
372473471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col:
372573471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col_stride:
372673471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row:
372773471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row_stride:
372809467b48Spatrick
372909467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col:
373009467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride:
373109467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride:
373209467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col:
373309467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row:
373409467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride:
373509467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride:
373673471bf0Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row:
373773471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col:
373873471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col_stride:
373973471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row:
374073471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row_stride:
374173471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col:
374273471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride:
374373471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row:
3744*d415bd75Srobert case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride:
3745*d415bd75Srobert case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16:
3746*d415bd75Srobert case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16: {
374709467b48Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
374809467b48Spatrick Info.memVT = MVT::v4i32;
374909467b48Spatrick Info.ptrVal = I.getArgOperand(0);
375009467b48Spatrick Info.offset = 0;
375109467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
375209467b48Spatrick Info.align = Align(16);
375309467b48Spatrick return true;
375409467b48Spatrick }
375509467b48Spatrick
375609467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col:
375709467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride:
375809467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride:
375909467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col:
376009467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row:
376109467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride:
376209467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride:
376309467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row:
376409467b48Spatrick
376509467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col:
376609467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride:
376709467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride:
376809467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col:
376909467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row:
377009467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride:
377109467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride:
377209467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row:
377309467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row:
377409467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride:
377509467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col:
377609467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride:
377709467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row:
377809467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride:
377909467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride:
378009467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row:
378109467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
378209467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
378309467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
3784*d415bd75Srobert case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col:
3785*d415bd75Srobert case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16:
3786*d415bd75Srobert case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16: {
378709467b48Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
378809467b48Spatrick Info.memVT = MVT::i32;
378909467b48Spatrick Info.ptrVal = I.getArgOperand(0);
379009467b48Spatrick Info.offset = 0;
379109467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
379209467b48Spatrick Info.align = Align(4);
379309467b48Spatrick return true;
379409467b48Spatrick }
379509467b48Spatrick
379609467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col:
379709467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row:
379809467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride:
379909467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride:
380009467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col:
380109467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row:
380209467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride:
380309467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride:
380409467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col:
380509467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row:
380609467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride:
380709467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: {
380809467b48Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
380909467b48Spatrick Info.memVT = MVT::v4f16;
381009467b48Spatrick Info.ptrVal = I.getArgOperand(0);
381109467b48Spatrick Info.offset = 0;
381209467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
381309467b48Spatrick Info.align = Align(16);
381409467b48Spatrick return true;
381509467b48Spatrick }
381609467b48Spatrick
381709467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col:
381809467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row:
381909467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride:
382009467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride:
382109467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col:
382209467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row:
382309467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride:
382409467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride:
382509467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col:
382609467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row:
382709467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride:
382873471bf0Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride:
382973471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col:
383073471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row:
383173471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col_stride:
383273471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row_stride: {
383309467b48Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
383409467b48Spatrick Info.memVT = MVT::v8f32;
383509467b48Spatrick Info.ptrVal = I.getArgOperand(0);
383609467b48Spatrick Info.offset = 0;
383709467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
383809467b48Spatrick Info.align = Align(16);
383909467b48Spatrick return true;
384009467b48Spatrick }
384109467b48Spatrick
384273471bf0Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col:
384373471bf0Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col_stride:
384473471bf0Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row:
384573471bf0Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row_stride:
384673471bf0Spatrick
384773471bf0Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col:
384873471bf0Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col_stride:
384973471bf0Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row:
385073471bf0Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row_stride:
385173471bf0Spatrick
385209467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col:
385309467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride:
385409467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row:
385509467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride:
385609467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col:
385709467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride:
385809467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row:
385909467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride:
386009467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col:
386109467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride:
386209467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row:
386309467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: {
386409467b48Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
386509467b48Spatrick Info.memVT = MVT::v8i32;
386609467b48Spatrick Info.ptrVal = I.getArgOperand(0);
386709467b48Spatrick Info.offset = 0;
386809467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
386909467b48Spatrick Info.align = Align(16);
387009467b48Spatrick return true;
387109467b48Spatrick }
387209467b48Spatrick
387309467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col:
387409467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride:
387509467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row:
387609467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride:
387709467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
387809467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
387909467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
3880*d415bd75Srobert case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride:
3881*d415bd75Srobert case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16:
3882*d415bd75Srobert case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16: {
388309467b48Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
388409467b48Spatrick Info.memVT = MVT::v2i32;
388509467b48Spatrick Info.ptrVal = I.getArgOperand(0);
388609467b48Spatrick Info.offset = 0;
388709467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
388809467b48Spatrick Info.align = Align(8);
388909467b48Spatrick return true;
389009467b48Spatrick }
389109467b48Spatrick
389273471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col:
389373471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col_stride:
389473471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row:
389573471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row_stride:
389673471bf0Spatrick
389773471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col:
389873471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col_stride:
389973471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row:
390073471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row_stride: {
390173471bf0Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
390273471bf0Spatrick Info.memVT = MVT::f64;
390373471bf0Spatrick Info.ptrVal = I.getArgOperand(0);
390473471bf0Spatrick Info.offset = 0;
390573471bf0Spatrick Info.flags = MachineMemOperand::MOLoad;
390673471bf0Spatrick Info.align = Align(8);
390773471bf0Spatrick return true;
390873471bf0Spatrick }
390973471bf0Spatrick
391073471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col:
391173471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col_stride:
391273471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row:
391373471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row_stride: {
391473471bf0Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
391573471bf0Spatrick Info.memVT = MVT::v2f64;
391673471bf0Spatrick Info.ptrVal = I.getArgOperand(0);
391773471bf0Spatrick Info.offset = 0;
391873471bf0Spatrick Info.flags = MachineMemOperand::MOLoad;
391973471bf0Spatrick Info.align = Align(16);
392073471bf0Spatrick return true;
392173471bf0Spatrick }
392273471bf0Spatrick
392309467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col:
392409467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row:
392509467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride:
392609467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride:
392709467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col:
392809467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row:
392909467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride:
393009467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride:
393109467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col:
393209467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row:
393309467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride:
393409467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: {
393509467b48Spatrick Info.opc = ISD::INTRINSIC_VOID;
393609467b48Spatrick Info.memVT = MVT::v4f16;
393709467b48Spatrick Info.ptrVal = I.getArgOperand(0);
393809467b48Spatrick Info.offset = 0;
393909467b48Spatrick Info.flags = MachineMemOperand::MOStore;
394009467b48Spatrick Info.align = Align(16);
394109467b48Spatrick return true;
394209467b48Spatrick }
394309467b48Spatrick
394409467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col:
394509467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row:
394609467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride:
394709467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride:
394809467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col:
394909467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row:
395009467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride:
395109467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride:
395209467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col:
395309467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row:
395409467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride:
395573471bf0Spatrick case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride:
395673471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col:
395773471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row:
395873471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col_stride:
395973471bf0Spatrick case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row_stride: {
396009467b48Spatrick Info.opc = ISD::INTRINSIC_VOID;
396109467b48Spatrick Info.memVT = MVT::v8f32;
396209467b48Spatrick Info.ptrVal = I.getArgOperand(0);
396309467b48Spatrick Info.offset = 0;
396409467b48Spatrick Info.flags = MachineMemOperand::MOStore;
396509467b48Spatrick Info.align = Align(16);
396609467b48Spatrick return true;
396709467b48Spatrick }
396809467b48Spatrick
396909467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col:
397009467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride:
397109467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row:
397209467b48Spatrick case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride:
397309467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col:
397409467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride:
397509467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row:
397609467b48Spatrick case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride:
397709467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col:
397809467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride:
397909467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row:
398009467b48Spatrick case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: {
398109467b48Spatrick Info.opc = ISD::INTRINSIC_VOID;
398209467b48Spatrick Info.memVT = MVT::v8i32;
398309467b48Spatrick Info.ptrVal = I.getArgOperand(0);
398409467b48Spatrick Info.offset = 0;
398509467b48Spatrick Info.flags = MachineMemOperand::MOStore;
398609467b48Spatrick Info.align = Align(16);
398709467b48Spatrick return true;
398809467b48Spatrick }
398909467b48Spatrick
399009467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col:
399109467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride:
399209467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row:
399309467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride:
399409467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col:
399509467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride:
399609467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row:
399709467b48Spatrick case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: {
399809467b48Spatrick Info.opc = ISD::INTRINSIC_VOID;
399909467b48Spatrick Info.memVT = MVT::v2i32;
400009467b48Spatrick Info.ptrVal = I.getArgOperand(0);
400109467b48Spatrick Info.offset = 0;
400209467b48Spatrick Info.flags = MachineMemOperand::MOStore;
400309467b48Spatrick Info.align = Align(8);
400409467b48Spatrick return true;
400509467b48Spatrick }
400609467b48Spatrick
400773471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col:
400873471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col_stride:
400973471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row:
401073471bf0Spatrick case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row_stride: {
401173471bf0Spatrick Info.opc = ISD::INTRINSIC_VOID;
401273471bf0Spatrick Info.memVT = MVT::v2f64;
401373471bf0Spatrick Info.ptrVal = I.getArgOperand(0);
401473471bf0Spatrick Info.offset = 0;
401573471bf0Spatrick Info.flags = MachineMemOperand::MOStore;
401673471bf0Spatrick Info.align = Align(16);
401773471bf0Spatrick return true;
401873471bf0Spatrick }
401973471bf0Spatrick
402009467b48Spatrick case Intrinsic::nvvm_atomic_load_inc_32:
402109467b48Spatrick case Intrinsic::nvvm_atomic_load_dec_32:
402209467b48Spatrick
402309467b48Spatrick case Intrinsic::nvvm_atomic_add_gen_f_cta:
402409467b48Spatrick case Intrinsic::nvvm_atomic_add_gen_f_sys:
402509467b48Spatrick case Intrinsic::nvvm_atomic_add_gen_i_cta:
402609467b48Spatrick case Intrinsic::nvvm_atomic_add_gen_i_sys:
402709467b48Spatrick case Intrinsic::nvvm_atomic_and_gen_i_cta:
402809467b48Spatrick case Intrinsic::nvvm_atomic_and_gen_i_sys:
402909467b48Spatrick case Intrinsic::nvvm_atomic_cas_gen_i_cta:
403009467b48Spatrick case Intrinsic::nvvm_atomic_cas_gen_i_sys:
403109467b48Spatrick case Intrinsic::nvvm_atomic_dec_gen_i_cta:
403209467b48Spatrick case Intrinsic::nvvm_atomic_dec_gen_i_sys:
403309467b48Spatrick case Intrinsic::nvvm_atomic_inc_gen_i_cta:
403409467b48Spatrick case Intrinsic::nvvm_atomic_inc_gen_i_sys:
403509467b48Spatrick case Intrinsic::nvvm_atomic_max_gen_i_cta:
403609467b48Spatrick case Intrinsic::nvvm_atomic_max_gen_i_sys:
403709467b48Spatrick case Intrinsic::nvvm_atomic_min_gen_i_cta:
403809467b48Spatrick case Intrinsic::nvvm_atomic_min_gen_i_sys:
403909467b48Spatrick case Intrinsic::nvvm_atomic_or_gen_i_cta:
404009467b48Spatrick case Intrinsic::nvvm_atomic_or_gen_i_sys:
404109467b48Spatrick case Intrinsic::nvvm_atomic_exch_gen_i_cta:
404209467b48Spatrick case Intrinsic::nvvm_atomic_exch_gen_i_sys:
404309467b48Spatrick case Intrinsic::nvvm_atomic_xor_gen_i_cta:
404409467b48Spatrick case Intrinsic::nvvm_atomic_xor_gen_i_sys: {
404509467b48Spatrick auto &DL = I.getModule()->getDataLayout();
404609467b48Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
404709467b48Spatrick Info.memVT = getValueType(DL, I.getType());
404809467b48Spatrick Info.ptrVal = I.getArgOperand(0);
404909467b48Spatrick Info.offset = 0;
405009467b48Spatrick Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
405109467b48Spatrick Info.align.reset();
405209467b48Spatrick return true;
405309467b48Spatrick }
405409467b48Spatrick
405509467b48Spatrick case Intrinsic::nvvm_ldu_global_i:
405609467b48Spatrick case Intrinsic::nvvm_ldu_global_f:
405709467b48Spatrick case Intrinsic::nvvm_ldu_global_p: {
405809467b48Spatrick auto &DL = I.getModule()->getDataLayout();
405909467b48Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
406009467b48Spatrick if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
406109467b48Spatrick Info.memVT = getValueType(DL, I.getType());
406209467b48Spatrick else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
406309467b48Spatrick Info.memVT = getPointerTy(DL);
406409467b48Spatrick else
406509467b48Spatrick Info.memVT = getValueType(DL, I.getType());
406609467b48Spatrick Info.ptrVal = I.getArgOperand(0);
406709467b48Spatrick Info.offset = 0;
406809467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
4069097a140dSpatrick Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
407009467b48Spatrick
407109467b48Spatrick return true;
407209467b48Spatrick }
407309467b48Spatrick case Intrinsic::nvvm_ldg_global_i:
407409467b48Spatrick case Intrinsic::nvvm_ldg_global_f:
407509467b48Spatrick case Intrinsic::nvvm_ldg_global_p: {
407609467b48Spatrick auto &DL = I.getModule()->getDataLayout();
407709467b48Spatrick
407809467b48Spatrick Info.opc = ISD::INTRINSIC_W_CHAIN;
407909467b48Spatrick if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
408009467b48Spatrick Info.memVT = getValueType(DL, I.getType());
408109467b48Spatrick else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
408209467b48Spatrick Info.memVT = getPointerTy(DL);
408309467b48Spatrick else
408409467b48Spatrick Info.memVT = getValueType(DL, I.getType());
408509467b48Spatrick Info.ptrVal = I.getArgOperand(0);
408609467b48Spatrick Info.offset = 0;
408709467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
4088097a140dSpatrick Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
408909467b48Spatrick
409009467b48Spatrick return true;
409109467b48Spatrick }
409209467b48Spatrick
409309467b48Spatrick case Intrinsic::nvvm_tex_1d_v4f32_s32:
409409467b48Spatrick case Intrinsic::nvvm_tex_1d_v4f32_f32:
409509467b48Spatrick case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
409609467b48Spatrick case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
409709467b48Spatrick case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
409809467b48Spatrick case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
409909467b48Spatrick case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
410009467b48Spatrick case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
410109467b48Spatrick case Intrinsic::nvvm_tex_2d_v4f32_s32:
410209467b48Spatrick case Intrinsic::nvvm_tex_2d_v4f32_f32:
410309467b48Spatrick case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
410409467b48Spatrick case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
410509467b48Spatrick case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
410609467b48Spatrick case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
410709467b48Spatrick case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
410809467b48Spatrick case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
410909467b48Spatrick case Intrinsic::nvvm_tex_3d_v4f32_s32:
411009467b48Spatrick case Intrinsic::nvvm_tex_3d_v4f32_f32:
411109467b48Spatrick case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
411209467b48Spatrick case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
411309467b48Spatrick case Intrinsic::nvvm_tex_cube_v4f32_f32:
411409467b48Spatrick case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
411509467b48Spatrick case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
411609467b48Spatrick case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
411709467b48Spatrick case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
411809467b48Spatrick case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
411909467b48Spatrick case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
412009467b48Spatrick case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
412109467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
412209467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
412309467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
412409467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
412509467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
412609467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
412709467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
412809467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
412909467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
413009467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
413109467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
413209467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
413309467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
413409467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
413509467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
413609467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
413709467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
413809467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
413909467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
414009467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
414109467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
414209467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
414309467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
414409467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
414509467b48Spatrick case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
414609467b48Spatrick case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
414709467b48Spatrick case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
414809467b48Spatrick case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
414909467b48Spatrick Info.opc = getOpcForTextureInstr(Intrinsic);
415009467b48Spatrick Info.memVT = MVT::v4f32;
415109467b48Spatrick Info.ptrVal = nullptr;
415209467b48Spatrick Info.offset = 0;
415309467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
415409467b48Spatrick Info.align = Align(16);
415509467b48Spatrick return true;
415609467b48Spatrick
415709467b48Spatrick case Intrinsic::nvvm_tex_1d_v4s32_s32:
415809467b48Spatrick case Intrinsic::nvvm_tex_1d_v4s32_f32:
415909467b48Spatrick case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
416009467b48Spatrick case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
416109467b48Spatrick case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
416209467b48Spatrick case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
416309467b48Spatrick case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
416409467b48Spatrick case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
416509467b48Spatrick case Intrinsic::nvvm_tex_2d_v4s32_s32:
416609467b48Spatrick case Intrinsic::nvvm_tex_2d_v4s32_f32:
416709467b48Spatrick case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
416809467b48Spatrick case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
416909467b48Spatrick case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
417009467b48Spatrick case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
417109467b48Spatrick case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
417209467b48Spatrick case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
417309467b48Spatrick case Intrinsic::nvvm_tex_3d_v4s32_s32:
417409467b48Spatrick case Intrinsic::nvvm_tex_3d_v4s32_f32:
417509467b48Spatrick case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
417609467b48Spatrick case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
417709467b48Spatrick case Intrinsic::nvvm_tex_cube_v4s32_f32:
417809467b48Spatrick case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
417909467b48Spatrick case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
418009467b48Spatrick case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
418109467b48Spatrick case Intrinsic::nvvm_tex_cube_v4u32_f32:
418209467b48Spatrick case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
418309467b48Spatrick case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
418409467b48Spatrick case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
418509467b48Spatrick case Intrinsic::nvvm_tex_1d_v4u32_s32:
418609467b48Spatrick case Intrinsic::nvvm_tex_1d_v4u32_f32:
418709467b48Spatrick case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
418809467b48Spatrick case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
418909467b48Spatrick case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
419009467b48Spatrick case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
419109467b48Spatrick case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
419209467b48Spatrick case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
419309467b48Spatrick case Intrinsic::nvvm_tex_2d_v4u32_s32:
419409467b48Spatrick case Intrinsic::nvvm_tex_2d_v4u32_f32:
419509467b48Spatrick case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
419609467b48Spatrick case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
419709467b48Spatrick case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
419809467b48Spatrick case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
419909467b48Spatrick case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
420009467b48Spatrick case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
420109467b48Spatrick case Intrinsic::nvvm_tex_3d_v4u32_s32:
420209467b48Spatrick case Intrinsic::nvvm_tex_3d_v4u32_f32:
420309467b48Spatrick case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
420409467b48Spatrick case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
420509467b48Spatrick case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
420609467b48Spatrick case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
420709467b48Spatrick case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
420809467b48Spatrick case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
420909467b48Spatrick case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
421009467b48Spatrick case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
421109467b48Spatrick case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
421209467b48Spatrick case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
421309467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
421409467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
421509467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
421609467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
421709467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
421809467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
421909467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
422009467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
422109467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
422209467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
422309467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
422409467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
422509467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
422609467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
422709467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
422809467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
422909467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
423009467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
423109467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
423209467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
423309467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
423409467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
423509467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
423609467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
423709467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
423809467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
423909467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
424009467b48Spatrick case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
424109467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
424209467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
424309467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
424409467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
424509467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
424609467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
424709467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
424809467b48Spatrick case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
424909467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
425009467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
425109467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
425209467b48Spatrick case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
425309467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
425409467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
425509467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
425609467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
425709467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
425809467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
425909467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
426009467b48Spatrick case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
426109467b48Spatrick case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
426209467b48Spatrick case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
426309467b48Spatrick case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
426409467b48Spatrick case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
426509467b48Spatrick case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
426609467b48Spatrick case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
426709467b48Spatrick case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
426809467b48Spatrick case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
426909467b48Spatrick Info.opc = getOpcForTextureInstr(Intrinsic);
427009467b48Spatrick Info.memVT = MVT::v4i32;
427109467b48Spatrick Info.ptrVal = nullptr;
427209467b48Spatrick Info.offset = 0;
427309467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
427409467b48Spatrick Info.align = Align(16);
427509467b48Spatrick return true;
427609467b48Spatrick
427709467b48Spatrick case Intrinsic::nvvm_suld_1d_i8_clamp:
427809467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i8_clamp:
427909467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i8_clamp:
428009467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i8_clamp:
428109467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
428209467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
428309467b48Spatrick case Intrinsic::nvvm_suld_2d_i8_clamp:
428409467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i8_clamp:
428509467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i8_clamp:
428609467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i8_clamp:
428709467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
428809467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
428909467b48Spatrick case Intrinsic::nvvm_suld_3d_i8_clamp:
429009467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i8_clamp:
429109467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i8_clamp:
429209467b48Spatrick case Intrinsic::nvvm_suld_1d_i8_trap:
429309467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i8_trap:
429409467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i8_trap:
429509467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i8_trap:
429609467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
429709467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
429809467b48Spatrick case Intrinsic::nvvm_suld_2d_i8_trap:
429909467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i8_trap:
430009467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i8_trap:
430109467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i8_trap:
430209467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
430309467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
430409467b48Spatrick case Intrinsic::nvvm_suld_3d_i8_trap:
430509467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i8_trap:
430609467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i8_trap:
430709467b48Spatrick case Intrinsic::nvvm_suld_1d_i8_zero:
430809467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i8_zero:
430909467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i8_zero:
431009467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i8_zero:
431109467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
431209467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
431309467b48Spatrick case Intrinsic::nvvm_suld_2d_i8_zero:
431409467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i8_zero:
431509467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i8_zero:
431609467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i8_zero:
431709467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
431809467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
431909467b48Spatrick case Intrinsic::nvvm_suld_3d_i8_zero:
432009467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i8_zero:
432109467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i8_zero:
432209467b48Spatrick Info.opc = getOpcForSurfaceInstr(Intrinsic);
432309467b48Spatrick Info.memVT = MVT::i8;
432409467b48Spatrick Info.ptrVal = nullptr;
432509467b48Spatrick Info.offset = 0;
432609467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
432709467b48Spatrick Info.align = Align(16);
432809467b48Spatrick return true;
432909467b48Spatrick
433009467b48Spatrick case Intrinsic::nvvm_suld_1d_i16_clamp:
433109467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i16_clamp:
433209467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i16_clamp:
433309467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i16_clamp:
433409467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
433509467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
433609467b48Spatrick case Intrinsic::nvvm_suld_2d_i16_clamp:
433709467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i16_clamp:
433809467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i16_clamp:
433909467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i16_clamp:
434009467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
434109467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
434209467b48Spatrick case Intrinsic::nvvm_suld_3d_i16_clamp:
434309467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i16_clamp:
434409467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i16_clamp:
434509467b48Spatrick case Intrinsic::nvvm_suld_1d_i16_trap:
434609467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i16_trap:
434709467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i16_trap:
434809467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i16_trap:
434909467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
435009467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
435109467b48Spatrick case Intrinsic::nvvm_suld_2d_i16_trap:
435209467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i16_trap:
435309467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i16_trap:
435409467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i16_trap:
435509467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
435609467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
435709467b48Spatrick case Intrinsic::nvvm_suld_3d_i16_trap:
435809467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i16_trap:
435909467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i16_trap:
436009467b48Spatrick case Intrinsic::nvvm_suld_1d_i16_zero:
436109467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i16_zero:
436209467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i16_zero:
436309467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i16_zero:
436409467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
436509467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
436609467b48Spatrick case Intrinsic::nvvm_suld_2d_i16_zero:
436709467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i16_zero:
436809467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i16_zero:
436909467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i16_zero:
437009467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
437109467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
437209467b48Spatrick case Intrinsic::nvvm_suld_3d_i16_zero:
437309467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i16_zero:
437409467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i16_zero:
437509467b48Spatrick Info.opc = getOpcForSurfaceInstr(Intrinsic);
437609467b48Spatrick Info.memVT = MVT::i16;
437709467b48Spatrick Info.ptrVal = nullptr;
437809467b48Spatrick Info.offset = 0;
437909467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
438009467b48Spatrick Info.align = Align(16);
438109467b48Spatrick return true;
438209467b48Spatrick
438309467b48Spatrick case Intrinsic::nvvm_suld_1d_i32_clamp:
438409467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i32_clamp:
438509467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i32_clamp:
438609467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i32_clamp:
438709467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
438809467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
438909467b48Spatrick case Intrinsic::nvvm_suld_2d_i32_clamp:
439009467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i32_clamp:
439109467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i32_clamp:
439209467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i32_clamp:
439309467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
439409467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
439509467b48Spatrick case Intrinsic::nvvm_suld_3d_i32_clamp:
439609467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i32_clamp:
439709467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i32_clamp:
439809467b48Spatrick case Intrinsic::nvvm_suld_1d_i32_trap:
439909467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i32_trap:
440009467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i32_trap:
440109467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i32_trap:
440209467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
440309467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
440409467b48Spatrick case Intrinsic::nvvm_suld_2d_i32_trap:
440509467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i32_trap:
440609467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i32_trap:
440709467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i32_trap:
440809467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
440909467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
441009467b48Spatrick case Intrinsic::nvvm_suld_3d_i32_trap:
441109467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i32_trap:
441209467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i32_trap:
441309467b48Spatrick case Intrinsic::nvvm_suld_1d_i32_zero:
441409467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i32_zero:
441509467b48Spatrick case Intrinsic::nvvm_suld_1d_v4i32_zero:
441609467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i32_zero:
441709467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
441809467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
441909467b48Spatrick case Intrinsic::nvvm_suld_2d_i32_zero:
442009467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i32_zero:
442109467b48Spatrick case Intrinsic::nvvm_suld_2d_v4i32_zero:
442209467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i32_zero:
442309467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
442409467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
442509467b48Spatrick case Intrinsic::nvvm_suld_3d_i32_zero:
442609467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i32_zero:
442709467b48Spatrick case Intrinsic::nvvm_suld_3d_v4i32_zero:
442809467b48Spatrick Info.opc = getOpcForSurfaceInstr(Intrinsic);
442909467b48Spatrick Info.memVT = MVT::i32;
443009467b48Spatrick Info.ptrVal = nullptr;
443109467b48Spatrick Info.offset = 0;
443209467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
443309467b48Spatrick Info.align = Align(16);
443409467b48Spatrick return true;
443509467b48Spatrick
443609467b48Spatrick case Intrinsic::nvvm_suld_1d_i64_clamp:
443709467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i64_clamp:
443809467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i64_clamp:
443909467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
444009467b48Spatrick case Intrinsic::nvvm_suld_2d_i64_clamp:
444109467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i64_clamp:
444209467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i64_clamp:
444309467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
444409467b48Spatrick case Intrinsic::nvvm_suld_3d_i64_clamp:
444509467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i64_clamp:
444609467b48Spatrick case Intrinsic::nvvm_suld_1d_i64_trap:
444709467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i64_trap:
444809467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i64_trap:
444909467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
445009467b48Spatrick case Intrinsic::nvvm_suld_2d_i64_trap:
445109467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i64_trap:
445209467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i64_trap:
445309467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
445409467b48Spatrick case Intrinsic::nvvm_suld_3d_i64_trap:
445509467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i64_trap:
445609467b48Spatrick case Intrinsic::nvvm_suld_1d_i64_zero:
445709467b48Spatrick case Intrinsic::nvvm_suld_1d_v2i64_zero:
445809467b48Spatrick case Intrinsic::nvvm_suld_1d_array_i64_zero:
445909467b48Spatrick case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
446009467b48Spatrick case Intrinsic::nvvm_suld_2d_i64_zero:
446109467b48Spatrick case Intrinsic::nvvm_suld_2d_v2i64_zero:
446209467b48Spatrick case Intrinsic::nvvm_suld_2d_array_i64_zero:
446309467b48Spatrick case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
446409467b48Spatrick case Intrinsic::nvvm_suld_3d_i64_zero:
446509467b48Spatrick case Intrinsic::nvvm_suld_3d_v2i64_zero:
446609467b48Spatrick Info.opc = getOpcForSurfaceInstr(Intrinsic);
446709467b48Spatrick Info.memVT = MVT::i64;
446809467b48Spatrick Info.ptrVal = nullptr;
446909467b48Spatrick Info.offset = 0;
447009467b48Spatrick Info.flags = MachineMemOperand::MOLoad;
447109467b48Spatrick Info.align = Align(16);
447209467b48Spatrick return true;
447309467b48Spatrick }
447409467b48Spatrick return false;
447509467b48Spatrick }
447609467b48Spatrick
4477*d415bd75Srobert /// getFunctionParamOptimizedAlign - since function arguments are passed via
4478*d415bd75Srobert /// .param space, we may want to increase their alignment in a way that
4479*d415bd75Srobert /// ensures that we can effectively vectorize their loads & stores. We can
4480*d415bd75Srobert /// increase alignment only if the function has internal or has private
4481*d415bd75Srobert /// linkage as for other linkage types callers may already rely on default
4482*d415bd75Srobert /// alignment. To allow using 128-bit vectorized loads/stores, this function
4483*d415bd75Srobert /// ensures that alignment is 16 or greater.
getFunctionParamOptimizedAlign(const Function * F,Type * ArgTy,const DataLayout & DL) const4484*d415bd75Srobert Align NVPTXTargetLowering::getFunctionParamOptimizedAlign(
4485*d415bd75Srobert const Function *F, Type *ArgTy, const DataLayout &DL) const {
4486*d415bd75Srobert const uint64_t ABITypeAlign = DL.getABITypeAlign(ArgTy).value();
4487*d415bd75Srobert
4488*d415bd75Srobert // If a function has linkage different from internal or private, we
4489*d415bd75Srobert // must use default ABI alignment as external users rely on it. Same
4490*d415bd75Srobert // for a function that may be called from a function pointer.
4491*d415bd75Srobert if (!F || !F->hasLocalLinkage() ||
4492*d415bd75Srobert F->hasAddressTaken(/*Users=*/nullptr,
4493*d415bd75Srobert /*IgnoreCallbackUses=*/false,
4494*d415bd75Srobert /*IgnoreAssumeLikeCalls=*/true,
4495*d415bd75Srobert /*IgnoreLLVMUsed=*/true))
4496*d415bd75Srobert return Align(ABITypeAlign);
4497*d415bd75Srobert
4498*d415bd75Srobert assert(!isKernelFunction(*F) && "Expect kernels to have non-local linkage");
4499*d415bd75Srobert return Align(std::max(uint64_t(16), ABITypeAlign));
4500*d415bd75Srobert }
4501*d415bd75Srobert
4502*d415bd75Srobert /// Helper for computing alignment of a device function byval parameter.
getFunctionByValParamAlign(const Function * F,Type * ArgTy,Align InitialAlign,const DataLayout & DL) const4503*d415bd75Srobert Align NVPTXTargetLowering::getFunctionByValParamAlign(
4504*d415bd75Srobert const Function *F, Type *ArgTy, Align InitialAlign,
4505*d415bd75Srobert const DataLayout &DL) const {
4506*d415bd75Srobert Align ArgAlign = InitialAlign;
4507*d415bd75Srobert // Try to increase alignment to enhance vectorization options.
4508*d415bd75Srobert if (F)
4509*d415bd75Srobert ArgAlign = std::max(ArgAlign, getFunctionParamOptimizedAlign(F, ArgTy, DL));
4510*d415bd75Srobert
4511*d415bd75Srobert // Work around a bug in ptxas. When PTX code takes address of
4512*d415bd75Srobert // byval parameter with alignment < 4, ptxas generates code to
4513*d415bd75Srobert // spill argument into memory. Alas on sm_50+ ptxas generates
4514*d415bd75Srobert // SASS code that fails with misaligned access. To work around
4515*d415bd75Srobert // the problem, make sure that we align byval parameters by at
4516*d415bd75Srobert // least 4.
4517*d415bd75Srobert // TODO: this will need to be undone when we get to support multi-TU
4518*d415bd75Srobert // device-side compilation as it breaks ABI compatibility with nvcc.
4519*d415bd75Srobert // Hopefully ptxas bug is fixed by then.
4520*d415bd75Srobert ArgAlign = std::max(ArgAlign, Align(4));
4521*d415bd75Srobert
4522*d415bd75Srobert return ArgAlign;
4523*d415bd75Srobert }
4524*d415bd75Srobert
452509467b48Spatrick /// isLegalAddressingMode - Return true if the addressing mode represented
452609467b48Spatrick /// by AM is legal for this target, for a load/store of the specified type.
452709467b48Spatrick /// Used to guide target specific optimizations, like loop strength reduction
452809467b48Spatrick /// (LoopStrengthReduce.cpp) and memory optimization for address mode
452909467b48Spatrick /// (CodeGenPrepare.cpp)
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const453009467b48Spatrick bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
453109467b48Spatrick const AddrMode &AM, Type *Ty,
453209467b48Spatrick unsigned AS, Instruction *I) const {
453309467b48Spatrick // AddrMode - This represents an addressing mode of:
453409467b48Spatrick // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
453509467b48Spatrick //
453609467b48Spatrick // The legal address modes are
453709467b48Spatrick // - [avar]
453809467b48Spatrick // - [areg]
453909467b48Spatrick // - [areg+immoff]
454009467b48Spatrick // - [immAddr]
454109467b48Spatrick
454209467b48Spatrick if (AM.BaseGV) {
454309467b48Spatrick return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
454409467b48Spatrick }
454509467b48Spatrick
454609467b48Spatrick switch (AM.Scale) {
454709467b48Spatrick case 0: // "r", "r+i" or "i" is allowed
454809467b48Spatrick break;
454909467b48Spatrick case 1:
455009467b48Spatrick if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
455109467b48Spatrick return false;
455209467b48Spatrick // Otherwise we have r+i.
455309467b48Spatrick break;
455409467b48Spatrick default:
455509467b48Spatrick // No scale > 1 is allowed
455609467b48Spatrick return false;
455709467b48Spatrick }
455809467b48Spatrick return true;
455909467b48Spatrick }
456009467b48Spatrick
456109467b48Spatrick //===----------------------------------------------------------------------===//
456209467b48Spatrick // NVPTX Inline Assembly Support
456309467b48Spatrick //===----------------------------------------------------------------------===//
456409467b48Spatrick
456509467b48Spatrick /// getConstraintType - Given a constraint letter, return the type of
456609467b48Spatrick /// constraint it is for this target.
456709467b48Spatrick NVPTXTargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const456809467b48Spatrick NVPTXTargetLowering::getConstraintType(StringRef Constraint) const {
456909467b48Spatrick if (Constraint.size() == 1) {
457009467b48Spatrick switch (Constraint[0]) {
457109467b48Spatrick default:
457209467b48Spatrick break;
457309467b48Spatrick case 'b':
457409467b48Spatrick case 'r':
457509467b48Spatrick case 'h':
457609467b48Spatrick case 'c':
457709467b48Spatrick case 'l':
457809467b48Spatrick case 'f':
457909467b48Spatrick case 'd':
458009467b48Spatrick case '0':
458109467b48Spatrick case 'N':
458209467b48Spatrick return C_RegisterClass;
458309467b48Spatrick }
458409467b48Spatrick }
458509467b48Spatrick return TargetLowering::getConstraintType(Constraint);
458609467b48Spatrick }
458709467b48Spatrick
458809467b48Spatrick std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const458909467b48Spatrick NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
459009467b48Spatrick StringRef Constraint,
459109467b48Spatrick MVT VT) const {
459209467b48Spatrick if (Constraint.size() == 1) {
459309467b48Spatrick switch (Constraint[0]) {
459409467b48Spatrick case 'b':
459509467b48Spatrick return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
459609467b48Spatrick case 'c':
459709467b48Spatrick return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
459809467b48Spatrick case 'h':
459909467b48Spatrick return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
460009467b48Spatrick case 'r':
460109467b48Spatrick return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
460209467b48Spatrick case 'l':
460309467b48Spatrick case 'N':
460409467b48Spatrick return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
460509467b48Spatrick case 'f':
460609467b48Spatrick return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
460709467b48Spatrick case 'd':
460809467b48Spatrick return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
460909467b48Spatrick }
461009467b48Spatrick }
461109467b48Spatrick return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
461209467b48Spatrick }
461309467b48Spatrick
461409467b48Spatrick //===----------------------------------------------------------------------===//
461509467b48Spatrick // NVPTX DAG Combining
461609467b48Spatrick //===----------------------------------------------------------------------===//
461709467b48Spatrick
allowFMA(MachineFunction & MF,CodeGenOpt::Level OptLevel) const461809467b48Spatrick bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
461909467b48Spatrick CodeGenOpt::Level OptLevel) const {
462009467b48Spatrick // Always honor command-line argument
462109467b48Spatrick if (FMAContractLevelOpt.getNumOccurrences() > 0)
462209467b48Spatrick return FMAContractLevelOpt > 0;
462309467b48Spatrick
462409467b48Spatrick // Do not contract if we're not optimizing the code.
462509467b48Spatrick if (OptLevel == 0)
462609467b48Spatrick return false;
462709467b48Spatrick
462809467b48Spatrick // Honor TargetOptions flags that explicitly say fusion is okay.
462909467b48Spatrick if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
463009467b48Spatrick return true;
463109467b48Spatrick
463209467b48Spatrick return allowUnsafeFPMath(MF);
463309467b48Spatrick }
463409467b48Spatrick
allowUnsafeFPMath(MachineFunction & MF) const463509467b48Spatrick bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const {
463609467b48Spatrick // Honor TargetOptions flags that explicitly say unsafe math is okay.
463709467b48Spatrick if (MF.getTarget().Options.UnsafeFPMath)
463809467b48Spatrick return true;
463909467b48Spatrick
464009467b48Spatrick // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
464109467b48Spatrick const Function &F = MF.getFunction();
464273471bf0Spatrick return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
464309467b48Spatrick }
464409467b48Spatrick
464509467b48Spatrick /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
464609467b48Spatrick /// operands N0 and N1. This is a helper for PerformADDCombine that is
464709467b48Spatrick /// called with the default operands, and if that fails, with commuted
464809467b48Spatrick /// operands.
PerformADDCombineWithOperands(SDNode * N,SDValue N0,SDValue N1,TargetLowering::DAGCombinerInfo & DCI,const NVPTXSubtarget & Subtarget,CodeGenOpt::Level OptLevel)464909467b48Spatrick static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
465009467b48Spatrick TargetLowering::DAGCombinerInfo &DCI,
465109467b48Spatrick const NVPTXSubtarget &Subtarget,
465209467b48Spatrick CodeGenOpt::Level OptLevel) {
465309467b48Spatrick SelectionDAG &DAG = DCI.DAG;
465409467b48Spatrick // Skip non-integer, non-scalar case
465509467b48Spatrick EVT VT=N0.getValueType();
465609467b48Spatrick if (VT.isVector())
465709467b48Spatrick return SDValue();
465809467b48Spatrick
465909467b48Spatrick // fold (add (mul a, b), c) -> (mad a, b, c)
466009467b48Spatrick //
466109467b48Spatrick if (N0.getOpcode() == ISD::MUL) {
466209467b48Spatrick assert (VT.isInteger());
466309467b48Spatrick // For integer:
466409467b48Spatrick // Since integer multiply-add costs the same as integer multiply
466509467b48Spatrick // but is more costly than integer add, do the fusion only when
466609467b48Spatrick // the mul is only used in the add.
466709467b48Spatrick if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
466809467b48Spatrick !N0.getNode()->hasOneUse())
466909467b48Spatrick return SDValue();
467009467b48Spatrick
467109467b48Spatrick // Do the folding
467209467b48Spatrick return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
467309467b48Spatrick N0.getOperand(0), N0.getOperand(1), N1);
467409467b48Spatrick }
467509467b48Spatrick else if (N0.getOpcode() == ISD::FMUL) {
467609467b48Spatrick if (VT == MVT::f32 || VT == MVT::f64) {
467709467b48Spatrick const auto *TLI = static_cast<const NVPTXTargetLowering *>(
467809467b48Spatrick &DAG.getTargetLoweringInfo());
467909467b48Spatrick if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
468009467b48Spatrick return SDValue();
468109467b48Spatrick
468209467b48Spatrick // For floating point:
468309467b48Spatrick // Do the fusion only when the mul has less than 5 uses and all
468409467b48Spatrick // are add.
468509467b48Spatrick // The heuristic is that if a use is not an add, then that use
468609467b48Spatrick // cannot be fused into fma, therefore mul is still needed anyway.
468709467b48Spatrick // If there are more than 4 uses, even if they are all add, fusing
468809467b48Spatrick // them will increase register pressue.
468909467b48Spatrick //
469009467b48Spatrick int numUses = 0;
469109467b48Spatrick int nonAddCount = 0;
4692*d415bd75Srobert for (const SDNode *User : N0.getNode()->uses()) {
469309467b48Spatrick numUses++;
469409467b48Spatrick if (User->getOpcode() != ISD::FADD)
469509467b48Spatrick ++nonAddCount;
469609467b48Spatrick }
469709467b48Spatrick if (numUses >= 5)
469809467b48Spatrick return SDValue();
469909467b48Spatrick if (nonAddCount) {
470009467b48Spatrick int orderNo = N->getIROrder();
470109467b48Spatrick int orderNo2 = N0.getNode()->getIROrder();
470209467b48Spatrick // simple heuristics here for considering potential register
470309467b48Spatrick // pressure, the logics here is that the differnce are used
470409467b48Spatrick // to measure the distance between def and use, the longer distance
470509467b48Spatrick // more likely cause register pressure.
470609467b48Spatrick if (orderNo - orderNo2 < 500)
470709467b48Spatrick return SDValue();
470809467b48Spatrick
470909467b48Spatrick // Now, check if at least one of the FMUL's operands is live beyond the node N,
471009467b48Spatrick // which guarantees that the FMA will not increase register pressure at node N.
471109467b48Spatrick bool opIsLive = false;
471209467b48Spatrick const SDNode *left = N0.getOperand(0).getNode();
471309467b48Spatrick const SDNode *right = N0.getOperand(1).getNode();
471409467b48Spatrick
471509467b48Spatrick if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
471609467b48Spatrick opIsLive = true;
471709467b48Spatrick
471809467b48Spatrick if (!opIsLive)
4719*d415bd75Srobert for (const SDNode *User : left->uses()) {
472009467b48Spatrick int orderNo3 = User->getIROrder();
472109467b48Spatrick if (orderNo3 > orderNo) {
472209467b48Spatrick opIsLive = true;
472309467b48Spatrick break;
472409467b48Spatrick }
472509467b48Spatrick }
472609467b48Spatrick
472709467b48Spatrick if (!opIsLive)
4728*d415bd75Srobert for (const SDNode *User : right->uses()) {
472909467b48Spatrick int orderNo3 = User->getIROrder();
473009467b48Spatrick if (orderNo3 > orderNo) {
473109467b48Spatrick opIsLive = true;
473209467b48Spatrick break;
473309467b48Spatrick }
473409467b48Spatrick }
473509467b48Spatrick
473609467b48Spatrick if (!opIsLive)
473709467b48Spatrick return SDValue();
473809467b48Spatrick }
473909467b48Spatrick
474009467b48Spatrick return DAG.getNode(ISD::FMA, SDLoc(N), VT,
474109467b48Spatrick N0.getOperand(0), N0.getOperand(1), N1);
474209467b48Spatrick }
474309467b48Spatrick }
474409467b48Spatrick
474509467b48Spatrick return SDValue();
474609467b48Spatrick }
474709467b48Spatrick
PerformStoreRetvalCombine(SDNode * N)4748*d415bd75Srobert static SDValue PerformStoreRetvalCombine(SDNode *N) {
4749*d415bd75Srobert // Operands from the 2nd to the last one are the values to be stored
4750*d415bd75Srobert for (std::size_t I = 2, OpsCount = N->ops().size(); I != OpsCount; ++I)
4751*d415bd75Srobert if (!N->getOperand(I).isUndef())
4752*d415bd75Srobert return SDValue();
4753*d415bd75Srobert
4754*d415bd75Srobert // Operand 0 is the previous value in the chain. Cannot return EntryToken
4755*d415bd75Srobert // as the previous value will become unused and eliminated later.
4756*d415bd75Srobert return N->getOperand(0);
4757*d415bd75Srobert }
4758*d415bd75Srobert
475909467b48Spatrick /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
476009467b48Spatrick ///
PerformADDCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,const NVPTXSubtarget & Subtarget,CodeGenOpt::Level OptLevel)476109467b48Spatrick static SDValue PerformADDCombine(SDNode *N,
476209467b48Spatrick TargetLowering::DAGCombinerInfo &DCI,
476309467b48Spatrick const NVPTXSubtarget &Subtarget,
476409467b48Spatrick CodeGenOpt::Level OptLevel) {
476509467b48Spatrick SDValue N0 = N->getOperand(0);
476609467b48Spatrick SDValue N1 = N->getOperand(1);
476709467b48Spatrick
476809467b48Spatrick // First try with the default operand order.
476909467b48Spatrick if (SDValue Result =
477009467b48Spatrick PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, OptLevel))
477109467b48Spatrick return Result;
477209467b48Spatrick
477309467b48Spatrick // If that didn't work, try again with the operands commuted.
477409467b48Spatrick return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
477509467b48Spatrick }
477609467b48Spatrick
PerformANDCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)477709467b48Spatrick static SDValue PerformANDCombine(SDNode *N,
477809467b48Spatrick TargetLowering::DAGCombinerInfo &DCI) {
477909467b48Spatrick // The type legalizer turns a vector load of i8 values into a zextload to i16
478009467b48Spatrick // registers, optionally ANY_EXTENDs it (if target type is integer),
478109467b48Spatrick // and ANDs off the high 8 bits. Since we turn this load into a
478209467b48Spatrick // target-specific DAG node, the DAG combiner fails to eliminate these AND
478309467b48Spatrick // nodes. Do that here.
478409467b48Spatrick SDValue Val = N->getOperand(0);
478509467b48Spatrick SDValue Mask = N->getOperand(1);
478609467b48Spatrick
478709467b48Spatrick if (isa<ConstantSDNode>(Val)) {
478809467b48Spatrick std::swap(Val, Mask);
478909467b48Spatrick }
479009467b48Spatrick
479109467b48Spatrick SDValue AExt;
479209467b48Spatrick // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
479309467b48Spatrick if (Val.getOpcode() == ISD::ANY_EXTEND) {
479409467b48Spatrick AExt = Val;
479509467b48Spatrick Val = Val->getOperand(0);
479609467b48Spatrick }
479709467b48Spatrick
479809467b48Spatrick if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
479909467b48Spatrick Val = Val->getOperand(0);
480009467b48Spatrick }
480109467b48Spatrick
480209467b48Spatrick if (Val->getOpcode() == NVPTXISD::LoadV2 ||
480309467b48Spatrick Val->getOpcode() == NVPTXISD::LoadV4) {
480409467b48Spatrick ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
480509467b48Spatrick if (!MaskCnst) {
480609467b48Spatrick // Not an AND with a constant
480709467b48Spatrick return SDValue();
480809467b48Spatrick }
480909467b48Spatrick
481009467b48Spatrick uint64_t MaskVal = MaskCnst->getZExtValue();
481109467b48Spatrick if (MaskVal != 0xff) {
481209467b48Spatrick // Not an AND that chops off top 8 bits
481309467b48Spatrick return SDValue();
481409467b48Spatrick }
481509467b48Spatrick
481609467b48Spatrick MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
481709467b48Spatrick if (!Mem) {
481809467b48Spatrick // Not a MemSDNode?!?
481909467b48Spatrick return SDValue();
482009467b48Spatrick }
482109467b48Spatrick
482209467b48Spatrick EVT MemVT = Mem->getMemoryVT();
482309467b48Spatrick if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
482409467b48Spatrick // We only handle the i8 case
482509467b48Spatrick return SDValue();
482609467b48Spatrick }
482709467b48Spatrick
482809467b48Spatrick unsigned ExtType =
482909467b48Spatrick cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))->
483009467b48Spatrick getZExtValue();
483109467b48Spatrick if (ExtType == ISD::SEXTLOAD) {
483209467b48Spatrick // If for some reason the load is a sextload, the and is needed to zero
483309467b48Spatrick // out the high 8 bits
483409467b48Spatrick return SDValue();
483509467b48Spatrick }
483609467b48Spatrick
483709467b48Spatrick bool AddTo = false;
483809467b48Spatrick if (AExt.getNode() != nullptr) {
483909467b48Spatrick // Re-insert the ext as a zext.
484009467b48Spatrick Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
484109467b48Spatrick AExt.getValueType(), Val);
484209467b48Spatrick AddTo = true;
484309467b48Spatrick }
484409467b48Spatrick
484509467b48Spatrick // If we get here, the AND is unnecessary. Just replace it with the load
484609467b48Spatrick DCI.CombineTo(N, Val, AddTo);
484709467b48Spatrick }
484809467b48Spatrick
484909467b48Spatrick return SDValue();
485009467b48Spatrick }
485109467b48Spatrick
PerformREMCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOpt::Level OptLevel)485209467b48Spatrick static SDValue PerformREMCombine(SDNode *N,
485309467b48Spatrick TargetLowering::DAGCombinerInfo &DCI,
485409467b48Spatrick CodeGenOpt::Level OptLevel) {
485509467b48Spatrick assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM);
485609467b48Spatrick
485709467b48Spatrick // Don't do anything at less than -O2.
485809467b48Spatrick if (OptLevel < CodeGenOpt::Default)
485909467b48Spatrick return SDValue();
486009467b48Spatrick
486109467b48Spatrick SelectionDAG &DAG = DCI.DAG;
486209467b48Spatrick SDLoc DL(N);
486309467b48Spatrick EVT VT = N->getValueType(0);
486409467b48Spatrick bool IsSigned = N->getOpcode() == ISD::SREM;
486509467b48Spatrick unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV;
486609467b48Spatrick
486709467b48Spatrick const SDValue &Num = N->getOperand(0);
486809467b48Spatrick const SDValue &Den = N->getOperand(1);
486909467b48Spatrick
487009467b48Spatrick for (const SDNode *U : Num->uses()) {
487109467b48Spatrick if (U->getOpcode() == DivOpc && U->getOperand(0) == Num &&
487209467b48Spatrick U->getOperand(1) == Den) {
487309467b48Spatrick // Num % Den -> Num - (Num / Den) * Den
487409467b48Spatrick return DAG.getNode(ISD::SUB, DL, VT, Num,
487509467b48Spatrick DAG.getNode(ISD::MUL, DL, VT,
487609467b48Spatrick DAG.getNode(DivOpc, DL, VT, Num, Den),
487709467b48Spatrick Den));
487809467b48Spatrick }
487909467b48Spatrick }
488009467b48Spatrick return SDValue();
488109467b48Spatrick }
488209467b48Spatrick
488309467b48Spatrick enum OperandSignedness {
488409467b48Spatrick Signed = 0,
488509467b48Spatrick Unsigned,
488609467b48Spatrick Unknown
488709467b48Spatrick };
488809467b48Spatrick
488909467b48Spatrick /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
489009467b48Spatrick /// that can be demoted to \p OptSize bits without loss of information. The
489109467b48Spatrick /// signedness of the operand, if determinable, is placed in \p S.
IsMulWideOperandDemotable(SDValue Op,unsigned OptSize,OperandSignedness & S)489209467b48Spatrick static bool IsMulWideOperandDemotable(SDValue Op,
489309467b48Spatrick unsigned OptSize,
489409467b48Spatrick OperandSignedness &S) {
489509467b48Spatrick S = Unknown;
489609467b48Spatrick
489709467b48Spatrick if (Op.getOpcode() == ISD::SIGN_EXTEND ||
489809467b48Spatrick Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
489909467b48Spatrick EVT OrigVT = Op.getOperand(0).getValueType();
490073471bf0Spatrick if (OrigVT.getFixedSizeInBits() <= OptSize) {
490109467b48Spatrick S = Signed;
490209467b48Spatrick return true;
490309467b48Spatrick }
490409467b48Spatrick } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
490509467b48Spatrick EVT OrigVT = Op.getOperand(0).getValueType();
490673471bf0Spatrick if (OrigVT.getFixedSizeInBits() <= OptSize) {
490709467b48Spatrick S = Unsigned;
490809467b48Spatrick return true;
490909467b48Spatrick }
491009467b48Spatrick }
491109467b48Spatrick
491209467b48Spatrick return false;
491309467b48Spatrick }
491409467b48Spatrick
491509467b48Spatrick /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
491609467b48Spatrick /// be demoted to \p OptSize bits without loss of information. If the operands
491709467b48Spatrick /// contain a constant, it should appear as the RHS operand. The signedness of
491809467b48Spatrick /// the operands is placed in \p IsSigned.
AreMulWideOperandsDemotable(SDValue LHS,SDValue RHS,unsigned OptSize,bool & IsSigned)491909467b48Spatrick static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
492009467b48Spatrick unsigned OptSize,
492109467b48Spatrick bool &IsSigned) {
492209467b48Spatrick OperandSignedness LHSSign;
492309467b48Spatrick
492409467b48Spatrick // The LHS operand must be a demotable op
492509467b48Spatrick if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
492609467b48Spatrick return false;
492709467b48Spatrick
492809467b48Spatrick // We should have been able to determine the signedness from the LHS
492909467b48Spatrick if (LHSSign == Unknown)
493009467b48Spatrick return false;
493109467b48Spatrick
493209467b48Spatrick IsSigned = (LHSSign == Signed);
493309467b48Spatrick
493409467b48Spatrick // The RHS can be a demotable op or a constant
493509467b48Spatrick if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
493609467b48Spatrick const APInt &Val = CI->getAPIntValue();
493709467b48Spatrick if (LHSSign == Unsigned) {
493809467b48Spatrick return Val.isIntN(OptSize);
493909467b48Spatrick } else {
494009467b48Spatrick return Val.isSignedIntN(OptSize);
494109467b48Spatrick }
494209467b48Spatrick } else {
494309467b48Spatrick OperandSignedness RHSSign;
494409467b48Spatrick if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
494509467b48Spatrick return false;
494609467b48Spatrick
494709467b48Spatrick return LHSSign == RHSSign;
494809467b48Spatrick }
494909467b48Spatrick }
495009467b48Spatrick
495109467b48Spatrick /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
495209467b48Spatrick /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
495309467b48Spatrick /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
495409467b48Spatrick /// amount.
TryMULWIDECombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)495509467b48Spatrick static SDValue TryMULWIDECombine(SDNode *N,
495609467b48Spatrick TargetLowering::DAGCombinerInfo &DCI) {
495709467b48Spatrick EVT MulType = N->getValueType(0);
495809467b48Spatrick if (MulType != MVT::i32 && MulType != MVT::i64) {
495909467b48Spatrick return SDValue();
496009467b48Spatrick }
496109467b48Spatrick
496209467b48Spatrick SDLoc DL(N);
496309467b48Spatrick unsigned OptSize = MulType.getSizeInBits() >> 1;
496409467b48Spatrick SDValue LHS = N->getOperand(0);
496509467b48Spatrick SDValue RHS = N->getOperand(1);
496609467b48Spatrick
496709467b48Spatrick // Canonicalize the multiply so the constant (if any) is on the right
496809467b48Spatrick if (N->getOpcode() == ISD::MUL) {
496909467b48Spatrick if (isa<ConstantSDNode>(LHS)) {
497009467b48Spatrick std::swap(LHS, RHS);
497109467b48Spatrick }
497209467b48Spatrick }
497309467b48Spatrick
497409467b48Spatrick // If we have a SHL, determine the actual multiply amount
497509467b48Spatrick if (N->getOpcode() == ISD::SHL) {
497609467b48Spatrick ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
497709467b48Spatrick if (!ShlRHS) {
497809467b48Spatrick return SDValue();
497909467b48Spatrick }
498009467b48Spatrick
498109467b48Spatrick APInt ShiftAmt = ShlRHS->getAPIntValue();
498209467b48Spatrick unsigned BitWidth = MulType.getSizeInBits();
498309467b48Spatrick if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
498409467b48Spatrick APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
498509467b48Spatrick RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
498609467b48Spatrick } else {
498709467b48Spatrick return SDValue();
498809467b48Spatrick }
498909467b48Spatrick }
499009467b48Spatrick
499109467b48Spatrick bool Signed;
499209467b48Spatrick // Verify that our operands are demotable
499309467b48Spatrick if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
499409467b48Spatrick return SDValue();
499509467b48Spatrick }
499609467b48Spatrick
499709467b48Spatrick EVT DemotedVT;
499809467b48Spatrick if (MulType == MVT::i32) {
499909467b48Spatrick DemotedVT = MVT::i16;
500009467b48Spatrick } else {
500109467b48Spatrick DemotedVT = MVT::i32;
500209467b48Spatrick }
500309467b48Spatrick
500409467b48Spatrick // Truncate the operands to the correct size. Note that these are just for
500509467b48Spatrick // type consistency and will (likely) be eliminated in later phases.
500609467b48Spatrick SDValue TruncLHS =
500709467b48Spatrick DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
500809467b48Spatrick SDValue TruncRHS =
500909467b48Spatrick DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
501009467b48Spatrick
501109467b48Spatrick unsigned Opc;
501209467b48Spatrick if (Signed) {
501309467b48Spatrick Opc = NVPTXISD::MUL_WIDE_SIGNED;
501409467b48Spatrick } else {
501509467b48Spatrick Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
501609467b48Spatrick }
501709467b48Spatrick
501809467b48Spatrick return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
501909467b48Spatrick }
502009467b48Spatrick
502109467b48Spatrick /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
PerformMULCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOpt::Level OptLevel)502209467b48Spatrick static SDValue PerformMULCombine(SDNode *N,
502309467b48Spatrick TargetLowering::DAGCombinerInfo &DCI,
502409467b48Spatrick CodeGenOpt::Level OptLevel) {
502509467b48Spatrick if (OptLevel > 0) {
502609467b48Spatrick // Try mul.wide combining at OptLevel > 0
502709467b48Spatrick if (SDValue Ret = TryMULWIDECombine(N, DCI))
502809467b48Spatrick return Ret;
502909467b48Spatrick }
503009467b48Spatrick
503109467b48Spatrick return SDValue();
503209467b48Spatrick }
503309467b48Spatrick
503409467b48Spatrick /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
PerformSHLCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI,CodeGenOpt::Level OptLevel)503509467b48Spatrick static SDValue PerformSHLCombine(SDNode *N,
503609467b48Spatrick TargetLowering::DAGCombinerInfo &DCI,
503709467b48Spatrick CodeGenOpt::Level OptLevel) {
503809467b48Spatrick if (OptLevel > 0) {
503909467b48Spatrick // Try mul.wide combining at OptLevel > 0
504009467b48Spatrick if (SDValue Ret = TryMULWIDECombine(N, DCI))
504109467b48Spatrick return Ret;
504209467b48Spatrick }
504309467b48Spatrick
504409467b48Spatrick return SDValue();
504509467b48Spatrick }
504609467b48Spatrick
PerformSETCCCombine(SDNode * N,TargetLowering::DAGCombinerInfo & DCI)504709467b48Spatrick static SDValue PerformSETCCCombine(SDNode *N,
504809467b48Spatrick TargetLowering::DAGCombinerInfo &DCI) {
504909467b48Spatrick EVT CCType = N->getValueType(0);
505009467b48Spatrick SDValue A = N->getOperand(0);
505109467b48Spatrick SDValue B = N->getOperand(1);
505209467b48Spatrick
505309467b48Spatrick if (CCType != MVT::v2i1 || A.getValueType() != MVT::v2f16)
505409467b48Spatrick return SDValue();
505509467b48Spatrick
505609467b48Spatrick SDLoc DL(N);
505709467b48Spatrick // setp.f16x2 returns two scalar predicates, which we need to
505809467b48Spatrick // convert back to v2i1. The returned result will be scalarized by
505909467b48Spatrick // the legalizer, but the comparison will remain a single vector
506009467b48Spatrick // instruction.
506109467b48Spatrick SDValue CCNode = DCI.DAG.getNode(NVPTXISD::SETP_F16X2, DL,
506209467b48Spatrick DCI.DAG.getVTList(MVT::i1, MVT::i1),
506309467b48Spatrick {A, B, N->getOperand(2)});
506409467b48Spatrick return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0),
506509467b48Spatrick CCNode.getValue(1));
506609467b48Spatrick }
506709467b48Spatrick
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const506809467b48Spatrick SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
506909467b48Spatrick DAGCombinerInfo &DCI) const {
507009467b48Spatrick CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
507109467b48Spatrick switch (N->getOpcode()) {
507209467b48Spatrick default: break;
507309467b48Spatrick case ISD::ADD:
507409467b48Spatrick case ISD::FADD:
507509467b48Spatrick return PerformADDCombine(N, DCI, STI, OptLevel);
507609467b48Spatrick case ISD::MUL:
507709467b48Spatrick return PerformMULCombine(N, DCI, OptLevel);
507809467b48Spatrick case ISD::SHL:
507909467b48Spatrick return PerformSHLCombine(N, DCI, OptLevel);
508009467b48Spatrick case ISD::AND:
508109467b48Spatrick return PerformANDCombine(N, DCI);
508209467b48Spatrick case ISD::UREM:
508309467b48Spatrick case ISD::SREM:
508409467b48Spatrick return PerformREMCombine(N, DCI, OptLevel);
508509467b48Spatrick case ISD::SETCC:
508609467b48Spatrick return PerformSETCCCombine(N, DCI);
5087*d415bd75Srobert case NVPTXISD::StoreRetval:
5088*d415bd75Srobert case NVPTXISD::StoreRetvalV2:
5089*d415bd75Srobert case NVPTXISD::StoreRetvalV4:
5090*d415bd75Srobert return PerformStoreRetvalCombine(N);
509109467b48Spatrick }
509209467b48Spatrick return SDValue();
509309467b48Spatrick }
509409467b48Spatrick
509509467b48Spatrick /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
ReplaceLoadVector(SDNode * N,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results)509609467b48Spatrick static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
509709467b48Spatrick SmallVectorImpl<SDValue> &Results) {
509809467b48Spatrick EVT ResVT = N->getValueType(0);
509909467b48Spatrick SDLoc DL(N);
510009467b48Spatrick
510109467b48Spatrick assert(ResVT.isVector() && "Vector load must have vector type");
510209467b48Spatrick
510309467b48Spatrick // We only handle "native" vector sizes for now, e.g. <4 x double> is not
510409467b48Spatrick // legal. We can (and should) split that into 2 loads of <2 x double> here
510509467b48Spatrick // but I'm leaving that as a TODO for now.
510609467b48Spatrick assert(ResVT.isSimple() && "Can only handle simple types");
510709467b48Spatrick switch (ResVT.getSimpleVT().SimpleTy) {
510809467b48Spatrick default:
510909467b48Spatrick return;
511009467b48Spatrick case MVT::v2i8:
511109467b48Spatrick case MVT::v2i16:
511209467b48Spatrick case MVT::v2i32:
511309467b48Spatrick case MVT::v2i64:
511409467b48Spatrick case MVT::v2f16:
511509467b48Spatrick case MVT::v2f32:
511609467b48Spatrick case MVT::v2f64:
511709467b48Spatrick case MVT::v4i8:
511809467b48Spatrick case MVT::v4i16:
511909467b48Spatrick case MVT::v4i32:
512009467b48Spatrick case MVT::v4f16:
512109467b48Spatrick case MVT::v4f32:
512209467b48Spatrick case MVT::v8f16: // <4 x f16x2>
512309467b48Spatrick // This is a "native" vector type
512409467b48Spatrick break;
512509467b48Spatrick }
512609467b48Spatrick
512709467b48Spatrick LoadSDNode *LD = cast<LoadSDNode>(N);
512809467b48Spatrick
5129097a140dSpatrick Align Alignment = LD->getAlign();
513009467b48Spatrick auto &TD = DAG.getDataLayout();
5131097a140dSpatrick Align PrefAlign = TD.getPrefTypeAlign(ResVT.getTypeForEVT(*DAG.getContext()));
5132097a140dSpatrick if (Alignment < PrefAlign) {
513309467b48Spatrick // This load is not sufficiently aligned, so bail out and let this vector
513409467b48Spatrick // load be scalarized. Note that we may still be able to emit smaller
513509467b48Spatrick // vector loads. For example, if we are loading a <4 x float> with an
513609467b48Spatrick // alignment of 8, this check will fail but the legalizer will try again
513709467b48Spatrick // with 2 x <2 x float>, which will succeed with an alignment of 8.
513809467b48Spatrick return;
513909467b48Spatrick }
514009467b48Spatrick
514109467b48Spatrick EVT EltVT = ResVT.getVectorElementType();
514209467b48Spatrick unsigned NumElts = ResVT.getVectorNumElements();
514309467b48Spatrick
514409467b48Spatrick // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
514509467b48Spatrick // Therefore, we must ensure the type is legal. For i1 and i8, we set the
514609467b48Spatrick // loaded type to i16 and propagate the "real" type as the memory type.
514709467b48Spatrick bool NeedTrunc = false;
514809467b48Spatrick if (EltVT.getSizeInBits() < 16) {
514909467b48Spatrick EltVT = MVT::i16;
515009467b48Spatrick NeedTrunc = true;
515109467b48Spatrick }
515209467b48Spatrick
515309467b48Spatrick unsigned Opcode = 0;
515409467b48Spatrick SDVTList LdResVTs;
515509467b48Spatrick bool LoadF16x2 = false;
515609467b48Spatrick
515709467b48Spatrick switch (NumElts) {
515809467b48Spatrick default:
515909467b48Spatrick return;
516009467b48Spatrick case 2:
516109467b48Spatrick Opcode = NVPTXISD::LoadV2;
516209467b48Spatrick LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
516309467b48Spatrick break;
516409467b48Spatrick case 4: {
516509467b48Spatrick Opcode = NVPTXISD::LoadV4;
516609467b48Spatrick EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
516709467b48Spatrick LdResVTs = DAG.getVTList(ListVTs);
516809467b48Spatrick break;
516909467b48Spatrick }
517009467b48Spatrick case 8: {
517109467b48Spatrick // v8f16 is a special case. PTX doesn't have ld.v8.f16
517209467b48Spatrick // instruction. Instead, we split the vector into v2f16 chunks and
517309467b48Spatrick // load them with ld.v4.b32.
5174*d415bd75Srobert assert((EltVT == MVT::f16 || EltVT == MVT::bf16) &&
5175*d415bd75Srobert "Unsupported v8 vector type.");
517609467b48Spatrick LoadF16x2 = true;
517709467b48Spatrick Opcode = NVPTXISD::LoadV4;
5178*d415bd75Srobert EVT VVT = (EltVT == MVT::f16) ? MVT::v2f16 : MVT::v2bf16;
5179*d415bd75Srobert EVT ListVTs[] = {VVT, VVT, VVT, VVT, MVT::Other};
518009467b48Spatrick LdResVTs = DAG.getVTList(ListVTs);
518109467b48Spatrick break;
518209467b48Spatrick }
518309467b48Spatrick }
518409467b48Spatrick
518509467b48Spatrick // Copy regular operands
518609467b48Spatrick SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
518709467b48Spatrick
518809467b48Spatrick // The select routine does not have access to the LoadSDNode instance, so
518909467b48Spatrick // pass along the extension information
519009467b48Spatrick OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
519109467b48Spatrick
519209467b48Spatrick SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
519309467b48Spatrick LD->getMemoryVT(),
519409467b48Spatrick LD->getMemOperand());
519509467b48Spatrick
519609467b48Spatrick SmallVector<SDValue, 8> ScalarRes;
519709467b48Spatrick if (LoadF16x2) {
519809467b48Spatrick // Split v2f16 subvectors back into individual elements.
519909467b48Spatrick NumElts /= 2;
520009467b48Spatrick for (unsigned i = 0; i < NumElts; ++i) {
520109467b48Spatrick SDValue SubVector = NewLD.getValue(i);
520209467b48Spatrick SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
520309467b48Spatrick DAG.getIntPtrConstant(0, DL));
520409467b48Spatrick SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
520509467b48Spatrick DAG.getIntPtrConstant(1, DL));
520609467b48Spatrick ScalarRes.push_back(E0);
520709467b48Spatrick ScalarRes.push_back(E1);
520809467b48Spatrick }
520909467b48Spatrick } else {
521009467b48Spatrick for (unsigned i = 0; i < NumElts; ++i) {
521109467b48Spatrick SDValue Res = NewLD.getValue(i);
521209467b48Spatrick if (NeedTrunc)
521309467b48Spatrick Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
521409467b48Spatrick ScalarRes.push_back(Res);
521509467b48Spatrick }
521609467b48Spatrick }
521709467b48Spatrick
521809467b48Spatrick SDValue LoadChain = NewLD.getValue(NumElts);
521909467b48Spatrick
522009467b48Spatrick SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes);
522109467b48Spatrick
522209467b48Spatrick Results.push_back(BuildVec);
522309467b48Spatrick Results.push_back(LoadChain);
522409467b48Spatrick }
522509467b48Spatrick
ReplaceINTRINSIC_W_CHAIN(SDNode * N,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results)522609467b48Spatrick static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
522709467b48Spatrick SmallVectorImpl<SDValue> &Results) {
522809467b48Spatrick SDValue Chain = N->getOperand(0);
522909467b48Spatrick SDValue Intrin = N->getOperand(1);
523009467b48Spatrick SDLoc DL(N);
523109467b48Spatrick
523209467b48Spatrick // Get the intrinsic ID
523309467b48Spatrick unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
523409467b48Spatrick switch (IntrinNo) {
523509467b48Spatrick default:
523609467b48Spatrick return;
523709467b48Spatrick case Intrinsic::nvvm_ldg_global_i:
523809467b48Spatrick case Intrinsic::nvvm_ldg_global_f:
523909467b48Spatrick case Intrinsic::nvvm_ldg_global_p:
524009467b48Spatrick case Intrinsic::nvvm_ldu_global_i:
524109467b48Spatrick case Intrinsic::nvvm_ldu_global_f:
524209467b48Spatrick case Intrinsic::nvvm_ldu_global_p: {
524309467b48Spatrick EVT ResVT = N->getValueType(0);
524409467b48Spatrick
524509467b48Spatrick if (ResVT.isVector()) {
524609467b48Spatrick // Vector LDG/LDU
524709467b48Spatrick
524809467b48Spatrick unsigned NumElts = ResVT.getVectorNumElements();
524909467b48Spatrick EVT EltVT = ResVT.getVectorElementType();
525009467b48Spatrick
525109467b48Spatrick // Since LDU/LDG are target nodes, we cannot rely on DAG type
525209467b48Spatrick // legalization.
525309467b48Spatrick // Therefore, we must ensure the type is legal. For i1 and i8, we set the
525409467b48Spatrick // loaded type to i16 and propagate the "real" type as the memory type.
525509467b48Spatrick bool NeedTrunc = false;
525609467b48Spatrick if (EltVT.getSizeInBits() < 16) {
525709467b48Spatrick EltVT = MVT::i16;
525809467b48Spatrick NeedTrunc = true;
525909467b48Spatrick }
526009467b48Spatrick
526109467b48Spatrick unsigned Opcode = 0;
526209467b48Spatrick SDVTList LdResVTs;
526309467b48Spatrick
526409467b48Spatrick switch (NumElts) {
526509467b48Spatrick default:
526609467b48Spatrick return;
526709467b48Spatrick case 2:
526809467b48Spatrick switch (IntrinNo) {
526909467b48Spatrick default:
527009467b48Spatrick return;
527109467b48Spatrick case Intrinsic::nvvm_ldg_global_i:
527209467b48Spatrick case Intrinsic::nvvm_ldg_global_f:
527309467b48Spatrick case Intrinsic::nvvm_ldg_global_p:
527409467b48Spatrick Opcode = NVPTXISD::LDGV2;
527509467b48Spatrick break;
527609467b48Spatrick case Intrinsic::nvvm_ldu_global_i:
527709467b48Spatrick case Intrinsic::nvvm_ldu_global_f:
527809467b48Spatrick case Intrinsic::nvvm_ldu_global_p:
527909467b48Spatrick Opcode = NVPTXISD::LDUV2;
528009467b48Spatrick break;
528109467b48Spatrick }
528209467b48Spatrick LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
528309467b48Spatrick break;
528409467b48Spatrick case 4: {
528509467b48Spatrick switch (IntrinNo) {
528609467b48Spatrick default:
528709467b48Spatrick return;
528809467b48Spatrick case Intrinsic::nvvm_ldg_global_i:
528909467b48Spatrick case Intrinsic::nvvm_ldg_global_f:
529009467b48Spatrick case Intrinsic::nvvm_ldg_global_p:
529109467b48Spatrick Opcode = NVPTXISD::LDGV4;
529209467b48Spatrick break;
529309467b48Spatrick case Intrinsic::nvvm_ldu_global_i:
529409467b48Spatrick case Intrinsic::nvvm_ldu_global_f:
529509467b48Spatrick case Intrinsic::nvvm_ldu_global_p:
529609467b48Spatrick Opcode = NVPTXISD::LDUV4;
529709467b48Spatrick break;
529809467b48Spatrick }
529909467b48Spatrick EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
530009467b48Spatrick LdResVTs = DAG.getVTList(ListVTs);
530109467b48Spatrick break;
530209467b48Spatrick }
530309467b48Spatrick }
530409467b48Spatrick
530509467b48Spatrick SmallVector<SDValue, 8> OtherOps;
530609467b48Spatrick
530709467b48Spatrick // Copy regular operands
530809467b48Spatrick
530909467b48Spatrick OtherOps.push_back(Chain); // Chain
531009467b48Spatrick // Skip operand 1 (intrinsic ID)
531109467b48Spatrick // Others
531209467b48Spatrick OtherOps.append(N->op_begin() + 2, N->op_end());
531309467b48Spatrick
531409467b48Spatrick MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
531509467b48Spatrick
531609467b48Spatrick SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
531709467b48Spatrick MemSD->getMemoryVT(),
531809467b48Spatrick MemSD->getMemOperand());
531909467b48Spatrick
532009467b48Spatrick SmallVector<SDValue, 4> ScalarRes;
532109467b48Spatrick
532209467b48Spatrick for (unsigned i = 0; i < NumElts; ++i) {
532309467b48Spatrick SDValue Res = NewLD.getValue(i);
532409467b48Spatrick if (NeedTrunc)
532509467b48Spatrick Res =
532609467b48Spatrick DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
532709467b48Spatrick ScalarRes.push_back(Res);
532809467b48Spatrick }
532909467b48Spatrick
533009467b48Spatrick SDValue LoadChain = NewLD.getValue(NumElts);
533109467b48Spatrick
533209467b48Spatrick SDValue BuildVec =
533309467b48Spatrick DAG.getBuildVector(ResVT, DL, ScalarRes);
533409467b48Spatrick
533509467b48Spatrick Results.push_back(BuildVec);
533609467b48Spatrick Results.push_back(LoadChain);
533709467b48Spatrick } else {
533809467b48Spatrick // i8 LDG/LDU
533909467b48Spatrick assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
534009467b48Spatrick "Custom handling of non-i8 ldu/ldg?");
534109467b48Spatrick
534209467b48Spatrick // Just copy all operands as-is
534309467b48Spatrick SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
534409467b48Spatrick
534509467b48Spatrick // Force output to i16
534609467b48Spatrick SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
534709467b48Spatrick
534809467b48Spatrick MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
534909467b48Spatrick
535009467b48Spatrick // We make sure the memory type is i8, which will be used during isel
535109467b48Spatrick // to select the proper instruction.
535209467b48Spatrick SDValue NewLD =
535309467b48Spatrick DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
535409467b48Spatrick MVT::i8, MemSD->getMemOperand());
535509467b48Spatrick
535609467b48Spatrick Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
535709467b48Spatrick NewLD.getValue(0)));
535809467b48Spatrick Results.push_back(NewLD.getValue(1));
535909467b48Spatrick }
536009467b48Spatrick }
536109467b48Spatrick }
536209467b48Spatrick }
536309467b48Spatrick
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const536409467b48Spatrick void NVPTXTargetLowering::ReplaceNodeResults(
536509467b48Spatrick SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
536609467b48Spatrick switch (N->getOpcode()) {
536709467b48Spatrick default:
536809467b48Spatrick report_fatal_error("Unhandled custom legalization");
536909467b48Spatrick case ISD::LOAD:
537009467b48Spatrick ReplaceLoadVector(N, DAG, Results);
537109467b48Spatrick return;
537209467b48Spatrick case ISD::INTRINSIC_W_CHAIN:
537309467b48Spatrick ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
537409467b48Spatrick return;
537509467b48Spatrick }
537609467b48Spatrick }
537709467b48Spatrick
5378*d415bd75Srobert NVPTXTargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const5379*d415bd75Srobert NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
5380*d415bd75Srobert Type *Ty = AI->getValOperand()->getType();
5381*d415bd75Srobert
5382*d415bd75Srobert if (AI->isFloatingPointOperation()) {
5383*d415bd75Srobert if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
5384*d415bd75Srobert if (Ty->isFloatTy())
5385*d415bd75Srobert return AtomicExpansionKind::None;
5386*d415bd75Srobert if (Ty->isDoubleTy() && STI.hasAtomAddF64())
5387*d415bd75Srobert return AtomicExpansionKind::None;
5388*d415bd75Srobert }
5389*d415bd75Srobert return AtomicExpansionKind::CmpXChg;
5390*d415bd75Srobert }
5391*d415bd75Srobert
5392*d415bd75Srobert assert(Ty->isIntegerTy() && "Ty should be integer at this point");
5393*d415bd75Srobert auto ITy = cast<llvm::IntegerType>(Ty);
5394*d415bd75Srobert
5395*d415bd75Srobert switch (AI->getOperation()) {
5396*d415bd75Srobert default:
5397*d415bd75Srobert return AtomicExpansionKind::CmpXChg;
5398*d415bd75Srobert case AtomicRMWInst::BinOp::And:
5399*d415bd75Srobert case AtomicRMWInst::BinOp::Or:
5400*d415bd75Srobert case AtomicRMWInst::BinOp::Xor:
5401*d415bd75Srobert case AtomicRMWInst::BinOp::Xchg:
5402*d415bd75Srobert switch (ITy->getBitWidth()) {
5403*d415bd75Srobert case 8:
5404*d415bd75Srobert case 16:
5405*d415bd75Srobert return AtomicExpansionKind::CmpXChg;
5406*d415bd75Srobert case 32:
5407*d415bd75Srobert return AtomicExpansionKind::None;
5408*d415bd75Srobert case 64:
5409*d415bd75Srobert if (STI.hasAtomBitwise64())
5410*d415bd75Srobert return AtomicExpansionKind::None;
5411*d415bd75Srobert return AtomicExpansionKind::CmpXChg;
5412*d415bd75Srobert default:
5413*d415bd75Srobert llvm_unreachable("unsupported width encountered");
5414*d415bd75Srobert }
5415*d415bd75Srobert case AtomicRMWInst::BinOp::Add:
5416*d415bd75Srobert case AtomicRMWInst::BinOp::Sub:
5417*d415bd75Srobert case AtomicRMWInst::BinOp::Max:
5418*d415bd75Srobert case AtomicRMWInst::BinOp::Min:
5419*d415bd75Srobert case AtomicRMWInst::BinOp::UMax:
5420*d415bd75Srobert case AtomicRMWInst::BinOp::UMin:
5421*d415bd75Srobert switch (ITy->getBitWidth()) {
5422*d415bd75Srobert case 8:
5423*d415bd75Srobert case 16:
5424*d415bd75Srobert return AtomicExpansionKind::CmpXChg;
5425*d415bd75Srobert case 32:
5426*d415bd75Srobert return AtomicExpansionKind::None;
5427*d415bd75Srobert case 64:
5428*d415bd75Srobert if (STI.hasAtomMinMax64())
5429*d415bd75Srobert return AtomicExpansionKind::None;
5430*d415bd75Srobert return AtomicExpansionKind::CmpXChg;
5431*d415bd75Srobert default:
5432*d415bd75Srobert llvm_unreachable("unsupported width encountered");
5433*d415bd75Srobert }
5434*d415bd75Srobert }
5435*d415bd75Srobert
5436*d415bd75Srobert return AtomicExpansionKind::CmpXChg;
5437*d415bd75Srobert }
5438*d415bd75Srobert
543909467b48Spatrick // Pin NVPTXTargetObjectFile's vtables to this file.
5440*d415bd75Srobert NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;
544109467b48Spatrick
SelectSectionForGlobal(const GlobalObject * GO,SectionKind Kind,const TargetMachine & TM) const544209467b48Spatrick MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal(
544309467b48Spatrick const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
544409467b48Spatrick return getDataSection();
544509467b48Spatrick }
5446