1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the NVPTX target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "NVPTXISelDAGToDAG.h"
14 #include "MCTargetDesc/NVPTXBaseInfo.h"
15 #include "NVPTXUtilities.h"
16 #include "llvm/Analysis/ValueTracking.h"
17 #include "llvm/CodeGen/ISDOpcodes.h"
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/Instructions.h"
20 #include "llvm/IR/IntrinsicsNVPTX.h"
21 #include "llvm/Support/AtomicOrdering.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "llvm/Target/TargetIntrinsicInfo.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "nvptx-isel"
31 #define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
32 
33 /// createNVPTXISelDag - This pass converts a legalized DAG into a
34 /// NVPTX-specific DAG, ready for instruction scheduling.
createNVPTXISelDag(NVPTXTargetMachine & TM,llvm::CodeGenOptLevel OptLevel)35 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
36                                        llvm::CodeGenOptLevel OptLevel) {
37   return new NVPTXDAGToDAGISel(TM, OptLevel);
38 }
39 
40 char NVPTXDAGToDAGISel::ID = 0;
41 
INITIALIZE_PASS(NVPTXDAGToDAGISel,DEBUG_TYPE,PASS_NAME,false,false)42 INITIALIZE_PASS(NVPTXDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
43 
44 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
45                                      CodeGenOptLevel OptLevel)
46     : SelectionDAGISel(ID, tm, OptLevel), TM(tm) {
47   doMulWide = (OptLevel > CodeGenOptLevel::None);
48 }
49 
runOnMachineFunction(MachineFunction & MF)50 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
51   Subtarget = &MF.getSubtarget<NVPTXSubtarget>();
52   return SelectionDAGISel::runOnMachineFunction(MF);
53 }
54 
getDivF32Level() const55 int NVPTXDAGToDAGISel::getDivF32Level() const {
56   return Subtarget->getTargetLowering()->getDivF32Level();
57 }
58 
usePrecSqrtF32() const59 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
60   return Subtarget->getTargetLowering()->usePrecSqrtF32();
61 }
62 
useF32FTZ() const63 bool NVPTXDAGToDAGISel::useF32FTZ() const {
64   return Subtarget->getTargetLowering()->useF32FTZ(*MF);
65 }
66 
allowFMA() const67 bool NVPTXDAGToDAGISel::allowFMA() const {
68   const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
69   return TL->allowFMA(*MF, OptLevel);
70 }
71 
allowUnsafeFPMath() const72 bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
73   const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
74   return TL->allowUnsafeFPMath(*MF);
75 }
76 
useShortPointers() const77 bool NVPTXDAGToDAGISel::useShortPointers() const {
78   return TM.useShortPointers();
79 }
80 
81 /// Select - Select instructions not customized! Used for
82 /// expanded, promoted and normal instructions.
Select(SDNode * N)83 void NVPTXDAGToDAGISel::Select(SDNode *N) {
84 
85   if (N->isMachineOpcode()) {
86     N->setNodeId(-1);
87     return; // Already selected.
88   }
89 
90   switch (N->getOpcode()) {
91   case ISD::LOAD:
92   case ISD::ATOMIC_LOAD:
93     if (tryLoad(N))
94       return;
95     break;
96   case ISD::STORE:
97   case ISD::ATOMIC_STORE:
98     if (tryStore(N))
99       return;
100     break;
101   case ISD::EXTRACT_VECTOR_ELT:
102     if (tryEXTRACT_VECTOR_ELEMENT(N))
103       return;
104     break;
105   case NVPTXISD::SETP_F16X2:
106     SelectSETP_F16X2(N);
107     return;
108   case NVPTXISD::SETP_BF16X2:
109     SelectSETP_BF16X2(N);
110     return;
111   case NVPTXISD::LoadV2:
112   case NVPTXISD::LoadV4:
113     if (tryLoadVector(N))
114       return;
115     break;
116   case NVPTXISD::LDGV2:
117   case NVPTXISD::LDGV4:
118   case NVPTXISD::LDUV2:
119   case NVPTXISD::LDUV4:
120     if (tryLDGLDU(N))
121       return;
122     break;
123   case NVPTXISD::StoreV2:
124   case NVPTXISD::StoreV4:
125     if (tryStoreVector(N))
126       return;
127     break;
128   case NVPTXISD::LoadParam:
129   case NVPTXISD::LoadParamV2:
130   case NVPTXISD::LoadParamV4:
131     if (tryLoadParam(N))
132       return;
133     break;
134   case NVPTXISD::StoreRetval:
135   case NVPTXISD::StoreRetvalV2:
136   case NVPTXISD::StoreRetvalV4:
137     if (tryStoreRetval(N))
138       return;
139     break;
140   case NVPTXISD::StoreParam:
141   case NVPTXISD::StoreParamV2:
142   case NVPTXISD::StoreParamV4:
143   case NVPTXISD::StoreParamS32:
144   case NVPTXISD::StoreParamU32:
145     if (tryStoreParam(N))
146       return;
147     break;
148   case ISD::INTRINSIC_WO_CHAIN:
149     if (tryIntrinsicNoChain(N))
150       return;
151     break;
152   case ISD::INTRINSIC_W_CHAIN:
153     if (tryIntrinsicChain(N))
154       return;
155     break;
156   case NVPTXISD::Tex1DFloatS32:
157   case NVPTXISD::Tex1DFloatFloat:
158   case NVPTXISD::Tex1DFloatFloatLevel:
159   case NVPTXISD::Tex1DFloatFloatGrad:
160   case NVPTXISD::Tex1DS32S32:
161   case NVPTXISD::Tex1DS32Float:
162   case NVPTXISD::Tex1DS32FloatLevel:
163   case NVPTXISD::Tex1DS32FloatGrad:
164   case NVPTXISD::Tex1DU32S32:
165   case NVPTXISD::Tex1DU32Float:
166   case NVPTXISD::Tex1DU32FloatLevel:
167   case NVPTXISD::Tex1DU32FloatGrad:
168   case NVPTXISD::Tex1DArrayFloatS32:
169   case NVPTXISD::Tex1DArrayFloatFloat:
170   case NVPTXISD::Tex1DArrayFloatFloatLevel:
171   case NVPTXISD::Tex1DArrayFloatFloatGrad:
172   case NVPTXISD::Tex1DArrayS32S32:
173   case NVPTXISD::Tex1DArrayS32Float:
174   case NVPTXISD::Tex1DArrayS32FloatLevel:
175   case NVPTXISD::Tex1DArrayS32FloatGrad:
176   case NVPTXISD::Tex1DArrayU32S32:
177   case NVPTXISD::Tex1DArrayU32Float:
178   case NVPTXISD::Tex1DArrayU32FloatLevel:
179   case NVPTXISD::Tex1DArrayU32FloatGrad:
180   case NVPTXISD::Tex2DFloatS32:
181   case NVPTXISD::Tex2DFloatFloat:
182   case NVPTXISD::Tex2DFloatFloatLevel:
183   case NVPTXISD::Tex2DFloatFloatGrad:
184   case NVPTXISD::Tex2DS32S32:
185   case NVPTXISD::Tex2DS32Float:
186   case NVPTXISD::Tex2DS32FloatLevel:
187   case NVPTXISD::Tex2DS32FloatGrad:
188   case NVPTXISD::Tex2DU32S32:
189   case NVPTXISD::Tex2DU32Float:
190   case NVPTXISD::Tex2DU32FloatLevel:
191   case NVPTXISD::Tex2DU32FloatGrad:
192   case NVPTXISD::Tex2DArrayFloatS32:
193   case NVPTXISD::Tex2DArrayFloatFloat:
194   case NVPTXISD::Tex2DArrayFloatFloatLevel:
195   case NVPTXISD::Tex2DArrayFloatFloatGrad:
196   case NVPTXISD::Tex2DArrayS32S32:
197   case NVPTXISD::Tex2DArrayS32Float:
198   case NVPTXISD::Tex2DArrayS32FloatLevel:
199   case NVPTXISD::Tex2DArrayS32FloatGrad:
200   case NVPTXISD::Tex2DArrayU32S32:
201   case NVPTXISD::Tex2DArrayU32Float:
202   case NVPTXISD::Tex2DArrayU32FloatLevel:
203   case NVPTXISD::Tex2DArrayU32FloatGrad:
204   case NVPTXISD::Tex3DFloatS32:
205   case NVPTXISD::Tex3DFloatFloat:
206   case NVPTXISD::Tex3DFloatFloatLevel:
207   case NVPTXISD::Tex3DFloatFloatGrad:
208   case NVPTXISD::Tex3DS32S32:
209   case NVPTXISD::Tex3DS32Float:
210   case NVPTXISD::Tex3DS32FloatLevel:
211   case NVPTXISD::Tex3DS32FloatGrad:
212   case NVPTXISD::Tex3DU32S32:
213   case NVPTXISD::Tex3DU32Float:
214   case NVPTXISD::Tex3DU32FloatLevel:
215   case NVPTXISD::Tex3DU32FloatGrad:
216   case NVPTXISD::TexCubeFloatFloat:
217   case NVPTXISD::TexCubeFloatFloatLevel:
218   case NVPTXISD::TexCubeS32Float:
219   case NVPTXISD::TexCubeS32FloatLevel:
220   case NVPTXISD::TexCubeU32Float:
221   case NVPTXISD::TexCubeU32FloatLevel:
222   case NVPTXISD::TexCubeArrayFloatFloat:
223   case NVPTXISD::TexCubeArrayFloatFloatLevel:
224   case NVPTXISD::TexCubeArrayS32Float:
225   case NVPTXISD::TexCubeArrayS32FloatLevel:
226   case NVPTXISD::TexCubeArrayU32Float:
227   case NVPTXISD::TexCubeArrayU32FloatLevel:
228   case NVPTXISD::Tld4R2DFloatFloat:
229   case NVPTXISD::Tld4G2DFloatFloat:
230   case NVPTXISD::Tld4B2DFloatFloat:
231   case NVPTXISD::Tld4A2DFloatFloat:
232   case NVPTXISD::Tld4R2DS64Float:
233   case NVPTXISD::Tld4G2DS64Float:
234   case NVPTXISD::Tld4B2DS64Float:
235   case NVPTXISD::Tld4A2DS64Float:
236   case NVPTXISD::Tld4R2DU64Float:
237   case NVPTXISD::Tld4G2DU64Float:
238   case NVPTXISD::Tld4B2DU64Float:
239   case NVPTXISD::Tld4A2DU64Float:
240   case NVPTXISD::TexUnified1DFloatS32:
241   case NVPTXISD::TexUnified1DFloatFloat:
242   case NVPTXISD::TexUnified1DFloatFloatLevel:
243   case NVPTXISD::TexUnified1DFloatFloatGrad:
244   case NVPTXISD::TexUnified1DS32S32:
245   case NVPTXISD::TexUnified1DS32Float:
246   case NVPTXISD::TexUnified1DS32FloatLevel:
247   case NVPTXISD::TexUnified1DS32FloatGrad:
248   case NVPTXISD::TexUnified1DU32S32:
249   case NVPTXISD::TexUnified1DU32Float:
250   case NVPTXISD::TexUnified1DU32FloatLevel:
251   case NVPTXISD::TexUnified1DU32FloatGrad:
252   case NVPTXISD::TexUnified1DArrayFloatS32:
253   case NVPTXISD::TexUnified1DArrayFloatFloat:
254   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
255   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
256   case NVPTXISD::TexUnified1DArrayS32S32:
257   case NVPTXISD::TexUnified1DArrayS32Float:
258   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
259   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
260   case NVPTXISD::TexUnified1DArrayU32S32:
261   case NVPTXISD::TexUnified1DArrayU32Float:
262   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
263   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
264   case NVPTXISD::TexUnified2DFloatS32:
265   case NVPTXISD::TexUnified2DFloatFloat:
266   case NVPTXISD::TexUnified2DFloatFloatLevel:
267   case NVPTXISD::TexUnified2DFloatFloatGrad:
268   case NVPTXISD::TexUnified2DS32S32:
269   case NVPTXISD::TexUnified2DS32Float:
270   case NVPTXISD::TexUnified2DS32FloatLevel:
271   case NVPTXISD::TexUnified2DS32FloatGrad:
272   case NVPTXISD::TexUnified2DU32S32:
273   case NVPTXISD::TexUnified2DU32Float:
274   case NVPTXISD::TexUnified2DU32FloatLevel:
275   case NVPTXISD::TexUnified2DU32FloatGrad:
276   case NVPTXISD::TexUnified2DArrayFloatS32:
277   case NVPTXISD::TexUnified2DArrayFloatFloat:
278   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
279   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
280   case NVPTXISD::TexUnified2DArrayS32S32:
281   case NVPTXISD::TexUnified2DArrayS32Float:
282   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
283   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
284   case NVPTXISD::TexUnified2DArrayU32S32:
285   case NVPTXISD::TexUnified2DArrayU32Float:
286   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
287   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
288   case NVPTXISD::TexUnified3DFloatS32:
289   case NVPTXISD::TexUnified3DFloatFloat:
290   case NVPTXISD::TexUnified3DFloatFloatLevel:
291   case NVPTXISD::TexUnified3DFloatFloatGrad:
292   case NVPTXISD::TexUnified3DS32S32:
293   case NVPTXISD::TexUnified3DS32Float:
294   case NVPTXISD::TexUnified3DS32FloatLevel:
295   case NVPTXISD::TexUnified3DS32FloatGrad:
296   case NVPTXISD::TexUnified3DU32S32:
297   case NVPTXISD::TexUnified3DU32Float:
298   case NVPTXISD::TexUnified3DU32FloatLevel:
299   case NVPTXISD::TexUnified3DU32FloatGrad:
300   case NVPTXISD::TexUnifiedCubeFloatFloat:
301   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
302   case NVPTXISD::TexUnifiedCubeS32Float:
303   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
304   case NVPTXISD::TexUnifiedCubeU32Float:
305   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
306   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
307   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
308   case NVPTXISD::TexUnifiedCubeArrayS32Float:
309   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
310   case NVPTXISD::TexUnifiedCubeArrayU32Float:
311   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
312   case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
313   case NVPTXISD::TexUnifiedCubeS32FloatGrad:
314   case NVPTXISD::TexUnifiedCubeU32FloatGrad:
315   case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
316   case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
317   case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
318   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
319   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
320   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
321   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
322   case NVPTXISD::Tld4UnifiedR2DS64Float:
323   case NVPTXISD::Tld4UnifiedG2DS64Float:
324   case NVPTXISD::Tld4UnifiedB2DS64Float:
325   case NVPTXISD::Tld4UnifiedA2DS64Float:
326   case NVPTXISD::Tld4UnifiedR2DU64Float:
327   case NVPTXISD::Tld4UnifiedG2DU64Float:
328   case NVPTXISD::Tld4UnifiedB2DU64Float:
329   case NVPTXISD::Tld4UnifiedA2DU64Float:
330     if (tryTextureIntrinsic(N))
331       return;
332     break;
333   case NVPTXISD::Suld1DI8Clamp:
334   case NVPTXISD::Suld1DI16Clamp:
335   case NVPTXISD::Suld1DI32Clamp:
336   case NVPTXISD::Suld1DI64Clamp:
337   case NVPTXISD::Suld1DV2I8Clamp:
338   case NVPTXISD::Suld1DV2I16Clamp:
339   case NVPTXISD::Suld1DV2I32Clamp:
340   case NVPTXISD::Suld1DV2I64Clamp:
341   case NVPTXISD::Suld1DV4I8Clamp:
342   case NVPTXISD::Suld1DV4I16Clamp:
343   case NVPTXISD::Suld1DV4I32Clamp:
344   case NVPTXISD::Suld1DArrayI8Clamp:
345   case NVPTXISD::Suld1DArrayI16Clamp:
346   case NVPTXISD::Suld1DArrayI32Clamp:
347   case NVPTXISD::Suld1DArrayI64Clamp:
348   case NVPTXISD::Suld1DArrayV2I8Clamp:
349   case NVPTXISD::Suld1DArrayV2I16Clamp:
350   case NVPTXISD::Suld1DArrayV2I32Clamp:
351   case NVPTXISD::Suld1DArrayV2I64Clamp:
352   case NVPTXISD::Suld1DArrayV4I8Clamp:
353   case NVPTXISD::Suld1DArrayV4I16Clamp:
354   case NVPTXISD::Suld1DArrayV4I32Clamp:
355   case NVPTXISD::Suld2DI8Clamp:
356   case NVPTXISD::Suld2DI16Clamp:
357   case NVPTXISD::Suld2DI32Clamp:
358   case NVPTXISD::Suld2DI64Clamp:
359   case NVPTXISD::Suld2DV2I8Clamp:
360   case NVPTXISD::Suld2DV2I16Clamp:
361   case NVPTXISD::Suld2DV2I32Clamp:
362   case NVPTXISD::Suld2DV2I64Clamp:
363   case NVPTXISD::Suld2DV4I8Clamp:
364   case NVPTXISD::Suld2DV4I16Clamp:
365   case NVPTXISD::Suld2DV4I32Clamp:
366   case NVPTXISD::Suld2DArrayI8Clamp:
367   case NVPTXISD::Suld2DArrayI16Clamp:
368   case NVPTXISD::Suld2DArrayI32Clamp:
369   case NVPTXISD::Suld2DArrayI64Clamp:
370   case NVPTXISD::Suld2DArrayV2I8Clamp:
371   case NVPTXISD::Suld2DArrayV2I16Clamp:
372   case NVPTXISD::Suld2DArrayV2I32Clamp:
373   case NVPTXISD::Suld2DArrayV2I64Clamp:
374   case NVPTXISD::Suld2DArrayV4I8Clamp:
375   case NVPTXISD::Suld2DArrayV4I16Clamp:
376   case NVPTXISD::Suld2DArrayV4I32Clamp:
377   case NVPTXISD::Suld3DI8Clamp:
378   case NVPTXISD::Suld3DI16Clamp:
379   case NVPTXISD::Suld3DI32Clamp:
380   case NVPTXISD::Suld3DI64Clamp:
381   case NVPTXISD::Suld3DV2I8Clamp:
382   case NVPTXISD::Suld3DV2I16Clamp:
383   case NVPTXISD::Suld3DV2I32Clamp:
384   case NVPTXISD::Suld3DV2I64Clamp:
385   case NVPTXISD::Suld3DV4I8Clamp:
386   case NVPTXISD::Suld3DV4I16Clamp:
387   case NVPTXISD::Suld3DV4I32Clamp:
388   case NVPTXISD::Suld1DI8Trap:
389   case NVPTXISD::Suld1DI16Trap:
390   case NVPTXISD::Suld1DI32Trap:
391   case NVPTXISD::Suld1DI64Trap:
392   case NVPTXISD::Suld1DV2I8Trap:
393   case NVPTXISD::Suld1DV2I16Trap:
394   case NVPTXISD::Suld1DV2I32Trap:
395   case NVPTXISD::Suld1DV2I64Trap:
396   case NVPTXISD::Suld1DV4I8Trap:
397   case NVPTXISD::Suld1DV4I16Trap:
398   case NVPTXISD::Suld1DV4I32Trap:
399   case NVPTXISD::Suld1DArrayI8Trap:
400   case NVPTXISD::Suld1DArrayI16Trap:
401   case NVPTXISD::Suld1DArrayI32Trap:
402   case NVPTXISD::Suld1DArrayI64Trap:
403   case NVPTXISD::Suld1DArrayV2I8Trap:
404   case NVPTXISD::Suld1DArrayV2I16Trap:
405   case NVPTXISD::Suld1DArrayV2I32Trap:
406   case NVPTXISD::Suld1DArrayV2I64Trap:
407   case NVPTXISD::Suld1DArrayV4I8Trap:
408   case NVPTXISD::Suld1DArrayV4I16Trap:
409   case NVPTXISD::Suld1DArrayV4I32Trap:
410   case NVPTXISD::Suld2DI8Trap:
411   case NVPTXISD::Suld2DI16Trap:
412   case NVPTXISD::Suld2DI32Trap:
413   case NVPTXISD::Suld2DI64Trap:
414   case NVPTXISD::Suld2DV2I8Trap:
415   case NVPTXISD::Suld2DV2I16Trap:
416   case NVPTXISD::Suld2DV2I32Trap:
417   case NVPTXISD::Suld2DV2I64Trap:
418   case NVPTXISD::Suld2DV4I8Trap:
419   case NVPTXISD::Suld2DV4I16Trap:
420   case NVPTXISD::Suld2DV4I32Trap:
421   case NVPTXISD::Suld2DArrayI8Trap:
422   case NVPTXISD::Suld2DArrayI16Trap:
423   case NVPTXISD::Suld2DArrayI32Trap:
424   case NVPTXISD::Suld2DArrayI64Trap:
425   case NVPTXISD::Suld2DArrayV2I8Trap:
426   case NVPTXISD::Suld2DArrayV2I16Trap:
427   case NVPTXISD::Suld2DArrayV2I32Trap:
428   case NVPTXISD::Suld2DArrayV2I64Trap:
429   case NVPTXISD::Suld2DArrayV4I8Trap:
430   case NVPTXISD::Suld2DArrayV4I16Trap:
431   case NVPTXISD::Suld2DArrayV4I32Trap:
432   case NVPTXISD::Suld3DI8Trap:
433   case NVPTXISD::Suld3DI16Trap:
434   case NVPTXISD::Suld3DI32Trap:
435   case NVPTXISD::Suld3DI64Trap:
436   case NVPTXISD::Suld3DV2I8Trap:
437   case NVPTXISD::Suld3DV2I16Trap:
438   case NVPTXISD::Suld3DV2I32Trap:
439   case NVPTXISD::Suld3DV2I64Trap:
440   case NVPTXISD::Suld3DV4I8Trap:
441   case NVPTXISD::Suld3DV4I16Trap:
442   case NVPTXISD::Suld3DV4I32Trap:
443   case NVPTXISD::Suld1DI8Zero:
444   case NVPTXISD::Suld1DI16Zero:
445   case NVPTXISD::Suld1DI32Zero:
446   case NVPTXISD::Suld1DI64Zero:
447   case NVPTXISD::Suld1DV2I8Zero:
448   case NVPTXISD::Suld1DV2I16Zero:
449   case NVPTXISD::Suld1DV2I32Zero:
450   case NVPTXISD::Suld1DV2I64Zero:
451   case NVPTXISD::Suld1DV4I8Zero:
452   case NVPTXISD::Suld1DV4I16Zero:
453   case NVPTXISD::Suld1DV4I32Zero:
454   case NVPTXISD::Suld1DArrayI8Zero:
455   case NVPTXISD::Suld1DArrayI16Zero:
456   case NVPTXISD::Suld1DArrayI32Zero:
457   case NVPTXISD::Suld1DArrayI64Zero:
458   case NVPTXISD::Suld1DArrayV2I8Zero:
459   case NVPTXISD::Suld1DArrayV2I16Zero:
460   case NVPTXISD::Suld1DArrayV2I32Zero:
461   case NVPTXISD::Suld1DArrayV2I64Zero:
462   case NVPTXISD::Suld1DArrayV4I8Zero:
463   case NVPTXISD::Suld1DArrayV4I16Zero:
464   case NVPTXISD::Suld1DArrayV4I32Zero:
465   case NVPTXISD::Suld2DI8Zero:
466   case NVPTXISD::Suld2DI16Zero:
467   case NVPTXISD::Suld2DI32Zero:
468   case NVPTXISD::Suld2DI64Zero:
469   case NVPTXISD::Suld2DV2I8Zero:
470   case NVPTXISD::Suld2DV2I16Zero:
471   case NVPTXISD::Suld2DV2I32Zero:
472   case NVPTXISD::Suld2DV2I64Zero:
473   case NVPTXISD::Suld2DV4I8Zero:
474   case NVPTXISD::Suld2DV4I16Zero:
475   case NVPTXISD::Suld2DV4I32Zero:
476   case NVPTXISD::Suld2DArrayI8Zero:
477   case NVPTXISD::Suld2DArrayI16Zero:
478   case NVPTXISD::Suld2DArrayI32Zero:
479   case NVPTXISD::Suld2DArrayI64Zero:
480   case NVPTXISD::Suld2DArrayV2I8Zero:
481   case NVPTXISD::Suld2DArrayV2I16Zero:
482   case NVPTXISD::Suld2DArrayV2I32Zero:
483   case NVPTXISD::Suld2DArrayV2I64Zero:
484   case NVPTXISD::Suld2DArrayV4I8Zero:
485   case NVPTXISD::Suld2DArrayV4I16Zero:
486   case NVPTXISD::Suld2DArrayV4I32Zero:
487   case NVPTXISD::Suld3DI8Zero:
488   case NVPTXISD::Suld3DI16Zero:
489   case NVPTXISD::Suld3DI32Zero:
490   case NVPTXISD::Suld3DI64Zero:
491   case NVPTXISD::Suld3DV2I8Zero:
492   case NVPTXISD::Suld3DV2I16Zero:
493   case NVPTXISD::Suld3DV2I32Zero:
494   case NVPTXISD::Suld3DV2I64Zero:
495   case NVPTXISD::Suld3DV4I8Zero:
496   case NVPTXISD::Suld3DV4I16Zero:
497   case NVPTXISD::Suld3DV4I32Zero:
498     if (trySurfaceIntrinsic(N))
499       return;
500     break;
501   case ISD::AND:
502   case ISD::SRA:
503   case ISD::SRL:
504     // Try to select BFE
505     if (tryBFE(N))
506       return;
507     break;
508   case ISD::ADDRSPACECAST:
509     SelectAddrSpaceCast(N);
510     return;
511   case ISD::ConstantFP:
512     if (tryConstantFP(N))
513       return;
514     break;
515   default:
516     break;
517   }
518   SelectCode(N);
519 }
520 
tryIntrinsicChain(SDNode * N)521 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
522   unsigned IID = N->getConstantOperandVal(1);
523   switch (IID) {
524   default:
525     return false;
526   case Intrinsic::nvvm_ldg_global_f:
527   case Intrinsic::nvvm_ldg_global_i:
528   case Intrinsic::nvvm_ldg_global_p:
529   case Intrinsic::nvvm_ldu_global_f:
530   case Intrinsic::nvvm_ldu_global_i:
531   case Intrinsic::nvvm_ldu_global_p:
532     return tryLDGLDU(N);
533   }
534 }
535 
536 // There's no way to specify FP16 and BF16 immediates in .(b)f16 ops, so we
537 // have to load them into an .(b)f16 register first.
tryConstantFP(SDNode * N)538 bool NVPTXDAGToDAGISel::tryConstantFP(SDNode *N) {
539   if (N->getValueType(0) != MVT::f16 && N->getValueType(0) != MVT::bf16)
540     return false;
541   SDValue Val = CurDAG->getTargetConstantFP(
542       cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), N->getValueType(0));
543   SDNode *LoadConstF16 = CurDAG->getMachineNode(
544       (N->getValueType(0) == MVT::f16 ? NVPTX::LOAD_CONST_F16
545                                       : NVPTX::LOAD_CONST_BF16),
546       SDLoc(N), N->getValueType(0), Val);
547   ReplaceNode(N, LoadConstF16);
548   return true;
549 }
550 
551 // Map ISD:CONDCODE value to appropriate CmpMode expected by
552 // NVPTXInstPrinter::printCmpMode()
getPTXCmpMode(const CondCodeSDNode & CondCode,bool FTZ)553 static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
554   using NVPTX::PTXCmpMode::CmpMode;
555   unsigned PTXCmpMode = [](ISD::CondCode CC) {
556     switch (CC) {
557     default:
558       llvm_unreachable("Unexpected condition code.");
559     case ISD::SETOEQ:
560       return CmpMode::EQ;
561     case ISD::SETOGT:
562       return CmpMode::GT;
563     case ISD::SETOGE:
564       return CmpMode::GE;
565     case ISD::SETOLT:
566       return CmpMode::LT;
567     case ISD::SETOLE:
568       return CmpMode::LE;
569     case ISD::SETONE:
570       return CmpMode::NE;
571     case ISD::SETO:
572       return CmpMode::NUM;
573     case ISD::SETUO:
574       return CmpMode::NotANumber;
575     case ISD::SETUEQ:
576       return CmpMode::EQU;
577     case ISD::SETUGT:
578       return CmpMode::GTU;
579     case ISD::SETUGE:
580       return CmpMode::GEU;
581     case ISD::SETULT:
582       return CmpMode::LTU;
583     case ISD::SETULE:
584       return CmpMode::LEU;
585     case ISD::SETUNE:
586       return CmpMode::NEU;
587     case ISD::SETEQ:
588       return CmpMode::EQ;
589     case ISD::SETGT:
590       return CmpMode::GT;
591     case ISD::SETGE:
592       return CmpMode::GE;
593     case ISD::SETLT:
594       return CmpMode::LT;
595     case ISD::SETLE:
596       return CmpMode::LE;
597     case ISD::SETNE:
598       return CmpMode::NE;
599     }
600   }(CondCode.get());
601 
602   if (FTZ)
603     PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
604 
605   return PTXCmpMode;
606 }
607 
SelectSETP_F16X2(SDNode * N)608 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
609   unsigned PTXCmpMode =
610       getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
611   SDLoc DL(N);
612   SDNode *SetP = CurDAG->getMachineNode(
613       NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
614       N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
615   ReplaceNode(N, SetP);
616   return true;
617 }
618 
SelectSETP_BF16X2(SDNode * N)619 bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(SDNode *N) {
620   unsigned PTXCmpMode =
621       getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
622   SDLoc DL(N);
623   SDNode *SetP = CurDAG->getMachineNode(
624       NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
625       N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
626   ReplaceNode(N, SetP);
627   return true;
628 }
629 
630 // Find all instances of extract_vector_elt that use this v2f16 vector
631 // and coalesce them into a scattering move instruction.
tryEXTRACT_VECTOR_ELEMENT(SDNode * N)632 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
633   SDValue Vector = N->getOperand(0);
634 
635   // We only care about 16x2 as it's the only real vector type we
636   // need to deal with.
637   MVT VT = Vector.getSimpleValueType();
638   if (!Isv2x16VT(VT))
639     return false;
640   // Find and record all uses of this vector that extract element 0 or 1.
641   SmallVector<SDNode *, 4> E0, E1;
642   for (auto *U : Vector.getNode()->uses()) {
643     if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
644       continue;
645     if (U->getOperand(0) != Vector)
646       continue;
647     if (const ConstantSDNode *IdxConst =
648             dyn_cast<ConstantSDNode>(U->getOperand(1))) {
649       if (IdxConst->getZExtValue() == 0)
650         E0.push_back(U);
651       else if (IdxConst->getZExtValue() == 1)
652         E1.push_back(U);
653       else
654         llvm_unreachable("Invalid vector index.");
655     }
656   }
657 
658   // There's no point scattering f16x2 if we only ever access one
659   // element of it.
660   if (E0.empty() || E1.empty())
661     return false;
662 
663   // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
664   // into f16,f16 SplitF16x2(V)
665   MVT EltVT = VT.getVectorElementType();
666   SDNode *ScatterOp =
667       CurDAG->getMachineNode(NVPTX::I32toV2I16, SDLoc(N), EltVT, EltVT, Vector);
668   for (auto *Node : E0)
669     ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
670   for (auto *Node : E1)
671     ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
672 
673   return true;
674 }
675 
getCodeAddrSpace(MemSDNode * N)676 static unsigned int getCodeAddrSpace(MemSDNode *N) {
677   const Value *Src = N->getMemOperand()->getValue();
678 
679   if (!Src)
680     return NVPTX::PTXLdStInstCode::GENERIC;
681 
682   if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
683     switch (PT->getAddressSpace()) {
684     case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
685     case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
686     case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
687     case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
688     case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
689     case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
690     default: break;
691     }
692   }
693   return NVPTX::PTXLdStInstCode::GENERIC;
694 }
695 
canLowerToLDG(MemSDNode * N,const NVPTXSubtarget & Subtarget,unsigned CodeAddrSpace,MachineFunction * F)696 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
697                           unsigned CodeAddrSpace, MachineFunction *F) {
698   // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
699   // space.
700   //
701   // We have two ways of identifying invariant loads: Loads may be explicitly
702   // marked as invariant, or we may infer them to be invariant.
703   //
704   // We currently infer invariance for loads from
705   //  - constant global variables, and
706   //  - kernel function pointer params that are noalias (i.e. __restrict) and
707   //    never written to.
708   //
709   // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
710   // not during the SelectionDAG phase).
711   //
712   // TODO: Infer invariance only at -O2.  We still want to use ldg at -O0 for
713   // explicitly invariant loads because these are how clang tells us to use ldg
714   // when the user uses a builtin.
715   if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
716     return false;
717 
718   if (N->isInvariant())
719     return true;
720 
721   bool IsKernelFn = isKernelFunction(F->getFunction());
722 
723   // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
724   // because the former looks through phi nodes while the latter does not. We
725   // need to look through phi nodes to handle pointer induction variables.
726   SmallVector<const Value *, 8> Objs;
727   getUnderlyingObjects(N->getMemOperand()->getValue(), Objs);
728 
729   return all_of(Objs, [&](const Value *V) {
730     if (auto *A = dyn_cast<const Argument>(V))
731       return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
732     if (auto *GV = dyn_cast<const GlobalVariable>(V))
733       return GV->isConstant();
734     return false;
735   });
736 }
737 
tryIntrinsicNoChain(SDNode * N)738 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
739   unsigned IID = N->getConstantOperandVal(0);
740   switch (IID) {
741   default:
742     return false;
743   case Intrinsic::nvvm_texsurf_handle_internal:
744     SelectTexSurfHandle(N);
745     return true;
746   }
747 }
748 
SelectTexSurfHandle(SDNode * N)749 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
750   // Op 0 is the intrinsic ID
751   SDValue Wrapper = N->getOperand(1);
752   SDValue GlobalVal = Wrapper.getOperand(0);
753   ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
754                                         MVT::i64, GlobalVal));
755 }
756 
SelectAddrSpaceCast(SDNode * N)757 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
758   SDValue Src = N->getOperand(0);
759   AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
760   unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
761   unsigned DstAddrSpace = CastN->getDestAddressSpace();
762   assert(SrcAddrSpace != DstAddrSpace &&
763          "addrspacecast must be between different address spaces");
764 
765   if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
766     // Specific to generic
767     unsigned Opc;
768     switch (SrcAddrSpace) {
769     default: report_fatal_error("Bad address space in addrspacecast");
770     case ADDRESS_SPACE_GLOBAL:
771       Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
772       break;
773     case ADDRESS_SPACE_SHARED:
774       Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
775                                                : NVPTX::cvta_shared_yes_64)
776                          : NVPTX::cvta_shared_yes;
777       break;
778     case ADDRESS_SPACE_CONST:
779       Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
780                                                : NVPTX::cvta_const_yes_64)
781                          : NVPTX::cvta_const_yes;
782       break;
783     case ADDRESS_SPACE_LOCAL:
784       Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
785                                                : NVPTX::cvta_local_yes_64)
786                          : NVPTX::cvta_local_yes;
787       break;
788     }
789     ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
790                                           Src));
791     return;
792   } else {
793     // Generic to specific
794     if (SrcAddrSpace != 0)
795       report_fatal_error("Cannot cast between two non-generic address spaces");
796     unsigned Opc;
797     switch (DstAddrSpace) {
798     default: report_fatal_error("Bad address space in addrspacecast");
799     case ADDRESS_SPACE_GLOBAL:
800       Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
801                          : NVPTX::cvta_to_global_yes;
802       break;
803     case ADDRESS_SPACE_SHARED:
804       Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
805                                                 : NVPTX::cvta_to_shared_yes_64)
806                          : NVPTX::cvta_to_shared_yes;
807       break;
808     case ADDRESS_SPACE_CONST:
809       Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
810                                              : NVPTX::cvta_to_const_yes_64)
811                          : NVPTX::cvta_to_const_yes;
812       break;
813     case ADDRESS_SPACE_LOCAL:
814       Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
815                                                : NVPTX::cvta_to_local_yes_64)
816                          : NVPTX::cvta_to_local_yes;
817       break;
818     case ADDRESS_SPACE_PARAM:
819       Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
820                          : NVPTX::nvvm_ptr_gen_to_param;
821       break;
822     }
823     ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
824                                           Src));
825     return;
826   }
827 }
828 
829 // Helper function template to reduce amount of boilerplate code for
830 // opcode selection.
831 static std::optional<unsigned>
pickOpcodeForVT(MVT::SimpleValueType VT,unsigned Opcode_i8,unsigned Opcode_i16,unsigned Opcode_i32,std::optional<unsigned> Opcode_i64,unsigned Opcode_f32,std::optional<unsigned> Opcode_f64)832 pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8,
833                 unsigned Opcode_i16, unsigned Opcode_i32,
834                 std::optional<unsigned> Opcode_i64, unsigned Opcode_f32,
835                 std::optional<unsigned> Opcode_f64) {
836   switch (VT) {
837   case MVT::i1:
838   case MVT::i8:
839     return Opcode_i8;
840   case MVT::i16:
841     return Opcode_i16;
842   case MVT::i32:
843     return Opcode_i32;
844   case MVT::i64:
845     return Opcode_i64;
846   case MVT::f16:
847   case MVT::bf16:
848     return Opcode_i16;
849   case MVT::v2f16:
850   case MVT::v2bf16:
851   case MVT::v2i16:
852   case MVT::v4i8:
853     return Opcode_i32;
854   case MVT::f32:
855     return Opcode_f32;
856   case MVT::f64:
857     return Opcode_f64;
858   default:
859     return std::nullopt;
860   }
861 }
862 
getLdStRegType(EVT VT)863 static int getLdStRegType(EVT VT) {
864   if (VT.isFloatingPoint())
865     switch (VT.getSimpleVT().SimpleTy) {
866     case MVT::f16:
867     case MVT::bf16:
868     case MVT::v2f16:
869     case MVT::v2bf16:
870       return NVPTX::PTXLdStInstCode::Untyped;
871     default:
872       return NVPTX::PTXLdStInstCode::Float;
873     }
874   else
875     return NVPTX::PTXLdStInstCode::Unsigned;
876 }
877 
tryLoad(SDNode * N)878 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
879   SDLoc dl(N);
880   MemSDNode *LD = cast<MemSDNode>(N);
881   assert(LD->readMem() && "Expected load");
882   LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
883   EVT LoadedVT = LD->getMemoryVT();
884   SDNode *NVPTXLD = nullptr;
885 
886   // do not support pre/post inc/dec
887   if (PlainLoad && PlainLoad->isIndexed())
888     return false;
889 
890   if (!LoadedVT.isSimple())
891     return false;
892 
893   AtomicOrdering Ordering = LD->getSuccessOrdering();
894   // In order to lower atomic loads with stronger guarantees we would need to
895   // use load.acquire or insert fences. However these features were only added
896   // with PTX ISA 6.0 / sm_70.
897   // TODO: Check if we can actually use the new instructions and implement them.
898   if (isStrongerThanMonotonic(Ordering))
899     return false;
900 
901   // Address Space Setting
902   unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
903   if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
904     return tryLDGLDU(N);
905   }
906 
907   unsigned int PointerSize =
908       CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
909 
910   // Volatile Setting
911   // - .volatile is only available for .global and .shared
912   // - .volatile has the same memory synchronization semantics as .relaxed.sys
913   bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
914   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
915       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
916       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
917     isVolatile = false;
918 
919   // Type Setting: fromType + fromTypeWidth
920   //
921   // Sign   : ISD::SEXTLOAD
922   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
923   //          type is integer
924   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
925   MVT SimpleVT = LoadedVT.getSimpleVT();
926   MVT ScalarVT = SimpleVT.getScalarType();
927   // Read at least 8 bits (predicates are stored as 8-bit values)
928   unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
929   unsigned int fromType;
930 
931   // Vector Setting
932   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
933   if (SimpleVT.isVector()) {
934     assert((Isv2x16VT(LoadedVT) || LoadedVT == MVT::v4i8) &&
935            "Unexpected vector type");
936     // v2f16/v2bf16/v2i16 is loaded using ld.b32
937     fromTypeWidth = 32;
938   }
939 
940   if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
941     fromType = NVPTX::PTXLdStInstCode::Signed;
942   else
943     fromType = getLdStRegType(ScalarVT);
944 
945   // Create the machine instruction DAG
946   SDValue Chain = N->getOperand(0);
947   SDValue N1 = N->getOperand(1);
948   SDValue Addr;
949   SDValue Offset, Base;
950   std::optional<unsigned> Opcode;
951   MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
952 
953   if (SelectDirectAddr(N1, Addr)) {
954     Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
955                              NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
956                              NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
957     if (!Opcode)
958       return false;
959     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
960                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
961                       getI32Imm(fromTypeWidth, dl), Addr, Chain };
962     NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
963   } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
964                                : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
965     Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
966                              NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
967                              NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
968     if (!Opcode)
969       return false;
970     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
971                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
972                       getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
973     NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
974   } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
975                                : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
976     if (PointerSize == 64)
977       Opcode =
978           pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
979                           NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
980                           NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
981     else
982       Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
983                                NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
984                                NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
985     if (!Opcode)
986       return false;
987     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
988                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
989                       getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
990     NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
991   } else {
992     if (PointerSize == 64)
993       Opcode =
994           pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
995                           NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
996                           NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
997     else
998       Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
999                                NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
1000                                NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
1001     if (!Opcode)
1002       return false;
1003     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
1004                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1005                       getI32Imm(fromTypeWidth, dl), N1, Chain };
1006     NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
1007   }
1008 
1009   if (!NVPTXLD)
1010     return false;
1011 
1012   MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1013   CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
1014 
1015   ReplaceNode(N, NVPTXLD);
1016   return true;
1017 }
1018 
tryLoadVector(SDNode * N)1019 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
1020 
1021   SDValue Chain = N->getOperand(0);
1022   SDValue Op1 = N->getOperand(1);
1023   SDValue Addr, Offset, Base;
1024   std::optional<unsigned> Opcode;
1025   SDLoc DL(N);
1026   SDNode *LD;
1027   MemSDNode *MemSD = cast<MemSDNode>(N);
1028   EVT LoadedVT = MemSD->getMemoryVT();
1029 
1030   if (!LoadedVT.isSimple())
1031     return false;
1032 
1033   // Address Space Setting
1034   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
1035   if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
1036     return tryLDGLDU(N);
1037   }
1038 
1039   unsigned int PointerSize =
1040       CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
1041 
1042   // Volatile Setting
1043   // - .volatile is only availalble for .global and .shared
1044   bool IsVolatile = MemSD->isVolatile();
1045   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1046       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1047       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1048     IsVolatile = false;
1049 
1050   // Vector Setting
1051   MVT SimpleVT = LoadedVT.getSimpleVT();
1052 
1053   // Type Setting: fromType + fromTypeWidth
1054   //
1055   // Sign   : ISD::SEXTLOAD
1056   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1057   //          type is integer
1058   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1059   MVT ScalarVT = SimpleVT.getScalarType();
1060   // Read at least 8 bits (predicates are stored as 8-bit values)
1061   unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
1062   unsigned int FromType;
1063   // The last operand holds the original LoadSDNode::getExtensionType() value
1064   unsigned ExtensionType = cast<ConstantSDNode>(
1065       N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1066   if (ExtensionType == ISD::SEXTLOAD)
1067     FromType = NVPTX::PTXLdStInstCode::Signed;
1068   else
1069     FromType = getLdStRegType(ScalarVT);
1070 
1071   unsigned VecType;
1072 
1073   switch (N->getOpcode()) {
1074   case NVPTXISD::LoadV2:
1075     VecType = NVPTX::PTXLdStInstCode::V2;
1076     break;
1077   case NVPTXISD::LoadV4:
1078     VecType = NVPTX::PTXLdStInstCode::V4;
1079     break;
1080   default:
1081     return false;
1082   }
1083 
1084   EVT EltVT = N->getValueType(0);
1085 
1086   // v8x16 is a special case. PTX doesn't have ld.v8.16
1087   // instruction. Instead, we split the vector into v2x16 chunks and
1088   // load them with ld.v4.b32.
1089   if (Isv2x16VT(EltVT)) {
1090     assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1091     EltVT = MVT::i32;
1092     FromType = NVPTX::PTXLdStInstCode::Untyped;
1093     FromTypeWidth = 32;
1094   }
1095 
1096   if (SelectDirectAddr(Op1, Addr)) {
1097     switch (N->getOpcode()) {
1098     default:
1099       return false;
1100     case NVPTXISD::LoadV2:
1101       Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1102                                NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1103                                NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1104                                NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1105       break;
1106     case NVPTXISD::LoadV4:
1107       Opcode =
1108           pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1109                           NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1110                           std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1111       break;
1112     }
1113     if (!Opcode)
1114       return false;
1115     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1116                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1117                       getI32Imm(FromTypeWidth, DL), Addr, Chain };
1118     LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1119   } else if (PointerSize == 64
1120                  ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1121                  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1122     switch (N->getOpcode()) {
1123     default:
1124       return false;
1125     case NVPTXISD::LoadV2:
1126       Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1127                                NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1128                                NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1129                                NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1130       break;
1131     case NVPTXISD::LoadV4:
1132       Opcode =
1133           pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1134                           NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1135                           std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1136       break;
1137     }
1138     if (!Opcode)
1139       return false;
1140     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1141                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1142                       getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1143     LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1144   } else if (PointerSize == 64
1145                  ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1146                  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1147     if (PointerSize == 64) {
1148       switch (N->getOpcode()) {
1149       default:
1150         return false;
1151       case NVPTXISD::LoadV2:
1152         Opcode =
1153             pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1154                             NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1155                             NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1156                             NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1157         break;
1158       case NVPTXISD::LoadV4:
1159         Opcode = pickOpcodeForVT(
1160             EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1161             NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1162             NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1163         break;
1164       }
1165     } else {
1166       switch (N->getOpcode()) {
1167       default:
1168         return false;
1169       case NVPTXISD::LoadV2:
1170         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1171                                  NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1172                                  NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1173                                  NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1174         break;
1175       case NVPTXISD::LoadV4:
1176         Opcode =
1177             pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1178                             NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1179                             std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1180         break;
1181       }
1182     }
1183     if (!Opcode)
1184       return false;
1185     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1186                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1187                       getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1188 
1189     LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1190   } else {
1191     if (PointerSize == 64) {
1192       switch (N->getOpcode()) {
1193       default:
1194         return false;
1195       case NVPTXISD::LoadV2:
1196         Opcode = pickOpcodeForVT(
1197             EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1198             NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1199             NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1200             NVPTX::LDV_f64_v2_areg_64);
1201         break;
1202       case NVPTXISD::LoadV4:
1203         Opcode = pickOpcodeForVT(
1204             EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1205             NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1206             NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1207         break;
1208       }
1209     } else {
1210       switch (N->getOpcode()) {
1211       default:
1212         return false;
1213       case NVPTXISD::LoadV2:
1214         Opcode =
1215             pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1216                             NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1217                             NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1218                             NVPTX::LDV_f64_v2_areg);
1219         break;
1220       case NVPTXISD::LoadV4:
1221         Opcode =
1222             pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1223                             NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1224                             std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1225         break;
1226       }
1227     }
1228     if (!Opcode)
1229       return false;
1230     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1231                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1232                       getI32Imm(FromTypeWidth, DL), Op1, Chain };
1233     LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1234   }
1235 
1236   MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1237   CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1238 
1239   ReplaceNode(N, LD);
1240   return true;
1241 }
1242 
tryLDGLDU(SDNode * N)1243 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1244 
1245   SDValue Chain = N->getOperand(0);
1246   SDValue Op1;
1247   MemSDNode *Mem;
1248   bool IsLDG = true;
1249 
1250   // If this is an LDG intrinsic, the address is the third operand. If its an
1251   // LDG/LDU SD node (from custom vector handling), then its the second operand
1252   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1253     Op1 = N->getOperand(2);
1254     Mem = cast<MemIntrinsicSDNode>(N);
1255     unsigned IID = N->getConstantOperandVal(1);
1256     switch (IID) {
1257     default:
1258       return false;
1259     case Intrinsic::nvvm_ldg_global_f:
1260     case Intrinsic::nvvm_ldg_global_i:
1261     case Intrinsic::nvvm_ldg_global_p:
1262       IsLDG = true;
1263       break;
1264     case Intrinsic::nvvm_ldu_global_f:
1265     case Intrinsic::nvvm_ldu_global_i:
1266     case Intrinsic::nvvm_ldu_global_p:
1267       IsLDG = false;
1268       break;
1269     }
1270   } else {
1271     Op1 = N->getOperand(1);
1272     Mem = cast<MemSDNode>(N);
1273   }
1274 
1275   std::optional<unsigned> Opcode;
1276   SDLoc DL(N);
1277   SDNode *LD;
1278   SDValue Base, Offset, Addr;
1279   EVT OrigType = N->getValueType(0);
1280 
1281   EVT EltVT = Mem->getMemoryVT();
1282   unsigned NumElts = 1;
1283   if (EltVT.isVector()) {
1284     NumElts = EltVT.getVectorNumElements();
1285     EltVT = EltVT.getVectorElementType();
1286     // vectors of 16bits type are loaded/stored as multiples of v2x16 elements.
1287     if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1288         (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1289         (EltVT == MVT::i16 && OrigType == MVT::v2i16)) {
1290       assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1291       EltVT = OrigType;
1292       NumElts /= 2;
1293     } else if (OrigType == MVT::v4i8) {
1294       EltVT = OrigType;
1295       NumElts = 1;
1296     }
1297   }
1298 
1299   // Build the "promoted" result VTList for the load. If we are really loading
1300   // i8s, then the return type will be promoted to i16 since we do not expose
1301   // 8-bit registers in NVPTX.
1302   EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1303   SmallVector<EVT, 5> InstVTs;
1304   for (unsigned i = 0; i != NumElts; ++i) {
1305     InstVTs.push_back(NodeVT);
1306   }
1307   InstVTs.push_back(MVT::Other);
1308   SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1309 
1310   if (SelectDirectAddr(Op1, Addr)) {
1311     switch (N->getOpcode()) {
1312     default:
1313       return false;
1314     case ISD::LOAD:
1315     case ISD::INTRINSIC_W_CHAIN:
1316       if (IsLDG)
1317         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1318                                  NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1319                                  NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1320                                  NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1321                                  NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1322                                  NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1323                                  NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1324       else
1325         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1326                                  NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1327                                  NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1328                                  NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1329                                  NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1330                                  NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1331                                  NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1332       break;
1333     case NVPTXISD::LoadV2:
1334     case NVPTXISD::LDGV2:
1335       Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1336                                NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1337                                NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1338                                NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1339                                NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1340                                NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1341                                NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1342       break;
1343     case NVPTXISD::LDUV2:
1344       Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1345                                NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1346                                NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1347                                NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1348                                NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1349                                NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1350                                NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1351       break;
1352     case NVPTXISD::LoadV4:
1353     case NVPTXISD::LDGV4:
1354       Opcode = pickOpcodeForVT(
1355           EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1356           NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1357           NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1358           NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1359       break;
1360     case NVPTXISD::LDUV4:
1361       Opcode = pickOpcodeForVT(
1362           EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1363           NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1364           NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1365           NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1366       break;
1367     }
1368     if (!Opcode)
1369       return false;
1370     SDValue Ops[] = { Addr, Chain };
1371     LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1372   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1373                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1374     if (TM.is64Bit()) {
1375       switch (N->getOpcode()) {
1376       default:
1377         return false;
1378       case ISD::LOAD:
1379       case ISD::INTRINSIC_W_CHAIN:
1380         if (IsLDG)
1381           Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1382                                        NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1383                                        NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1384                                        NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1385                                        NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1386                                        NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1387                                        NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1388         else
1389           Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1390                                        NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1391                                        NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1392                                        NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1393                                        NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1394                                        NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1395                                        NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1396         break;
1397       case NVPTXISD::LoadV2:
1398       case NVPTXISD::LDGV2:
1399         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1400                                      NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1401                                      NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1402                                      NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1403                                      NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1404                                      NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1405                                      NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1406         break;
1407       case NVPTXISD::LDUV2:
1408         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1409                                      NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1410                                      NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1411                                      NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1412                                      NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1413                                      NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1414                                      NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1415         break;
1416       case NVPTXISD::LoadV4:
1417       case NVPTXISD::LDGV4:
1418         Opcode = pickOpcodeForVT(
1419             EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1420             NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1421             NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1422             NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1423         break;
1424       case NVPTXISD::LDUV4:
1425         Opcode = pickOpcodeForVT(
1426             EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1427             NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1428             NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1429             NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1430         break;
1431       }
1432     } else {
1433       switch (N->getOpcode()) {
1434       default:
1435         return false;
1436       case ISD::LOAD:
1437       case ISD::INTRINSIC_W_CHAIN:
1438         if (IsLDG)
1439           Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1440                                    NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1441                                    NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1442                                    NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1443                                    NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1444                                    NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1445                                    NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1446         else
1447           Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1448                                    NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1449                                    NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1450                                    NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1451                                    NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1452                                    NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1453                                    NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1454         break;
1455       case NVPTXISD::LoadV2:
1456       case NVPTXISD::LDGV2:
1457         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1458                                  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1459                                  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1460                                  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1461                                  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1462                                  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1463                                  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1464         break;
1465       case NVPTXISD::LDUV2:
1466         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1467                                  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1468                                  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1469                                  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1470                                  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1471                                  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1472                                  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1473         break;
1474       case NVPTXISD::LoadV4:
1475       case NVPTXISD::LDGV4:
1476         Opcode = pickOpcodeForVT(
1477             EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1478             NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1479             NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1480             NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1481         break;
1482       case NVPTXISD::LDUV4:
1483         Opcode = pickOpcodeForVT(
1484             EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1485             NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1486             NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1487             NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1488         break;
1489       }
1490     }
1491     if (!Opcode)
1492       return false;
1493     SDValue Ops[] = {Base, Offset, Chain};
1494     LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1495   } else {
1496     if (TM.is64Bit()) {
1497       switch (N->getOpcode()) {
1498       default:
1499         return false;
1500       case ISD::LOAD:
1501       case ISD::INTRINSIC_W_CHAIN:
1502         if (IsLDG)
1503           Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1504                                        NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1505                                        NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1506                                        NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1507                                        NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1508                                        NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1509                                        NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1510         else
1511           Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1512                                        NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1513                                        NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1514                                        NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1515                                        NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1516                                        NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1517                                        NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1518         break;
1519       case NVPTXISD::LoadV2:
1520       case NVPTXISD::LDGV2:
1521         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1522                                      NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1523                                      NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1524                                      NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1525                                      NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1526                                      NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1527                                      NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1528         break;
1529       case NVPTXISD::LDUV2:
1530         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1531                                      NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1532                                      NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1533                                      NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1534                                      NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1535                                      NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1536                                      NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1537         break;
1538       case NVPTXISD::LoadV4:
1539       case NVPTXISD::LDGV4:
1540         Opcode = pickOpcodeForVT(
1541             EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1542             NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1543             NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1544             NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1545         break;
1546       case NVPTXISD::LDUV4:
1547         Opcode = pickOpcodeForVT(
1548             EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1549             NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1550             NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1551             NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1552         break;
1553       }
1554     } else {
1555       switch (N->getOpcode()) {
1556       default:
1557         return false;
1558       case ISD::LOAD:
1559       case ISD::INTRINSIC_W_CHAIN:
1560         if (IsLDG)
1561           Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1562                                    NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1563                                    NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1564                                    NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1565                                    NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1566                                    NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1567                                    NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1568         else
1569           Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1570                                    NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1571                                    NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1572                                    NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1573                                    NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1574                                    NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1575                                    NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1576         break;
1577       case NVPTXISD::LoadV2:
1578       case NVPTXISD::LDGV2:
1579         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1580                                  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1581                                  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1582                                  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1583                                  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1584                                  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1585                                  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1586         break;
1587       case NVPTXISD::LDUV2:
1588         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1589                                  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1590                                  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1591                                  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1592                                  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1593                                  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1594                                  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1595         break;
1596       case NVPTXISD::LoadV4:
1597       case NVPTXISD::LDGV4:
1598         Opcode = pickOpcodeForVT(
1599             EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1600             NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1601             NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1602             NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1603         break;
1604       case NVPTXISD::LDUV4:
1605         Opcode = pickOpcodeForVT(
1606             EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1607             NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1608             NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1609             NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1610         break;
1611       }
1612     }
1613     if (!Opcode)
1614       return false;
1615     SDValue Ops[] = { Op1, Chain };
1616     LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1617   }
1618 
1619   // For automatic generation of LDG (through SelectLoad[Vector], not the
1620   // intrinsics), we may have an extending load like:
1621   //
1622   //   i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1623   //
1624   // In this case, the matching logic above will select a load for the original
1625   // memory type (in this case, i8) and our types will not match (the node needs
1626   // to return an i32 in this case). Our LDG/LDU nodes do not support the
1627   // concept of sign-/zero-extension, so emulate it here by adding an explicit
1628   // CVT instruction. Ptxas should clean up any redundancies here.
1629 
1630   LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1631 
1632   if (OrigType != EltVT &&
1633       (LdNode || (OrigType.isFloatingPoint() && EltVT.isFloatingPoint()))) {
1634     // We have an extending-load. The instruction we selected operates on the
1635     // smaller type, but the SDNode we are replacing has the larger type. We
1636     // need to emit a CVT to make the types match.
1637     unsigned CvtOpc =
1638         GetConvertOpcode(OrigType.getSimpleVT(), EltVT.getSimpleVT(), LdNode);
1639 
1640     // For each output value, apply the manual sign/zero-extension and make sure
1641     // all users of the load go through that CVT.
1642     for (unsigned i = 0; i != NumElts; ++i) {
1643       SDValue Res(LD, i);
1644       SDValue OrigVal(N, i);
1645 
1646       SDNode *CvtNode =
1647         CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1648                                CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
1649                                                          DL, MVT::i32));
1650       ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1651     }
1652   }
1653 
1654   ReplaceNode(N, LD);
1655   return true;
1656 }
1657 
tryStore(SDNode * N)1658 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1659   SDLoc dl(N);
1660   MemSDNode *ST = cast<MemSDNode>(N);
1661   assert(ST->writeMem() && "Expected store");
1662   StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
1663   AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
1664   assert((PlainStore || AtomicStore) && "Expected store");
1665   EVT StoreVT = ST->getMemoryVT();
1666   SDNode *NVPTXST = nullptr;
1667 
1668   // do not support pre/post inc/dec
1669   if (PlainStore && PlainStore->isIndexed())
1670     return false;
1671 
1672   if (!StoreVT.isSimple())
1673     return false;
1674 
1675   AtomicOrdering Ordering = ST->getSuccessOrdering();
1676   // In order to lower atomic loads with stronger guarantees we would need to
1677   // use store.release or insert fences. However these features were only added
1678   // with PTX ISA 6.0 / sm_70.
1679   // TODO: Check if we can actually use the new instructions and implement them.
1680   if (isStrongerThanMonotonic(Ordering))
1681     return false;
1682 
1683   // Address Space Setting
1684   unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
1685   unsigned int PointerSize =
1686       CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
1687 
1688   // Volatile Setting
1689   // - .volatile is only available for .global and .shared
1690   // - .volatile has the same memory synchronization semantics as .relaxed.sys
1691   bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
1692   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1693       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1694       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1695     isVolatile = false;
1696 
1697   // Vector Setting
1698   MVT SimpleVT = StoreVT.getSimpleVT();
1699   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1700 
1701   // Type Setting: toType + toTypeWidth
1702   // - for integer type, always use 'u'
1703   //
1704   MVT ScalarVT = SimpleVT.getScalarType();
1705   unsigned toTypeWidth = ScalarVT.getSizeInBits();
1706   if (SimpleVT.isVector()) {
1707     assert((Isv2x16VT(StoreVT) || StoreVT == MVT::v4i8) &&
1708            "Unexpected vector type");
1709     // v2x16 is stored using st.b32
1710     toTypeWidth = 32;
1711   }
1712 
1713   unsigned int toType = getLdStRegType(ScalarVT);
1714 
1715   // Create the machine instruction DAG
1716   SDValue Chain = ST->getChain();
1717   SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
1718   SDValue BasePtr = ST->getBasePtr();
1719   SDValue Addr;
1720   SDValue Offset, Base;
1721   std::optional<unsigned> Opcode;
1722   MVT::SimpleValueType SourceVT =
1723       Value.getNode()->getSimpleValueType(0).SimpleTy;
1724 
1725   if (SelectDirectAddr(BasePtr, Addr)) {
1726     Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1727                              NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1728                              NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1729     if (!Opcode)
1730       return false;
1731     SDValue Ops[] = {Value,
1732                      getI32Imm(isVolatile, dl),
1733                      getI32Imm(CodeAddrSpace, dl),
1734                      getI32Imm(vecType, dl),
1735                      getI32Imm(toType, dl),
1736                      getI32Imm(toTypeWidth, dl),
1737                      Addr,
1738                      Chain};
1739     NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1740   } else if (PointerSize == 64
1741                  ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
1742                  : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
1743     Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1744                              NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1745                              NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1746     if (!Opcode)
1747       return false;
1748     SDValue Ops[] = {Value,
1749                      getI32Imm(isVolatile, dl),
1750                      getI32Imm(CodeAddrSpace, dl),
1751                      getI32Imm(vecType, dl),
1752                      getI32Imm(toType, dl),
1753                      getI32Imm(toTypeWidth, dl),
1754                      Base,
1755                      Offset,
1756                      Chain};
1757     NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1758   } else if (PointerSize == 64
1759                  ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
1760                  : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
1761     if (PointerSize == 64)
1762       Opcode =
1763           pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1764                           NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1765                           NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1766     else
1767       Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1768                                NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1769                                NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1770     if (!Opcode)
1771       return false;
1772 
1773     SDValue Ops[] = {Value,
1774                      getI32Imm(isVolatile, dl),
1775                      getI32Imm(CodeAddrSpace, dl),
1776                      getI32Imm(vecType, dl),
1777                      getI32Imm(toType, dl),
1778                      getI32Imm(toTypeWidth, dl),
1779                      Base,
1780                      Offset,
1781                      Chain};
1782     NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1783   } else {
1784     if (PointerSize == 64)
1785       Opcode =
1786           pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1787                           NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1788                           NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1789     else
1790       Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1791                                NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1792                                NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1793     if (!Opcode)
1794       return false;
1795     SDValue Ops[] = {Value,
1796                      getI32Imm(isVolatile, dl),
1797                      getI32Imm(CodeAddrSpace, dl),
1798                      getI32Imm(vecType, dl),
1799                      getI32Imm(toType, dl),
1800                      getI32Imm(toTypeWidth, dl),
1801                      BasePtr,
1802                      Chain};
1803     NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1804   }
1805 
1806   if (!NVPTXST)
1807     return false;
1808 
1809   MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1810   CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
1811   ReplaceNode(N, NVPTXST);
1812   return true;
1813 }
1814 
tryStoreVector(SDNode * N)1815 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1816   SDValue Chain = N->getOperand(0);
1817   SDValue Op1 = N->getOperand(1);
1818   SDValue Addr, Offset, Base;
1819   std::optional<unsigned> Opcode;
1820   SDLoc DL(N);
1821   SDNode *ST;
1822   EVT EltVT = Op1.getValueType();
1823   MemSDNode *MemSD = cast<MemSDNode>(N);
1824   EVT StoreVT = MemSD->getMemoryVT();
1825 
1826   // Address Space Setting
1827   unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1828   if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1829     report_fatal_error("Cannot store to pointer that points to constant "
1830                        "memory space");
1831   }
1832   unsigned int PointerSize =
1833       CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
1834 
1835   // Volatile Setting
1836   // - .volatile is only availalble for .global and .shared
1837   bool IsVolatile = MemSD->isVolatile();
1838   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1839       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1840       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1841     IsVolatile = false;
1842 
1843   // Type Setting: toType + toTypeWidth
1844   // - for integer type, always use 'u'
1845   assert(StoreVT.isSimple() && "Store value is not simple");
1846   MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1847   unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1848   unsigned ToType = getLdStRegType(ScalarVT);
1849 
1850   SmallVector<SDValue, 12> StOps;
1851   SDValue N2;
1852   unsigned VecType;
1853 
1854   switch (N->getOpcode()) {
1855   case NVPTXISD::StoreV2:
1856     VecType = NVPTX::PTXLdStInstCode::V2;
1857     StOps.push_back(N->getOperand(1));
1858     StOps.push_back(N->getOperand(2));
1859     N2 = N->getOperand(3);
1860     break;
1861   case NVPTXISD::StoreV4:
1862     VecType = NVPTX::PTXLdStInstCode::V4;
1863     StOps.push_back(N->getOperand(1));
1864     StOps.push_back(N->getOperand(2));
1865     StOps.push_back(N->getOperand(3));
1866     StOps.push_back(N->getOperand(4));
1867     N2 = N->getOperand(5);
1868     break;
1869   default:
1870     return false;
1871   }
1872 
1873   // v8x16 is a special case. PTX doesn't have st.v8.x16
1874   // instruction. Instead, we split the vector into v2x16 chunks and
1875   // store them with st.v4.b32.
1876   if (Isv2x16VT(EltVT)) {
1877     assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1878     EltVT = MVT::i32;
1879     ToType = NVPTX::PTXLdStInstCode::Untyped;
1880     ToTypeWidth = 32;
1881   }
1882 
1883   StOps.push_back(getI32Imm(IsVolatile, DL));
1884   StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1885   StOps.push_back(getI32Imm(VecType, DL));
1886   StOps.push_back(getI32Imm(ToType, DL));
1887   StOps.push_back(getI32Imm(ToTypeWidth, DL));
1888 
1889   if (SelectDirectAddr(N2, Addr)) {
1890     switch (N->getOpcode()) {
1891     default:
1892       return false;
1893     case NVPTXISD::StoreV2:
1894       Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1895                                NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1896                                NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1897                                NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1898       break;
1899     case NVPTXISD::StoreV4:
1900       Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1901                                NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1902                                NVPTX::STV_i32_v4_avar, std::nullopt,
1903                                NVPTX::STV_f32_v4_avar, std::nullopt);
1904       break;
1905     }
1906     StOps.push_back(Addr);
1907   } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1908                                : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1909     switch (N->getOpcode()) {
1910     default:
1911       return false;
1912     case NVPTXISD::StoreV2:
1913       Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1914                                NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1915                                NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1916                                NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1917       break;
1918     case NVPTXISD::StoreV4:
1919       Opcode =
1920           pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1921                           NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1922                           std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1923       break;
1924     }
1925     StOps.push_back(Base);
1926     StOps.push_back(Offset);
1927   } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1928                                : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1929     if (PointerSize == 64) {
1930       switch (N->getOpcode()) {
1931       default:
1932         return false;
1933       case NVPTXISD::StoreV2:
1934         Opcode =
1935             pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1936                             NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1937                             NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1938                             NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1939         break;
1940       case NVPTXISD::StoreV4:
1941         Opcode = pickOpcodeForVT(
1942             EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
1943             NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1944             NVPTX::STV_f32_v4_ari_64, std::nullopt);
1945         break;
1946       }
1947     } else {
1948       switch (N->getOpcode()) {
1949       default:
1950         return false;
1951       case NVPTXISD::StoreV2:
1952         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1953                                  NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1954                                  NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1955                                  NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1956         break;
1957       case NVPTXISD::StoreV4:
1958         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1959                                  NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1960                                  NVPTX::STV_i32_v4_ari, std::nullopt,
1961                                  NVPTX::STV_f32_v4_ari, std::nullopt);
1962         break;
1963       }
1964     }
1965     StOps.push_back(Base);
1966     StOps.push_back(Offset);
1967   } else {
1968     if (PointerSize == 64) {
1969       switch (N->getOpcode()) {
1970       default:
1971         return false;
1972       case NVPTXISD::StoreV2:
1973         Opcode = pickOpcodeForVT(
1974             EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
1975             NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1976             NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1977             NVPTX::STV_f64_v2_areg_64);
1978         break;
1979       case NVPTXISD::StoreV4:
1980         Opcode = pickOpcodeForVT(
1981             EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
1982             NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
1983             NVPTX::STV_f32_v4_areg_64, std::nullopt);
1984         break;
1985       }
1986     } else {
1987       switch (N->getOpcode()) {
1988       default:
1989         return false;
1990       case NVPTXISD::StoreV2:
1991         Opcode =
1992             pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
1993                             NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1994                             NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
1995                             NVPTX::STV_f64_v2_areg);
1996         break;
1997       case NVPTXISD::StoreV4:
1998         Opcode =
1999             pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2000                             NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
2001                             std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
2002         break;
2003       }
2004     }
2005     StOps.push_back(N2);
2006   }
2007 
2008   if (!Opcode)
2009     return false;
2010 
2011   StOps.push_back(Chain);
2012 
2013   ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, StOps);
2014 
2015   MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2016   CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
2017 
2018   ReplaceNode(N, ST);
2019   return true;
2020 }
2021 
tryLoadParam(SDNode * Node)2022 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2023   SDValue Chain = Node->getOperand(0);
2024   SDValue Offset = Node->getOperand(2);
2025   SDValue Glue = Node->getOperand(3);
2026   SDLoc DL(Node);
2027   MemSDNode *Mem = cast<MemSDNode>(Node);
2028 
2029   unsigned VecSize;
2030   switch (Node->getOpcode()) {
2031   default:
2032     return false;
2033   case NVPTXISD::LoadParam:
2034     VecSize = 1;
2035     break;
2036   case NVPTXISD::LoadParamV2:
2037     VecSize = 2;
2038     break;
2039   case NVPTXISD::LoadParamV4:
2040     VecSize = 4;
2041     break;
2042   }
2043 
2044   EVT EltVT = Node->getValueType(0);
2045   EVT MemVT = Mem->getMemoryVT();
2046 
2047   std::optional<unsigned> Opcode;
2048 
2049   switch (VecSize) {
2050   default:
2051     return false;
2052   case 1:
2053     Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2054                              NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2055                              NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2056                              NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2057     break;
2058   case 2:
2059     Opcode =
2060         pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2061                         NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2062                         NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
2063                         NVPTX::LoadParamMemV2F64);
2064     break;
2065   case 4:
2066     Opcode =
2067         pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2068                         NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
2069                         std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
2070     break;
2071   }
2072   if (!Opcode)
2073     return false;
2074 
2075   SDVTList VTs;
2076   if (VecSize == 1) {
2077     VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2078   } else if (VecSize == 2) {
2079     VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2080   } else {
2081     EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2082     VTs = CurDAG->getVTList(EVTs);
2083   }
2084 
2085   unsigned OffsetVal = Offset->getAsZExtVal();
2086 
2087   SmallVector<SDValue, 2> Ops;
2088   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2089   Ops.push_back(Chain);
2090   Ops.push_back(Glue);
2091 
2092   ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops));
2093   return true;
2094 }
2095 
tryStoreRetval(SDNode * N)2096 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2097   SDLoc DL(N);
2098   SDValue Chain = N->getOperand(0);
2099   SDValue Offset = N->getOperand(1);
2100   unsigned OffsetVal = Offset->getAsZExtVal();
2101   MemSDNode *Mem = cast<MemSDNode>(N);
2102 
2103   // How many elements do we have?
2104   unsigned NumElts = 1;
2105   switch (N->getOpcode()) {
2106   default:
2107     return false;
2108   case NVPTXISD::StoreRetval:
2109     NumElts = 1;
2110     break;
2111   case NVPTXISD::StoreRetvalV2:
2112     NumElts = 2;
2113     break;
2114   case NVPTXISD::StoreRetvalV4:
2115     NumElts = 4;
2116     break;
2117   }
2118 
2119   // Build vector of operands
2120   SmallVector<SDValue, 6> Ops;
2121   for (unsigned i = 0; i < NumElts; ++i)
2122     Ops.push_back(N->getOperand(i + 2));
2123   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2124   Ops.push_back(Chain);
2125 
2126   // Determine target opcode
2127   // If we have an i1, use an 8-bit store. The lowering code in
2128   // NVPTXISelLowering will have already emitted an upcast.
2129   std::optional<unsigned> Opcode = 0;
2130   switch (NumElts) {
2131   default:
2132     return false;
2133   case 1:
2134     Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2135                              NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2136                              NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2137                              NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2138     break;
2139   case 2:
2140     Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2141                              NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2142                              NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2143                              NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2144     break;
2145   case 4:
2146     Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2147                              NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2148                              NVPTX::StoreRetvalV4I32, std::nullopt,
2149                              NVPTX::StoreRetvalV4F32, std::nullopt);
2150     break;
2151   }
2152   if (!Opcode)
2153     return false;
2154 
2155   SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
2156   MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2157   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2158 
2159   ReplaceNode(N, Ret);
2160   return true;
2161 }
2162 
tryStoreParam(SDNode * N)2163 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2164   SDLoc DL(N);
2165   SDValue Chain = N->getOperand(0);
2166   SDValue Param = N->getOperand(1);
2167   unsigned ParamVal = Param->getAsZExtVal();
2168   SDValue Offset = N->getOperand(2);
2169   unsigned OffsetVal = Offset->getAsZExtVal();
2170   MemSDNode *Mem = cast<MemSDNode>(N);
2171   SDValue Glue = N->getOperand(N->getNumOperands() - 1);
2172 
2173   // How many elements do we have?
2174   unsigned NumElts = 1;
2175   switch (N->getOpcode()) {
2176   default:
2177     return false;
2178   case NVPTXISD::StoreParamU32:
2179   case NVPTXISD::StoreParamS32:
2180   case NVPTXISD::StoreParam:
2181     NumElts = 1;
2182     break;
2183   case NVPTXISD::StoreParamV2:
2184     NumElts = 2;
2185     break;
2186   case NVPTXISD::StoreParamV4:
2187     NumElts = 4;
2188     break;
2189   }
2190 
2191   // Build vector of operands
2192   SmallVector<SDValue, 8> Ops;
2193   for (unsigned i = 0; i < NumElts; ++i)
2194     Ops.push_back(N->getOperand(i + 3));
2195   Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2196   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2197   Ops.push_back(Chain);
2198   Ops.push_back(Glue);
2199 
2200   // Determine target opcode
2201   // If we have an i1, use an 8-bit store. The lowering code in
2202   // NVPTXISelLowering will have already emitted an upcast.
2203   std::optional<unsigned> Opcode = 0;
2204   switch (N->getOpcode()) {
2205   default:
2206     switch (NumElts) {
2207     default:
2208       return false;
2209     case 1:
2210       Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2211                                NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2212                                NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2213                                NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2214       break;
2215     case 2:
2216       Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2217                                NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2218                                NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2219                                NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2220       break;
2221     case 4:
2222       Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2223                                NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2224                                NVPTX::StoreParamV4I32, std::nullopt,
2225                                NVPTX::StoreParamV4F32, std::nullopt);
2226       break;
2227     }
2228     if (!Opcode)
2229       return false;
2230     break;
2231   // Special case: if we have a sign-extend/zero-extend node, insert the
2232   // conversion instruction first, and use that as the value operand to
2233   // the selected StoreParam node.
2234   case NVPTXISD::StoreParamU32: {
2235     Opcode = NVPTX::StoreParamI32;
2236     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2237                                                 MVT::i32);
2238     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2239                                          MVT::i32, Ops[0], CvtNone);
2240     Ops[0] = SDValue(Cvt, 0);
2241     break;
2242   }
2243   case NVPTXISD::StoreParamS32: {
2244     Opcode = NVPTX::StoreParamI32;
2245     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2246                                                 MVT::i32);
2247     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2248                                          MVT::i32, Ops[0], CvtNone);
2249     Ops[0] = SDValue(Cvt, 0);
2250     break;
2251   }
2252   }
2253 
2254   SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2255   SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, RetVTs, Ops);
2256   MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2257   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2258 
2259   ReplaceNode(N, Ret);
2260   return true;
2261 }
2262 
tryTextureIntrinsic(SDNode * N)2263 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2264   unsigned Opc = 0;
2265 
2266   switch (N->getOpcode()) {
2267   default: return false;
2268   case NVPTXISD::Tex1DFloatS32:
2269     Opc = NVPTX::TEX_1D_F32_S32_RR;
2270     break;
2271   case NVPTXISD::Tex1DFloatFloat:
2272     Opc = NVPTX::TEX_1D_F32_F32_RR;
2273     break;
2274   case NVPTXISD::Tex1DFloatFloatLevel:
2275     Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2276     break;
2277   case NVPTXISD::Tex1DFloatFloatGrad:
2278     Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2279     break;
2280   case NVPTXISD::Tex1DS32S32:
2281     Opc = NVPTX::TEX_1D_S32_S32_RR;
2282     break;
2283   case NVPTXISD::Tex1DS32Float:
2284     Opc = NVPTX::TEX_1D_S32_F32_RR;
2285     break;
2286   case NVPTXISD::Tex1DS32FloatLevel:
2287     Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2288     break;
2289   case NVPTXISD::Tex1DS32FloatGrad:
2290     Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2291     break;
2292   case NVPTXISD::Tex1DU32S32:
2293     Opc = NVPTX::TEX_1D_U32_S32_RR;
2294     break;
2295   case NVPTXISD::Tex1DU32Float:
2296     Opc = NVPTX::TEX_1D_U32_F32_RR;
2297     break;
2298   case NVPTXISD::Tex1DU32FloatLevel:
2299     Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2300     break;
2301   case NVPTXISD::Tex1DU32FloatGrad:
2302     Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2303     break;
2304   case NVPTXISD::Tex1DArrayFloatS32:
2305     Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2306     break;
2307   case NVPTXISD::Tex1DArrayFloatFloat:
2308     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2309     break;
2310   case NVPTXISD::Tex1DArrayFloatFloatLevel:
2311     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2312     break;
2313   case NVPTXISD::Tex1DArrayFloatFloatGrad:
2314     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2315     break;
2316   case NVPTXISD::Tex1DArrayS32S32:
2317     Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2318     break;
2319   case NVPTXISD::Tex1DArrayS32Float:
2320     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2321     break;
2322   case NVPTXISD::Tex1DArrayS32FloatLevel:
2323     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2324     break;
2325   case NVPTXISD::Tex1DArrayS32FloatGrad:
2326     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2327     break;
2328   case NVPTXISD::Tex1DArrayU32S32:
2329     Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2330     break;
2331   case NVPTXISD::Tex1DArrayU32Float:
2332     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2333     break;
2334   case NVPTXISD::Tex1DArrayU32FloatLevel:
2335     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2336     break;
2337   case NVPTXISD::Tex1DArrayU32FloatGrad:
2338     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2339     break;
2340   case NVPTXISD::Tex2DFloatS32:
2341     Opc = NVPTX::TEX_2D_F32_S32_RR;
2342     break;
2343   case NVPTXISD::Tex2DFloatFloat:
2344     Opc = NVPTX::TEX_2D_F32_F32_RR;
2345     break;
2346   case NVPTXISD::Tex2DFloatFloatLevel:
2347     Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2348     break;
2349   case NVPTXISD::Tex2DFloatFloatGrad:
2350     Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2351     break;
2352   case NVPTXISD::Tex2DS32S32:
2353     Opc = NVPTX::TEX_2D_S32_S32_RR;
2354     break;
2355   case NVPTXISD::Tex2DS32Float:
2356     Opc = NVPTX::TEX_2D_S32_F32_RR;
2357     break;
2358   case NVPTXISD::Tex2DS32FloatLevel:
2359     Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2360     break;
2361   case NVPTXISD::Tex2DS32FloatGrad:
2362     Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2363     break;
2364   case NVPTXISD::Tex2DU32S32:
2365     Opc = NVPTX::TEX_2D_U32_S32_RR;
2366     break;
2367   case NVPTXISD::Tex2DU32Float:
2368     Opc = NVPTX::TEX_2D_U32_F32_RR;
2369     break;
2370   case NVPTXISD::Tex2DU32FloatLevel:
2371     Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2372     break;
2373   case NVPTXISD::Tex2DU32FloatGrad:
2374     Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2375     break;
2376   case NVPTXISD::Tex2DArrayFloatS32:
2377     Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2378     break;
2379   case NVPTXISD::Tex2DArrayFloatFloat:
2380     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2381     break;
2382   case NVPTXISD::Tex2DArrayFloatFloatLevel:
2383     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2384     break;
2385   case NVPTXISD::Tex2DArrayFloatFloatGrad:
2386     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2387     break;
2388   case NVPTXISD::Tex2DArrayS32S32:
2389     Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2390     break;
2391   case NVPTXISD::Tex2DArrayS32Float:
2392     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2393     break;
2394   case NVPTXISD::Tex2DArrayS32FloatLevel:
2395     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2396     break;
2397   case NVPTXISD::Tex2DArrayS32FloatGrad:
2398     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2399     break;
2400   case NVPTXISD::Tex2DArrayU32S32:
2401     Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2402     break;
2403   case NVPTXISD::Tex2DArrayU32Float:
2404     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2405     break;
2406   case NVPTXISD::Tex2DArrayU32FloatLevel:
2407     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2408     break;
2409   case NVPTXISD::Tex2DArrayU32FloatGrad:
2410     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2411     break;
2412   case NVPTXISD::Tex3DFloatS32:
2413     Opc = NVPTX::TEX_3D_F32_S32_RR;
2414     break;
2415   case NVPTXISD::Tex3DFloatFloat:
2416     Opc = NVPTX::TEX_3D_F32_F32_RR;
2417     break;
2418   case NVPTXISD::Tex3DFloatFloatLevel:
2419     Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2420     break;
2421   case NVPTXISD::Tex3DFloatFloatGrad:
2422     Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2423     break;
2424   case NVPTXISD::Tex3DS32S32:
2425     Opc = NVPTX::TEX_3D_S32_S32_RR;
2426     break;
2427   case NVPTXISD::Tex3DS32Float:
2428     Opc = NVPTX::TEX_3D_S32_F32_RR;
2429     break;
2430   case NVPTXISD::Tex3DS32FloatLevel:
2431     Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2432     break;
2433   case NVPTXISD::Tex3DS32FloatGrad:
2434     Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2435     break;
2436   case NVPTXISD::Tex3DU32S32:
2437     Opc = NVPTX::TEX_3D_U32_S32_RR;
2438     break;
2439   case NVPTXISD::Tex3DU32Float:
2440     Opc = NVPTX::TEX_3D_U32_F32_RR;
2441     break;
2442   case NVPTXISD::Tex3DU32FloatLevel:
2443     Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2444     break;
2445   case NVPTXISD::Tex3DU32FloatGrad:
2446     Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2447     break;
2448   case NVPTXISD::TexCubeFloatFloat:
2449     Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2450     break;
2451   case NVPTXISD::TexCubeFloatFloatLevel:
2452     Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2453     break;
2454   case NVPTXISD::TexCubeS32Float:
2455     Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2456     break;
2457   case NVPTXISD::TexCubeS32FloatLevel:
2458     Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2459     break;
2460   case NVPTXISD::TexCubeU32Float:
2461     Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2462     break;
2463   case NVPTXISD::TexCubeU32FloatLevel:
2464     Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2465     break;
2466   case NVPTXISD::TexCubeArrayFloatFloat:
2467     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2468     break;
2469   case NVPTXISD::TexCubeArrayFloatFloatLevel:
2470     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2471     break;
2472   case NVPTXISD::TexCubeArrayS32Float:
2473     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2474     break;
2475   case NVPTXISD::TexCubeArrayS32FloatLevel:
2476     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2477     break;
2478   case NVPTXISD::TexCubeArrayU32Float:
2479     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2480     break;
2481   case NVPTXISD::TexCubeArrayU32FloatLevel:
2482     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2483     break;
2484   case NVPTXISD::Tld4R2DFloatFloat:
2485     Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2486     break;
2487   case NVPTXISD::Tld4G2DFloatFloat:
2488     Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2489     break;
2490   case NVPTXISD::Tld4B2DFloatFloat:
2491     Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2492     break;
2493   case NVPTXISD::Tld4A2DFloatFloat:
2494     Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2495     break;
2496   case NVPTXISD::Tld4R2DS64Float:
2497     Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2498     break;
2499   case NVPTXISD::Tld4G2DS64Float:
2500     Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2501     break;
2502   case NVPTXISD::Tld4B2DS64Float:
2503     Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2504     break;
2505   case NVPTXISD::Tld4A2DS64Float:
2506     Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2507     break;
2508   case NVPTXISD::Tld4R2DU64Float:
2509     Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2510     break;
2511   case NVPTXISD::Tld4G2DU64Float:
2512     Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2513     break;
2514   case NVPTXISD::Tld4B2DU64Float:
2515     Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2516     break;
2517   case NVPTXISD::Tld4A2DU64Float:
2518     Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2519     break;
2520   case NVPTXISD::TexUnified1DFloatS32:
2521     Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2522     break;
2523   case NVPTXISD::TexUnified1DFloatFloat:
2524     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2525     break;
2526   case NVPTXISD::TexUnified1DFloatFloatLevel:
2527     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2528     break;
2529   case NVPTXISD::TexUnified1DFloatFloatGrad:
2530     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2531     break;
2532   case NVPTXISD::TexUnified1DS32S32:
2533     Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2534     break;
2535   case NVPTXISD::TexUnified1DS32Float:
2536     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2537     break;
2538   case NVPTXISD::TexUnified1DS32FloatLevel:
2539     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2540     break;
2541   case NVPTXISD::TexUnified1DS32FloatGrad:
2542     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2543     break;
2544   case NVPTXISD::TexUnified1DU32S32:
2545     Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2546     break;
2547   case NVPTXISD::TexUnified1DU32Float:
2548     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2549     break;
2550   case NVPTXISD::TexUnified1DU32FloatLevel:
2551     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2552     break;
2553   case NVPTXISD::TexUnified1DU32FloatGrad:
2554     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2555     break;
2556   case NVPTXISD::TexUnified1DArrayFloatS32:
2557     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2558     break;
2559   case NVPTXISD::TexUnified1DArrayFloatFloat:
2560     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2561     break;
2562   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
2563     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2564     break;
2565   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
2566     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2567     break;
2568   case NVPTXISD::TexUnified1DArrayS32S32:
2569     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2570     break;
2571   case NVPTXISD::TexUnified1DArrayS32Float:
2572     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2573     break;
2574   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
2575     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2576     break;
2577   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
2578     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2579     break;
2580   case NVPTXISD::TexUnified1DArrayU32S32:
2581     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2582     break;
2583   case NVPTXISD::TexUnified1DArrayU32Float:
2584     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2585     break;
2586   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
2587     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2588     break;
2589   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
2590     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2591     break;
2592   case NVPTXISD::TexUnified2DFloatS32:
2593     Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2594     break;
2595   case NVPTXISD::TexUnified2DFloatFloat:
2596     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2597     break;
2598   case NVPTXISD::TexUnified2DFloatFloatLevel:
2599     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2600     break;
2601   case NVPTXISD::TexUnified2DFloatFloatGrad:
2602     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2603     break;
2604   case NVPTXISD::TexUnified2DS32S32:
2605     Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2606     break;
2607   case NVPTXISD::TexUnified2DS32Float:
2608     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2609     break;
2610   case NVPTXISD::TexUnified2DS32FloatLevel:
2611     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2612     break;
2613   case NVPTXISD::TexUnified2DS32FloatGrad:
2614     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2615     break;
2616   case NVPTXISD::TexUnified2DU32S32:
2617     Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2618     break;
2619   case NVPTXISD::TexUnified2DU32Float:
2620     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2621     break;
2622   case NVPTXISD::TexUnified2DU32FloatLevel:
2623     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2624     break;
2625   case NVPTXISD::TexUnified2DU32FloatGrad:
2626     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2627     break;
2628   case NVPTXISD::TexUnified2DArrayFloatS32:
2629     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2630     break;
2631   case NVPTXISD::TexUnified2DArrayFloatFloat:
2632     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2633     break;
2634   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
2635     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2636     break;
2637   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
2638     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2639     break;
2640   case NVPTXISD::TexUnified2DArrayS32S32:
2641     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2642     break;
2643   case NVPTXISD::TexUnified2DArrayS32Float:
2644     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2645     break;
2646   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
2647     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2648     break;
2649   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
2650     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2651     break;
2652   case NVPTXISD::TexUnified2DArrayU32S32:
2653     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2654     break;
2655   case NVPTXISD::TexUnified2DArrayU32Float:
2656     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2657     break;
2658   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
2659     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2660     break;
2661   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
2662     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2663     break;
2664   case NVPTXISD::TexUnified3DFloatS32:
2665     Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2666     break;
2667   case NVPTXISD::TexUnified3DFloatFloat:
2668     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2669     break;
2670   case NVPTXISD::TexUnified3DFloatFloatLevel:
2671     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2672     break;
2673   case NVPTXISD::TexUnified3DFloatFloatGrad:
2674     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2675     break;
2676   case NVPTXISD::TexUnified3DS32S32:
2677     Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2678     break;
2679   case NVPTXISD::TexUnified3DS32Float:
2680     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2681     break;
2682   case NVPTXISD::TexUnified3DS32FloatLevel:
2683     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2684     break;
2685   case NVPTXISD::TexUnified3DS32FloatGrad:
2686     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2687     break;
2688   case NVPTXISD::TexUnified3DU32S32:
2689     Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2690     break;
2691   case NVPTXISD::TexUnified3DU32Float:
2692     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2693     break;
2694   case NVPTXISD::TexUnified3DU32FloatLevel:
2695     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2696     break;
2697   case NVPTXISD::TexUnified3DU32FloatGrad:
2698     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2699     break;
2700   case NVPTXISD::TexUnifiedCubeFloatFloat:
2701     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2702     break;
2703   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
2704     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2705     break;
2706   case NVPTXISD::TexUnifiedCubeS32Float:
2707     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2708     break;
2709   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
2710     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2711     break;
2712   case NVPTXISD::TexUnifiedCubeU32Float:
2713     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2714     break;
2715   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
2716     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2717     break;
2718   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
2719     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2720     break;
2721   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
2722     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2723     break;
2724   case NVPTXISD::TexUnifiedCubeArrayS32Float:
2725     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2726     break;
2727   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
2728     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2729     break;
2730   case NVPTXISD::TexUnifiedCubeArrayU32Float:
2731     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2732     break;
2733   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
2734     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2735     break;
2736   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
2737     Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2738     break;
2739   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
2740     Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2741     break;
2742   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
2743     Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2744     break;
2745   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
2746     Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2747     break;
2748   case NVPTXISD::Tld4UnifiedR2DS64Float:
2749     Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2750     break;
2751   case NVPTXISD::Tld4UnifiedG2DS64Float:
2752     Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2753     break;
2754   case NVPTXISD::Tld4UnifiedB2DS64Float:
2755     Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2756     break;
2757   case NVPTXISD::Tld4UnifiedA2DS64Float:
2758     Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2759     break;
2760   case NVPTXISD::Tld4UnifiedR2DU64Float:
2761     Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2762     break;
2763   case NVPTXISD::Tld4UnifiedG2DU64Float:
2764     Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2765     break;
2766   case NVPTXISD::Tld4UnifiedB2DU64Float:
2767     Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2768     break;
2769   case NVPTXISD::Tld4UnifiedA2DU64Float:
2770     Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2771     break;
2772   case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
2773     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
2774     break;
2775   case NVPTXISD::TexUnifiedCubeS32FloatGrad:
2776     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
2777     break;
2778   case NVPTXISD::TexUnifiedCubeU32FloatGrad:
2779     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
2780     break;
2781   case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
2782     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
2783     break;
2784   case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
2785     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
2786     break;
2787   case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
2788     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
2789     break;
2790   }
2791 
2792   // Copy over operands
2793   SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
2794   Ops.push_back(N->getOperand(0)); // Move chain to the back.
2795 
2796   ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2797   return true;
2798 }
2799 
trySurfaceIntrinsic(SDNode * N)2800 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2801   unsigned Opc = 0;
2802   switch (N->getOpcode()) {
2803   default: return false;
2804   case NVPTXISD::Suld1DI8Clamp:
2805     Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2806     break;
2807   case NVPTXISD::Suld1DI16Clamp:
2808     Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2809     break;
2810   case NVPTXISD::Suld1DI32Clamp:
2811     Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2812     break;
2813   case NVPTXISD::Suld1DI64Clamp:
2814     Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2815     break;
2816   case NVPTXISD::Suld1DV2I8Clamp:
2817     Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2818     break;
2819   case NVPTXISD::Suld1DV2I16Clamp:
2820     Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2821     break;
2822   case NVPTXISD::Suld1DV2I32Clamp:
2823     Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2824     break;
2825   case NVPTXISD::Suld1DV2I64Clamp:
2826     Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2827     break;
2828   case NVPTXISD::Suld1DV4I8Clamp:
2829     Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2830     break;
2831   case NVPTXISD::Suld1DV4I16Clamp:
2832     Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2833     break;
2834   case NVPTXISD::Suld1DV4I32Clamp:
2835     Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2836     break;
2837   case NVPTXISD::Suld1DArrayI8Clamp:
2838     Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2839     break;
2840   case NVPTXISD::Suld1DArrayI16Clamp:
2841     Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2842     break;
2843   case NVPTXISD::Suld1DArrayI32Clamp:
2844     Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2845     break;
2846   case NVPTXISD::Suld1DArrayI64Clamp:
2847     Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
2848     break;
2849   case NVPTXISD::Suld1DArrayV2I8Clamp:
2850     Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
2851     break;
2852   case NVPTXISD::Suld1DArrayV2I16Clamp:
2853     Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
2854     break;
2855   case NVPTXISD::Suld1DArrayV2I32Clamp:
2856     Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
2857     break;
2858   case NVPTXISD::Suld1DArrayV2I64Clamp:
2859     Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
2860     break;
2861   case NVPTXISD::Suld1DArrayV4I8Clamp:
2862     Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
2863     break;
2864   case NVPTXISD::Suld1DArrayV4I16Clamp:
2865     Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
2866     break;
2867   case NVPTXISD::Suld1DArrayV4I32Clamp:
2868     Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
2869     break;
2870   case NVPTXISD::Suld2DI8Clamp:
2871     Opc = NVPTX::SULD_2D_I8_CLAMP_R;
2872     break;
2873   case NVPTXISD::Suld2DI16Clamp:
2874     Opc = NVPTX::SULD_2D_I16_CLAMP_R;
2875     break;
2876   case NVPTXISD::Suld2DI32Clamp:
2877     Opc = NVPTX::SULD_2D_I32_CLAMP_R;
2878     break;
2879   case NVPTXISD::Suld2DI64Clamp:
2880     Opc = NVPTX::SULD_2D_I64_CLAMP_R;
2881     break;
2882   case NVPTXISD::Suld2DV2I8Clamp:
2883     Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
2884     break;
2885   case NVPTXISD::Suld2DV2I16Clamp:
2886     Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
2887     break;
2888   case NVPTXISD::Suld2DV2I32Clamp:
2889     Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
2890     break;
2891   case NVPTXISD::Suld2DV2I64Clamp:
2892     Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
2893     break;
2894   case NVPTXISD::Suld2DV4I8Clamp:
2895     Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
2896     break;
2897   case NVPTXISD::Suld2DV4I16Clamp:
2898     Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
2899     break;
2900   case NVPTXISD::Suld2DV4I32Clamp:
2901     Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
2902     break;
2903   case NVPTXISD::Suld2DArrayI8Clamp:
2904     Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
2905     break;
2906   case NVPTXISD::Suld2DArrayI16Clamp:
2907     Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
2908     break;
2909   case NVPTXISD::Suld2DArrayI32Clamp:
2910     Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
2911     break;
2912   case NVPTXISD::Suld2DArrayI64Clamp:
2913     Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
2914     break;
2915   case NVPTXISD::Suld2DArrayV2I8Clamp:
2916     Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
2917     break;
2918   case NVPTXISD::Suld2DArrayV2I16Clamp:
2919     Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
2920     break;
2921   case NVPTXISD::Suld2DArrayV2I32Clamp:
2922     Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
2923     break;
2924   case NVPTXISD::Suld2DArrayV2I64Clamp:
2925     Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
2926     break;
2927   case NVPTXISD::Suld2DArrayV4I8Clamp:
2928     Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
2929     break;
2930   case NVPTXISD::Suld2DArrayV4I16Clamp:
2931     Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
2932     break;
2933   case NVPTXISD::Suld2DArrayV4I32Clamp:
2934     Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
2935     break;
2936   case NVPTXISD::Suld3DI8Clamp:
2937     Opc = NVPTX::SULD_3D_I8_CLAMP_R;
2938     break;
2939   case NVPTXISD::Suld3DI16Clamp:
2940     Opc = NVPTX::SULD_3D_I16_CLAMP_R;
2941     break;
2942   case NVPTXISD::Suld3DI32Clamp:
2943     Opc = NVPTX::SULD_3D_I32_CLAMP_R;
2944     break;
2945   case NVPTXISD::Suld3DI64Clamp:
2946     Opc = NVPTX::SULD_3D_I64_CLAMP_R;
2947     break;
2948   case NVPTXISD::Suld3DV2I8Clamp:
2949     Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
2950     break;
2951   case NVPTXISD::Suld3DV2I16Clamp:
2952     Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
2953     break;
2954   case NVPTXISD::Suld3DV2I32Clamp:
2955     Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
2956     break;
2957   case NVPTXISD::Suld3DV2I64Clamp:
2958     Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
2959     break;
2960   case NVPTXISD::Suld3DV4I8Clamp:
2961     Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
2962     break;
2963   case NVPTXISD::Suld3DV4I16Clamp:
2964     Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
2965     break;
2966   case NVPTXISD::Suld3DV4I32Clamp:
2967     Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
2968     break;
2969   case NVPTXISD::Suld1DI8Trap:
2970     Opc = NVPTX::SULD_1D_I8_TRAP_R;
2971     break;
2972   case NVPTXISD::Suld1DI16Trap:
2973     Opc = NVPTX::SULD_1D_I16_TRAP_R;
2974     break;
2975   case NVPTXISD::Suld1DI32Trap:
2976     Opc = NVPTX::SULD_1D_I32_TRAP_R;
2977     break;
2978   case NVPTXISD::Suld1DI64Trap:
2979     Opc = NVPTX::SULD_1D_I64_TRAP_R;
2980     break;
2981   case NVPTXISD::Suld1DV2I8Trap:
2982     Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
2983     break;
2984   case NVPTXISD::Suld1DV2I16Trap:
2985     Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
2986     break;
2987   case NVPTXISD::Suld1DV2I32Trap:
2988     Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
2989     break;
2990   case NVPTXISD::Suld1DV2I64Trap:
2991     Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
2992     break;
2993   case NVPTXISD::Suld1DV4I8Trap:
2994     Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
2995     break;
2996   case NVPTXISD::Suld1DV4I16Trap:
2997     Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
2998     break;
2999   case NVPTXISD::Suld1DV4I32Trap:
3000     Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3001     break;
3002   case NVPTXISD::Suld1DArrayI8Trap:
3003     Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3004     break;
3005   case NVPTXISD::Suld1DArrayI16Trap:
3006     Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3007     break;
3008   case NVPTXISD::Suld1DArrayI32Trap:
3009     Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3010     break;
3011   case NVPTXISD::Suld1DArrayI64Trap:
3012     Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3013     break;
3014   case NVPTXISD::Suld1DArrayV2I8Trap:
3015     Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3016     break;
3017   case NVPTXISD::Suld1DArrayV2I16Trap:
3018     Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3019     break;
3020   case NVPTXISD::Suld1DArrayV2I32Trap:
3021     Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3022     break;
3023   case NVPTXISD::Suld1DArrayV2I64Trap:
3024     Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3025     break;
3026   case NVPTXISD::Suld1DArrayV4I8Trap:
3027     Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3028     break;
3029   case NVPTXISD::Suld1DArrayV4I16Trap:
3030     Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3031     break;
3032   case NVPTXISD::Suld1DArrayV4I32Trap:
3033     Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3034     break;
3035   case NVPTXISD::Suld2DI8Trap:
3036     Opc = NVPTX::SULD_2D_I8_TRAP_R;
3037     break;
3038   case NVPTXISD::Suld2DI16Trap:
3039     Opc = NVPTX::SULD_2D_I16_TRAP_R;
3040     break;
3041   case NVPTXISD::Suld2DI32Trap:
3042     Opc = NVPTX::SULD_2D_I32_TRAP_R;
3043     break;
3044   case NVPTXISD::Suld2DI64Trap:
3045     Opc = NVPTX::SULD_2D_I64_TRAP_R;
3046     break;
3047   case NVPTXISD::Suld2DV2I8Trap:
3048     Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3049     break;
3050   case NVPTXISD::Suld2DV2I16Trap:
3051     Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3052     break;
3053   case NVPTXISD::Suld2DV2I32Trap:
3054     Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3055     break;
3056   case NVPTXISD::Suld2DV2I64Trap:
3057     Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3058     break;
3059   case NVPTXISD::Suld2DV4I8Trap:
3060     Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3061     break;
3062   case NVPTXISD::Suld2DV4I16Trap:
3063     Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3064     break;
3065   case NVPTXISD::Suld2DV4I32Trap:
3066     Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3067     break;
3068   case NVPTXISD::Suld2DArrayI8Trap:
3069     Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3070     break;
3071   case NVPTXISD::Suld2DArrayI16Trap:
3072     Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3073     break;
3074   case NVPTXISD::Suld2DArrayI32Trap:
3075     Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3076     break;
3077   case NVPTXISD::Suld2DArrayI64Trap:
3078     Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3079     break;
3080   case NVPTXISD::Suld2DArrayV2I8Trap:
3081     Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3082     break;
3083   case NVPTXISD::Suld2DArrayV2I16Trap:
3084     Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3085     break;
3086   case NVPTXISD::Suld2DArrayV2I32Trap:
3087     Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3088     break;
3089   case NVPTXISD::Suld2DArrayV2I64Trap:
3090     Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3091     break;
3092   case NVPTXISD::Suld2DArrayV4I8Trap:
3093     Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3094     break;
3095   case NVPTXISD::Suld2DArrayV4I16Trap:
3096     Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3097     break;
3098   case NVPTXISD::Suld2DArrayV4I32Trap:
3099     Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3100     break;
3101   case NVPTXISD::Suld3DI8Trap:
3102     Opc = NVPTX::SULD_3D_I8_TRAP_R;
3103     break;
3104   case NVPTXISD::Suld3DI16Trap:
3105     Opc = NVPTX::SULD_3D_I16_TRAP_R;
3106     break;
3107   case NVPTXISD::Suld3DI32Trap:
3108     Opc = NVPTX::SULD_3D_I32_TRAP_R;
3109     break;
3110   case NVPTXISD::Suld3DI64Trap:
3111     Opc = NVPTX::SULD_3D_I64_TRAP_R;
3112     break;
3113   case NVPTXISD::Suld3DV2I8Trap:
3114     Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3115     break;
3116   case NVPTXISD::Suld3DV2I16Trap:
3117     Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3118     break;
3119   case NVPTXISD::Suld3DV2I32Trap:
3120     Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3121     break;
3122   case NVPTXISD::Suld3DV2I64Trap:
3123     Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3124     break;
3125   case NVPTXISD::Suld3DV4I8Trap:
3126     Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3127     break;
3128   case NVPTXISD::Suld3DV4I16Trap:
3129     Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3130     break;
3131   case NVPTXISD::Suld3DV4I32Trap:
3132     Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3133     break;
3134   case NVPTXISD::Suld1DI8Zero:
3135     Opc = NVPTX::SULD_1D_I8_ZERO_R;
3136     break;
3137   case NVPTXISD::Suld1DI16Zero:
3138     Opc = NVPTX::SULD_1D_I16_ZERO_R;
3139     break;
3140   case NVPTXISD::Suld1DI32Zero:
3141     Opc = NVPTX::SULD_1D_I32_ZERO_R;
3142     break;
3143   case NVPTXISD::Suld1DI64Zero:
3144     Opc = NVPTX::SULD_1D_I64_ZERO_R;
3145     break;
3146   case NVPTXISD::Suld1DV2I8Zero:
3147     Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3148     break;
3149   case NVPTXISD::Suld1DV2I16Zero:
3150     Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3151     break;
3152   case NVPTXISD::Suld1DV2I32Zero:
3153     Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3154     break;
3155   case NVPTXISD::Suld1DV2I64Zero:
3156     Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3157     break;
3158   case NVPTXISD::Suld1DV4I8Zero:
3159     Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3160     break;
3161   case NVPTXISD::Suld1DV4I16Zero:
3162     Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3163     break;
3164   case NVPTXISD::Suld1DV4I32Zero:
3165     Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3166     break;
3167   case NVPTXISD::Suld1DArrayI8Zero:
3168     Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3169     break;
3170   case NVPTXISD::Suld1DArrayI16Zero:
3171     Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3172     break;
3173   case NVPTXISD::Suld1DArrayI32Zero:
3174     Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3175     break;
3176   case NVPTXISD::Suld1DArrayI64Zero:
3177     Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3178     break;
3179   case NVPTXISD::Suld1DArrayV2I8Zero:
3180     Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3181     break;
3182   case NVPTXISD::Suld1DArrayV2I16Zero:
3183     Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3184     break;
3185   case NVPTXISD::Suld1DArrayV2I32Zero:
3186     Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3187     break;
3188   case NVPTXISD::Suld1DArrayV2I64Zero:
3189     Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3190     break;
3191   case NVPTXISD::Suld1DArrayV4I8Zero:
3192     Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3193     break;
3194   case NVPTXISD::Suld1DArrayV4I16Zero:
3195     Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3196     break;
3197   case NVPTXISD::Suld1DArrayV4I32Zero:
3198     Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3199     break;
3200   case NVPTXISD::Suld2DI8Zero:
3201     Opc = NVPTX::SULD_2D_I8_ZERO_R;
3202     break;
3203   case NVPTXISD::Suld2DI16Zero:
3204     Opc = NVPTX::SULD_2D_I16_ZERO_R;
3205     break;
3206   case NVPTXISD::Suld2DI32Zero:
3207     Opc = NVPTX::SULD_2D_I32_ZERO_R;
3208     break;
3209   case NVPTXISD::Suld2DI64Zero:
3210     Opc = NVPTX::SULD_2D_I64_ZERO_R;
3211     break;
3212   case NVPTXISD::Suld2DV2I8Zero:
3213     Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3214     break;
3215   case NVPTXISD::Suld2DV2I16Zero:
3216     Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3217     break;
3218   case NVPTXISD::Suld2DV2I32Zero:
3219     Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3220     break;
3221   case NVPTXISD::Suld2DV2I64Zero:
3222     Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3223     break;
3224   case NVPTXISD::Suld2DV4I8Zero:
3225     Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3226     break;
3227   case NVPTXISD::Suld2DV4I16Zero:
3228     Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3229     break;
3230   case NVPTXISD::Suld2DV4I32Zero:
3231     Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3232     break;
3233   case NVPTXISD::Suld2DArrayI8Zero:
3234     Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3235     break;
3236   case NVPTXISD::Suld2DArrayI16Zero:
3237     Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3238     break;
3239   case NVPTXISD::Suld2DArrayI32Zero:
3240     Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3241     break;
3242   case NVPTXISD::Suld2DArrayI64Zero:
3243     Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3244     break;
3245   case NVPTXISD::Suld2DArrayV2I8Zero:
3246     Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3247     break;
3248   case NVPTXISD::Suld2DArrayV2I16Zero:
3249     Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3250     break;
3251   case NVPTXISD::Suld2DArrayV2I32Zero:
3252     Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3253     break;
3254   case NVPTXISD::Suld2DArrayV2I64Zero:
3255     Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3256     break;
3257   case NVPTXISD::Suld2DArrayV4I8Zero:
3258     Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3259     break;
3260   case NVPTXISD::Suld2DArrayV4I16Zero:
3261     Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3262     break;
3263   case NVPTXISD::Suld2DArrayV4I32Zero:
3264     Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3265     break;
3266   case NVPTXISD::Suld3DI8Zero:
3267     Opc = NVPTX::SULD_3D_I8_ZERO_R;
3268     break;
3269   case NVPTXISD::Suld3DI16Zero:
3270     Opc = NVPTX::SULD_3D_I16_ZERO_R;
3271     break;
3272   case NVPTXISD::Suld3DI32Zero:
3273     Opc = NVPTX::SULD_3D_I32_ZERO_R;
3274     break;
3275   case NVPTXISD::Suld3DI64Zero:
3276     Opc = NVPTX::SULD_3D_I64_ZERO_R;
3277     break;
3278   case NVPTXISD::Suld3DV2I8Zero:
3279     Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3280     break;
3281   case NVPTXISD::Suld3DV2I16Zero:
3282     Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3283     break;
3284   case NVPTXISD::Suld3DV2I32Zero:
3285     Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3286     break;
3287   case NVPTXISD::Suld3DV2I64Zero:
3288     Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3289     break;
3290   case NVPTXISD::Suld3DV4I8Zero:
3291     Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3292     break;
3293   case NVPTXISD::Suld3DV4I16Zero:
3294     Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3295     break;
3296   case NVPTXISD::Suld3DV4I32Zero:
3297     Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3298     break;
3299   }
3300 
3301   // Copy over operands
3302   SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
3303   Ops.push_back(N->getOperand(0)); // Move chain to the back.
3304 
3305   ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3306   return true;
3307 }
3308 
3309 
3310 /// SelectBFE - Look for instruction sequences that can be made more efficient
3311 /// by using the 'bfe' (bit-field extract) PTX instruction
tryBFE(SDNode * N)3312 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3313   SDLoc DL(N);
3314   SDValue LHS = N->getOperand(0);
3315   SDValue RHS = N->getOperand(1);
3316   SDValue Len;
3317   SDValue Start;
3318   SDValue Val;
3319   bool IsSigned = false;
3320 
3321   if (N->getOpcode() == ISD::AND) {
3322     // Canonicalize the operands
3323     // We want 'and %val, %mask'
3324     if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3325       std::swap(LHS, RHS);
3326     }
3327 
3328     ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3329     if (!Mask) {
3330       // We need a constant mask on the RHS of the AND
3331       return false;
3332     }
3333 
3334     // Extract the mask bits
3335     uint64_t MaskVal = Mask->getZExtValue();
3336     if (!isMask_64(MaskVal)) {
3337       // We *could* handle shifted masks here, but doing so would require an
3338       // 'and' operation to fix up the low-order bits so we would trade
3339       // shr+and for bfe+and, which has the same throughput
3340       return false;
3341     }
3342 
3343     // How many bits are in our mask?
3344     int64_t NumBits = countr_one(MaskVal);
3345     Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3346 
3347     if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3348       // We have a 'srl/and' pair, extract the effective start bit and length
3349       Val = LHS.getNode()->getOperand(0);
3350       Start = LHS.getNode()->getOperand(1);
3351       ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3352       if (StartConst) {
3353         uint64_t StartVal = StartConst->getZExtValue();
3354         // How many "good" bits do we have left?  "good" is defined here as bits
3355         // that exist in the original value, not shifted in.
3356         int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3357         if (NumBits > GoodBits) {
3358           // Do not handle the case where bits have been shifted in. In theory
3359           // we could handle this, but the cost is likely higher than just
3360           // emitting the srl/and pair.
3361           return false;
3362         }
3363         Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3364       } else {
3365         // Do not handle the case where the shift amount (can be zero if no srl
3366         // was found) is not constant. We could handle this case, but it would
3367         // require run-time logic that would be more expensive than just
3368         // emitting the srl/and pair.
3369         return false;
3370       }
3371     } else {
3372       // Do not handle the case where the LHS of the and is not a shift. While
3373       // it would be trivial to handle this case, it would just transform
3374       // 'and' -> 'bfe', but 'and' has higher-throughput.
3375       return false;
3376     }
3377   } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3378     if (LHS->getOpcode() == ISD::AND) {
3379       ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3380       if (!ShiftCnst) {
3381         // Shift amount must be constant
3382         return false;
3383       }
3384 
3385       uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3386 
3387       SDValue AndLHS = LHS->getOperand(0);
3388       SDValue AndRHS = LHS->getOperand(1);
3389 
3390       // Canonicalize the AND to have the mask on the RHS
3391       if (isa<ConstantSDNode>(AndLHS)) {
3392         std::swap(AndLHS, AndRHS);
3393       }
3394 
3395       ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3396       if (!MaskCnst) {
3397         // Mask must be constant
3398         return false;
3399       }
3400 
3401       uint64_t MaskVal = MaskCnst->getZExtValue();
3402       uint64_t NumZeros;
3403       uint64_t NumBits;
3404       if (isMask_64(MaskVal)) {
3405         NumZeros = 0;
3406         // The number of bits in the result bitfield will be the number of
3407         // trailing ones (the AND) minus the number of bits we shift off
3408         NumBits = llvm::countr_one(MaskVal) - ShiftAmt;
3409       } else if (isShiftedMask_64(MaskVal)) {
3410         NumZeros = llvm::countr_zero(MaskVal);
3411         unsigned NumOnes = llvm::countr_one(MaskVal >> NumZeros);
3412         // The number of bits in the result bitfield will be the number of
3413         // trailing zeros plus the number of set bits in the mask minus the
3414         // number of bits we shift off
3415         NumBits = NumZeros + NumOnes - ShiftAmt;
3416       } else {
3417         // This is not a mask we can handle
3418         return false;
3419       }
3420 
3421       if (ShiftAmt < NumZeros) {
3422         // Handling this case would require extra logic that would make this
3423         // transformation non-profitable
3424         return false;
3425       }
3426 
3427       Val = AndLHS;
3428       Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3429       Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3430     } else if (LHS->getOpcode() == ISD::SHL) {
3431       // Here, we have a pattern like:
3432       //
3433       // (sra (shl val, NN), MM)
3434       // or
3435       // (srl (shl val, NN), MM)
3436       //
3437       // If MM >= NN, we can efficiently optimize this with bfe
3438       Val = LHS->getOperand(0);
3439 
3440       SDValue ShlRHS = LHS->getOperand(1);
3441       ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3442       if (!ShlCnst) {
3443         // Shift amount must be constant
3444         return false;
3445       }
3446       uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3447 
3448       SDValue ShrRHS = RHS;
3449       ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3450       if (!ShrCnst) {
3451         // Shift amount must be constant
3452         return false;
3453       }
3454       uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3455 
3456       // To avoid extra codegen and be profitable, we need Outer >= Inner
3457       if (OuterShiftAmt < InnerShiftAmt) {
3458         return false;
3459       }
3460 
3461       // If the outer shift is more than the type size, we have no bitfield to
3462       // extract (since we also check that the inner shift is <= the outer shift
3463       // then this also implies that the inner shift is < the type size)
3464       if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3465         return false;
3466       }
3467 
3468       Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3469                                         MVT::i32);
3470       Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3471                                       DL, MVT::i32);
3472 
3473       if (N->getOpcode() == ISD::SRA) {
3474         // If we have a arithmetic right shift, we need to use the signed bfe
3475         // variant
3476         IsSigned = true;
3477       }
3478     } else {
3479       // No can do...
3480       return false;
3481     }
3482   } else {
3483     // No can do...
3484     return false;
3485   }
3486 
3487 
3488   unsigned Opc;
3489   // For the BFE operations we form here from "and" and "srl", always use the
3490   // unsigned variants.
3491   if (Val.getValueType() == MVT::i32) {
3492     if (IsSigned) {
3493       Opc = NVPTX::BFE_S32rii;
3494     } else {
3495       Opc = NVPTX::BFE_U32rii;
3496     }
3497   } else if (Val.getValueType() == MVT::i64) {
3498     if (IsSigned) {
3499       Opc = NVPTX::BFE_S64rii;
3500     } else {
3501       Opc = NVPTX::BFE_U64rii;
3502     }
3503   } else {
3504     // We cannot handle this type
3505     return false;
3506   }
3507 
3508   SDValue Ops[] = {
3509     Val, Start, Len
3510   };
3511 
3512   ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3513   return true;
3514 }
3515 
3516 // SelectDirectAddr - Match a direct address for DAG.
3517 // A direct address could be a globaladdress or externalsymbol.
SelectDirectAddr(SDValue N,SDValue & Address)3518 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3519   // Return true if TGA or ES.
3520   if (N.getOpcode() == ISD::TargetGlobalAddress ||
3521       N.getOpcode() == ISD::TargetExternalSymbol) {
3522     Address = N;
3523     return true;
3524   }
3525   if (N.getOpcode() == NVPTXISD::Wrapper) {
3526     Address = N.getOperand(0);
3527     return true;
3528   }
3529   // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3530   if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3531     if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3532         CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
3533         CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3534       return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
3535   }
3536   return false;
3537 }
3538 
3539 // symbol+offset
SelectADDRsi_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)3540 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3541     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3542   if (Addr.getOpcode() == ISD::ADD) {
3543     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3544       SDValue base = Addr.getOperand(0);
3545       if (SelectDirectAddr(base, Base)) {
3546         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3547                                            mvt);
3548         return true;
3549       }
3550     }
3551   }
3552   return false;
3553 }
3554 
3555 // symbol+offset
SelectADDRsi(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)3556 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3557                                      SDValue &Base, SDValue &Offset) {
3558   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3559 }
3560 
3561 // symbol+offset
SelectADDRsi64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)3562 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3563                                        SDValue &Base, SDValue &Offset) {
3564   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3565 }
3566 
3567 // register+offset
SelectADDRri_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)3568 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3569     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3570   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3571     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3572     Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
3573     return true;
3574   }
3575   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3576       Addr.getOpcode() == ISD::TargetGlobalAddress)
3577     return false; // direct calls.
3578 
3579   if (Addr.getOpcode() == ISD::ADD) {
3580     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3581       return false;
3582     }
3583     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3584       if (FrameIndexSDNode *FIN =
3585               dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3586         // Constant offset from frame ref.
3587         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3588       else
3589         Base = Addr.getOperand(0);
3590       Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3591                                          mvt);
3592       return true;
3593     }
3594   }
3595   return false;
3596 }
3597 
3598 // register+offset
SelectADDRri(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)3599 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3600                                      SDValue &Base, SDValue &Offset) {
3601   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3602 }
3603 
3604 // register+offset
SelectADDRri64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)3605 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3606                                        SDValue &Base, SDValue &Offset) {
3607   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3608 }
3609 
ChkMemSDNodeAddressSpace(SDNode * N,unsigned int spN) const3610 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3611                                                  unsigned int spN) const {
3612   const Value *Src = nullptr;
3613   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3614     if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3615       return true;
3616     Src = mN->getMemOperand()->getValue();
3617   }
3618   if (!Src)
3619     return false;
3620   if (auto *PT = dyn_cast<PointerType>(Src->getType()))
3621     return (PT->getAddressSpace() == spN);
3622   return false;
3623 }
3624 
3625 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3626 /// inline asm expressions.
SelectInlineAsmMemoryOperand(const SDValue & Op,InlineAsm::ConstraintCode ConstraintID,std::vector<SDValue> & OutOps)3627 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
3628     const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3629     std::vector<SDValue> &OutOps) {
3630   SDValue Op0, Op1;
3631   switch (ConstraintID) {
3632   default:
3633     return true;
3634   case InlineAsm::ConstraintCode::m: // memory
3635     if (SelectDirectAddr(Op, Op0)) {
3636       OutOps.push_back(Op0);
3637       OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3638       return false;
3639     }
3640     if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3641       OutOps.push_back(Op0);
3642       OutOps.push_back(Op1);
3643       return false;
3644     }
3645     break;
3646   }
3647   return true;
3648 }
3649 
3650 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3651 /// conversion from \p SrcTy to \p DestTy.
GetConvertOpcode(MVT DestTy,MVT SrcTy,LoadSDNode * LdNode)3652 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3653                                              LoadSDNode *LdNode) {
3654   bool IsSigned = LdNode && LdNode->getExtensionType() == ISD::SEXTLOAD;
3655   switch (SrcTy.SimpleTy) {
3656   default:
3657     llvm_unreachable("Unhandled source type");
3658   case MVT::i8:
3659     switch (DestTy.SimpleTy) {
3660     default:
3661       llvm_unreachable("Unhandled dest type");
3662     case MVT::i16:
3663       return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3664     case MVT::i32:
3665       return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3666     case MVT::i64:
3667       return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3668     }
3669   case MVT::i16:
3670     switch (DestTy.SimpleTy) {
3671     default:
3672       llvm_unreachable("Unhandled dest type");
3673     case MVT::i8:
3674       return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3675     case MVT::i32:
3676       return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3677     case MVT::i64:
3678       return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3679     }
3680   case MVT::i32:
3681     switch (DestTy.SimpleTy) {
3682     default:
3683       llvm_unreachable("Unhandled dest type");
3684     case MVT::i8:
3685       return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3686     case MVT::i16:
3687       return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3688     case MVT::i64:
3689       return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3690     }
3691   case MVT::i64:
3692     switch (DestTy.SimpleTy) {
3693     default:
3694       llvm_unreachable("Unhandled dest type");
3695     case MVT::i8:
3696       return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3697     case MVT::i16:
3698       return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3699     case MVT::i32:
3700       return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3701     }
3702   case MVT::f16:
3703     switch (DestTy.SimpleTy) {
3704     default:
3705       llvm_unreachable("Unhandled dest type");
3706     case MVT::f32:
3707       return NVPTX::CVT_f32_f16;
3708     case MVT::f64:
3709       return NVPTX::CVT_f64_f16;
3710     }
3711   }
3712 }
3713