1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the NVPTX target.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "NVPTXISelDAGToDAG.h"
14 #include "MCTargetDesc/NVPTXBaseInfo.h"
15 #include "NVPTXUtilities.h"
16 #include "llvm/Analysis/ValueTracking.h"
17 #include "llvm/CodeGen/ISDOpcodes.h"
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/Instructions.h"
20 #include "llvm/IR/IntrinsicsNVPTX.h"
21 #include "llvm/Support/AtomicOrdering.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "llvm/Target/TargetIntrinsicInfo.h"
27
28 using namespace llvm;
29
30 #define DEBUG_TYPE "nvptx-isel"
31 #define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
32
33 /// createNVPTXISelDag - This pass converts a legalized DAG into a
34 /// NVPTX-specific DAG, ready for instruction scheduling.
createNVPTXISelDag(NVPTXTargetMachine & TM,llvm::CodeGenOptLevel OptLevel)35 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
36 llvm::CodeGenOptLevel OptLevel) {
37 return new NVPTXDAGToDAGISel(TM, OptLevel);
38 }
39
40 char NVPTXDAGToDAGISel::ID = 0;
41
INITIALIZE_PASS(NVPTXDAGToDAGISel,DEBUG_TYPE,PASS_NAME,false,false)42 INITIALIZE_PASS(NVPTXDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
43
44 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
45 CodeGenOptLevel OptLevel)
46 : SelectionDAGISel(ID, tm, OptLevel), TM(tm) {
47 doMulWide = (OptLevel > CodeGenOptLevel::None);
48 }
49
runOnMachineFunction(MachineFunction & MF)50 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
51 Subtarget = &MF.getSubtarget<NVPTXSubtarget>();
52 return SelectionDAGISel::runOnMachineFunction(MF);
53 }
54
getDivF32Level() const55 int NVPTXDAGToDAGISel::getDivF32Level() const {
56 return Subtarget->getTargetLowering()->getDivF32Level();
57 }
58
usePrecSqrtF32() const59 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
60 return Subtarget->getTargetLowering()->usePrecSqrtF32();
61 }
62
useF32FTZ() const63 bool NVPTXDAGToDAGISel::useF32FTZ() const {
64 return Subtarget->getTargetLowering()->useF32FTZ(*MF);
65 }
66
allowFMA() const67 bool NVPTXDAGToDAGISel::allowFMA() const {
68 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
69 return TL->allowFMA(*MF, OptLevel);
70 }
71
allowUnsafeFPMath() const72 bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
73 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
74 return TL->allowUnsafeFPMath(*MF);
75 }
76
useShortPointers() const77 bool NVPTXDAGToDAGISel::useShortPointers() const {
78 return TM.useShortPointers();
79 }
80
81 /// Select - Select instructions not customized! Used for
82 /// expanded, promoted and normal instructions.
Select(SDNode * N)83 void NVPTXDAGToDAGISel::Select(SDNode *N) {
84
85 if (N->isMachineOpcode()) {
86 N->setNodeId(-1);
87 return; // Already selected.
88 }
89
90 switch (N->getOpcode()) {
91 case ISD::LOAD:
92 case ISD::ATOMIC_LOAD:
93 if (tryLoad(N))
94 return;
95 break;
96 case ISD::STORE:
97 case ISD::ATOMIC_STORE:
98 if (tryStore(N))
99 return;
100 break;
101 case ISD::EXTRACT_VECTOR_ELT:
102 if (tryEXTRACT_VECTOR_ELEMENT(N))
103 return;
104 break;
105 case NVPTXISD::SETP_F16X2:
106 SelectSETP_F16X2(N);
107 return;
108 case NVPTXISD::SETP_BF16X2:
109 SelectSETP_BF16X2(N);
110 return;
111 case NVPTXISD::LoadV2:
112 case NVPTXISD::LoadV4:
113 if (tryLoadVector(N))
114 return;
115 break;
116 case NVPTXISD::LDGV2:
117 case NVPTXISD::LDGV4:
118 case NVPTXISD::LDUV2:
119 case NVPTXISD::LDUV4:
120 if (tryLDGLDU(N))
121 return;
122 break;
123 case NVPTXISD::StoreV2:
124 case NVPTXISD::StoreV4:
125 if (tryStoreVector(N))
126 return;
127 break;
128 case NVPTXISD::LoadParam:
129 case NVPTXISD::LoadParamV2:
130 case NVPTXISD::LoadParamV4:
131 if (tryLoadParam(N))
132 return;
133 break;
134 case NVPTXISD::StoreRetval:
135 case NVPTXISD::StoreRetvalV2:
136 case NVPTXISD::StoreRetvalV4:
137 if (tryStoreRetval(N))
138 return;
139 break;
140 case NVPTXISD::StoreParam:
141 case NVPTXISD::StoreParamV2:
142 case NVPTXISD::StoreParamV4:
143 case NVPTXISD::StoreParamS32:
144 case NVPTXISD::StoreParamU32:
145 if (tryStoreParam(N))
146 return;
147 break;
148 case ISD::INTRINSIC_WO_CHAIN:
149 if (tryIntrinsicNoChain(N))
150 return;
151 break;
152 case ISD::INTRINSIC_W_CHAIN:
153 if (tryIntrinsicChain(N))
154 return;
155 break;
156 case NVPTXISD::Tex1DFloatS32:
157 case NVPTXISD::Tex1DFloatFloat:
158 case NVPTXISD::Tex1DFloatFloatLevel:
159 case NVPTXISD::Tex1DFloatFloatGrad:
160 case NVPTXISD::Tex1DS32S32:
161 case NVPTXISD::Tex1DS32Float:
162 case NVPTXISD::Tex1DS32FloatLevel:
163 case NVPTXISD::Tex1DS32FloatGrad:
164 case NVPTXISD::Tex1DU32S32:
165 case NVPTXISD::Tex1DU32Float:
166 case NVPTXISD::Tex1DU32FloatLevel:
167 case NVPTXISD::Tex1DU32FloatGrad:
168 case NVPTXISD::Tex1DArrayFloatS32:
169 case NVPTXISD::Tex1DArrayFloatFloat:
170 case NVPTXISD::Tex1DArrayFloatFloatLevel:
171 case NVPTXISD::Tex1DArrayFloatFloatGrad:
172 case NVPTXISD::Tex1DArrayS32S32:
173 case NVPTXISD::Tex1DArrayS32Float:
174 case NVPTXISD::Tex1DArrayS32FloatLevel:
175 case NVPTXISD::Tex1DArrayS32FloatGrad:
176 case NVPTXISD::Tex1DArrayU32S32:
177 case NVPTXISD::Tex1DArrayU32Float:
178 case NVPTXISD::Tex1DArrayU32FloatLevel:
179 case NVPTXISD::Tex1DArrayU32FloatGrad:
180 case NVPTXISD::Tex2DFloatS32:
181 case NVPTXISD::Tex2DFloatFloat:
182 case NVPTXISD::Tex2DFloatFloatLevel:
183 case NVPTXISD::Tex2DFloatFloatGrad:
184 case NVPTXISD::Tex2DS32S32:
185 case NVPTXISD::Tex2DS32Float:
186 case NVPTXISD::Tex2DS32FloatLevel:
187 case NVPTXISD::Tex2DS32FloatGrad:
188 case NVPTXISD::Tex2DU32S32:
189 case NVPTXISD::Tex2DU32Float:
190 case NVPTXISD::Tex2DU32FloatLevel:
191 case NVPTXISD::Tex2DU32FloatGrad:
192 case NVPTXISD::Tex2DArrayFloatS32:
193 case NVPTXISD::Tex2DArrayFloatFloat:
194 case NVPTXISD::Tex2DArrayFloatFloatLevel:
195 case NVPTXISD::Tex2DArrayFloatFloatGrad:
196 case NVPTXISD::Tex2DArrayS32S32:
197 case NVPTXISD::Tex2DArrayS32Float:
198 case NVPTXISD::Tex2DArrayS32FloatLevel:
199 case NVPTXISD::Tex2DArrayS32FloatGrad:
200 case NVPTXISD::Tex2DArrayU32S32:
201 case NVPTXISD::Tex2DArrayU32Float:
202 case NVPTXISD::Tex2DArrayU32FloatLevel:
203 case NVPTXISD::Tex2DArrayU32FloatGrad:
204 case NVPTXISD::Tex3DFloatS32:
205 case NVPTXISD::Tex3DFloatFloat:
206 case NVPTXISD::Tex3DFloatFloatLevel:
207 case NVPTXISD::Tex3DFloatFloatGrad:
208 case NVPTXISD::Tex3DS32S32:
209 case NVPTXISD::Tex3DS32Float:
210 case NVPTXISD::Tex3DS32FloatLevel:
211 case NVPTXISD::Tex3DS32FloatGrad:
212 case NVPTXISD::Tex3DU32S32:
213 case NVPTXISD::Tex3DU32Float:
214 case NVPTXISD::Tex3DU32FloatLevel:
215 case NVPTXISD::Tex3DU32FloatGrad:
216 case NVPTXISD::TexCubeFloatFloat:
217 case NVPTXISD::TexCubeFloatFloatLevel:
218 case NVPTXISD::TexCubeS32Float:
219 case NVPTXISD::TexCubeS32FloatLevel:
220 case NVPTXISD::TexCubeU32Float:
221 case NVPTXISD::TexCubeU32FloatLevel:
222 case NVPTXISD::TexCubeArrayFloatFloat:
223 case NVPTXISD::TexCubeArrayFloatFloatLevel:
224 case NVPTXISD::TexCubeArrayS32Float:
225 case NVPTXISD::TexCubeArrayS32FloatLevel:
226 case NVPTXISD::TexCubeArrayU32Float:
227 case NVPTXISD::TexCubeArrayU32FloatLevel:
228 case NVPTXISD::Tld4R2DFloatFloat:
229 case NVPTXISD::Tld4G2DFloatFloat:
230 case NVPTXISD::Tld4B2DFloatFloat:
231 case NVPTXISD::Tld4A2DFloatFloat:
232 case NVPTXISD::Tld4R2DS64Float:
233 case NVPTXISD::Tld4G2DS64Float:
234 case NVPTXISD::Tld4B2DS64Float:
235 case NVPTXISD::Tld4A2DS64Float:
236 case NVPTXISD::Tld4R2DU64Float:
237 case NVPTXISD::Tld4G2DU64Float:
238 case NVPTXISD::Tld4B2DU64Float:
239 case NVPTXISD::Tld4A2DU64Float:
240 case NVPTXISD::TexUnified1DFloatS32:
241 case NVPTXISD::TexUnified1DFloatFloat:
242 case NVPTXISD::TexUnified1DFloatFloatLevel:
243 case NVPTXISD::TexUnified1DFloatFloatGrad:
244 case NVPTXISD::TexUnified1DS32S32:
245 case NVPTXISD::TexUnified1DS32Float:
246 case NVPTXISD::TexUnified1DS32FloatLevel:
247 case NVPTXISD::TexUnified1DS32FloatGrad:
248 case NVPTXISD::TexUnified1DU32S32:
249 case NVPTXISD::TexUnified1DU32Float:
250 case NVPTXISD::TexUnified1DU32FloatLevel:
251 case NVPTXISD::TexUnified1DU32FloatGrad:
252 case NVPTXISD::TexUnified1DArrayFloatS32:
253 case NVPTXISD::TexUnified1DArrayFloatFloat:
254 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
255 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
256 case NVPTXISD::TexUnified1DArrayS32S32:
257 case NVPTXISD::TexUnified1DArrayS32Float:
258 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
259 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
260 case NVPTXISD::TexUnified1DArrayU32S32:
261 case NVPTXISD::TexUnified1DArrayU32Float:
262 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
263 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
264 case NVPTXISD::TexUnified2DFloatS32:
265 case NVPTXISD::TexUnified2DFloatFloat:
266 case NVPTXISD::TexUnified2DFloatFloatLevel:
267 case NVPTXISD::TexUnified2DFloatFloatGrad:
268 case NVPTXISD::TexUnified2DS32S32:
269 case NVPTXISD::TexUnified2DS32Float:
270 case NVPTXISD::TexUnified2DS32FloatLevel:
271 case NVPTXISD::TexUnified2DS32FloatGrad:
272 case NVPTXISD::TexUnified2DU32S32:
273 case NVPTXISD::TexUnified2DU32Float:
274 case NVPTXISD::TexUnified2DU32FloatLevel:
275 case NVPTXISD::TexUnified2DU32FloatGrad:
276 case NVPTXISD::TexUnified2DArrayFloatS32:
277 case NVPTXISD::TexUnified2DArrayFloatFloat:
278 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
279 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
280 case NVPTXISD::TexUnified2DArrayS32S32:
281 case NVPTXISD::TexUnified2DArrayS32Float:
282 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
283 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
284 case NVPTXISD::TexUnified2DArrayU32S32:
285 case NVPTXISD::TexUnified2DArrayU32Float:
286 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
287 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
288 case NVPTXISD::TexUnified3DFloatS32:
289 case NVPTXISD::TexUnified3DFloatFloat:
290 case NVPTXISD::TexUnified3DFloatFloatLevel:
291 case NVPTXISD::TexUnified3DFloatFloatGrad:
292 case NVPTXISD::TexUnified3DS32S32:
293 case NVPTXISD::TexUnified3DS32Float:
294 case NVPTXISD::TexUnified3DS32FloatLevel:
295 case NVPTXISD::TexUnified3DS32FloatGrad:
296 case NVPTXISD::TexUnified3DU32S32:
297 case NVPTXISD::TexUnified3DU32Float:
298 case NVPTXISD::TexUnified3DU32FloatLevel:
299 case NVPTXISD::TexUnified3DU32FloatGrad:
300 case NVPTXISD::TexUnifiedCubeFloatFloat:
301 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
302 case NVPTXISD::TexUnifiedCubeS32Float:
303 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
304 case NVPTXISD::TexUnifiedCubeU32Float:
305 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
306 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
307 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
308 case NVPTXISD::TexUnifiedCubeArrayS32Float:
309 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
310 case NVPTXISD::TexUnifiedCubeArrayU32Float:
311 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
312 case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
313 case NVPTXISD::TexUnifiedCubeS32FloatGrad:
314 case NVPTXISD::TexUnifiedCubeU32FloatGrad:
315 case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
316 case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
317 case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
318 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
319 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
320 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
321 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
322 case NVPTXISD::Tld4UnifiedR2DS64Float:
323 case NVPTXISD::Tld4UnifiedG2DS64Float:
324 case NVPTXISD::Tld4UnifiedB2DS64Float:
325 case NVPTXISD::Tld4UnifiedA2DS64Float:
326 case NVPTXISD::Tld4UnifiedR2DU64Float:
327 case NVPTXISD::Tld4UnifiedG2DU64Float:
328 case NVPTXISD::Tld4UnifiedB2DU64Float:
329 case NVPTXISD::Tld4UnifiedA2DU64Float:
330 if (tryTextureIntrinsic(N))
331 return;
332 break;
333 case NVPTXISD::Suld1DI8Clamp:
334 case NVPTXISD::Suld1DI16Clamp:
335 case NVPTXISD::Suld1DI32Clamp:
336 case NVPTXISD::Suld1DI64Clamp:
337 case NVPTXISD::Suld1DV2I8Clamp:
338 case NVPTXISD::Suld1DV2I16Clamp:
339 case NVPTXISD::Suld1DV2I32Clamp:
340 case NVPTXISD::Suld1DV2I64Clamp:
341 case NVPTXISD::Suld1DV4I8Clamp:
342 case NVPTXISD::Suld1DV4I16Clamp:
343 case NVPTXISD::Suld1DV4I32Clamp:
344 case NVPTXISD::Suld1DArrayI8Clamp:
345 case NVPTXISD::Suld1DArrayI16Clamp:
346 case NVPTXISD::Suld1DArrayI32Clamp:
347 case NVPTXISD::Suld1DArrayI64Clamp:
348 case NVPTXISD::Suld1DArrayV2I8Clamp:
349 case NVPTXISD::Suld1DArrayV2I16Clamp:
350 case NVPTXISD::Suld1DArrayV2I32Clamp:
351 case NVPTXISD::Suld1DArrayV2I64Clamp:
352 case NVPTXISD::Suld1DArrayV4I8Clamp:
353 case NVPTXISD::Suld1DArrayV4I16Clamp:
354 case NVPTXISD::Suld1DArrayV4I32Clamp:
355 case NVPTXISD::Suld2DI8Clamp:
356 case NVPTXISD::Suld2DI16Clamp:
357 case NVPTXISD::Suld2DI32Clamp:
358 case NVPTXISD::Suld2DI64Clamp:
359 case NVPTXISD::Suld2DV2I8Clamp:
360 case NVPTXISD::Suld2DV2I16Clamp:
361 case NVPTXISD::Suld2DV2I32Clamp:
362 case NVPTXISD::Suld2DV2I64Clamp:
363 case NVPTXISD::Suld2DV4I8Clamp:
364 case NVPTXISD::Suld2DV4I16Clamp:
365 case NVPTXISD::Suld2DV4I32Clamp:
366 case NVPTXISD::Suld2DArrayI8Clamp:
367 case NVPTXISD::Suld2DArrayI16Clamp:
368 case NVPTXISD::Suld2DArrayI32Clamp:
369 case NVPTXISD::Suld2DArrayI64Clamp:
370 case NVPTXISD::Suld2DArrayV2I8Clamp:
371 case NVPTXISD::Suld2DArrayV2I16Clamp:
372 case NVPTXISD::Suld2DArrayV2I32Clamp:
373 case NVPTXISD::Suld2DArrayV2I64Clamp:
374 case NVPTXISD::Suld2DArrayV4I8Clamp:
375 case NVPTXISD::Suld2DArrayV4I16Clamp:
376 case NVPTXISD::Suld2DArrayV4I32Clamp:
377 case NVPTXISD::Suld3DI8Clamp:
378 case NVPTXISD::Suld3DI16Clamp:
379 case NVPTXISD::Suld3DI32Clamp:
380 case NVPTXISD::Suld3DI64Clamp:
381 case NVPTXISD::Suld3DV2I8Clamp:
382 case NVPTXISD::Suld3DV2I16Clamp:
383 case NVPTXISD::Suld3DV2I32Clamp:
384 case NVPTXISD::Suld3DV2I64Clamp:
385 case NVPTXISD::Suld3DV4I8Clamp:
386 case NVPTXISD::Suld3DV4I16Clamp:
387 case NVPTXISD::Suld3DV4I32Clamp:
388 case NVPTXISD::Suld1DI8Trap:
389 case NVPTXISD::Suld1DI16Trap:
390 case NVPTXISD::Suld1DI32Trap:
391 case NVPTXISD::Suld1DI64Trap:
392 case NVPTXISD::Suld1DV2I8Trap:
393 case NVPTXISD::Suld1DV2I16Trap:
394 case NVPTXISD::Suld1DV2I32Trap:
395 case NVPTXISD::Suld1DV2I64Trap:
396 case NVPTXISD::Suld1DV4I8Trap:
397 case NVPTXISD::Suld1DV4I16Trap:
398 case NVPTXISD::Suld1DV4I32Trap:
399 case NVPTXISD::Suld1DArrayI8Trap:
400 case NVPTXISD::Suld1DArrayI16Trap:
401 case NVPTXISD::Suld1DArrayI32Trap:
402 case NVPTXISD::Suld1DArrayI64Trap:
403 case NVPTXISD::Suld1DArrayV2I8Trap:
404 case NVPTXISD::Suld1DArrayV2I16Trap:
405 case NVPTXISD::Suld1DArrayV2I32Trap:
406 case NVPTXISD::Suld1DArrayV2I64Trap:
407 case NVPTXISD::Suld1DArrayV4I8Trap:
408 case NVPTXISD::Suld1DArrayV4I16Trap:
409 case NVPTXISD::Suld1DArrayV4I32Trap:
410 case NVPTXISD::Suld2DI8Trap:
411 case NVPTXISD::Suld2DI16Trap:
412 case NVPTXISD::Suld2DI32Trap:
413 case NVPTXISD::Suld2DI64Trap:
414 case NVPTXISD::Suld2DV2I8Trap:
415 case NVPTXISD::Suld2DV2I16Trap:
416 case NVPTXISD::Suld2DV2I32Trap:
417 case NVPTXISD::Suld2DV2I64Trap:
418 case NVPTXISD::Suld2DV4I8Trap:
419 case NVPTXISD::Suld2DV4I16Trap:
420 case NVPTXISD::Suld2DV4I32Trap:
421 case NVPTXISD::Suld2DArrayI8Trap:
422 case NVPTXISD::Suld2DArrayI16Trap:
423 case NVPTXISD::Suld2DArrayI32Trap:
424 case NVPTXISD::Suld2DArrayI64Trap:
425 case NVPTXISD::Suld2DArrayV2I8Trap:
426 case NVPTXISD::Suld2DArrayV2I16Trap:
427 case NVPTXISD::Suld2DArrayV2I32Trap:
428 case NVPTXISD::Suld2DArrayV2I64Trap:
429 case NVPTXISD::Suld2DArrayV4I8Trap:
430 case NVPTXISD::Suld2DArrayV4I16Trap:
431 case NVPTXISD::Suld2DArrayV4I32Trap:
432 case NVPTXISD::Suld3DI8Trap:
433 case NVPTXISD::Suld3DI16Trap:
434 case NVPTXISD::Suld3DI32Trap:
435 case NVPTXISD::Suld3DI64Trap:
436 case NVPTXISD::Suld3DV2I8Trap:
437 case NVPTXISD::Suld3DV2I16Trap:
438 case NVPTXISD::Suld3DV2I32Trap:
439 case NVPTXISD::Suld3DV2I64Trap:
440 case NVPTXISD::Suld3DV4I8Trap:
441 case NVPTXISD::Suld3DV4I16Trap:
442 case NVPTXISD::Suld3DV4I32Trap:
443 case NVPTXISD::Suld1DI8Zero:
444 case NVPTXISD::Suld1DI16Zero:
445 case NVPTXISD::Suld1DI32Zero:
446 case NVPTXISD::Suld1DI64Zero:
447 case NVPTXISD::Suld1DV2I8Zero:
448 case NVPTXISD::Suld1DV2I16Zero:
449 case NVPTXISD::Suld1DV2I32Zero:
450 case NVPTXISD::Suld1DV2I64Zero:
451 case NVPTXISD::Suld1DV4I8Zero:
452 case NVPTXISD::Suld1DV4I16Zero:
453 case NVPTXISD::Suld1DV4I32Zero:
454 case NVPTXISD::Suld1DArrayI8Zero:
455 case NVPTXISD::Suld1DArrayI16Zero:
456 case NVPTXISD::Suld1DArrayI32Zero:
457 case NVPTXISD::Suld1DArrayI64Zero:
458 case NVPTXISD::Suld1DArrayV2I8Zero:
459 case NVPTXISD::Suld1DArrayV2I16Zero:
460 case NVPTXISD::Suld1DArrayV2I32Zero:
461 case NVPTXISD::Suld1DArrayV2I64Zero:
462 case NVPTXISD::Suld1DArrayV4I8Zero:
463 case NVPTXISD::Suld1DArrayV4I16Zero:
464 case NVPTXISD::Suld1DArrayV4I32Zero:
465 case NVPTXISD::Suld2DI8Zero:
466 case NVPTXISD::Suld2DI16Zero:
467 case NVPTXISD::Suld2DI32Zero:
468 case NVPTXISD::Suld2DI64Zero:
469 case NVPTXISD::Suld2DV2I8Zero:
470 case NVPTXISD::Suld2DV2I16Zero:
471 case NVPTXISD::Suld2DV2I32Zero:
472 case NVPTXISD::Suld2DV2I64Zero:
473 case NVPTXISD::Suld2DV4I8Zero:
474 case NVPTXISD::Suld2DV4I16Zero:
475 case NVPTXISD::Suld2DV4I32Zero:
476 case NVPTXISD::Suld2DArrayI8Zero:
477 case NVPTXISD::Suld2DArrayI16Zero:
478 case NVPTXISD::Suld2DArrayI32Zero:
479 case NVPTXISD::Suld2DArrayI64Zero:
480 case NVPTXISD::Suld2DArrayV2I8Zero:
481 case NVPTXISD::Suld2DArrayV2I16Zero:
482 case NVPTXISD::Suld2DArrayV2I32Zero:
483 case NVPTXISD::Suld2DArrayV2I64Zero:
484 case NVPTXISD::Suld2DArrayV4I8Zero:
485 case NVPTXISD::Suld2DArrayV4I16Zero:
486 case NVPTXISD::Suld2DArrayV4I32Zero:
487 case NVPTXISD::Suld3DI8Zero:
488 case NVPTXISD::Suld3DI16Zero:
489 case NVPTXISD::Suld3DI32Zero:
490 case NVPTXISD::Suld3DI64Zero:
491 case NVPTXISD::Suld3DV2I8Zero:
492 case NVPTXISD::Suld3DV2I16Zero:
493 case NVPTXISD::Suld3DV2I32Zero:
494 case NVPTXISD::Suld3DV2I64Zero:
495 case NVPTXISD::Suld3DV4I8Zero:
496 case NVPTXISD::Suld3DV4I16Zero:
497 case NVPTXISD::Suld3DV4I32Zero:
498 if (trySurfaceIntrinsic(N))
499 return;
500 break;
501 case ISD::AND:
502 case ISD::SRA:
503 case ISD::SRL:
504 // Try to select BFE
505 if (tryBFE(N))
506 return;
507 break;
508 case ISD::ADDRSPACECAST:
509 SelectAddrSpaceCast(N);
510 return;
511 case ISD::ConstantFP:
512 if (tryConstantFP(N))
513 return;
514 break;
515 default:
516 break;
517 }
518 SelectCode(N);
519 }
520
tryIntrinsicChain(SDNode * N)521 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
522 unsigned IID = N->getConstantOperandVal(1);
523 switch (IID) {
524 default:
525 return false;
526 case Intrinsic::nvvm_ldg_global_f:
527 case Intrinsic::nvvm_ldg_global_i:
528 case Intrinsic::nvvm_ldg_global_p:
529 case Intrinsic::nvvm_ldu_global_f:
530 case Intrinsic::nvvm_ldu_global_i:
531 case Intrinsic::nvvm_ldu_global_p:
532 return tryLDGLDU(N);
533 }
534 }
535
536 // There's no way to specify FP16 and BF16 immediates in .(b)f16 ops, so we
537 // have to load them into an .(b)f16 register first.
tryConstantFP(SDNode * N)538 bool NVPTXDAGToDAGISel::tryConstantFP(SDNode *N) {
539 if (N->getValueType(0) != MVT::f16 && N->getValueType(0) != MVT::bf16)
540 return false;
541 SDValue Val = CurDAG->getTargetConstantFP(
542 cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), N->getValueType(0));
543 SDNode *LoadConstF16 = CurDAG->getMachineNode(
544 (N->getValueType(0) == MVT::f16 ? NVPTX::LOAD_CONST_F16
545 : NVPTX::LOAD_CONST_BF16),
546 SDLoc(N), N->getValueType(0), Val);
547 ReplaceNode(N, LoadConstF16);
548 return true;
549 }
550
551 // Map ISD:CONDCODE value to appropriate CmpMode expected by
552 // NVPTXInstPrinter::printCmpMode()
getPTXCmpMode(const CondCodeSDNode & CondCode,bool FTZ)553 static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
554 using NVPTX::PTXCmpMode::CmpMode;
555 unsigned PTXCmpMode = [](ISD::CondCode CC) {
556 switch (CC) {
557 default:
558 llvm_unreachable("Unexpected condition code.");
559 case ISD::SETOEQ:
560 return CmpMode::EQ;
561 case ISD::SETOGT:
562 return CmpMode::GT;
563 case ISD::SETOGE:
564 return CmpMode::GE;
565 case ISD::SETOLT:
566 return CmpMode::LT;
567 case ISD::SETOLE:
568 return CmpMode::LE;
569 case ISD::SETONE:
570 return CmpMode::NE;
571 case ISD::SETO:
572 return CmpMode::NUM;
573 case ISD::SETUO:
574 return CmpMode::NotANumber;
575 case ISD::SETUEQ:
576 return CmpMode::EQU;
577 case ISD::SETUGT:
578 return CmpMode::GTU;
579 case ISD::SETUGE:
580 return CmpMode::GEU;
581 case ISD::SETULT:
582 return CmpMode::LTU;
583 case ISD::SETULE:
584 return CmpMode::LEU;
585 case ISD::SETUNE:
586 return CmpMode::NEU;
587 case ISD::SETEQ:
588 return CmpMode::EQ;
589 case ISD::SETGT:
590 return CmpMode::GT;
591 case ISD::SETGE:
592 return CmpMode::GE;
593 case ISD::SETLT:
594 return CmpMode::LT;
595 case ISD::SETLE:
596 return CmpMode::LE;
597 case ISD::SETNE:
598 return CmpMode::NE;
599 }
600 }(CondCode.get());
601
602 if (FTZ)
603 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
604
605 return PTXCmpMode;
606 }
607
SelectSETP_F16X2(SDNode * N)608 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
609 unsigned PTXCmpMode =
610 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
611 SDLoc DL(N);
612 SDNode *SetP = CurDAG->getMachineNode(
613 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
614 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
615 ReplaceNode(N, SetP);
616 return true;
617 }
618
SelectSETP_BF16X2(SDNode * N)619 bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(SDNode *N) {
620 unsigned PTXCmpMode =
621 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
622 SDLoc DL(N);
623 SDNode *SetP = CurDAG->getMachineNode(
624 NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
625 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
626 ReplaceNode(N, SetP);
627 return true;
628 }
629
630 // Find all instances of extract_vector_elt that use this v2f16 vector
631 // and coalesce them into a scattering move instruction.
tryEXTRACT_VECTOR_ELEMENT(SDNode * N)632 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
633 SDValue Vector = N->getOperand(0);
634
635 // We only care about 16x2 as it's the only real vector type we
636 // need to deal with.
637 MVT VT = Vector.getSimpleValueType();
638 if (!Isv2x16VT(VT))
639 return false;
640 // Find and record all uses of this vector that extract element 0 or 1.
641 SmallVector<SDNode *, 4> E0, E1;
642 for (auto *U : Vector.getNode()->uses()) {
643 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
644 continue;
645 if (U->getOperand(0) != Vector)
646 continue;
647 if (const ConstantSDNode *IdxConst =
648 dyn_cast<ConstantSDNode>(U->getOperand(1))) {
649 if (IdxConst->getZExtValue() == 0)
650 E0.push_back(U);
651 else if (IdxConst->getZExtValue() == 1)
652 E1.push_back(U);
653 else
654 llvm_unreachable("Invalid vector index.");
655 }
656 }
657
658 // There's no point scattering f16x2 if we only ever access one
659 // element of it.
660 if (E0.empty() || E1.empty())
661 return false;
662
663 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
664 // into f16,f16 SplitF16x2(V)
665 MVT EltVT = VT.getVectorElementType();
666 SDNode *ScatterOp =
667 CurDAG->getMachineNode(NVPTX::I32toV2I16, SDLoc(N), EltVT, EltVT, Vector);
668 for (auto *Node : E0)
669 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
670 for (auto *Node : E1)
671 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
672
673 return true;
674 }
675
getCodeAddrSpace(MemSDNode * N)676 static unsigned int getCodeAddrSpace(MemSDNode *N) {
677 const Value *Src = N->getMemOperand()->getValue();
678
679 if (!Src)
680 return NVPTX::PTXLdStInstCode::GENERIC;
681
682 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
683 switch (PT->getAddressSpace()) {
684 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
685 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
686 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
687 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
688 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
689 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
690 default: break;
691 }
692 }
693 return NVPTX::PTXLdStInstCode::GENERIC;
694 }
695
canLowerToLDG(MemSDNode * N,const NVPTXSubtarget & Subtarget,unsigned CodeAddrSpace,MachineFunction * F)696 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
697 unsigned CodeAddrSpace, MachineFunction *F) {
698 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
699 // space.
700 //
701 // We have two ways of identifying invariant loads: Loads may be explicitly
702 // marked as invariant, or we may infer them to be invariant.
703 //
704 // We currently infer invariance for loads from
705 // - constant global variables, and
706 // - kernel function pointer params that are noalias (i.e. __restrict) and
707 // never written to.
708 //
709 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
710 // not during the SelectionDAG phase).
711 //
712 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
713 // explicitly invariant loads because these are how clang tells us to use ldg
714 // when the user uses a builtin.
715 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
716 return false;
717
718 if (N->isInvariant())
719 return true;
720
721 bool IsKernelFn = isKernelFunction(F->getFunction());
722
723 // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
724 // because the former looks through phi nodes while the latter does not. We
725 // need to look through phi nodes to handle pointer induction variables.
726 SmallVector<const Value *, 8> Objs;
727 getUnderlyingObjects(N->getMemOperand()->getValue(), Objs);
728
729 return all_of(Objs, [&](const Value *V) {
730 if (auto *A = dyn_cast<const Argument>(V))
731 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
732 if (auto *GV = dyn_cast<const GlobalVariable>(V))
733 return GV->isConstant();
734 return false;
735 });
736 }
737
tryIntrinsicNoChain(SDNode * N)738 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
739 unsigned IID = N->getConstantOperandVal(0);
740 switch (IID) {
741 default:
742 return false;
743 case Intrinsic::nvvm_texsurf_handle_internal:
744 SelectTexSurfHandle(N);
745 return true;
746 }
747 }
748
SelectTexSurfHandle(SDNode * N)749 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
750 // Op 0 is the intrinsic ID
751 SDValue Wrapper = N->getOperand(1);
752 SDValue GlobalVal = Wrapper.getOperand(0);
753 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
754 MVT::i64, GlobalVal));
755 }
756
SelectAddrSpaceCast(SDNode * N)757 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
758 SDValue Src = N->getOperand(0);
759 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
760 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
761 unsigned DstAddrSpace = CastN->getDestAddressSpace();
762 assert(SrcAddrSpace != DstAddrSpace &&
763 "addrspacecast must be between different address spaces");
764
765 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
766 // Specific to generic
767 unsigned Opc;
768 switch (SrcAddrSpace) {
769 default: report_fatal_error("Bad address space in addrspacecast");
770 case ADDRESS_SPACE_GLOBAL:
771 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
772 break;
773 case ADDRESS_SPACE_SHARED:
774 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
775 : NVPTX::cvta_shared_yes_64)
776 : NVPTX::cvta_shared_yes;
777 break;
778 case ADDRESS_SPACE_CONST:
779 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
780 : NVPTX::cvta_const_yes_64)
781 : NVPTX::cvta_const_yes;
782 break;
783 case ADDRESS_SPACE_LOCAL:
784 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
785 : NVPTX::cvta_local_yes_64)
786 : NVPTX::cvta_local_yes;
787 break;
788 }
789 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
790 Src));
791 return;
792 } else {
793 // Generic to specific
794 if (SrcAddrSpace != 0)
795 report_fatal_error("Cannot cast between two non-generic address spaces");
796 unsigned Opc;
797 switch (DstAddrSpace) {
798 default: report_fatal_error("Bad address space in addrspacecast");
799 case ADDRESS_SPACE_GLOBAL:
800 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
801 : NVPTX::cvta_to_global_yes;
802 break;
803 case ADDRESS_SPACE_SHARED:
804 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
805 : NVPTX::cvta_to_shared_yes_64)
806 : NVPTX::cvta_to_shared_yes;
807 break;
808 case ADDRESS_SPACE_CONST:
809 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
810 : NVPTX::cvta_to_const_yes_64)
811 : NVPTX::cvta_to_const_yes;
812 break;
813 case ADDRESS_SPACE_LOCAL:
814 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
815 : NVPTX::cvta_to_local_yes_64)
816 : NVPTX::cvta_to_local_yes;
817 break;
818 case ADDRESS_SPACE_PARAM:
819 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
820 : NVPTX::nvvm_ptr_gen_to_param;
821 break;
822 }
823 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
824 Src));
825 return;
826 }
827 }
828
829 // Helper function template to reduce amount of boilerplate code for
830 // opcode selection.
831 static std::optional<unsigned>
pickOpcodeForVT(MVT::SimpleValueType VT,unsigned Opcode_i8,unsigned Opcode_i16,unsigned Opcode_i32,std::optional<unsigned> Opcode_i64,unsigned Opcode_f32,std::optional<unsigned> Opcode_f64)832 pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8,
833 unsigned Opcode_i16, unsigned Opcode_i32,
834 std::optional<unsigned> Opcode_i64, unsigned Opcode_f32,
835 std::optional<unsigned> Opcode_f64) {
836 switch (VT) {
837 case MVT::i1:
838 case MVT::i8:
839 return Opcode_i8;
840 case MVT::i16:
841 return Opcode_i16;
842 case MVT::i32:
843 return Opcode_i32;
844 case MVT::i64:
845 return Opcode_i64;
846 case MVT::f16:
847 case MVT::bf16:
848 return Opcode_i16;
849 case MVT::v2f16:
850 case MVT::v2bf16:
851 case MVT::v2i16:
852 case MVT::v4i8:
853 return Opcode_i32;
854 case MVT::f32:
855 return Opcode_f32;
856 case MVT::f64:
857 return Opcode_f64;
858 default:
859 return std::nullopt;
860 }
861 }
862
getLdStRegType(EVT VT)863 static int getLdStRegType(EVT VT) {
864 if (VT.isFloatingPoint())
865 switch (VT.getSimpleVT().SimpleTy) {
866 case MVT::f16:
867 case MVT::bf16:
868 case MVT::v2f16:
869 case MVT::v2bf16:
870 return NVPTX::PTXLdStInstCode::Untyped;
871 default:
872 return NVPTX::PTXLdStInstCode::Float;
873 }
874 else
875 return NVPTX::PTXLdStInstCode::Unsigned;
876 }
877
tryLoad(SDNode * N)878 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
879 SDLoc dl(N);
880 MemSDNode *LD = cast<MemSDNode>(N);
881 assert(LD->readMem() && "Expected load");
882 LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
883 EVT LoadedVT = LD->getMemoryVT();
884 SDNode *NVPTXLD = nullptr;
885
886 // do not support pre/post inc/dec
887 if (PlainLoad && PlainLoad->isIndexed())
888 return false;
889
890 if (!LoadedVT.isSimple())
891 return false;
892
893 AtomicOrdering Ordering = LD->getSuccessOrdering();
894 // In order to lower atomic loads with stronger guarantees we would need to
895 // use load.acquire or insert fences. However these features were only added
896 // with PTX ISA 6.0 / sm_70.
897 // TODO: Check if we can actually use the new instructions and implement them.
898 if (isStrongerThanMonotonic(Ordering))
899 return false;
900
901 // Address Space Setting
902 unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
903 if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
904 return tryLDGLDU(N);
905 }
906
907 unsigned int PointerSize =
908 CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
909
910 // Volatile Setting
911 // - .volatile is only available for .global and .shared
912 // - .volatile has the same memory synchronization semantics as .relaxed.sys
913 bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
914 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
915 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
916 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
917 isVolatile = false;
918
919 // Type Setting: fromType + fromTypeWidth
920 //
921 // Sign : ISD::SEXTLOAD
922 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
923 // type is integer
924 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
925 MVT SimpleVT = LoadedVT.getSimpleVT();
926 MVT ScalarVT = SimpleVT.getScalarType();
927 // Read at least 8 bits (predicates are stored as 8-bit values)
928 unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
929 unsigned int fromType;
930
931 // Vector Setting
932 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
933 if (SimpleVT.isVector()) {
934 assert((Isv2x16VT(LoadedVT) || LoadedVT == MVT::v4i8) &&
935 "Unexpected vector type");
936 // v2f16/v2bf16/v2i16 is loaded using ld.b32
937 fromTypeWidth = 32;
938 }
939
940 if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
941 fromType = NVPTX::PTXLdStInstCode::Signed;
942 else
943 fromType = getLdStRegType(ScalarVT);
944
945 // Create the machine instruction DAG
946 SDValue Chain = N->getOperand(0);
947 SDValue N1 = N->getOperand(1);
948 SDValue Addr;
949 SDValue Offset, Base;
950 std::optional<unsigned> Opcode;
951 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
952
953 if (SelectDirectAddr(N1, Addr)) {
954 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
955 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
956 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
957 if (!Opcode)
958 return false;
959 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
960 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
961 getI32Imm(fromTypeWidth, dl), Addr, Chain };
962 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
963 } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
964 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
965 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
966 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
967 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
968 if (!Opcode)
969 return false;
970 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
971 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
972 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
973 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
974 } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
975 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
976 if (PointerSize == 64)
977 Opcode =
978 pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
979 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
980 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
981 else
982 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
983 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
984 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
985 if (!Opcode)
986 return false;
987 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
988 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
989 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
990 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
991 } else {
992 if (PointerSize == 64)
993 Opcode =
994 pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
995 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
996 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
997 else
998 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
999 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
1000 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
1001 if (!Opcode)
1002 return false;
1003 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
1004 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1005 getI32Imm(fromTypeWidth, dl), N1, Chain };
1006 NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
1007 }
1008
1009 if (!NVPTXLD)
1010 return false;
1011
1012 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1013 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
1014
1015 ReplaceNode(N, NVPTXLD);
1016 return true;
1017 }
1018
tryLoadVector(SDNode * N)1019 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
1020
1021 SDValue Chain = N->getOperand(0);
1022 SDValue Op1 = N->getOperand(1);
1023 SDValue Addr, Offset, Base;
1024 std::optional<unsigned> Opcode;
1025 SDLoc DL(N);
1026 SDNode *LD;
1027 MemSDNode *MemSD = cast<MemSDNode>(N);
1028 EVT LoadedVT = MemSD->getMemoryVT();
1029
1030 if (!LoadedVT.isSimple())
1031 return false;
1032
1033 // Address Space Setting
1034 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
1035 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
1036 return tryLDGLDU(N);
1037 }
1038
1039 unsigned int PointerSize =
1040 CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
1041
1042 // Volatile Setting
1043 // - .volatile is only availalble for .global and .shared
1044 bool IsVolatile = MemSD->isVolatile();
1045 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1046 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1047 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1048 IsVolatile = false;
1049
1050 // Vector Setting
1051 MVT SimpleVT = LoadedVT.getSimpleVT();
1052
1053 // Type Setting: fromType + fromTypeWidth
1054 //
1055 // Sign : ISD::SEXTLOAD
1056 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1057 // type is integer
1058 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1059 MVT ScalarVT = SimpleVT.getScalarType();
1060 // Read at least 8 bits (predicates are stored as 8-bit values)
1061 unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
1062 unsigned int FromType;
1063 // The last operand holds the original LoadSDNode::getExtensionType() value
1064 unsigned ExtensionType = cast<ConstantSDNode>(
1065 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1066 if (ExtensionType == ISD::SEXTLOAD)
1067 FromType = NVPTX::PTXLdStInstCode::Signed;
1068 else
1069 FromType = getLdStRegType(ScalarVT);
1070
1071 unsigned VecType;
1072
1073 switch (N->getOpcode()) {
1074 case NVPTXISD::LoadV2:
1075 VecType = NVPTX::PTXLdStInstCode::V2;
1076 break;
1077 case NVPTXISD::LoadV4:
1078 VecType = NVPTX::PTXLdStInstCode::V4;
1079 break;
1080 default:
1081 return false;
1082 }
1083
1084 EVT EltVT = N->getValueType(0);
1085
1086 // v8x16 is a special case. PTX doesn't have ld.v8.16
1087 // instruction. Instead, we split the vector into v2x16 chunks and
1088 // load them with ld.v4.b32.
1089 if (Isv2x16VT(EltVT)) {
1090 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1091 EltVT = MVT::i32;
1092 FromType = NVPTX::PTXLdStInstCode::Untyped;
1093 FromTypeWidth = 32;
1094 }
1095
1096 if (SelectDirectAddr(Op1, Addr)) {
1097 switch (N->getOpcode()) {
1098 default:
1099 return false;
1100 case NVPTXISD::LoadV2:
1101 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1102 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1103 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1104 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1105 break;
1106 case NVPTXISD::LoadV4:
1107 Opcode =
1108 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1109 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1110 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1111 break;
1112 }
1113 if (!Opcode)
1114 return false;
1115 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1116 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1117 getI32Imm(FromTypeWidth, DL), Addr, Chain };
1118 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1119 } else if (PointerSize == 64
1120 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1121 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1122 switch (N->getOpcode()) {
1123 default:
1124 return false;
1125 case NVPTXISD::LoadV2:
1126 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1127 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1128 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1129 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1130 break;
1131 case NVPTXISD::LoadV4:
1132 Opcode =
1133 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1134 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1135 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1136 break;
1137 }
1138 if (!Opcode)
1139 return false;
1140 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1141 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1142 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1143 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1144 } else if (PointerSize == 64
1145 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1146 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1147 if (PointerSize == 64) {
1148 switch (N->getOpcode()) {
1149 default:
1150 return false;
1151 case NVPTXISD::LoadV2:
1152 Opcode =
1153 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1154 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1155 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1156 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1157 break;
1158 case NVPTXISD::LoadV4:
1159 Opcode = pickOpcodeForVT(
1160 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1161 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1162 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1163 break;
1164 }
1165 } else {
1166 switch (N->getOpcode()) {
1167 default:
1168 return false;
1169 case NVPTXISD::LoadV2:
1170 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1171 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1172 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1173 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1174 break;
1175 case NVPTXISD::LoadV4:
1176 Opcode =
1177 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1178 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1179 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1180 break;
1181 }
1182 }
1183 if (!Opcode)
1184 return false;
1185 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1186 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1187 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1188
1189 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1190 } else {
1191 if (PointerSize == 64) {
1192 switch (N->getOpcode()) {
1193 default:
1194 return false;
1195 case NVPTXISD::LoadV2:
1196 Opcode = pickOpcodeForVT(
1197 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1198 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1199 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1200 NVPTX::LDV_f64_v2_areg_64);
1201 break;
1202 case NVPTXISD::LoadV4:
1203 Opcode = pickOpcodeForVT(
1204 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1205 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1206 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1207 break;
1208 }
1209 } else {
1210 switch (N->getOpcode()) {
1211 default:
1212 return false;
1213 case NVPTXISD::LoadV2:
1214 Opcode =
1215 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1216 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1217 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1218 NVPTX::LDV_f64_v2_areg);
1219 break;
1220 case NVPTXISD::LoadV4:
1221 Opcode =
1222 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1223 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1224 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1225 break;
1226 }
1227 }
1228 if (!Opcode)
1229 return false;
1230 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1231 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1232 getI32Imm(FromTypeWidth, DL), Op1, Chain };
1233 LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1234 }
1235
1236 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1237 CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1238
1239 ReplaceNode(N, LD);
1240 return true;
1241 }
1242
tryLDGLDU(SDNode * N)1243 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1244
1245 SDValue Chain = N->getOperand(0);
1246 SDValue Op1;
1247 MemSDNode *Mem;
1248 bool IsLDG = true;
1249
1250 // If this is an LDG intrinsic, the address is the third operand. If its an
1251 // LDG/LDU SD node (from custom vector handling), then its the second operand
1252 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1253 Op1 = N->getOperand(2);
1254 Mem = cast<MemIntrinsicSDNode>(N);
1255 unsigned IID = N->getConstantOperandVal(1);
1256 switch (IID) {
1257 default:
1258 return false;
1259 case Intrinsic::nvvm_ldg_global_f:
1260 case Intrinsic::nvvm_ldg_global_i:
1261 case Intrinsic::nvvm_ldg_global_p:
1262 IsLDG = true;
1263 break;
1264 case Intrinsic::nvvm_ldu_global_f:
1265 case Intrinsic::nvvm_ldu_global_i:
1266 case Intrinsic::nvvm_ldu_global_p:
1267 IsLDG = false;
1268 break;
1269 }
1270 } else {
1271 Op1 = N->getOperand(1);
1272 Mem = cast<MemSDNode>(N);
1273 }
1274
1275 std::optional<unsigned> Opcode;
1276 SDLoc DL(N);
1277 SDNode *LD;
1278 SDValue Base, Offset, Addr;
1279 EVT OrigType = N->getValueType(0);
1280
1281 EVT EltVT = Mem->getMemoryVT();
1282 unsigned NumElts = 1;
1283 if (EltVT.isVector()) {
1284 NumElts = EltVT.getVectorNumElements();
1285 EltVT = EltVT.getVectorElementType();
1286 // vectors of 16bits type are loaded/stored as multiples of v2x16 elements.
1287 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1288 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1289 (EltVT == MVT::i16 && OrigType == MVT::v2i16)) {
1290 assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1291 EltVT = OrigType;
1292 NumElts /= 2;
1293 } else if (OrigType == MVT::v4i8) {
1294 EltVT = OrigType;
1295 NumElts = 1;
1296 }
1297 }
1298
1299 // Build the "promoted" result VTList for the load. If we are really loading
1300 // i8s, then the return type will be promoted to i16 since we do not expose
1301 // 8-bit registers in NVPTX.
1302 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1303 SmallVector<EVT, 5> InstVTs;
1304 for (unsigned i = 0; i != NumElts; ++i) {
1305 InstVTs.push_back(NodeVT);
1306 }
1307 InstVTs.push_back(MVT::Other);
1308 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1309
1310 if (SelectDirectAddr(Op1, Addr)) {
1311 switch (N->getOpcode()) {
1312 default:
1313 return false;
1314 case ISD::LOAD:
1315 case ISD::INTRINSIC_W_CHAIN:
1316 if (IsLDG)
1317 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1318 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1319 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1320 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1321 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1322 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1323 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1324 else
1325 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1326 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1327 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1328 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1329 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1330 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1331 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1332 break;
1333 case NVPTXISD::LoadV2:
1334 case NVPTXISD::LDGV2:
1335 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1336 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1337 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1338 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1339 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1340 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1341 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1342 break;
1343 case NVPTXISD::LDUV2:
1344 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1345 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1346 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1347 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1348 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1349 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1350 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1351 break;
1352 case NVPTXISD::LoadV4:
1353 case NVPTXISD::LDGV4:
1354 Opcode = pickOpcodeForVT(
1355 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1356 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1357 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1358 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1359 break;
1360 case NVPTXISD::LDUV4:
1361 Opcode = pickOpcodeForVT(
1362 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1363 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1364 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1365 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1366 break;
1367 }
1368 if (!Opcode)
1369 return false;
1370 SDValue Ops[] = { Addr, Chain };
1371 LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1372 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1373 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1374 if (TM.is64Bit()) {
1375 switch (N->getOpcode()) {
1376 default:
1377 return false;
1378 case ISD::LOAD:
1379 case ISD::INTRINSIC_W_CHAIN:
1380 if (IsLDG)
1381 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1382 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1383 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1384 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1385 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1386 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1387 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1388 else
1389 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1390 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1391 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1392 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1393 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1394 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1395 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1396 break;
1397 case NVPTXISD::LoadV2:
1398 case NVPTXISD::LDGV2:
1399 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1400 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1401 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1402 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1403 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1404 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1405 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1406 break;
1407 case NVPTXISD::LDUV2:
1408 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1409 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1410 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1411 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1412 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1413 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1414 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1415 break;
1416 case NVPTXISD::LoadV4:
1417 case NVPTXISD::LDGV4:
1418 Opcode = pickOpcodeForVT(
1419 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1420 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1421 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1422 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1423 break;
1424 case NVPTXISD::LDUV4:
1425 Opcode = pickOpcodeForVT(
1426 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1427 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1428 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1429 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1430 break;
1431 }
1432 } else {
1433 switch (N->getOpcode()) {
1434 default:
1435 return false;
1436 case ISD::LOAD:
1437 case ISD::INTRINSIC_W_CHAIN:
1438 if (IsLDG)
1439 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1440 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1441 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1442 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1443 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1444 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1445 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1446 else
1447 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1448 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1449 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1450 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1451 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1452 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1453 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1454 break;
1455 case NVPTXISD::LoadV2:
1456 case NVPTXISD::LDGV2:
1457 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1458 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1459 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1460 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1461 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1462 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1463 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1464 break;
1465 case NVPTXISD::LDUV2:
1466 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1467 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1468 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1469 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1470 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1471 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1472 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1473 break;
1474 case NVPTXISD::LoadV4:
1475 case NVPTXISD::LDGV4:
1476 Opcode = pickOpcodeForVT(
1477 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1478 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1479 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1480 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1481 break;
1482 case NVPTXISD::LDUV4:
1483 Opcode = pickOpcodeForVT(
1484 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1485 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1486 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1487 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1488 break;
1489 }
1490 }
1491 if (!Opcode)
1492 return false;
1493 SDValue Ops[] = {Base, Offset, Chain};
1494 LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1495 } else {
1496 if (TM.is64Bit()) {
1497 switch (N->getOpcode()) {
1498 default:
1499 return false;
1500 case ISD::LOAD:
1501 case ISD::INTRINSIC_W_CHAIN:
1502 if (IsLDG)
1503 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1504 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1505 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1506 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1507 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1508 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1509 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1510 else
1511 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1512 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1513 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1514 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1515 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1516 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1517 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1518 break;
1519 case NVPTXISD::LoadV2:
1520 case NVPTXISD::LDGV2:
1521 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1522 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1523 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1524 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1525 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1526 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1527 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1528 break;
1529 case NVPTXISD::LDUV2:
1530 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1531 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1532 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1533 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1534 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1535 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1536 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1537 break;
1538 case NVPTXISD::LoadV4:
1539 case NVPTXISD::LDGV4:
1540 Opcode = pickOpcodeForVT(
1541 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1542 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1543 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1544 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1545 break;
1546 case NVPTXISD::LDUV4:
1547 Opcode = pickOpcodeForVT(
1548 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1549 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1550 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1551 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1552 break;
1553 }
1554 } else {
1555 switch (N->getOpcode()) {
1556 default:
1557 return false;
1558 case ISD::LOAD:
1559 case ISD::INTRINSIC_W_CHAIN:
1560 if (IsLDG)
1561 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1562 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1563 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1564 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1565 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1566 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1567 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1568 else
1569 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1570 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1571 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1572 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1573 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1574 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1575 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1576 break;
1577 case NVPTXISD::LoadV2:
1578 case NVPTXISD::LDGV2:
1579 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1580 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1581 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1582 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1583 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1584 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1585 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1586 break;
1587 case NVPTXISD::LDUV2:
1588 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1589 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1590 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1591 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1592 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1593 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1594 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1595 break;
1596 case NVPTXISD::LoadV4:
1597 case NVPTXISD::LDGV4:
1598 Opcode = pickOpcodeForVT(
1599 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1600 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1601 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1602 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1603 break;
1604 case NVPTXISD::LDUV4:
1605 Opcode = pickOpcodeForVT(
1606 EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1607 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1608 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1609 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1610 break;
1611 }
1612 }
1613 if (!Opcode)
1614 return false;
1615 SDValue Ops[] = { Op1, Chain };
1616 LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1617 }
1618
1619 // For automatic generation of LDG (through SelectLoad[Vector], not the
1620 // intrinsics), we may have an extending load like:
1621 //
1622 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1623 //
1624 // In this case, the matching logic above will select a load for the original
1625 // memory type (in this case, i8) and our types will not match (the node needs
1626 // to return an i32 in this case). Our LDG/LDU nodes do not support the
1627 // concept of sign-/zero-extension, so emulate it here by adding an explicit
1628 // CVT instruction. Ptxas should clean up any redundancies here.
1629
1630 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1631
1632 if (OrigType != EltVT &&
1633 (LdNode || (OrigType.isFloatingPoint() && EltVT.isFloatingPoint()))) {
1634 // We have an extending-load. The instruction we selected operates on the
1635 // smaller type, but the SDNode we are replacing has the larger type. We
1636 // need to emit a CVT to make the types match.
1637 unsigned CvtOpc =
1638 GetConvertOpcode(OrigType.getSimpleVT(), EltVT.getSimpleVT(), LdNode);
1639
1640 // For each output value, apply the manual sign/zero-extension and make sure
1641 // all users of the load go through that CVT.
1642 for (unsigned i = 0; i != NumElts; ++i) {
1643 SDValue Res(LD, i);
1644 SDValue OrigVal(N, i);
1645
1646 SDNode *CvtNode =
1647 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1648 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
1649 DL, MVT::i32));
1650 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1651 }
1652 }
1653
1654 ReplaceNode(N, LD);
1655 return true;
1656 }
1657
tryStore(SDNode * N)1658 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1659 SDLoc dl(N);
1660 MemSDNode *ST = cast<MemSDNode>(N);
1661 assert(ST->writeMem() && "Expected store");
1662 StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
1663 AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
1664 assert((PlainStore || AtomicStore) && "Expected store");
1665 EVT StoreVT = ST->getMemoryVT();
1666 SDNode *NVPTXST = nullptr;
1667
1668 // do not support pre/post inc/dec
1669 if (PlainStore && PlainStore->isIndexed())
1670 return false;
1671
1672 if (!StoreVT.isSimple())
1673 return false;
1674
1675 AtomicOrdering Ordering = ST->getSuccessOrdering();
1676 // In order to lower atomic loads with stronger guarantees we would need to
1677 // use store.release or insert fences. However these features were only added
1678 // with PTX ISA 6.0 / sm_70.
1679 // TODO: Check if we can actually use the new instructions and implement them.
1680 if (isStrongerThanMonotonic(Ordering))
1681 return false;
1682
1683 // Address Space Setting
1684 unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
1685 unsigned int PointerSize =
1686 CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
1687
1688 // Volatile Setting
1689 // - .volatile is only available for .global and .shared
1690 // - .volatile has the same memory synchronization semantics as .relaxed.sys
1691 bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
1692 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1693 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1694 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1695 isVolatile = false;
1696
1697 // Vector Setting
1698 MVT SimpleVT = StoreVT.getSimpleVT();
1699 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1700
1701 // Type Setting: toType + toTypeWidth
1702 // - for integer type, always use 'u'
1703 //
1704 MVT ScalarVT = SimpleVT.getScalarType();
1705 unsigned toTypeWidth = ScalarVT.getSizeInBits();
1706 if (SimpleVT.isVector()) {
1707 assert((Isv2x16VT(StoreVT) || StoreVT == MVT::v4i8) &&
1708 "Unexpected vector type");
1709 // v2x16 is stored using st.b32
1710 toTypeWidth = 32;
1711 }
1712
1713 unsigned int toType = getLdStRegType(ScalarVT);
1714
1715 // Create the machine instruction DAG
1716 SDValue Chain = ST->getChain();
1717 SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
1718 SDValue BasePtr = ST->getBasePtr();
1719 SDValue Addr;
1720 SDValue Offset, Base;
1721 std::optional<unsigned> Opcode;
1722 MVT::SimpleValueType SourceVT =
1723 Value.getNode()->getSimpleValueType(0).SimpleTy;
1724
1725 if (SelectDirectAddr(BasePtr, Addr)) {
1726 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1727 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1728 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1729 if (!Opcode)
1730 return false;
1731 SDValue Ops[] = {Value,
1732 getI32Imm(isVolatile, dl),
1733 getI32Imm(CodeAddrSpace, dl),
1734 getI32Imm(vecType, dl),
1735 getI32Imm(toType, dl),
1736 getI32Imm(toTypeWidth, dl),
1737 Addr,
1738 Chain};
1739 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1740 } else if (PointerSize == 64
1741 ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
1742 : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
1743 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1744 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1745 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1746 if (!Opcode)
1747 return false;
1748 SDValue Ops[] = {Value,
1749 getI32Imm(isVolatile, dl),
1750 getI32Imm(CodeAddrSpace, dl),
1751 getI32Imm(vecType, dl),
1752 getI32Imm(toType, dl),
1753 getI32Imm(toTypeWidth, dl),
1754 Base,
1755 Offset,
1756 Chain};
1757 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1758 } else if (PointerSize == 64
1759 ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
1760 : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
1761 if (PointerSize == 64)
1762 Opcode =
1763 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1764 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1765 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1766 else
1767 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1768 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1769 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1770 if (!Opcode)
1771 return false;
1772
1773 SDValue Ops[] = {Value,
1774 getI32Imm(isVolatile, dl),
1775 getI32Imm(CodeAddrSpace, dl),
1776 getI32Imm(vecType, dl),
1777 getI32Imm(toType, dl),
1778 getI32Imm(toTypeWidth, dl),
1779 Base,
1780 Offset,
1781 Chain};
1782 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1783 } else {
1784 if (PointerSize == 64)
1785 Opcode =
1786 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1787 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1788 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1789 else
1790 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1791 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1792 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1793 if (!Opcode)
1794 return false;
1795 SDValue Ops[] = {Value,
1796 getI32Imm(isVolatile, dl),
1797 getI32Imm(CodeAddrSpace, dl),
1798 getI32Imm(vecType, dl),
1799 getI32Imm(toType, dl),
1800 getI32Imm(toTypeWidth, dl),
1801 BasePtr,
1802 Chain};
1803 NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1804 }
1805
1806 if (!NVPTXST)
1807 return false;
1808
1809 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1810 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
1811 ReplaceNode(N, NVPTXST);
1812 return true;
1813 }
1814
tryStoreVector(SDNode * N)1815 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1816 SDValue Chain = N->getOperand(0);
1817 SDValue Op1 = N->getOperand(1);
1818 SDValue Addr, Offset, Base;
1819 std::optional<unsigned> Opcode;
1820 SDLoc DL(N);
1821 SDNode *ST;
1822 EVT EltVT = Op1.getValueType();
1823 MemSDNode *MemSD = cast<MemSDNode>(N);
1824 EVT StoreVT = MemSD->getMemoryVT();
1825
1826 // Address Space Setting
1827 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1828 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1829 report_fatal_error("Cannot store to pointer that points to constant "
1830 "memory space");
1831 }
1832 unsigned int PointerSize =
1833 CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
1834
1835 // Volatile Setting
1836 // - .volatile is only availalble for .global and .shared
1837 bool IsVolatile = MemSD->isVolatile();
1838 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1839 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1840 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1841 IsVolatile = false;
1842
1843 // Type Setting: toType + toTypeWidth
1844 // - for integer type, always use 'u'
1845 assert(StoreVT.isSimple() && "Store value is not simple");
1846 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1847 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1848 unsigned ToType = getLdStRegType(ScalarVT);
1849
1850 SmallVector<SDValue, 12> StOps;
1851 SDValue N2;
1852 unsigned VecType;
1853
1854 switch (N->getOpcode()) {
1855 case NVPTXISD::StoreV2:
1856 VecType = NVPTX::PTXLdStInstCode::V2;
1857 StOps.push_back(N->getOperand(1));
1858 StOps.push_back(N->getOperand(2));
1859 N2 = N->getOperand(3);
1860 break;
1861 case NVPTXISD::StoreV4:
1862 VecType = NVPTX::PTXLdStInstCode::V4;
1863 StOps.push_back(N->getOperand(1));
1864 StOps.push_back(N->getOperand(2));
1865 StOps.push_back(N->getOperand(3));
1866 StOps.push_back(N->getOperand(4));
1867 N2 = N->getOperand(5);
1868 break;
1869 default:
1870 return false;
1871 }
1872
1873 // v8x16 is a special case. PTX doesn't have st.v8.x16
1874 // instruction. Instead, we split the vector into v2x16 chunks and
1875 // store them with st.v4.b32.
1876 if (Isv2x16VT(EltVT)) {
1877 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1878 EltVT = MVT::i32;
1879 ToType = NVPTX::PTXLdStInstCode::Untyped;
1880 ToTypeWidth = 32;
1881 }
1882
1883 StOps.push_back(getI32Imm(IsVolatile, DL));
1884 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1885 StOps.push_back(getI32Imm(VecType, DL));
1886 StOps.push_back(getI32Imm(ToType, DL));
1887 StOps.push_back(getI32Imm(ToTypeWidth, DL));
1888
1889 if (SelectDirectAddr(N2, Addr)) {
1890 switch (N->getOpcode()) {
1891 default:
1892 return false;
1893 case NVPTXISD::StoreV2:
1894 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1895 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1896 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1897 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1898 break;
1899 case NVPTXISD::StoreV4:
1900 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1901 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1902 NVPTX::STV_i32_v4_avar, std::nullopt,
1903 NVPTX::STV_f32_v4_avar, std::nullopt);
1904 break;
1905 }
1906 StOps.push_back(Addr);
1907 } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1908 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1909 switch (N->getOpcode()) {
1910 default:
1911 return false;
1912 case NVPTXISD::StoreV2:
1913 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1914 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1915 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1916 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1917 break;
1918 case NVPTXISD::StoreV4:
1919 Opcode =
1920 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1921 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1922 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1923 break;
1924 }
1925 StOps.push_back(Base);
1926 StOps.push_back(Offset);
1927 } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1928 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1929 if (PointerSize == 64) {
1930 switch (N->getOpcode()) {
1931 default:
1932 return false;
1933 case NVPTXISD::StoreV2:
1934 Opcode =
1935 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1936 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1937 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1938 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1939 break;
1940 case NVPTXISD::StoreV4:
1941 Opcode = pickOpcodeForVT(
1942 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
1943 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1944 NVPTX::STV_f32_v4_ari_64, std::nullopt);
1945 break;
1946 }
1947 } else {
1948 switch (N->getOpcode()) {
1949 default:
1950 return false;
1951 case NVPTXISD::StoreV2:
1952 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1953 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1954 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1955 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1956 break;
1957 case NVPTXISD::StoreV4:
1958 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1959 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1960 NVPTX::STV_i32_v4_ari, std::nullopt,
1961 NVPTX::STV_f32_v4_ari, std::nullopt);
1962 break;
1963 }
1964 }
1965 StOps.push_back(Base);
1966 StOps.push_back(Offset);
1967 } else {
1968 if (PointerSize == 64) {
1969 switch (N->getOpcode()) {
1970 default:
1971 return false;
1972 case NVPTXISD::StoreV2:
1973 Opcode = pickOpcodeForVT(
1974 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
1975 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1976 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1977 NVPTX::STV_f64_v2_areg_64);
1978 break;
1979 case NVPTXISD::StoreV4:
1980 Opcode = pickOpcodeForVT(
1981 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
1982 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
1983 NVPTX::STV_f32_v4_areg_64, std::nullopt);
1984 break;
1985 }
1986 } else {
1987 switch (N->getOpcode()) {
1988 default:
1989 return false;
1990 case NVPTXISD::StoreV2:
1991 Opcode =
1992 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
1993 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1994 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
1995 NVPTX::STV_f64_v2_areg);
1996 break;
1997 case NVPTXISD::StoreV4:
1998 Opcode =
1999 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2000 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
2001 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
2002 break;
2003 }
2004 }
2005 StOps.push_back(N2);
2006 }
2007
2008 if (!Opcode)
2009 return false;
2010
2011 StOps.push_back(Chain);
2012
2013 ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, StOps);
2014
2015 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2016 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
2017
2018 ReplaceNode(N, ST);
2019 return true;
2020 }
2021
tryLoadParam(SDNode * Node)2022 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2023 SDValue Chain = Node->getOperand(0);
2024 SDValue Offset = Node->getOperand(2);
2025 SDValue Glue = Node->getOperand(3);
2026 SDLoc DL(Node);
2027 MemSDNode *Mem = cast<MemSDNode>(Node);
2028
2029 unsigned VecSize;
2030 switch (Node->getOpcode()) {
2031 default:
2032 return false;
2033 case NVPTXISD::LoadParam:
2034 VecSize = 1;
2035 break;
2036 case NVPTXISD::LoadParamV2:
2037 VecSize = 2;
2038 break;
2039 case NVPTXISD::LoadParamV4:
2040 VecSize = 4;
2041 break;
2042 }
2043
2044 EVT EltVT = Node->getValueType(0);
2045 EVT MemVT = Mem->getMemoryVT();
2046
2047 std::optional<unsigned> Opcode;
2048
2049 switch (VecSize) {
2050 default:
2051 return false;
2052 case 1:
2053 Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2054 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2055 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2056 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2057 break;
2058 case 2:
2059 Opcode =
2060 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2061 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2062 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
2063 NVPTX::LoadParamMemV2F64);
2064 break;
2065 case 4:
2066 Opcode =
2067 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2068 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
2069 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
2070 break;
2071 }
2072 if (!Opcode)
2073 return false;
2074
2075 SDVTList VTs;
2076 if (VecSize == 1) {
2077 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2078 } else if (VecSize == 2) {
2079 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2080 } else {
2081 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2082 VTs = CurDAG->getVTList(EVTs);
2083 }
2084
2085 unsigned OffsetVal = Offset->getAsZExtVal();
2086
2087 SmallVector<SDValue, 2> Ops;
2088 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2089 Ops.push_back(Chain);
2090 Ops.push_back(Glue);
2091
2092 ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops));
2093 return true;
2094 }
2095
tryStoreRetval(SDNode * N)2096 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2097 SDLoc DL(N);
2098 SDValue Chain = N->getOperand(0);
2099 SDValue Offset = N->getOperand(1);
2100 unsigned OffsetVal = Offset->getAsZExtVal();
2101 MemSDNode *Mem = cast<MemSDNode>(N);
2102
2103 // How many elements do we have?
2104 unsigned NumElts = 1;
2105 switch (N->getOpcode()) {
2106 default:
2107 return false;
2108 case NVPTXISD::StoreRetval:
2109 NumElts = 1;
2110 break;
2111 case NVPTXISD::StoreRetvalV2:
2112 NumElts = 2;
2113 break;
2114 case NVPTXISD::StoreRetvalV4:
2115 NumElts = 4;
2116 break;
2117 }
2118
2119 // Build vector of operands
2120 SmallVector<SDValue, 6> Ops;
2121 for (unsigned i = 0; i < NumElts; ++i)
2122 Ops.push_back(N->getOperand(i + 2));
2123 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2124 Ops.push_back(Chain);
2125
2126 // Determine target opcode
2127 // If we have an i1, use an 8-bit store. The lowering code in
2128 // NVPTXISelLowering will have already emitted an upcast.
2129 std::optional<unsigned> Opcode = 0;
2130 switch (NumElts) {
2131 default:
2132 return false;
2133 case 1:
2134 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2135 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2136 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2137 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2138 break;
2139 case 2:
2140 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2141 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2142 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2143 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2144 break;
2145 case 4:
2146 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2147 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2148 NVPTX::StoreRetvalV4I32, std::nullopt,
2149 NVPTX::StoreRetvalV4F32, std::nullopt);
2150 break;
2151 }
2152 if (!Opcode)
2153 return false;
2154
2155 SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
2156 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2157 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2158
2159 ReplaceNode(N, Ret);
2160 return true;
2161 }
2162
tryStoreParam(SDNode * N)2163 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2164 SDLoc DL(N);
2165 SDValue Chain = N->getOperand(0);
2166 SDValue Param = N->getOperand(1);
2167 unsigned ParamVal = Param->getAsZExtVal();
2168 SDValue Offset = N->getOperand(2);
2169 unsigned OffsetVal = Offset->getAsZExtVal();
2170 MemSDNode *Mem = cast<MemSDNode>(N);
2171 SDValue Glue = N->getOperand(N->getNumOperands() - 1);
2172
2173 // How many elements do we have?
2174 unsigned NumElts = 1;
2175 switch (N->getOpcode()) {
2176 default:
2177 return false;
2178 case NVPTXISD::StoreParamU32:
2179 case NVPTXISD::StoreParamS32:
2180 case NVPTXISD::StoreParam:
2181 NumElts = 1;
2182 break;
2183 case NVPTXISD::StoreParamV2:
2184 NumElts = 2;
2185 break;
2186 case NVPTXISD::StoreParamV4:
2187 NumElts = 4;
2188 break;
2189 }
2190
2191 // Build vector of operands
2192 SmallVector<SDValue, 8> Ops;
2193 for (unsigned i = 0; i < NumElts; ++i)
2194 Ops.push_back(N->getOperand(i + 3));
2195 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2196 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2197 Ops.push_back(Chain);
2198 Ops.push_back(Glue);
2199
2200 // Determine target opcode
2201 // If we have an i1, use an 8-bit store. The lowering code in
2202 // NVPTXISelLowering will have already emitted an upcast.
2203 std::optional<unsigned> Opcode = 0;
2204 switch (N->getOpcode()) {
2205 default:
2206 switch (NumElts) {
2207 default:
2208 return false;
2209 case 1:
2210 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2211 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2212 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2213 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2214 break;
2215 case 2:
2216 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2217 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2218 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2219 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2220 break;
2221 case 4:
2222 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2223 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2224 NVPTX::StoreParamV4I32, std::nullopt,
2225 NVPTX::StoreParamV4F32, std::nullopt);
2226 break;
2227 }
2228 if (!Opcode)
2229 return false;
2230 break;
2231 // Special case: if we have a sign-extend/zero-extend node, insert the
2232 // conversion instruction first, and use that as the value operand to
2233 // the selected StoreParam node.
2234 case NVPTXISD::StoreParamU32: {
2235 Opcode = NVPTX::StoreParamI32;
2236 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2237 MVT::i32);
2238 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2239 MVT::i32, Ops[0], CvtNone);
2240 Ops[0] = SDValue(Cvt, 0);
2241 break;
2242 }
2243 case NVPTXISD::StoreParamS32: {
2244 Opcode = NVPTX::StoreParamI32;
2245 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2246 MVT::i32);
2247 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2248 MVT::i32, Ops[0], CvtNone);
2249 Ops[0] = SDValue(Cvt, 0);
2250 break;
2251 }
2252 }
2253
2254 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2255 SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, RetVTs, Ops);
2256 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2257 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2258
2259 ReplaceNode(N, Ret);
2260 return true;
2261 }
2262
tryTextureIntrinsic(SDNode * N)2263 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2264 unsigned Opc = 0;
2265
2266 switch (N->getOpcode()) {
2267 default: return false;
2268 case NVPTXISD::Tex1DFloatS32:
2269 Opc = NVPTX::TEX_1D_F32_S32_RR;
2270 break;
2271 case NVPTXISD::Tex1DFloatFloat:
2272 Opc = NVPTX::TEX_1D_F32_F32_RR;
2273 break;
2274 case NVPTXISD::Tex1DFloatFloatLevel:
2275 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2276 break;
2277 case NVPTXISD::Tex1DFloatFloatGrad:
2278 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2279 break;
2280 case NVPTXISD::Tex1DS32S32:
2281 Opc = NVPTX::TEX_1D_S32_S32_RR;
2282 break;
2283 case NVPTXISD::Tex1DS32Float:
2284 Opc = NVPTX::TEX_1D_S32_F32_RR;
2285 break;
2286 case NVPTXISD::Tex1DS32FloatLevel:
2287 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2288 break;
2289 case NVPTXISD::Tex1DS32FloatGrad:
2290 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2291 break;
2292 case NVPTXISD::Tex1DU32S32:
2293 Opc = NVPTX::TEX_1D_U32_S32_RR;
2294 break;
2295 case NVPTXISD::Tex1DU32Float:
2296 Opc = NVPTX::TEX_1D_U32_F32_RR;
2297 break;
2298 case NVPTXISD::Tex1DU32FloatLevel:
2299 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2300 break;
2301 case NVPTXISD::Tex1DU32FloatGrad:
2302 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2303 break;
2304 case NVPTXISD::Tex1DArrayFloatS32:
2305 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2306 break;
2307 case NVPTXISD::Tex1DArrayFloatFloat:
2308 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2309 break;
2310 case NVPTXISD::Tex1DArrayFloatFloatLevel:
2311 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2312 break;
2313 case NVPTXISD::Tex1DArrayFloatFloatGrad:
2314 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2315 break;
2316 case NVPTXISD::Tex1DArrayS32S32:
2317 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2318 break;
2319 case NVPTXISD::Tex1DArrayS32Float:
2320 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2321 break;
2322 case NVPTXISD::Tex1DArrayS32FloatLevel:
2323 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2324 break;
2325 case NVPTXISD::Tex1DArrayS32FloatGrad:
2326 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2327 break;
2328 case NVPTXISD::Tex1DArrayU32S32:
2329 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2330 break;
2331 case NVPTXISD::Tex1DArrayU32Float:
2332 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2333 break;
2334 case NVPTXISD::Tex1DArrayU32FloatLevel:
2335 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2336 break;
2337 case NVPTXISD::Tex1DArrayU32FloatGrad:
2338 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2339 break;
2340 case NVPTXISD::Tex2DFloatS32:
2341 Opc = NVPTX::TEX_2D_F32_S32_RR;
2342 break;
2343 case NVPTXISD::Tex2DFloatFloat:
2344 Opc = NVPTX::TEX_2D_F32_F32_RR;
2345 break;
2346 case NVPTXISD::Tex2DFloatFloatLevel:
2347 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2348 break;
2349 case NVPTXISD::Tex2DFloatFloatGrad:
2350 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2351 break;
2352 case NVPTXISD::Tex2DS32S32:
2353 Opc = NVPTX::TEX_2D_S32_S32_RR;
2354 break;
2355 case NVPTXISD::Tex2DS32Float:
2356 Opc = NVPTX::TEX_2D_S32_F32_RR;
2357 break;
2358 case NVPTXISD::Tex2DS32FloatLevel:
2359 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2360 break;
2361 case NVPTXISD::Tex2DS32FloatGrad:
2362 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2363 break;
2364 case NVPTXISD::Tex2DU32S32:
2365 Opc = NVPTX::TEX_2D_U32_S32_RR;
2366 break;
2367 case NVPTXISD::Tex2DU32Float:
2368 Opc = NVPTX::TEX_2D_U32_F32_RR;
2369 break;
2370 case NVPTXISD::Tex2DU32FloatLevel:
2371 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2372 break;
2373 case NVPTXISD::Tex2DU32FloatGrad:
2374 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2375 break;
2376 case NVPTXISD::Tex2DArrayFloatS32:
2377 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2378 break;
2379 case NVPTXISD::Tex2DArrayFloatFloat:
2380 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2381 break;
2382 case NVPTXISD::Tex2DArrayFloatFloatLevel:
2383 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2384 break;
2385 case NVPTXISD::Tex2DArrayFloatFloatGrad:
2386 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2387 break;
2388 case NVPTXISD::Tex2DArrayS32S32:
2389 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2390 break;
2391 case NVPTXISD::Tex2DArrayS32Float:
2392 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2393 break;
2394 case NVPTXISD::Tex2DArrayS32FloatLevel:
2395 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2396 break;
2397 case NVPTXISD::Tex2DArrayS32FloatGrad:
2398 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2399 break;
2400 case NVPTXISD::Tex2DArrayU32S32:
2401 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2402 break;
2403 case NVPTXISD::Tex2DArrayU32Float:
2404 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2405 break;
2406 case NVPTXISD::Tex2DArrayU32FloatLevel:
2407 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2408 break;
2409 case NVPTXISD::Tex2DArrayU32FloatGrad:
2410 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2411 break;
2412 case NVPTXISD::Tex3DFloatS32:
2413 Opc = NVPTX::TEX_3D_F32_S32_RR;
2414 break;
2415 case NVPTXISD::Tex3DFloatFloat:
2416 Opc = NVPTX::TEX_3D_F32_F32_RR;
2417 break;
2418 case NVPTXISD::Tex3DFloatFloatLevel:
2419 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2420 break;
2421 case NVPTXISD::Tex3DFloatFloatGrad:
2422 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2423 break;
2424 case NVPTXISD::Tex3DS32S32:
2425 Opc = NVPTX::TEX_3D_S32_S32_RR;
2426 break;
2427 case NVPTXISD::Tex3DS32Float:
2428 Opc = NVPTX::TEX_3D_S32_F32_RR;
2429 break;
2430 case NVPTXISD::Tex3DS32FloatLevel:
2431 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2432 break;
2433 case NVPTXISD::Tex3DS32FloatGrad:
2434 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2435 break;
2436 case NVPTXISD::Tex3DU32S32:
2437 Opc = NVPTX::TEX_3D_U32_S32_RR;
2438 break;
2439 case NVPTXISD::Tex3DU32Float:
2440 Opc = NVPTX::TEX_3D_U32_F32_RR;
2441 break;
2442 case NVPTXISD::Tex3DU32FloatLevel:
2443 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2444 break;
2445 case NVPTXISD::Tex3DU32FloatGrad:
2446 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2447 break;
2448 case NVPTXISD::TexCubeFloatFloat:
2449 Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2450 break;
2451 case NVPTXISD::TexCubeFloatFloatLevel:
2452 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2453 break;
2454 case NVPTXISD::TexCubeS32Float:
2455 Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2456 break;
2457 case NVPTXISD::TexCubeS32FloatLevel:
2458 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2459 break;
2460 case NVPTXISD::TexCubeU32Float:
2461 Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2462 break;
2463 case NVPTXISD::TexCubeU32FloatLevel:
2464 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2465 break;
2466 case NVPTXISD::TexCubeArrayFloatFloat:
2467 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2468 break;
2469 case NVPTXISD::TexCubeArrayFloatFloatLevel:
2470 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2471 break;
2472 case NVPTXISD::TexCubeArrayS32Float:
2473 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2474 break;
2475 case NVPTXISD::TexCubeArrayS32FloatLevel:
2476 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2477 break;
2478 case NVPTXISD::TexCubeArrayU32Float:
2479 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2480 break;
2481 case NVPTXISD::TexCubeArrayU32FloatLevel:
2482 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2483 break;
2484 case NVPTXISD::Tld4R2DFloatFloat:
2485 Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2486 break;
2487 case NVPTXISD::Tld4G2DFloatFloat:
2488 Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2489 break;
2490 case NVPTXISD::Tld4B2DFloatFloat:
2491 Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2492 break;
2493 case NVPTXISD::Tld4A2DFloatFloat:
2494 Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2495 break;
2496 case NVPTXISD::Tld4R2DS64Float:
2497 Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2498 break;
2499 case NVPTXISD::Tld4G2DS64Float:
2500 Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2501 break;
2502 case NVPTXISD::Tld4B2DS64Float:
2503 Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2504 break;
2505 case NVPTXISD::Tld4A2DS64Float:
2506 Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2507 break;
2508 case NVPTXISD::Tld4R2DU64Float:
2509 Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2510 break;
2511 case NVPTXISD::Tld4G2DU64Float:
2512 Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2513 break;
2514 case NVPTXISD::Tld4B2DU64Float:
2515 Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2516 break;
2517 case NVPTXISD::Tld4A2DU64Float:
2518 Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2519 break;
2520 case NVPTXISD::TexUnified1DFloatS32:
2521 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2522 break;
2523 case NVPTXISD::TexUnified1DFloatFloat:
2524 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2525 break;
2526 case NVPTXISD::TexUnified1DFloatFloatLevel:
2527 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2528 break;
2529 case NVPTXISD::TexUnified1DFloatFloatGrad:
2530 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2531 break;
2532 case NVPTXISD::TexUnified1DS32S32:
2533 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2534 break;
2535 case NVPTXISD::TexUnified1DS32Float:
2536 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2537 break;
2538 case NVPTXISD::TexUnified1DS32FloatLevel:
2539 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2540 break;
2541 case NVPTXISD::TexUnified1DS32FloatGrad:
2542 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2543 break;
2544 case NVPTXISD::TexUnified1DU32S32:
2545 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2546 break;
2547 case NVPTXISD::TexUnified1DU32Float:
2548 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2549 break;
2550 case NVPTXISD::TexUnified1DU32FloatLevel:
2551 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2552 break;
2553 case NVPTXISD::TexUnified1DU32FloatGrad:
2554 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2555 break;
2556 case NVPTXISD::TexUnified1DArrayFloatS32:
2557 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2558 break;
2559 case NVPTXISD::TexUnified1DArrayFloatFloat:
2560 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2561 break;
2562 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
2563 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2564 break;
2565 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
2566 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2567 break;
2568 case NVPTXISD::TexUnified1DArrayS32S32:
2569 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2570 break;
2571 case NVPTXISD::TexUnified1DArrayS32Float:
2572 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2573 break;
2574 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
2575 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2576 break;
2577 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
2578 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2579 break;
2580 case NVPTXISD::TexUnified1DArrayU32S32:
2581 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2582 break;
2583 case NVPTXISD::TexUnified1DArrayU32Float:
2584 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2585 break;
2586 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
2587 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2588 break;
2589 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
2590 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2591 break;
2592 case NVPTXISD::TexUnified2DFloatS32:
2593 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2594 break;
2595 case NVPTXISD::TexUnified2DFloatFloat:
2596 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2597 break;
2598 case NVPTXISD::TexUnified2DFloatFloatLevel:
2599 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2600 break;
2601 case NVPTXISD::TexUnified2DFloatFloatGrad:
2602 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2603 break;
2604 case NVPTXISD::TexUnified2DS32S32:
2605 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2606 break;
2607 case NVPTXISD::TexUnified2DS32Float:
2608 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2609 break;
2610 case NVPTXISD::TexUnified2DS32FloatLevel:
2611 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2612 break;
2613 case NVPTXISD::TexUnified2DS32FloatGrad:
2614 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2615 break;
2616 case NVPTXISD::TexUnified2DU32S32:
2617 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2618 break;
2619 case NVPTXISD::TexUnified2DU32Float:
2620 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2621 break;
2622 case NVPTXISD::TexUnified2DU32FloatLevel:
2623 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2624 break;
2625 case NVPTXISD::TexUnified2DU32FloatGrad:
2626 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2627 break;
2628 case NVPTXISD::TexUnified2DArrayFloatS32:
2629 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2630 break;
2631 case NVPTXISD::TexUnified2DArrayFloatFloat:
2632 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2633 break;
2634 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
2635 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2636 break;
2637 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
2638 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2639 break;
2640 case NVPTXISD::TexUnified2DArrayS32S32:
2641 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2642 break;
2643 case NVPTXISD::TexUnified2DArrayS32Float:
2644 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2645 break;
2646 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
2647 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2648 break;
2649 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
2650 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2651 break;
2652 case NVPTXISD::TexUnified2DArrayU32S32:
2653 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2654 break;
2655 case NVPTXISD::TexUnified2DArrayU32Float:
2656 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2657 break;
2658 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
2659 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2660 break;
2661 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
2662 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2663 break;
2664 case NVPTXISD::TexUnified3DFloatS32:
2665 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2666 break;
2667 case NVPTXISD::TexUnified3DFloatFloat:
2668 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2669 break;
2670 case NVPTXISD::TexUnified3DFloatFloatLevel:
2671 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2672 break;
2673 case NVPTXISD::TexUnified3DFloatFloatGrad:
2674 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2675 break;
2676 case NVPTXISD::TexUnified3DS32S32:
2677 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2678 break;
2679 case NVPTXISD::TexUnified3DS32Float:
2680 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2681 break;
2682 case NVPTXISD::TexUnified3DS32FloatLevel:
2683 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2684 break;
2685 case NVPTXISD::TexUnified3DS32FloatGrad:
2686 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2687 break;
2688 case NVPTXISD::TexUnified3DU32S32:
2689 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2690 break;
2691 case NVPTXISD::TexUnified3DU32Float:
2692 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2693 break;
2694 case NVPTXISD::TexUnified3DU32FloatLevel:
2695 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2696 break;
2697 case NVPTXISD::TexUnified3DU32FloatGrad:
2698 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2699 break;
2700 case NVPTXISD::TexUnifiedCubeFloatFloat:
2701 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2702 break;
2703 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
2704 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2705 break;
2706 case NVPTXISD::TexUnifiedCubeS32Float:
2707 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2708 break;
2709 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
2710 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2711 break;
2712 case NVPTXISD::TexUnifiedCubeU32Float:
2713 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2714 break;
2715 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
2716 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2717 break;
2718 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
2719 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2720 break;
2721 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
2722 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2723 break;
2724 case NVPTXISD::TexUnifiedCubeArrayS32Float:
2725 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2726 break;
2727 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
2728 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2729 break;
2730 case NVPTXISD::TexUnifiedCubeArrayU32Float:
2731 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2732 break;
2733 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
2734 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2735 break;
2736 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
2737 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2738 break;
2739 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
2740 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2741 break;
2742 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
2743 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2744 break;
2745 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
2746 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2747 break;
2748 case NVPTXISD::Tld4UnifiedR2DS64Float:
2749 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2750 break;
2751 case NVPTXISD::Tld4UnifiedG2DS64Float:
2752 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2753 break;
2754 case NVPTXISD::Tld4UnifiedB2DS64Float:
2755 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2756 break;
2757 case NVPTXISD::Tld4UnifiedA2DS64Float:
2758 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2759 break;
2760 case NVPTXISD::Tld4UnifiedR2DU64Float:
2761 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2762 break;
2763 case NVPTXISD::Tld4UnifiedG2DU64Float:
2764 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2765 break;
2766 case NVPTXISD::Tld4UnifiedB2DU64Float:
2767 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2768 break;
2769 case NVPTXISD::Tld4UnifiedA2DU64Float:
2770 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2771 break;
2772 case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
2773 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
2774 break;
2775 case NVPTXISD::TexUnifiedCubeS32FloatGrad:
2776 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
2777 break;
2778 case NVPTXISD::TexUnifiedCubeU32FloatGrad:
2779 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
2780 break;
2781 case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
2782 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
2783 break;
2784 case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
2785 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
2786 break;
2787 case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
2788 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
2789 break;
2790 }
2791
2792 // Copy over operands
2793 SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
2794 Ops.push_back(N->getOperand(0)); // Move chain to the back.
2795
2796 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2797 return true;
2798 }
2799
trySurfaceIntrinsic(SDNode * N)2800 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2801 unsigned Opc = 0;
2802 switch (N->getOpcode()) {
2803 default: return false;
2804 case NVPTXISD::Suld1DI8Clamp:
2805 Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2806 break;
2807 case NVPTXISD::Suld1DI16Clamp:
2808 Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2809 break;
2810 case NVPTXISD::Suld1DI32Clamp:
2811 Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2812 break;
2813 case NVPTXISD::Suld1DI64Clamp:
2814 Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2815 break;
2816 case NVPTXISD::Suld1DV2I8Clamp:
2817 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2818 break;
2819 case NVPTXISD::Suld1DV2I16Clamp:
2820 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2821 break;
2822 case NVPTXISD::Suld1DV2I32Clamp:
2823 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2824 break;
2825 case NVPTXISD::Suld1DV2I64Clamp:
2826 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2827 break;
2828 case NVPTXISD::Suld1DV4I8Clamp:
2829 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2830 break;
2831 case NVPTXISD::Suld1DV4I16Clamp:
2832 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2833 break;
2834 case NVPTXISD::Suld1DV4I32Clamp:
2835 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2836 break;
2837 case NVPTXISD::Suld1DArrayI8Clamp:
2838 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2839 break;
2840 case NVPTXISD::Suld1DArrayI16Clamp:
2841 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2842 break;
2843 case NVPTXISD::Suld1DArrayI32Clamp:
2844 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2845 break;
2846 case NVPTXISD::Suld1DArrayI64Clamp:
2847 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
2848 break;
2849 case NVPTXISD::Suld1DArrayV2I8Clamp:
2850 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
2851 break;
2852 case NVPTXISD::Suld1DArrayV2I16Clamp:
2853 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
2854 break;
2855 case NVPTXISD::Suld1DArrayV2I32Clamp:
2856 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
2857 break;
2858 case NVPTXISD::Suld1DArrayV2I64Clamp:
2859 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
2860 break;
2861 case NVPTXISD::Suld1DArrayV4I8Clamp:
2862 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
2863 break;
2864 case NVPTXISD::Suld1DArrayV4I16Clamp:
2865 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
2866 break;
2867 case NVPTXISD::Suld1DArrayV4I32Clamp:
2868 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
2869 break;
2870 case NVPTXISD::Suld2DI8Clamp:
2871 Opc = NVPTX::SULD_2D_I8_CLAMP_R;
2872 break;
2873 case NVPTXISD::Suld2DI16Clamp:
2874 Opc = NVPTX::SULD_2D_I16_CLAMP_R;
2875 break;
2876 case NVPTXISD::Suld2DI32Clamp:
2877 Opc = NVPTX::SULD_2D_I32_CLAMP_R;
2878 break;
2879 case NVPTXISD::Suld2DI64Clamp:
2880 Opc = NVPTX::SULD_2D_I64_CLAMP_R;
2881 break;
2882 case NVPTXISD::Suld2DV2I8Clamp:
2883 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
2884 break;
2885 case NVPTXISD::Suld2DV2I16Clamp:
2886 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
2887 break;
2888 case NVPTXISD::Suld2DV2I32Clamp:
2889 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
2890 break;
2891 case NVPTXISD::Suld2DV2I64Clamp:
2892 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
2893 break;
2894 case NVPTXISD::Suld2DV4I8Clamp:
2895 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
2896 break;
2897 case NVPTXISD::Suld2DV4I16Clamp:
2898 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
2899 break;
2900 case NVPTXISD::Suld2DV4I32Clamp:
2901 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
2902 break;
2903 case NVPTXISD::Suld2DArrayI8Clamp:
2904 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
2905 break;
2906 case NVPTXISD::Suld2DArrayI16Clamp:
2907 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
2908 break;
2909 case NVPTXISD::Suld2DArrayI32Clamp:
2910 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
2911 break;
2912 case NVPTXISD::Suld2DArrayI64Clamp:
2913 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
2914 break;
2915 case NVPTXISD::Suld2DArrayV2I8Clamp:
2916 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
2917 break;
2918 case NVPTXISD::Suld2DArrayV2I16Clamp:
2919 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
2920 break;
2921 case NVPTXISD::Suld2DArrayV2I32Clamp:
2922 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
2923 break;
2924 case NVPTXISD::Suld2DArrayV2I64Clamp:
2925 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
2926 break;
2927 case NVPTXISD::Suld2DArrayV4I8Clamp:
2928 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
2929 break;
2930 case NVPTXISD::Suld2DArrayV4I16Clamp:
2931 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
2932 break;
2933 case NVPTXISD::Suld2DArrayV4I32Clamp:
2934 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
2935 break;
2936 case NVPTXISD::Suld3DI8Clamp:
2937 Opc = NVPTX::SULD_3D_I8_CLAMP_R;
2938 break;
2939 case NVPTXISD::Suld3DI16Clamp:
2940 Opc = NVPTX::SULD_3D_I16_CLAMP_R;
2941 break;
2942 case NVPTXISD::Suld3DI32Clamp:
2943 Opc = NVPTX::SULD_3D_I32_CLAMP_R;
2944 break;
2945 case NVPTXISD::Suld3DI64Clamp:
2946 Opc = NVPTX::SULD_3D_I64_CLAMP_R;
2947 break;
2948 case NVPTXISD::Suld3DV2I8Clamp:
2949 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
2950 break;
2951 case NVPTXISD::Suld3DV2I16Clamp:
2952 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
2953 break;
2954 case NVPTXISD::Suld3DV2I32Clamp:
2955 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
2956 break;
2957 case NVPTXISD::Suld3DV2I64Clamp:
2958 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
2959 break;
2960 case NVPTXISD::Suld3DV4I8Clamp:
2961 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
2962 break;
2963 case NVPTXISD::Suld3DV4I16Clamp:
2964 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
2965 break;
2966 case NVPTXISD::Suld3DV4I32Clamp:
2967 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
2968 break;
2969 case NVPTXISD::Suld1DI8Trap:
2970 Opc = NVPTX::SULD_1D_I8_TRAP_R;
2971 break;
2972 case NVPTXISD::Suld1DI16Trap:
2973 Opc = NVPTX::SULD_1D_I16_TRAP_R;
2974 break;
2975 case NVPTXISD::Suld1DI32Trap:
2976 Opc = NVPTX::SULD_1D_I32_TRAP_R;
2977 break;
2978 case NVPTXISD::Suld1DI64Trap:
2979 Opc = NVPTX::SULD_1D_I64_TRAP_R;
2980 break;
2981 case NVPTXISD::Suld1DV2I8Trap:
2982 Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
2983 break;
2984 case NVPTXISD::Suld1DV2I16Trap:
2985 Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
2986 break;
2987 case NVPTXISD::Suld1DV2I32Trap:
2988 Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
2989 break;
2990 case NVPTXISD::Suld1DV2I64Trap:
2991 Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
2992 break;
2993 case NVPTXISD::Suld1DV4I8Trap:
2994 Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
2995 break;
2996 case NVPTXISD::Suld1DV4I16Trap:
2997 Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
2998 break;
2999 case NVPTXISD::Suld1DV4I32Trap:
3000 Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3001 break;
3002 case NVPTXISD::Suld1DArrayI8Trap:
3003 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3004 break;
3005 case NVPTXISD::Suld1DArrayI16Trap:
3006 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3007 break;
3008 case NVPTXISD::Suld1DArrayI32Trap:
3009 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3010 break;
3011 case NVPTXISD::Suld1DArrayI64Trap:
3012 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3013 break;
3014 case NVPTXISD::Suld1DArrayV2I8Trap:
3015 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3016 break;
3017 case NVPTXISD::Suld1DArrayV2I16Trap:
3018 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3019 break;
3020 case NVPTXISD::Suld1DArrayV2I32Trap:
3021 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3022 break;
3023 case NVPTXISD::Suld1DArrayV2I64Trap:
3024 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3025 break;
3026 case NVPTXISD::Suld1DArrayV4I8Trap:
3027 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3028 break;
3029 case NVPTXISD::Suld1DArrayV4I16Trap:
3030 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3031 break;
3032 case NVPTXISD::Suld1DArrayV4I32Trap:
3033 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3034 break;
3035 case NVPTXISD::Suld2DI8Trap:
3036 Opc = NVPTX::SULD_2D_I8_TRAP_R;
3037 break;
3038 case NVPTXISD::Suld2DI16Trap:
3039 Opc = NVPTX::SULD_2D_I16_TRAP_R;
3040 break;
3041 case NVPTXISD::Suld2DI32Trap:
3042 Opc = NVPTX::SULD_2D_I32_TRAP_R;
3043 break;
3044 case NVPTXISD::Suld2DI64Trap:
3045 Opc = NVPTX::SULD_2D_I64_TRAP_R;
3046 break;
3047 case NVPTXISD::Suld2DV2I8Trap:
3048 Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3049 break;
3050 case NVPTXISD::Suld2DV2I16Trap:
3051 Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3052 break;
3053 case NVPTXISD::Suld2DV2I32Trap:
3054 Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3055 break;
3056 case NVPTXISD::Suld2DV2I64Trap:
3057 Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3058 break;
3059 case NVPTXISD::Suld2DV4I8Trap:
3060 Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3061 break;
3062 case NVPTXISD::Suld2DV4I16Trap:
3063 Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3064 break;
3065 case NVPTXISD::Suld2DV4I32Trap:
3066 Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3067 break;
3068 case NVPTXISD::Suld2DArrayI8Trap:
3069 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3070 break;
3071 case NVPTXISD::Suld2DArrayI16Trap:
3072 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3073 break;
3074 case NVPTXISD::Suld2DArrayI32Trap:
3075 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3076 break;
3077 case NVPTXISD::Suld2DArrayI64Trap:
3078 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3079 break;
3080 case NVPTXISD::Suld2DArrayV2I8Trap:
3081 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3082 break;
3083 case NVPTXISD::Suld2DArrayV2I16Trap:
3084 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3085 break;
3086 case NVPTXISD::Suld2DArrayV2I32Trap:
3087 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3088 break;
3089 case NVPTXISD::Suld2DArrayV2I64Trap:
3090 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3091 break;
3092 case NVPTXISD::Suld2DArrayV4I8Trap:
3093 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3094 break;
3095 case NVPTXISD::Suld2DArrayV4I16Trap:
3096 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3097 break;
3098 case NVPTXISD::Suld2DArrayV4I32Trap:
3099 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3100 break;
3101 case NVPTXISD::Suld3DI8Trap:
3102 Opc = NVPTX::SULD_3D_I8_TRAP_R;
3103 break;
3104 case NVPTXISD::Suld3DI16Trap:
3105 Opc = NVPTX::SULD_3D_I16_TRAP_R;
3106 break;
3107 case NVPTXISD::Suld3DI32Trap:
3108 Opc = NVPTX::SULD_3D_I32_TRAP_R;
3109 break;
3110 case NVPTXISD::Suld3DI64Trap:
3111 Opc = NVPTX::SULD_3D_I64_TRAP_R;
3112 break;
3113 case NVPTXISD::Suld3DV2I8Trap:
3114 Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3115 break;
3116 case NVPTXISD::Suld3DV2I16Trap:
3117 Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3118 break;
3119 case NVPTXISD::Suld3DV2I32Trap:
3120 Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3121 break;
3122 case NVPTXISD::Suld3DV2I64Trap:
3123 Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3124 break;
3125 case NVPTXISD::Suld3DV4I8Trap:
3126 Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3127 break;
3128 case NVPTXISD::Suld3DV4I16Trap:
3129 Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3130 break;
3131 case NVPTXISD::Suld3DV4I32Trap:
3132 Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3133 break;
3134 case NVPTXISD::Suld1DI8Zero:
3135 Opc = NVPTX::SULD_1D_I8_ZERO_R;
3136 break;
3137 case NVPTXISD::Suld1DI16Zero:
3138 Opc = NVPTX::SULD_1D_I16_ZERO_R;
3139 break;
3140 case NVPTXISD::Suld1DI32Zero:
3141 Opc = NVPTX::SULD_1D_I32_ZERO_R;
3142 break;
3143 case NVPTXISD::Suld1DI64Zero:
3144 Opc = NVPTX::SULD_1D_I64_ZERO_R;
3145 break;
3146 case NVPTXISD::Suld1DV2I8Zero:
3147 Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3148 break;
3149 case NVPTXISD::Suld1DV2I16Zero:
3150 Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3151 break;
3152 case NVPTXISD::Suld1DV2I32Zero:
3153 Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3154 break;
3155 case NVPTXISD::Suld1DV2I64Zero:
3156 Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3157 break;
3158 case NVPTXISD::Suld1DV4I8Zero:
3159 Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3160 break;
3161 case NVPTXISD::Suld1DV4I16Zero:
3162 Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3163 break;
3164 case NVPTXISD::Suld1DV4I32Zero:
3165 Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3166 break;
3167 case NVPTXISD::Suld1DArrayI8Zero:
3168 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3169 break;
3170 case NVPTXISD::Suld1DArrayI16Zero:
3171 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3172 break;
3173 case NVPTXISD::Suld1DArrayI32Zero:
3174 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3175 break;
3176 case NVPTXISD::Suld1DArrayI64Zero:
3177 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3178 break;
3179 case NVPTXISD::Suld1DArrayV2I8Zero:
3180 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3181 break;
3182 case NVPTXISD::Suld1DArrayV2I16Zero:
3183 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3184 break;
3185 case NVPTXISD::Suld1DArrayV2I32Zero:
3186 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3187 break;
3188 case NVPTXISD::Suld1DArrayV2I64Zero:
3189 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3190 break;
3191 case NVPTXISD::Suld1DArrayV4I8Zero:
3192 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3193 break;
3194 case NVPTXISD::Suld1DArrayV4I16Zero:
3195 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3196 break;
3197 case NVPTXISD::Suld1DArrayV4I32Zero:
3198 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3199 break;
3200 case NVPTXISD::Suld2DI8Zero:
3201 Opc = NVPTX::SULD_2D_I8_ZERO_R;
3202 break;
3203 case NVPTXISD::Suld2DI16Zero:
3204 Opc = NVPTX::SULD_2D_I16_ZERO_R;
3205 break;
3206 case NVPTXISD::Suld2DI32Zero:
3207 Opc = NVPTX::SULD_2D_I32_ZERO_R;
3208 break;
3209 case NVPTXISD::Suld2DI64Zero:
3210 Opc = NVPTX::SULD_2D_I64_ZERO_R;
3211 break;
3212 case NVPTXISD::Suld2DV2I8Zero:
3213 Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3214 break;
3215 case NVPTXISD::Suld2DV2I16Zero:
3216 Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3217 break;
3218 case NVPTXISD::Suld2DV2I32Zero:
3219 Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3220 break;
3221 case NVPTXISD::Suld2DV2I64Zero:
3222 Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3223 break;
3224 case NVPTXISD::Suld2DV4I8Zero:
3225 Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3226 break;
3227 case NVPTXISD::Suld2DV4I16Zero:
3228 Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3229 break;
3230 case NVPTXISD::Suld2DV4I32Zero:
3231 Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3232 break;
3233 case NVPTXISD::Suld2DArrayI8Zero:
3234 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3235 break;
3236 case NVPTXISD::Suld2DArrayI16Zero:
3237 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3238 break;
3239 case NVPTXISD::Suld2DArrayI32Zero:
3240 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3241 break;
3242 case NVPTXISD::Suld2DArrayI64Zero:
3243 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3244 break;
3245 case NVPTXISD::Suld2DArrayV2I8Zero:
3246 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3247 break;
3248 case NVPTXISD::Suld2DArrayV2I16Zero:
3249 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3250 break;
3251 case NVPTXISD::Suld2DArrayV2I32Zero:
3252 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3253 break;
3254 case NVPTXISD::Suld2DArrayV2I64Zero:
3255 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3256 break;
3257 case NVPTXISD::Suld2DArrayV4I8Zero:
3258 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3259 break;
3260 case NVPTXISD::Suld2DArrayV4I16Zero:
3261 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3262 break;
3263 case NVPTXISD::Suld2DArrayV4I32Zero:
3264 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3265 break;
3266 case NVPTXISD::Suld3DI8Zero:
3267 Opc = NVPTX::SULD_3D_I8_ZERO_R;
3268 break;
3269 case NVPTXISD::Suld3DI16Zero:
3270 Opc = NVPTX::SULD_3D_I16_ZERO_R;
3271 break;
3272 case NVPTXISD::Suld3DI32Zero:
3273 Opc = NVPTX::SULD_3D_I32_ZERO_R;
3274 break;
3275 case NVPTXISD::Suld3DI64Zero:
3276 Opc = NVPTX::SULD_3D_I64_ZERO_R;
3277 break;
3278 case NVPTXISD::Suld3DV2I8Zero:
3279 Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3280 break;
3281 case NVPTXISD::Suld3DV2I16Zero:
3282 Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3283 break;
3284 case NVPTXISD::Suld3DV2I32Zero:
3285 Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3286 break;
3287 case NVPTXISD::Suld3DV2I64Zero:
3288 Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3289 break;
3290 case NVPTXISD::Suld3DV4I8Zero:
3291 Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3292 break;
3293 case NVPTXISD::Suld3DV4I16Zero:
3294 Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3295 break;
3296 case NVPTXISD::Suld3DV4I32Zero:
3297 Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3298 break;
3299 }
3300
3301 // Copy over operands
3302 SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
3303 Ops.push_back(N->getOperand(0)); // Move chain to the back.
3304
3305 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3306 return true;
3307 }
3308
3309
3310 /// SelectBFE - Look for instruction sequences that can be made more efficient
3311 /// by using the 'bfe' (bit-field extract) PTX instruction
tryBFE(SDNode * N)3312 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3313 SDLoc DL(N);
3314 SDValue LHS = N->getOperand(0);
3315 SDValue RHS = N->getOperand(1);
3316 SDValue Len;
3317 SDValue Start;
3318 SDValue Val;
3319 bool IsSigned = false;
3320
3321 if (N->getOpcode() == ISD::AND) {
3322 // Canonicalize the operands
3323 // We want 'and %val, %mask'
3324 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3325 std::swap(LHS, RHS);
3326 }
3327
3328 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3329 if (!Mask) {
3330 // We need a constant mask on the RHS of the AND
3331 return false;
3332 }
3333
3334 // Extract the mask bits
3335 uint64_t MaskVal = Mask->getZExtValue();
3336 if (!isMask_64(MaskVal)) {
3337 // We *could* handle shifted masks here, but doing so would require an
3338 // 'and' operation to fix up the low-order bits so we would trade
3339 // shr+and for bfe+and, which has the same throughput
3340 return false;
3341 }
3342
3343 // How many bits are in our mask?
3344 int64_t NumBits = countr_one(MaskVal);
3345 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3346
3347 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3348 // We have a 'srl/and' pair, extract the effective start bit and length
3349 Val = LHS.getNode()->getOperand(0);
3350 Start = LHS.getNode()->getOperand(1);
3351 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3352 if (StartConst) {
3353 uint64_t StartVal = StartConst->getZExtValue();
3354 // How many "good" bits do we have left? "good" is defined here as bits
3355 // that exist in the original value, not shifted in.
3356 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3357 if (NumBits > GoodBits) {
3358 // Do not handle the case where bits have been shifted in. In theory
3359 // we could handle this, but the cost is likely higher than just
3360 // emitting the srl/and pair.
3361 return false;
3362 }
3363 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3364 } else {
3365 // Do not handle the case where the shift amount (can be zero if no srl
3366 // was found) is not constant. We could handle this case, but it would
3367 // require run-time logic that would be more expensive than just
3368 // emitting the srl/and pair.
3369 return false;
3370 }
3371 } else {
3372 // Do not handle the case where the LHS of the and is not a shift. While
3373 // it would be trivial to handle this case, it would just transform
3374 // 'and' -> 'bfe', but 'and' has higher-throughput.
3375 return false;
3376 }
3377 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3378 if (LHS->getOpcode() == ISD::AND) {
3379 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3380 if (!ShiftCnst) {
3381 // Shift amount must be constant
3382 return false;
3383 }
3384
3385 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3386
3387 SDValue AndLHS = LHS->getOperand(0);
3388 SDValue AndRHS = LHS->getOperand(1);
3389
3390 // Canonicalize the AND to have the mask on the RHS
3391 if (isa<ConstantSDNode>(AndLHS)) {
3392 std::swap(AndLHS, AndRHS);
3393 }
3394
3395 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3396 if (!MaskCnst) {
3397 // Mask must be constant
3398 return false;
3399 }
3400
3401 uint64_t MaskVal = MaskCnst->getZExtValue();
3402 uint64_t NumZeros;
3403 uint64_t NumBits;
3404 if (isMask_64(MaskVal)) {
3405 NumZeros = 0;
3406 // The number of bits in the result bitfield will be the number of
3407 // trailing ones (the AND) minus the number of bits we shift off
3408 NumBits = llvm::countr_one(MaskVal) - ShiftAmt;
3409 } else if (isShiftedMask_64(MaskVal)) {
3410 NumZeros = llvm::countr_zero(MaskVal);
3411 unsigned NumOnes = llvm::countr_one(MaskVal >> NumZeros);
3412 // The number of bits in the result bitfield will be the number of
3413 // trailing zeros plus the number of set bits in the mask minus the
3414 // number of bits we shift off
3415 NumBits = NumZeros + NumOnes - ShiftAmt;
3416 } else {
3417 // This is not a mask we can handle
3418 return false;
3419 }
3420
3421 if (ShiftAmt < NumZeros) {
3422 // Handling this case would require extra logic that would make this
3423 // transformation non-profitable
3424 return false;
3425 }
3426
3427 Val = AndLHS;
3428 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3429 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3430 } else if (LHS->getOpcode() == ISD::SHL) {
3431 // Here, we have a pattern like:
3432 //
3433 // (sra (shl val, NN), MM)
3434 // or
3435 // (srl (shl val, NN), MM)
3436 //
3437 // If MM >= NN, we can efficiently optimize this with bfe
3438 Val = LHS->getOperand(0);
3439
3440 SDValue ShlRHS = LHS->getOperand(1);
3441 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3442 if (!ShlCnst) {
3443 // Shift amount must be constant
3444 return false;
3445 }
3446 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3447
3448 SDValue ShrRHS = RHS;
3449 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3450 if (!ShrCnst) {
3451 // Shift amount must be constant
3452 return false;
3453 }
3454 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3455
3456 // To avoid extra codegen and be profitable, we need Outer >= Inner
3457 if (OuterShiftAmt < InnerShiftAmt) {
3458 return false;
3459 }
3460
3461 // If the outer shift is more than the type size, we have no bitfield to
3462 // extract (since we also check that the inner shift is <= the outer shift
3463 // then this also implies that the inner shift is < the type size)
3464 if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3465 return false;
3466 }
3467
3468 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3469 MVT::i32);
3470 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3471 DL, MVT::i32);
3472
3473 if (N->getOpcode() == ISD::SRA) {
3474 // If we have a arithmetic right shift, we need to use the signed bfe
3475 // variant
3476 IsSigned = true;
3477 }
3478 } else {
3479 // No can do...
3480 return false;
3481 }
3482 } else {
3483 // No can do...
3484 return false;
3485 }
3486
3487
3488 unsigned Opc;
3489 // For the BFE operations we form here from "and" and "srl", always use the
3490 // unsigned variants.
3491 if (Val.getValueType() == MVT::i32) {
3492 if (IsSigned) {
3493 Opc = NVPTX::BFE_S32rii;
3494 } else {
3495 Opc = NVPTX::BFE_U32rii;
3496 }
3497 } else if (Val.getValueType() == MVT::i64) {
3498 if (IsSigned) {
3499 Opc = NVPTX::BFE_S64rii;
3500 } else {
3501 Opc = NVPTX::BFE_U64rii;
3502 }
3503 } else {
3504 // We cannot handle this type
3505 return false;
3506 }
3507
3508 SDValue Ops[] = {
3509 Val, Start, Len
3510 };
3511
3512 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3513 return true;
3514 }
3515
3516 // SelectDirectAddr - Match a direct address for DAG.
3517 // A direct address could be a globaladdress or externalsymbol.
SelectDirectAddr(SDValue N,SDValue & Address)3518 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3519 // Return true if TGA or ES.
3520 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3521 N.getOpcode() == ISD::TargetExternalSymbol) {
3522 Address = N;
3523 return true;
3524 }
3525 if (N.getOpcode() == NVPTXISD::Wrapper) {
3526 Address = N.getOperand(0);
3527 return true;
3528 }
3529 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3530 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3531 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3532 CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
3533 CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3534 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
3535 }
3536 return false;
3537 }
3538
3539 // symbol+offset
SelectADDRsi_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)3540 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3541 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3542 if (Addr.getOpcode() == ISD::ADD) {
3543 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3544 SDValue base = Addr.getOperand(0);
3545 if (SelectDirectAddr(base, Base)) {
3546 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3547 mvt);
3548 return true;
3549 }
3550 }
3551 }
3552 return false;
3553 }
3554
3555 // symbol+offset
SelectADDRsi(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)3556 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3557 SDValue &Base, SDValue &Offset) {
3558 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3559 }
3560
3561 // symbol+offset
SelectADDRsi64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)3562 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3563 SDValue &Base, SDValue &Offset) {
3564 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3565 }
3566
3567 // register+offset
SelectADDRri_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)3568 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3569 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3570 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3571 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3572 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
3573 return true;
3574 }
3575 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3576 Addr.getOpcode() == ISD::TargetGlobalAddress)
3577 return false; // direct calls.
3578
3579 if (Addr.getOpcode() == ISD::ADD) {
3580 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3581 return false;
3582 }
3583 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3584 if (FrameIndexSDNode *FIN =
3585 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3586 // Constant offset from frame ref.
3587 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3588 else
3589 Base = Addr.getOperand(0);
3590 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3591 mvt);
3592 return true;
3593 }
3594 }
3595 return false;
3596 }
3597
3598 // register+offset
SelectADDRri(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)3599 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3600 SDValue &Base, SDValue &Offset) {
3601 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3602 }
3603
3604 // register+offset
SelectADDRri64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)3605 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3606 SDValue &Base, SDValue &Offset) {
3607 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3608 }
3609
ChkMemSDNodeAddressSpace(SDNode * N,unsigned int spN) const3610 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3611 unsigned int spN) const {
3612 const Value *Src = nullptr;
3613 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3614 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3615 return true;
3616 Src = mN->getMemOperand()->getValue();
3617 }
3618 if (!Src)
3619 return false;
3620 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
3621 return (PT->getAddressSpace() == spN);
3622 return false;
3623 }
3624
3625 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3626 /// inline asm expressions.
SelectInlineAsmMemoryOperand(const SDValue & Op,InlineAsm::ConstraintCode ConstraintID,std::vector<SDValue> & OutOps)3627 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
3628 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3629 std::vector<SDValue> &OutOps) {
3630 SDValue Op0, Op1;
3631 switch (ConstraintID) {
3632 default:
3633 return true;
3634 case InlineAsm::ConstraintCode::m: // memory
3635 if (SelectDirectAddr(Op, Op0)) {
3636 OutOps.push_back(Op0);
3637 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3638 return false;
3639 }
3640 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3641 OutOps.push_back(Op0);
3642 OutOps.push_back(Op1);
3643 return false;
3644 }
3645 break;
3646 }
3647 return true;
3648 }
3649
3650 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3651 /// conversion from \p SrcTy to \p DestTy.
GetConvertOpcode(MVT DestTy,MVT SrcTy,LoadSDNode * LdNode)3652 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3653 LoadSDNode *LdNode) {
3654 bool IsSigned = LdNode && LdNode->getExtensionType() == ISD::SEXTLOAD;
3655 switch (SrcTy.SimpleTy) {
3656 default:
3657 llvm_unreachable("Unhandled source type");
3658 case MVT::i8:
3659 switch (DestTy.SimpleTy) {
3660 default:
3661 llvm_unreachable("Unhandled dest type");
3662 case MVT::i16:
3663 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3664 case MVT::i32:
3665 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3666 case MVT::i64:
3667 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3668 }
3669 case MVT::i16:
3670 switch (DestTy.SimpleTy) {
3671 default:
3672 llvm_unreachable("Unhandled dest type");
3673 case MVT::i8:
3674 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3675 case MVT::i32:
3676 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3677 case MVT::i64:
3678 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3679 }
3680 case MVT::i32:
3681 switch (DestTy.SimpleTy) {
3682 default:
3683 llvm_unreachable("Unhandled dest type");
3684 case MVT::i8:
3685 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3686 case MVT::i16:
3687 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3688 case MVT::i64:
3689 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3690 }
3691 case MVT::i64:
3692 switch (DestTy.SimpleTy) {
3693 default:
3694 llvm_unreachable("Unhandled dest type");
3695 case MVT::i8:
3696 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3697 case MVT::i16:
3698 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3699 case MVT::i32:
3700 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3701 }
3702 case MVT::f16:
3703 switch (DestTy.SimpleTy) {
3704 default:
3705 llvm_unreachable("Unhandled dest type");
3706 case MVT::f32:
3707 return NVPTX::CVT_f32_f16;
3708 case MVT::f64:
3709 return NVPTX::CVT_f64_f16;
3710 }
3711 }
3712 }
3713