1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def immFloat0 : PatLeaf<(fpimm), [{
10    float f = (float)N->getValueAPF().convertToFloat();
11    return (f==0.0f);
12}]>;
13
14def immFloat1 : PatLeaf<(fpimm), [{
15    float f = (float)N->getValueAPF().convertToFloat();
16    return (f==1.0f);
17}]>;
18
19def immDouble0 : PatLeaf<(fpimm), [{
20    double d = (double)N->getValueAPF().convertToDouble();
21    return (d==0.0);
22}]>;
23
24def immDouble1 : PatLeaf<(fpimm), [{
25    double d = (double)N->getValueAPF().convertToDouble();
26    return (d==1.0);
27}]>;
28
29def AS_match {
30  code generic = [{
31   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32  }];
33  code shared = [{
34   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35  }];
36  code global = [{
37   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38  }];
39}
40
41// A node that will be replaced with the current PTX version.
42class PTX {
43  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45  }]>;
46  // (i32 0) will be XForm'ed to the currently used PTX version.
47  dag version = (PTXVerXform (i32 0));
48}
49def ptx : PTX;
50
51// Generates list of n sequential register names.
52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53class RegSeq<int n, string prefix> {
54  list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
55                                        [prefix # !add(n, -1)]),
56                            []);
57}
58
59//-----------------------------------
60// Synchronization and shuffle functions
61//-----------------------------------
62let isConvergent = 1 in {
63def INT_BARRIER0 : NVPTXInst<(outs), (ins),
64                  "bar.sync \t0;",
65      [(int_nvvm_barrier0)]>;
66def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
67                  "bar.sync \t$src1;",
68      [(int_nvvm_barrier_n Int32Regs:$src1)]>;
69def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
70                  "bar.sync \t$src1, $src2;",
71      [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
72def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
73  !strconcat("{{ \n\t",
74             ".reg .pred \t%p1; \n\t",
75             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
76             "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
77             "}}"),
78      [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
79def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
80  !strconcat("{{ \n\t",
81             ".reg .pred \t%p1; \n\t",
82             ".reg .pred \t%p2; \n\t",
83             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
84             "bar.red.and.pred \t%p2, 0, %p1; \n\t",
85             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
86             "}}"),
87      [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
88def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
89  !strconcat("{{ \n\t",
90             ".reg .pred \t%p1; \n\t",
91             ".reg .pred \t%p2; \n\t",
92             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
93             "bar.red.or.pred \t%p2, 0, %p1; \n\t",
94             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
95             "}}"),
96      [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
97
98def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
99                             [(int_nvvm_bar_sync imm:$i)]>;
100
101def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
102                             [(int_nvvm_bar_warp_sync imm:$i)]>,
103        Requires<[hasPTX60, hasSM30]>;
104def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
105                             [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
106        Requires<[hasPTX60, hasSM30]>;
107
108def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
109                                   [(int_nvvm_barrier_sync imm:$i)]>,
110        Requires<[hasPTX60, hasSM30]>;
111def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
112                                   [(int_nvvm_barrier_sync Int32Regs:$i)]>,
113        Requires<[hasPTX60, hasSM30]>;
114
115def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
116                 "barrier.sync \t$id, $cnt;",
117                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
118        Requires<[hasPTX60, hasSM30]>;
119def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
120                 "barrier.sync \t$id, $cnt;",
121                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
122        Requires<[hasPTX60, hasSM30]>;
123def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
124                 "barrier.sync \t$id, $cnt;",
125                 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
126        Requires<[hasPTX60, hasSM30]>;
127def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
128                 "barrier.sync \t$id, $cnt;",
129                 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
130        Requires<[hasPTX60, hasSM30]>;
131
132
133// shfl.{up,down,bfly,idx}.b32
134multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
135  // The last two parameters to shfl can be regs or imms.  ptxas is smart
136  // enough to inline constant registers, so strictly speaking we don't need to
137  // handle immediates here.  But it's easy enough, and it makes our ptx more
138  // readable.
139  def reg : NVPTXInst<
140      (outs regclass:$dst),
141      (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
142      !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
143      [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
144
145  def imm1 : NVPTXInst<
146      (outs regclass:$dst),
147      (ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
148      !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
149      [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
150
151  def imm2 : NVPTXInst<
152      (outs regclass:$dst),
153      (ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
154      !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
155      [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
156
157  def imm3 : NVPTXInst<
158      (outs regclass:$dst),
159      (ins regclass:$src, i32imm:$offset, i32imm:$mask),
160      !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
161      [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
162}
163
164defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
165defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
166defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
167defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
168defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
169defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
170defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
171defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
172
173multiclass SHFL_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
174  // Threadmask and the last two parameters to shfl.sync can be regs or imms.
175  // ptxas is smart enough to inline constant registers, so strictly speaking we
176  // don't need to handle immediates here.  But it's easy enough, and it makes
177  // our ptx more readable.
178  def rrr : NVPTXInst<
179      (outs regclass:$dst),
180      (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
181      !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
182      [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
183                            Int32Regs:$offset, Int32Regs:$mask))]>;
184
185  def rri : NVPTXInst<
186      (outs regclass:$dst),
187      (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
188      !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
189      [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
190                            Int32Regs:$offset, imm:$mask))]>;
191
192  def rir : NVPTXInst<
193      (outs regclass:$dst),
194      (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
195      !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
196      [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
197                            imm:$offset, Int32Regs:$mask))]>;
198
199  def rii : NVPTXInst<
200      (outs regclass:$dst),
201      (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
202      !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
203      [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
204                            imm:$offset, imm:$mask))]>;
205
206  def irr : NVPTXInst<
207      (outs regclass:$dst),
208      (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
209      !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
210      [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
211                            Int32Regs:$offset, Int32Regs:$mask))]>;
212
213  def iri : NVPTXInst<
214      (outs regclass:$dst),
215      (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
216      !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
217      [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
218                            Int32Regs:$offset, imm:$mask))]>;
219
220  def iir : NVPTXInst<
221      (outs regclass:$dst),
222      (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
223      !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
224      [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
225                            imm:$offset, Int32Regs:$mask))]>;
226
227  def iii : NVPTXInst<
228      (outs regclass:$dst),
229      (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
230      !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
231      [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
232                            imm:$offset, imm:$mask))]>;
233}
234
235// On sm_70 these don't have to be convergent, so we may eventually want to
236// implement non-convergent variant of this intrinsic.
237defm INT_SHFL_SYNC_DOWN_I32 : SHFL_SYNC<Int32Regs, "down", int_nvvm_shfl_sync_down_i32>;
238defm INT_SHFL_SYNC_DOWN_F32 : SHFL_SYNC<Float32Regs, "down", int_nvvm_shfl_sync_down_f32>;
239defm INT_SHFL_SYNC_UP_I32 : SHFL_SYNC<Int32Regs, "up", int_nvvm_shfl_sync_up_i32>;
240defm INT_SHFL_SYNC_UP_F32 : SHFL_SYNC<Float32Regs, "up", int_nvvm_shfl_sync_up_f32>;
241defm INT_SHFL_SYNC_BFLY_I32 : SHFL_SYNC<Int32Regs, "bfly", int_nvvm_shfl_sync_bfly_i32>;
242defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC<Float32Regs, "bfly", int_nvvm_shfl_sync_bfly_f32>;
243defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC<Int32Regs, "idx", int_nvvm_shfl_sync_idx_i32>;
244defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC<Float32Regs, "idx", int_nvvm_shfl_sync_idx_f32>;
245
246
247// vote.{all,any,uni,ballot}
248multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
249  def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
250              "vote." # mode # " \t$dest, $pred;",
251              [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
252        Requires<[hasPTX60, hasSM30]>;
253}
254
255defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
256defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
257defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
258defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
259
260// vote.sync.{all,any,uni,ballot}
261multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
262  def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
263              "vote.sync." # mode # " \t$dest, $pred, $mask;",
264              [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
265          Requires<[hasPTX60, hasSM30]>;
266  def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
267              "vote.sync." # mode #" \t$dest, $pred, $mask;",
268              [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
269          Requires<[hasPTX60, hasSM30]>;
270}
271
272defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
273defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
274defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
275defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
276
277multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
278                          Operand ImmOp> {
279  def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
280              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
281              [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
282           Requires<[hasPTX60, hasSM70]>;
283  def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
284              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
285              [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
286           Requires<[hasPTX60, hasSM70]>;
287  def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
288              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
289              [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
290           Requires<[hasPTX60, hasSM70]>;
291  def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
292              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
293              [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
294           Requires<[hasPTX60, hasSM70]>;
295}
296
297defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
298                                        i32imm>;
299defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
300                                        i64imm>;
301
302multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
303                          Operand ImmOp> {
304  def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
305                     (ins i32imm:$mask, ImmOp:$value),
306              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
307              [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
308           Requires<[hasPTX60, hasSM70]>;
309  def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
310                     (ins Int32Regs:$mask, ImmOp:$value),
311              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
312              [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
313           Requires<[hasPTX60, hasSM70]>;
314  def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
315                     (ins i32imm:$mask, regclass:$value),
316              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
317              [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
318           Requires<[hasPTX60, hasSM70]>;
319  def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
320                     (ins Int32Regs:$mask, regclass:$value),
321              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
322              [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
323           Requires<[hasPTX60, hasSM70]>;
324}
325defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
326                                         i32imm>;
327defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
328                                         i64imm>;
329
330} // isConvergent = 1
331
332//-----------------------------------
333// Explicit Memory Fence Functions
334//-----------------------------------
335class MEMBAR<string StrOp, Intrinsic IntOP> :
336              NVPTXInst<(outs), (ins),
337            StrOp, [(IntOP)]>;
338
339def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
340def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
341def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
342
343
344//-----------------------------------
345// Math Functions
346//-----------------------------------
347
348// Map min(1.0, max(0.0, x)) to sat(x)
349// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
350// NaN
351// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
352// Same story for fmax, fmin.
353
354def : Pat<(int_nvvm_fmin_f immFloat1,
355            (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
356          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
357def : Pat<(int_nvvm_fmin_f immFloat1,
358            (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
359          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
360def : Pat<(int_nvvm_fmin_f
361            (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
362          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
363def : Pat<(int_nvvm_fmin_f
364            (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
365          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
366
367def : Pat<(int_nvvm_fmin_d immDouble1,
368            (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
369          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
370def : Pat<(int_nvvm_fmin_d immDouble1,
371            (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
372          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
373def : Pat<(int_nvvm_fmin_d
374            (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
375          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
376def : Pat<(int_nvvm_fmin_d
377            (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
378          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
379
380
381// We need a full string for OpcStr here because we need to deal with case like
382// INT_PTX_RECIP.
383class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
384  NVPTXRegClass src_regclass, Intrinsic IntOP>
385            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
386            OpcStr,
387        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
388
389// We need a full string for OpcStr here because we need to deal with the case
390// like INT_PTX_NATIVE_POWR_F.
391class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
392  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
393            : NVPTXInst<(outs t_regclass:$dst),
394              (ins s0_regclass:$src0, s1_regclass:$src1),
395            OpcStr,
396        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
397
398class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
399  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
400  NVPTXRegClass s2_regclass, Intrinsic IntOP>
401            : NVPTXInst<(outs t_regclass:$dst),
402              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
403            OpcStr,
404        [(set t_regclass:$dst,
405          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
406
407//
408// MISC
409//
410
411def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
412  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
413
414//
415// Min Max
416//
417
418def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
419  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
420def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
421  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
422
423def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
424  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
425def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
426  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
427
428def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
429  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
430def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
431  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
432
433
434//
435// Multiplication
436//
437
438def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
439  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
440def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
441  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
442
443def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
444  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
445def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
446  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
447
448def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
449  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
450def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
451  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
452def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
453  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
454def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
455  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
456def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
457  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
458def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
459  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
460def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
461  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
462def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
463  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
464
465def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
466  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
467def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
468  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
469def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
470  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
471def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
472  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
473
474def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
475  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
476def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
477  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
478
479//
480// Div
481//
482
483def INT_NVVM_DIV_APPROX_FTZ_F
484  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
485    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
486def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
487  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
488
489def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
490  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
491def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
492  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
493def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
494  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
495def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
496  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
497def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
498  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
499def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
500  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
501def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
502  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
503def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
504  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
505
506def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
507  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
508def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
509  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
510def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
511  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
512def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
513  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
514
515//
516// Sad
517//
518
519def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
520  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
521def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
522  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
523
524//
525// Floor  Ceil
526//
527
528def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
529          (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
530def : Pat<(int_nvvm_floor_f Float32Regs:$a),
531          (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
532def : Pat<(int_nvvm_floor_d Float64Regs:$a),
533          (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
534
535def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
536          (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
537def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
538          (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
539def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
540          (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
541
542//
543// Abs
544//
545
546def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
547  Float32Regs, int_nvvm_fabs_ftz_f>;
548def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
549  Float32Regs, int_nvvm_fabs_f>;
550
551def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
552  Float64Regs, int_nvvm_fabs_d>;
553
554//
555// Round
556//
557
558def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
559          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
560def : Pat<(int_nvvm_round_f Float32Regs:$a),
561          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
562def : Pat<(int_nvvm_round_d Float64Regs:$a),
563          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
564
565//
566// Trunc
567//
568
569def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
570          (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
571def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
572          (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
573def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
574          (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
575
576//
577// Saturate
578//
579
580def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
581          (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
582def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
583          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
584def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
585          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
586
587//
588// Exp2  Log2
589//
590
591def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
592  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
593def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
594  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
595def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
596  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
597
598def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
599  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
600def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
601  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
602def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
603  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
604
605//
606// Sin  Cos
607//
608
609def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
610  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
611def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
612  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
613
614def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
615  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
616def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
617  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
618
619//
620// Fma
621//
622
623def INT_NVVM_FMA_RN_FTZ_F
624  : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
625    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
626def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
627  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
628def INT_NVVM_FMA_RZ_FTZ_F
629  : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
630    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
631def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
632  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
633def INT_NVVM_FMA_RM_FTZ_F
634  : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
635    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
636def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
637  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
638def INT_NVVM_FMA_RP_FTZ_F
639  : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
640    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
641def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
642  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
643
644def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
645  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
646def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
647  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
648def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
649  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
650def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
651  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
652
653//
654// Rcp
655//
656
657def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
658  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
659def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
660  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
661def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
662  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
663def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
664  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
665def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
666  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
667def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
668  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
669def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
670  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
671def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
672  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
673
674def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
675  Float64Regs, int_nvvm_rcp_rn_d>;
676def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
677  Float64Regs, int_nvvm_rcp_rz_d>;
678def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
679  Float64Regs, int_nvvm_rcp_rm_d>;
680def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
681  Float64Regs, int_nvvm_rcp_rp_d>;
682
683def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
684  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
685
686//
687// Sqrt
688//
689
690def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
691  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
692def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
693  Float32Regs, int_nvvm_sqrt_rn_f>;
694def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
695  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
696def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
697  Float32Regs, int_nvvm_sqrt_rz_f>;
698def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
699  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
700def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
701  Float32Regs, int_nvvm_sqrt_rm_f>;
702def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
703  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
704def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
705  Float32Regs, int_nvvm_sqrt_rp_f>;
706def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
707  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
708def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
709  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
710
711def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
712  Float64Regs, int_nvvm_sqrt_rn_d>;
713def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
714  Float64Regs, int_nvvm_sqrt_rz_d>;
715def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
716  Float64Regs, int_nvvm_sqrt_rm_d>;
717def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
718  Float64Regs, int_nvvm_sqrt_rp_d>;
719
720// nvvm_sqrt intrinsic
721def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
722          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
723def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
724          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
725def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
726          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
727def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
728          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
729
730//
731// Rsqrt
732//
733
734def INT_NVVM_RSQRT_APPROX_FTZ_F
735  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
736    int_nvvm_rsqrt_approx_ftz_f>;
737def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
738  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
739def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
740  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
741
742//
743// Add
744//
745
746def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
747  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
748def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
749  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
750def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
751  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
752def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
753  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
754def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
755  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
756def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
757  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
758def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
759  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
760def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
761  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
762
763def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
764  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
765def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
766  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
767def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
768  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
769def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
770  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
771
772//
773// Convert
774//
775
776def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
777          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
778def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
779          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
780def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
781          (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
782def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
783          (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
784def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
785          (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
786def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
787          (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
788def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
789          (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
790def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
791          (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
792
793def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
794          (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
795def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
796          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
797def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
798          (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
799def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
800          (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
801
802def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
803          (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
804def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
805          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
806def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
807          (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
808def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
809          (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
810
811def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
812          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
813def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
814          (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
815def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
816          (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
817def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
818          (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
819
820def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
821          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
822def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
823          (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
824def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
825          (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
826def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
827          (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
828
829def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
830          (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
831def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
832          (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
833def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
834          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
835def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
836          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
837def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
838          (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
839def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
840          (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
841def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
842          (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
843def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
844          (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
845
846def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
847          (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
848def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
849          (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
850def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
851          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
852def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
853          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
854def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
855          (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
856def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
857          (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
858def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
859          (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
860def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
861          (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
862
863def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
864          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
865def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
866          (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
867def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
868          (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
869def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
870          (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
871
872def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
873          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
874def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
875          (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
876def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
877          (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
878def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
879          (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
880
881def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
882  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
883
884def INT_NVVM_D2I_LO : F_MATH_1<
885  !strconcat("{{\n\t",
886             ".reg .b32 %temp; \n\t",
887             "mov.b64 \t{$dst, %temp}, $src0;\n\t",
888             "}}"),
889  Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
890def INT_NVVM_D2I_HI : F_MATH_1<
891  !strconcat("{{\n\t",
892             ".reg .b32 %temp; \n\t",
893             "mov.b64 \t{%temp, $dst}, $src0;\n\t",
894             "}}"),
895  Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
896
897def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
898          (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
899def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
900          (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
901def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
902          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
903def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
904          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
905def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
906          (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
907def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
908          (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
909def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
910          (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
911def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
912          (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
913
914def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
915          (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
916def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
917          (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
918def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
919          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
920def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
921          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
922def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
923          (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
924def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
925          (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
926def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
927          (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
928def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
929          (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
930
931def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
932          (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
933def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
934          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
935def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
936          (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
937def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
938          (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
939
940def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
941          (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
942def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
943          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
944def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
945          (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
946def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
947          (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
948
949def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
950          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
951def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
952          (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
953def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
954          (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
955def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
956          (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
957
958def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
959          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
960def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
961          (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
962def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
963          (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
964def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
965          (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
966
967def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
968          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
969def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
970          (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
971def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
972          (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
973def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
974          (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
975
976def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
977          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
978def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
979          (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
980def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
981          (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
982def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
983          (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
984
985
986def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
987          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
988def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
989          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
990
991//
992// Bitcast
993//
994
995def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
996  Float32Regs, int_nvvm_bitcast_f2i>;
997def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
998  Int32Regs, int_nvvm_bitcast_i2f>;
999
1000def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1001  Int64Regs, int_nvvm_bitcast_ll2d>;
1002def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1003  Float64Regs, int_nvvm_bitcast_d2ll>;
1004
1005//
1006// FNS
1007//
1008
1009class INT_FNS_MBO<dag ins, dag Operands>
1010  : NVPTXInst<(outs Int32Regs:$dst), ins,
1011               "fns.b32 \t$dst, $mask, $base, $offset;",
1012               [(set Int32Regs:$dst, Operands )]>,
1013    Requires<[hasPTX60, hasSM30]>;
1014
1015def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1016                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1017def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1018                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1019def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1020                     (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1021def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1022                     (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1023def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1024                     (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1025def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1026                     (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1027def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1028                     (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1029def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1030                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1031
1032//-----------------------------------
1033// Atomic Functions
1034//-----------------------------------
1035
1036class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1037 : PatFrag<ops, frag, AS_match.global>;
1038class ATOMIC_SHARED_CHK <dag ops, dag frag>
1039 : PatFrag<ops, frag, AS_match.shared>;
1040class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1041 : PatFrag<ops, frag, AS_match.generic>;
1042
1043multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1044  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1045  Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1046  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1047    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1048    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1049  Requires<Pred>;
1050  def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1051    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1052    [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1053  Requires<Pred>;
1054}
1055multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1056  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1057  list<Predicate> Pred = []> {
1058  defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1059    IntOp, IMMType, IMM, Pred>;
1060  defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1061    IntOp, IMMType, IMM, Pred>;
1062}
1063
1064// has 2 operands, neg the second one
1065multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1066  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1067  Operand IMMType, list<Predicate> Pred> {
1068  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1069    !strconcat(
1070      "{{ \n\t",
1071      ".reg \t.s", TypeStr, " temp; \n\t",
1072      "neg.s", TypeStr, " \ttemp, $b; \n\t",
1073      "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1074      "}}"),
1075    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1076  Requires<Pred>;
1077}
1078multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1079  string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
1080  list<Predicate> Pred = []> {
1081 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1082   IntOp, IMMType, Pred> ;
1083 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1084   IntOp, IMMType, Pred> ;
1085}
1086
1087// has 3 operands
1088multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1089  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1090  Operand IMMType, list<Predicate> Pred> {
1091  def reg : NVPTXInst<(outs regclass:$dst),
1092    (ins ptrclass:$addr, regclass:$b, regclass:$c),
1093    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1094    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1095  Requires<Pred>;
1096
1097  def imm1 : NVPTXInst<(outs regclass:$dst),
1098    (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1099    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1100    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1101  Requires<Pred>;
1102
1103  def imm2 : NVPTXInst<(outs regclass:$dst),
1104    (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1105    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1106    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1107  Requires<Pred>;
1108
1109  def imm3 : NVPTXInst<(outs regclass:$dst),
1110    (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1111    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1112    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1113  Requires<Pred>;
1114}
1115multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1116  string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1117  defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1118    IntOp, IMMType, Pred>;
1119  defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1120    IntOp, IMMType, Pred>;
1121}
1122
1123// atom_add
1124
1125def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1126  (atomic_load_add_32 node:$a, node:$b)>;
1127def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1128  (atomic_load_add_32 node:$a, node:$b)>;
1129def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1130  (atomic_load_add_32 node:$a, node:$b)>;
1131def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1132  (atomic_load_add_64 node:$a, node:$b)>;
1133def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1134  (atomic_load_add_64 node:$a, node:$b)>;
1135def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1136  (atomic_load_add_64 node:$a, node:$b)>;
1137def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1138  (atomic_load_fadd node:$a, node:$b)>;
1139def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1140  (atomic_load_fadd node:$a, node:$b)>;
1141def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1142  (atomic_load_fadd node:$a, node:$b)>;
1143
1144defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1145  atomic_load_add_32_g, i32imm, imm>;
1146defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1147  atomic_load_add_32_s, i32imm, imm>;
1148defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1149  atomic_load_add_32_gen, i32imm, imm>;
1150defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1151  ".add", atomic_load_add_32_gen, i32imm, imm>;
1152
1153defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1154  atomic_load_add_64_g, i64imm, imm>;
1155defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1156  atomic_load_add_64_s, i64imm, imm>;
1157defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1158  atomic_load_add_64_gen, i64imm, imm>;
1159defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1160  ".add", atomic_load_add_64_gen, i64imm, imm>;
1161
1162defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1163  atomic_load_add_g, f32imm, fpimm>;
1164defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1165  atomic_load_add_s, f32imm, fpimm>;
1166defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1167  atomic_load_add_gen, f32imm, fpimm>;
1168
1169defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1170  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1171defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1172  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1173defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1174  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1175
1176// atom_sub
1177
1178def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1179  (atomic_load_sub_32 node:$a, node:$b)>;
1180def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1181  (atomic_load_sub_32 node:$a, node:$b)>;
1182def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1183  (atomic_load_sub_32 node:$a, node:$b)>;
1184def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1185  (atomic_load_sub_64 node:$a, node:$b)>;
1186def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1187  (atomic_load_sub_64 node:$a, node:$b)>;
1188def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1189  (atomic_load_sub_64 node:$a, node:$b)>;
1190
1191defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1192  atomic_load_sub_32_g, i32imm>;
1193defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1194  atomic_load_sub_64_g, i64imm>;
1195defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1196  atomic_load_sub_32_gen, i32imm>;
1197defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1198  ".add", atomic_load_sub_32_gen, i32imm>;
1199defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1200  atomic_load_sub_32_s, i32imm>;
1201defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1202  atomic_load_sub_64_s, i64imm>;
1203defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1204  atomic_load_sub_64_gen, i64imm>;
1205defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1206  ".add", atomic_load_sub_64_gen, i64imm>;
1207
1208// atom_swap
1209
1210def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1211  (atomic_swap_32 node:$a, node:$b)>;
1212def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1213  (atomic_swap_32 node:$a, node:$b)>;
1214def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1215  (atomic_swap_32 node:$a, node:$b)>;
1216def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1217  (atomic_swap_64 node:$a, node:$b)>;
1218def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1219  (atomic_swap_64 node:$a, node:$b)>;
1220def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1221  (atomic_swap_64 node:$a, node:$b)>;
1222
1223defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1224  atomic_swap_32_g, i32imm, imm>;
1225defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1226  atomic_swap_32_s, i32imm, imm>;
1227defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1228  atomic_swap_32_gen, i32imm, imm>;
1229defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1230  ".exch", atomic_swap_32_gen, i32imm, imm>;
1231defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1232  atomic_swap_64_g, i64imm, imm>;
1233defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1234  atomic_swap_64_s, i64imm, imm>;
1235defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1236  atomic_swap_64_gen, i64imm, imm>;
1237defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1238  ".exch", atomic_swap_64_gen, i64imm, imm>;
1239
1240// atom_max
1241
1242def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1243  , (atomic_load_max_32 node:$a, node:$b)>;
1244def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1245  (atomic_load_max_32 node:$a, node:$b)>;
1246def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1247  (atomic_load_max_32 node:$a, node:$b)>;
1248def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1249  , (atomic_load_max_64 node:$a, node:$b)>;
1250def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1251  (atomic_load_max_64 node:$a, node:$b)>;
1252def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1253  (atomic_load_max_64 node:$a, node:$b)>;
1254def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1255  (atomic_load_umax_32 node:$a, node:$b)>;
1256def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1257  (atomic_load_umax_32 node:$a, node:$b)>;
1258def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1259  (atomic_load_umax_32 node:$a, node:$b)>;
1260def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1261  (atomic_load_umax_64 node:$a, node:$b)>;
1262def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1263  (atomic_load_umax_64 node:$a, node:$b)>;
1264def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1265  (atomic_load_umax_64 node:$a, node:$b)>;
1266
1267defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1268  ".max", atomic_load_max_32_g, i32imm, imm>;
1269defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1270  ".max", atomic_load_max_32_s, i32imm, imm>;
1271defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1272  atomic_load_max_32_gen, i32imm, imm>;
1273defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1274  ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1275defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1276  ".max", atomic_load_max_64_g, i64imm, imm>;
1277defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1278  ".max", atomic_load_max_64_s, i64imm, imm>;
1279defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1280  atomic_load_max_64_gen, i64imm, imm>;
1281defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1282  ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1283defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1284  ".max", atomic_load_umax_32_g, i32imm, imm>;
1285defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1286  ".max", atomic_load_umax_32_s, i32imm, imm>;
1287defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1288  atomic_load_umax_32_gen, i32imm, imm>;
1289defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1290  ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1291defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1292  ".max", atomic_load_umax_64_g, i64imm, imm>;
1293defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1294  ".max", atomic_load_umax_64_s, i64imm, imm>;
1295defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1296  atomic_load_umax_64_gen, i64imm, imm>;
1297defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1298  ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1299
1300// atom_min
1301
1302def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1303  (atomic_load_min_32 node:$a, node:$b)>;
1304def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1305  (atomic_load_min_32 node:$a, node:$b)>;
1306def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1307  (atomic_load_min_32 node:$a, node:$b)>;
1308def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1309  (atomic_load_min_64 node:$a, node:$b)>;
1310def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1311  (atomic_load_min_64 node:$a, node:$b)>;
1312def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1313  (atomic_load_min_64 node:$a, node:$b)>;
1314def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1315  (atomic_load_umin_32 node:$a, node:$b)>;
1316def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1317  (atomic_load_umin_32 node:$a, node:$b)>;
1318def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1319  (atomic_load_umin_32 node:$a, node:$b)>;
1320def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1321  (atomic_load_umin_64 node:$a, node:$b)>;
1322def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1323  (atomic_load_umin_64 node:$a, node:$b)>;
1324def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1325  (atomic_load_umin_64 node:$a, node:$b)>;
1326
1327defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1328  ".min", atomic_load_min_32_g, i32imm, imm>;
1329defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1330  ".min", atomic_load_min_32_s, i32imm, imm>;
1331defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1332  atomic_load_min_32_gen, i32imm, imm>;
1333defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1334  ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1335defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1336  ".min", atomic_load_min_64_g, i64imm, imm>;
1337defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1338  ".min", atomic_load_min_64_s, i64imm, imm>;
1339defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1340  atomic_load_min_64_gen, i64imm, imm>;
1341defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1342  ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1343defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1344  ".min", atomic_load_umin_32_g, i32imm, imm>;
1345defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1346  ".min", atomic_load_umin_32_s, i32imm, imm>;
1347defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1348  atomic_load_umin_32_gen, i32imm, imm>;
1349defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1350  ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1351defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1352  ".min", atomic_load_umin_64_g, i64imm, imm>;
1353defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1354  ".min", atomic_load_umin_64_s, i64imm, imm>;
1355defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1356  atomic_load_umin_64_gen, i64imm, imm>;
1357defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1358  ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1359
1360// atom_inc  atom_dec
1361
1362def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1363  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1364def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1365  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1366def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1367  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1368def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1369  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1370def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1371  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1372def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1373  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1374
1375defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1376  atomic_load_inc_32_g, i32imm, imm>;
1377defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1378  atomic_load_inc_32_s, i32imm, imm>;
1379defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1380  atomic_load_inc_32_gen, i32imm, imm>;
1381defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1382  ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1383defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1384  atomic_load_dec_32_g, i32imm, imm>;
1385defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1386  atomic_load_dec_32_s, i32imm, imm>;
1387defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1388  atomic_load_dec_32_gen, i32imm, imm>;
1389defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1390  ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1391
1392// atom_and
1393
1394def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1395  (atomic_load_and_32 node:$a, node:$b)>;
1396def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1397  (atomic_load_and_32 node:$a, node:$b)>;
1398def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1399  (atomic_load_and_32 node:$a, node:$b)>;
1400def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1401  (atomic_load_and_64 node:$a, node:$b)>;
1402def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1403  (atomic_load_and_64 node:$a, node:$b)>;
1404def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1405  (atomic_load_and_64 node:$a, node:$b)>;
1406
1407defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1408  atomic_load_and_32_g, i32imm, imm>;
1409defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1410  atomic_load_and_32_s, i32imm, imm>;
1411defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1412  atomic_load_and_32_gen, i32imm, imm>;
1413defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1414  ".and", atomic_load_and_32_gen, i32imm, imm>;
1415defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1416  atomic_load_and_64_g, i64imm, imm>;
1417defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1418  atomic_load_and_64_s, i64imm, imm>;
1419defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1420  atomic_load_and_64_gen, i64imm, imm>;
1421defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1422  ".and", atomic_load_and_64_gen, i64imm, imm>;
1423
1424// atom_or
1425
1426def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1427  (atomic_load_or_32 node:$a, node:$b)>;
1428def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1429  (atomic_load_or_32 node:$a, node:$b)>;
1430def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1431  (atomic_load_or_32 node:$a, node:$b)>;
1432def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1433  (atomic_load_or_64 node:$a, node:$b)>;
1434def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1435  (atomic_load_or_64 node:$a, node:$b)>;
1436def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1437  (atomic_load_or_64 node:$a, node:$b)>;
1438
1439defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1440  atomic_load_or_32_g, i32imm, imm>;
1441defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1442  atomic_load_or_32_gen, i32imm, imm>;
1443defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1444  ".or", atomic_load_or_32_gen, i32imm, imm>;
1445defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1446  atomic_load_or_32_s, i32imm, imm>;
1447defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1448  atomic_load_or_64_g, i64imm, imm>;
1449defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1450  atomic_load_or_64_gen, i64imm, imm>;
1451defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1452  ".or", atomic_load_or_64_gen, i64imm, imm>;
1453defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1454  atomic_load_or_64_s, i64imm, imm>;
1455
1456// atom_xor
1457
1458def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1459  (atomic_load_xor_32 node:$a, node:$b)>;
1460def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1461  (atomic_load_xor_32 node:$a, node:$b)>;
1462def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1463  (atomic_load_xor_32 node:$a, node:$b)>;
1464def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1465  (atomic_load_xor_64 node:$a, node:$b)>;
1466def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1467  (atomic_load_xor_64 node:$a, node:$b)>;
1468def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1469  (atomic_load_xor_64 node:$a, node:$b)>;
1470
1471defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1472  atomic_load_xor_32_g, i32imm, imm>;
1473defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1474  atomic_load_xor_32_s, i32imm, imm>;
1475defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1476  atomic_load_xor_32_gen, i32imm, imm>;
1477defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1478  ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1479defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1480  atomic_load_xor_64_g, i64imm, imm>;
1481defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1482  atomic_load_xor_64_s, i64imm, imm>;
1483defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1484  atomic_load_xor_64_gen, i64imm, imm>;
1485defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1486  ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1487
1488// atom_cas
1489
1490def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1491  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1492def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1493  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1494def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1495  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1496def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1497  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1498def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1499  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1500def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1501  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1502
1503defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1504  atomic_cmp_swap_32_g, i32imm>;
1505defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1506  atomic_cmp_swap_32_s, i32imm>;
1507defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1508  atomic_cmp_swap_32_gen, i32imm>;
1509defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1510  ".cas", atomic_cmp_swap_32_gen, i32imm>;
1511defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1512  atomic_cmp_swap_64_g, i64imm>;
1513defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1514  atomic_cmp_swap_64_s, i64imm>;
1515defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1516  atomic_cmp_swap_64_gen, i64imm>;
1517defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1518  ".cas", atomic_cmp_swap_64_gen, i64imm>;
1519
1520// Support for scoped atomic operations.  Matches
1521// int_nvvm_atomic_{op}_{space}_{type}_{scope}
1522// and converts it into the appropriate instruction.
1523// NOTE: not all possible combinations are implemented
1524//  'space' is limited to generic as it's the only one needed to support CUDA.
1525//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1526class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1527                  dag ins, dag Operands>
1528      : NVPTXInst<(outs regclass:$result), ins,
1529                  AsmStr,
1530                  [(set regclass:$result, Operands)]>,
1531        Requires<Preds>;
1532
1533// Define instruction variants for all addressing modes.
1534multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1535                       NVPTXRegClass regclass, Operand ImmType,
1536                       SDNode Imm, ValueType ImmTy,
1537                       list<Predicate> Preds> {
1538  let AddedComplexity = 1 in {
1539    def : ATOM23_impl<AsmStr, regclass, Preds,
1540                      (ins Int32Regs:$src, regclass:$b),
1541                      (Intr Int32Regs:$src, regclass:$b)>;
1542    def : ATOM23_impl<AsmStr, regclass, Preds,
1543                      (ins Int64Regs:$src, regclass:$b),
1544                      (Intr Int64Regs:$src, regclass:$b)>;
1545  }
1546  // tablegen can't infer argument types from Intrinsic (though it can
1547  // from Instruction) so we have to enforce specific type on
1548  // immediates via explicit cast to ImmTy.
1549  def : ATOM23_impl<AsmStr, regclass, Preds,
1550                    (ins Int32Regs:$src, ImmType:$b),
1551                    (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1552  def : ATOM23_impl<AsmStr, regclass, Preds,
1553                    (ins Int64Regs:$src, ImmType:$b),
1554                    (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1555}
1556
1557multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1558                       NVPTXRegClass regclass, Operand ImmType,
1559                       SDNode Imm, ValueType ImmTy,
1560                       list<Predicate> Preds> {
1561  // Variants for register/immediate permutations of $b and $c
1562  let AddedComplexity = 2 in {
1563    def : ATOM23_impl<AsmStr, regclass, Preds,
1564                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
1565                      (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1566    def : ATOM23_impl<AsmStr, regclass, Preds,
1567                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
1568                      (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1569  }
1570  let AddedComplexity = 1 in {
1571    def : ATOM23_impl<AsmStr, regclass, Preds,
1572                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1573                      (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1574    def : ATOM23_impl<AsmStr, regclass, Preds,
1575                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1576                      (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1577    def : ATOM23_impl<AsmStr, regclass, Preds,
1578                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1579                      (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1580    def : ATOM23_impl<AsmStr, regclass, Preds,
1581                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1582                      (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1583  }
1584  def : ATOM23_impl<AsmStr, regclass, Preds,
1585                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1586                    (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1587  def : ATOM23_impl<AsmStr, regclass, Preds,
1588                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1589                    (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1590}
1591
1592// Constructs instrinsic name and instruction asm strings.
1593multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1594                       string ScopeStr, string SpaceStr,
1595                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1596                       ValueType ImmTy, list<Predicate> Preds> {
1597  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1598                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1599                            # "." # OpStr # "." # TypeStr
1600                            # " \t$result, [$src], $b;",
1601                     !cast<Intrinsic>(
1602                            "int_nvvm_atomic_" # OpStr
1603                            # "_" # SpaceStr # "_" # IntTypeStr
1604                            # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1605                     regclass, ImmType, Imm, ImmTy, Preds>;
1606}
1607multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1608                       string ScopeStr, string SpaceStr,
1609                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1610                       ValueType ImmTy, list<Predicate> Preds> {
1611  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1612                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1613                            # "." # OpStr # "." # TypeStr
1614                            # " \t$result, [$src], $b, $c;",
1615                     !cast<Intrinsic>(
1616                            "int_nvvm_atomic_" # OpStr
1617                            # "_" # SpaceStr # "_" # IntTypeStr
1618                            # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1619                     regclass, ImmType, Imm, ImmTy, Preds>;
1620}
1621
1622// Constructs variants for different address spaces.
1623// For now we only need variants for generic space pointers.
1624multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1625                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1626                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1627   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1628                            regclass, ImmType, Imm, ImmTy, Preds>;
1629}
1630multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1631                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1632                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1633   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1634                            regclass, ImmType, Imm, ImmTy, Preds>;
1635}
1636
1637// Constructs variants for different scopes of atomic op.
1638multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1639                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1640                       ValueType ImmTy, list<Predicate> Preds> {
1641   // .gpu scope is default and is currently covered by existing
1642   // atomics w/o explicitly specified scope.
1643   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1644                           regclass, ImmType, Imm, ImmTy,
1645                           !listconcat(Preds,[hasAtomScope])>;
1646   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1647                           regclass, ImmType, Imm, ImmTy,
1648                           !listconcat(Preds,[hasAtomScope])>;
1649}
1650multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1651           NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1652           list<Predicate> Preds> {
1653   // No need to define ".gpu"-scoped atomics.  They do the same thing
1654   // as the regular, non-scoped atomics defined elsewhere.
1655   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1656                           regclass, ImmType, Imm, ImmTy,
1657                           !listconcat(Preds,[hasAtomScope])>;
1658   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1659                           regclass, ImmType, Imm, ImmTy,
1660                           !listconcat(Preds,[hasAtomScope])>;
1661}
1662
1663// atom.add
1664multiclass ATOM2_add_impl<string OpStr> {
1665   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1666   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1667   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1668   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1669                            []>;
1670   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1671                            [hasAtomAddF64]>;
1672}
1673
1674// atom.{and,or,xor}
1675multiclass ATOM2_bitwise_impl<string OpStr> {
1676   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1677   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1678                            [hasAtomBitwise64]>;
1679}
1680
1681// atom.exch
1682multiclass ATOM2_exch_impl<string OpStr> {
1683   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1684   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1685}
1686
1687// atom.{min,max}
1688multiclass ATOM2_minmax_impl<string OpStr> {
1689   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1690   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1691   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1692                            [hasAtomMinMax64]>;
1693   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1694                            [hasAtomMinMax64]>;
1695}
1696
1697// atom.{inc,dec}
1698multiclass ATOM2_incdec_impl<string OpStr> {
1699   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1700}
1701
1702// atom.cas
1703multiclass ATOM3_cas_impl<string OpStr> {
1704   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1705   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1706}
1707
1708defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1709defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1710defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1711defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1712defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1713defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1714defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1715defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1716defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
1717defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1718
1719//-----------------------------------
1720// Support for ldu on sm_20 or later
1721//-----------------------------------
1722
1723// Don't annotate ldu instructions as mayLoad, as they load from memory that is
1724// read-only in a kernel.
1725
1726// Scalar
1727
1728multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1729  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1730               !strconcat("ldu.global.", TyStr),
1731                      []>, Requires<[hasLDU]>;
1732  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1733               !strconcat("ldu.global.", TyStr),
1734                        []>, Requires<[hasLDU]>;
1735 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1736               !strconcat("ldu.global.", TyStr),
1737                      []>, Requires<[hasLDU]>;
1738 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1739               !strconcat("ldu.global.", TyStr),
1740                      []>, Requires<[hasLDU]>;
1741 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1742               !strconcat("ldu.global.", TyStr),
1743                        []>, Requires<[hasLDU]>;
1744}
1745
1746defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1747defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1748defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1749defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1750defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1751defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1752defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1753defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1754defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1755defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1756
1757// vector
1758
1759// Elementized vector ldu
1760multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1761 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1762                     (ins Int32Regs:$src),
1763                     !strconcat("ldu.global.", TyStr), []>;
1764 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1765                     (ins Int64Regs:$src),
1766                     !strconcat("ldu.global.", TyStr), []>;
1767 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1768                     (ins MEMri:$src),
1769                     !strconcat("ldu.global.", TyStr), []>;
1770 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1771                     (ins MEMri64:$src),
1772                     !strconcat("ldu.global.", TyStr), []>;
1773 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1774                     (ins imemAny:$src),
1775                     !strconcat("ldu.global.", TyStr), []>;
1776}
1777
1778multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1779 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1780                            regclass:$dst4), (ins Int32Regs:$src),
1781               !strconcat("ldu.global.", TyStr), []>;
1782 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1783                            regclass:$dst4), (ins Int64Regs:$src),
1784               !strconcat("ldu.global.", TyStr), []>;
1785 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1786                            regclass:$dst4), (ins MEMri:$src),
1787               !strconcat("ldu.global.", TyStr), []>;
1788 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1789                            regclass:$dst4), (ins MEMri64:$src),
1790               !strconcat("ldu.global.", TyStr), []>;
1791 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1792                            regclass:$dst4), (ins imemAny:$src),
1793               !strconcat("ldu.global.", TyStr), []>;
1794}
1795
1796defm INT_PTX_LDU_G_v2i8_ELE
1797  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1798defm INT_PTX_LDU_G_v2i16_ELE
1799  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1800defm INT_PTX_LDU_G_v2i32_ELE
1801  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1802defm INT_PTX_LDU_G_v2f16_ELE
1803  : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1804defm INT_PTX_LDU_G_v2f16x2_ELE
1805  : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1806defm INT_PTX_LDU_G_v2f32_ELE
1807  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1808defm INT_PTX_LDU_G_v2i64_ELE
1809  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1810defm INT_PTX_LDU_G_v2f64_ELE
1811  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1812defm INT_PTX_LDU_G_v4i8_ELE
1813  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1814defm INT_PTX_LDU_G_v4i16_ELE
1815  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1816    Int16Regs>;
1817defm INT_PTX_LDU_G_v4i32_ELE
1818  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1819    Int32Regs>;
1820defm INT_PTX_LDU_G_v4f16_ELE
1821  : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1822    Float16Regs>;
1823defm INT_PTX_LDU_G_v4f16x2_ELE
1824  : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1825    Float16x2Regs>;
1826defm INT_PTX_LDU_G_v4f32_ELE
1827  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1828    Float32Regs>;
1829
1830
1831//-----------------------------------
1832// Support for ldg on sm_35 or later
1833//-----------------------------------
1834
1835// Don't annotate ld.global.nc as mayLoad, because these loads go through the
1836// non-coherent texture cache, and therefore the values read must be read-only
1837// during the lifetime of the kernel.
1838
1839multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1840  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1841               !strconcat("ld.global.nc.", TyStr),
1842                      []>, Requires<[hasLDG]>;
1843  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1844               !strconcat("ld.global.nc.", TyStr),
1845                        []>, Requires<[hasLDG]>;
1846 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1847               !strconcat("ld.global.nc.", TyStr),
1848                      []>, Requires<[hasLDG]>;
1849 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1850               !strconcat("ld.global.nc.", TyStr),
1851                      []>, Requires<[hasLDG]>;
1852 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1853               !strconcat("ld.global.nc.", TyStr),
1854                        []>, Requires<[hasLDG]>;
1855}
1856
1857defm INT_PTX_LDG_GLOBAL_i8
1858  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1859defm INT_PTX_LDG_GLOBAL_i16
1860  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1861defm INT_PTX_LDG_GLOBAL_i32
1862  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1863defm INT_PTX_LDG_GLOBAL_i64
1864  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1865defm INT_PTX_LDG_GLOBAL_f16
1866  : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
1867defm INT_PTX_LDG_GLOBAL_f16x2
1868  : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
1869defm INT_PTX_LDG_GLOBAL_f32
1870  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1871defm INT_PTX_LDG_GLOBAL_f64
1872  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1873defm INT_PTX_LDG_GLOBAL_p32
1874  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1875defm INT_PTX_LDG_GLOBAL_p64
1876  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1877
1878// vector
1879
1880// Elementized vector ldg
1881multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1882 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1883                     (ins Int32Regs:$src),
1884                     !strconcat("ld.global.nc.", TyStr), []>;
1885 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1886                     (ins Int64Regs:$src),
1887                     !strconcat("ld.global.nc.", TyStr), []>;
1888 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1889                     (ins MEMri:$src),
1890                     !strconcat("ld.global.nc.", TyStr), []>;
1891 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1892                     (ins MEMri64:$src),
1893                     !strconcat("ld.global.nc.", TyStr), []>;
1894 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1895                     (ins imemAny:$src),
1896                     !strconcat("ld.global.nc.", TyStr), []>;
1897}
1898
1899multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1900  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1901                              regclass:$dst4), (ins Int32Regs:$src),
1902               !strconcat("ld.global.nc.", TyStr), []>;
1903  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1904                               regclass:$dst4), (ins Int64Regs:$src),
1905               !strconcat("ld.global.nc.", TyStr), []>;
1906  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1907                              regclass:$dst4), (ins MEMri:$src),
1908               !strconcat("ld.global.nc.", TyStr), []>;
1909  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1910                              regclass:$dst4), (ins MEMri64:$src),
1911               !strconcat("ld.global.nc.", TyStr), []>;
1912  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1913                             regclass:$dst4), (ins imemAny:$src),
1914               !strconcat("ld.global.nc.", TyStr), []>;
1915}
1916
1917// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1918defm INT_PTX_LDG_G_v2i8_ELE
1919  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1920defm INT_PTX_LDG_G_v2i16_ELE
1921  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1922defm INT_PTX_LDG_G_v2i32_ELE
1923  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1924defm INT_PTX_LDG_G_v2f16_ELE
1925  : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1926defm INT_PTX_LDG_G_v2f16x2_ELE
1927  : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1928defm INT_PTX_LDG_G_v2f32_ELE
1929  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1930defm INT_PTX_LDG_G_v2i64_ELE
1931  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1932defm INT_PTX_LDG_G_v2f64_ELE
1933  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1934defm INT_PTX_LDG_G_v4i8_ELE
1935  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1936defm INT_PTX_LDG_G_v4i16_ELE
1937  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1938defm INT_PTX_LDG_G_v4i32_ELE
1939  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1940defm INT_PTX_LDG_G_v4f16_ELE
1941  : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
1942defm INT_PTX_LDG_G_v4f16x2_ELE
1943  : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
1944defm INT_PTX_LDG_G_v4f32_ELE
1945  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1946
1947
1948multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1949   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1950          !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
1951      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1952   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1953          !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
1954      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1955   def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
1956          "{{ .reg .b64 %tmp;\n\t"
1957          #"  cvt.u64.u32 \t%tmp, $src;\n\t"
1958          #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
1959      [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
1960      Requires<[useShortPtr]>;
1961}
1962
1963multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1964   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1965          !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
1966      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1967   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1968          !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
1969      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1970   def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
1971          "{{ .reg .b64 %tmp;\n\t"
1972          #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
1973          #"  cvt.u32.u64 \t$result, %tmp; }}",
1974      [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
1975      Requires<[useShortPtr]>;
1976}
1977
1978defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1979defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1980defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1981defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1982
1983defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1984defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1985defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1986defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1987
1988
1989// nvvm.ptr.gen.to.param
1990def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1991  (ins Int32Regs:$src),
1992                        "mov.u32 \t$result, $src;",
1993                              [(set Int32Regs:$result,
1994                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1995def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1996  (ins Int64Regs:$src),
1997                        "mov.u64 \t$result, $src;",
1998                              [(set Int64Regs:$result,
1999                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2000
2001
2002// nvvm.move intrinsicc
2003def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2004                             "mov.b16 \t$r, $s;",
2005                             [(set Int16Regs:$r,
2006                               (int_nvvm_move_i16 Int16Regs:$s))]>;
2007def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2008                             "mov.b32 \t$r, $s;",
2009                             [(set Int32Regs:$r,
2010                               (int_nvvm_move_i32 Int32Regs:$s))]>;
2011def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2012                             "mov.b64 \t$r, $s;",
2013                             [(set Int64Regs:$r,
2014                               (int_nvvm_move_i64 Int64Regs:$s))]>;
2015def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2016                             "mov.f32 \t$r, $s;",
2017                             [(set Float32Regs:$r,
2018                               (int_nvvm_move_float Float32Regs:$s))]>;
2019def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2020                             "mov.f64 \t$r, $s;",
2021                             [(set Float64Regs:$r,
2022                               (int_nvvm_move_double Float64Regs:$s))]>;
2023def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2024                             "mov.u32 \t$r, $s;",
2025                             [(set Int32Regs:$r,
2026                               (int_nvvm_move_ptr Int32Regs:$s))]>;
2027def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2028                             "mov.u64 \t$r, $s;",
2029                             [(set Int64Regs:$r,
2030                               (int_nvvm_move_ptr Int64Regs:$s))]>;
2031
2032// @TODO: Are these actually needed, or will we always just see symbols
2033// copied to registers first?
2034/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2035                             "mov.u32 \t$r, $s;",
2036                             [(set Int32Regs:$r,
2037                             (int_nvvm_move_ptr texternalsym:$s))]>;
2038def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2039                             "mov.u64 \t$r, $s;",
2040                             [(set Int64Regs:$r,
2041                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
2042
2043
2044// MoveParam        %r1, param
2045// ptr_local_to_gen %r2, %r1
2046// ptr_gen_to_local %r3, %r2
2047// ->
2048// mov %r1, param
2049
2050// @TODO: Revisit this.  There is a type
2051// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2052// instructions are not currently defined. However, we can use the ptr
2053// variants and the asm printer will do the right thing.
2054def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2055                (MoveParam texternalsym:$src)))),
2056               (nvvm_move_ptr64  texternalsym:$src)>;
2057def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2058                (MoveParam texternalsym:$src)))),
2059               (nvvm_move_ptr32  texternalsym:$src)>;
2060
2061def texsurf_handles
2062  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2063              "mov.u64 \t$result, $src;", []>;
2064
2065//-----------------------------------
2066// Compiler Error Warn
2067// - Just ignore them in codegen
2068//-----------------------------------
2069
2070def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2071                "// llvm.nvvm.compiler.warn()",
2072                [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2073def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2074                "// llvm.nvvm.compiler.warn()",
2075                [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2076def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2077                "// llvm.nvvm.compiler.error()",
2078                [(int_nvvm_compiler_error Int32Regs:$a)]>;
2079def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2080                "// llvm.nvvm.compiler.error()",
2081                [(int_nvvm_compiler_error Int64Regs:$a)]>;
2082
2083
2084// isspacep
2085
2086def ISSPACEP_CONST_32
2087  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2088              "isspacep.const \t$d, $a;",
2089              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2090    Requires<[hasPTX31]>;
2091def ISSPACEP_CONST_64
2092  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2093              "isspacep.const \t$d, $a;",
2094              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2095    Requires<[hasPTX31]>;
2096def ISSPACEP_GLOBAL_32
2097  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2098              "isspacep.global \t$d, $a;",
2099              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2100def ISSPACEP_GLOBAL_64
2101  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2102              "isspacep.global \t$d, $a;",
2103              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2104def ISSPACEP_LOCAL_32
2105  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2106              "isspacep.local \t$d, $a;",
2107              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2108def ISSPACEP_LOCAL_64
2109  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2110              "isspacep.local \t$d, $a;",
2111              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2112def ISSPACEP_SHARED_32
2113  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2114              "isspacep.shared \t$d, $a;",
2115              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2116def ISSPACEP_SHARED_64
2117  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2118              "isspacep.shared \t$d, $a;",
2119              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2120
2121
2122// Special register reads
2123def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2124                            (ins SpecialRegs:$r),
2125                            "mov.b32 \t$d, $r;", []>;
2126
2127def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2128def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2129def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2130def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2131def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2132def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2133def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2134def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2135def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2136def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2137def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2138def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2139def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2140def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2141def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2142def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2143def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2144def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2145def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2146def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2147def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2148def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2149def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2150def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2151def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2152def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2153def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2154def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2155def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2156def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2157def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2158def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2159
2160
2161// rotate builtin support
2162
2163def ROTATE_B32_HW_IMM
2164  : NVPTXInst<(outs Int32Regs:$dst),
2165              (ins  Int32Regs:$src, i32imm:$amt),
2166              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2167              [(set Int32Regs:$dst,
2168                 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2169              Requires<[hasHWROT32]> ;
2170
2171def ROTATE_B32_HW_REG
2172  : NVPTXInst<(outs Int32Regs:$dst),
2173              (ins  Int32Regs:$src, Int32Regs:$amt),
2174              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2175              [(set Int32Regs:$dst,
2176                 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2177              Requires<[hasHWROT32]> ;
2178
2179def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2180          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2181      Requires<[noHWROT32]> ;
2182
2183def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2184          (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2185      Requires<[noHWROT32]> ;
2186
2187let hasSideEffects = 0 in {
2188  def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2189    !strconcat("{{\n\t",
2190               ".reg .b32 %dummy;\n\t",
2191               "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2192               "}}"),
2193          []> ;
2194
2195  def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2196    !strconcat("{{\n\t",
2197               ".reg .b32 %dummy;\n\t",
2198               "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2199               "}}"),
2200          []> ;
2201}
2202
2203let hasSideEffects = 0 in {
2204  def PACK_TWO_INT32
2205    : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2206                "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2207}
2208
2209def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2210          (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2211                          (GET_LO_INT64 Int64Regs:$src))> ;
2212
2213// Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2214// no side effects.
2215let hasSideEffects = 0 in {
2216  def SHF_L_WRAP_B32_IMM
2217    : NVPTXInst<(outs Int32Regs:$dst),
2218                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2219                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2220      Requires<[hasHWROT32]>;
2221
2222  def SHF_L_WRAP_B32_REG
2223    : NVPTXInst<(outs Int32Regs:$dst),
2224                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2225                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2226      Requires<[hasHWROT32]>;
2227
2228  def SHF_R_WRAP_B32_IMM
2229    : NVPTXInst<(outs Int32Regs:$dst),
2230                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2231                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2232      Requires<[hasHWROT32]>;
2233
2234  def SHF_R_WRAP_B32_REG
2235    : NVPTXInst<(outs Int32Regs:$dst),
2236                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2237                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2238      Requires<[hasHWROT32]>;
2239}
2240
2241// HW version of rotate 64
2242def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2243          (PACK_TWO_INT32
2244            (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2245                                (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2246            (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2247                                (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2248      Requires<[hasHWROT32]>;
2249
2250def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2251          (PACK_TWO_INT32
2252            (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2253                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2254            (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2255                               (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2256      Requires<[hasHWROT32]>;
2257
2258
2259def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2260          (PACK_TWO_INT32
2261            (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2262                                (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2263            (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2264                                (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2265      Requires<[hasHWROT32]>;
2266
2267def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2268          (PACK_TWO_INT32
2269            (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2270                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2271            (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2272                               (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2273      Requires<[hasHWROT32]>;
2274
2275// SW version of rotate 64
2276def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2277          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2278      Requires<[noHWROT32]>;
2279def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2280          (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2281      Requires<[noHWROT32]>;
2282def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2283          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2284      Requires<[noHWROT32]>;
2285def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2286          (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2287      Requires<[noHWROT32]>;
2288
2289
2290//-----------------------------------
2291// Texture Intrinsics
2292//-----------------------------------
2293
2294// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2295// also defined in NVPTXReplaceImageHandles.cpp
2296
2297// texmode_independent
2298let IsTex = 1, IsTexModeUnified = 0 in {
2299// Texture fetch instructions using handles
2300def TEX_1D_F32_S32
2301  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2302                    Float32Regs:$b, Float32Regs:$a),
2303              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2304              "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2305              []>;
2306def TEX_1D_F32_F32
2307  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2308                    Float32Regs:$b, Float32Regs:$a),
2309              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2310              "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2311              []>;
2312def TEX_1D_F32_F32_LEVEL
2313  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2314                    Float32Regs:$b, Float32Regs:$a),
2315              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2316              "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2317              "[$t, $s, \\{$x\\}], $lod;",
2318              []>;
2319def TEX_1D_F32_F32_GRAD
2320  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2321                    Float32Regs:$b, Float32Regs:$a),
2322              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2323                   Float32Regs:$gradx, Float32Regs:$grady),
2324              "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2325              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2326              []>;
2327def TEX_1D_S32_S32
2328  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2329                    Int32Regs:$b, Int32Regs:$a),
2330              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2331              "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2332              []>;
2333def TEX_1D_S32_F32
2334  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2335                    Int32Regs:$b, Int32Regs:$a),
2336              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2337              "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2338              []>;
2339def TEX_1D_S32_F32_LEVEL
2340  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2341                    Int32Regs:$b, Int32Regs:$a),
2342              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2343                   Float32Regs:$lod),
2344              "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2345              "[$t, $s, \\{$x\\}], $lod;",
2346              []>;
2347def TEX_1D_S32_F32_GRAD
2348  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2349                    Int32Regs:$b, Int32Regs:$a),
2350              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2351                   Float32Regs:$gradx, Float32Regs:$grady),
2352              "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2353              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2354              []>;
2355def TEX_1D_U32_S32
2356  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2357                    Int32Regs:$b, Int32Regs:$a),
2358              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2359              "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2360              []>;
2361def TEX_1D_U32_F32
2362  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2363                    Int32Regs:$b, Int32Regs:$a),
2364              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2365              "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2366              []>;
2367def TEX_1D_U32_F32_LEVEL
2368  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2369                    Int32Regs:$b, Int32Regs:$a),
2370              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2371                   Float32Regs:$lod),
2372              "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2373              "[$t, $s, \\{$x\\}], $lod;",
2374              []>;
2375def TEX_1D_U32_F32_GRAD
2376  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2377                    Int32Regs:$b, Int32Regs:$a),
2378              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2379                   Float32Regs:$gradx, Float32Regs:$grady),
2380              "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2381              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2382              []>;
2383
2384def TEX_1D_ARRAY_F32_S32
2385  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2386                    Float32Regs:$b, Float32Regs:$a),
2387              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2388              "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2389              "[$t, $s, \\{$l, $x\\}];",
2390              []>;
2391def TEX_1D_ARRAY_F32_F32
2392  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2393                    Float32Regs:$b, Float32Regs:$a),
2394              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2395              "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2396              "[$t, $s, \\{$l, $x\\}];",
2397              []>;
2398def TEX_1D_ARRAY_F32_F32_LEVEL
2399  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2400                    Float32Regs:$b, Float32Regs:$a),
2401              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2402                   Float32Regs:$lod),
2403              "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2404              "[$t, $s, \\{$l, $x\\}], $lod;",
2405              []>;
2406def TEX_1D_ARRAY_F32_F32_GRAD
2407  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2408                    Float32Regs:$b, Float32Regs:$a),
2409              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2410                   Float32Regs:$gradx, Float32Regs:$grady),
2411              "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2412              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2413              []>;
2414def TEX_1D_ARRAY_S32_S32
2415  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2416                    Int32Regs:$b, Int32Regs:$a),
2417              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2418              "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2419              "[$t, $s, \\{$l, $x\\}];",
2420              []>;
2421def TEX_1D_ARRAY_S32_F32
2422  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2423                    Int32Regs:$b, Int32Regs:$a),
2424              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2425              "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2426              "[$t, $s, \\{$l, $x\\}];",
2427              []>;
2428def TEX_1D_ARRAY_S32_F32_LEVEL
2429  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2430                    Int32Regs:$b, Int32Regs:$a),
2431              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2432                   Float32Regs:$lod),
2433              "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2434              "[$t, $s, \\{$l, $x\\}], $lod;",
2435              []>;
2436def TEX_1D_ARRAY_S32_F32_GRAD
2437  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2438                    Int32Regs:$b, Int32Regs:$a),
2439              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2440                   Float32Regs:$gradx, Float32Regs:$grady),
2441              "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2442              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2443              []>;
2444def TEX_1D_ARRAY_U32_S32
2445  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2446                    Int32Regs:$b, Int32Regs:$a),
2447              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2448              "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2449              "[$t, $s, \\{$l, $x\\}];",
2450              []>;
2451def TEX_1D_ARRAY_U32_F32
2452  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2453                    Int32Regs:$b, Int32Regs:$a),
2454              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2455              "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2456              "[$t, $s, \\{$l, $x\\}];",
2457              []>;
2458def TEX_1D_ARRAY_U32_F32_LEVEL
2459  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2460                    Int32Regs:$b, Int32Regs:$a),
2461              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2462                   Float32Regs:$lod),
2463              "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2464              "[$t, $s, \\{$l, $x\\}], $lod;",
2465              []>;
2466def TEX_1D_ARRAY_U32_F32_GRAD
2467  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2468                    Int32Regs:$b, Int32Regs:$a),
2469              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2470                   Float32Regs:$gradx, Float32Regs:$grady),
2471              "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2472              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2473              []>;
2474
2475def TEX_2D_F32_S32
2476  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2477                    Float32Regs:$b, Float32Regs:$a),
2478              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2479              "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2480              "[$t, $s, \\{$x, $y\\}];",
2481              []>;
2482def TEX_2D_F32_F32
2483  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2484                    Float32Regs:$b, Float32Regs:$a),
2485              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2486              "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2487              "[$t, $s, \\{$x, $y\\}];",
2488              []>;
2489def TEX_2D_F32_F32_LEVEL
2490  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2491                    Float32Regs:$b, Float32Regs:$a),
2492              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2493                   Float32Regs:$lod),
2494              "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2495              "[$t, $s, \\{$x, $y\\}], $lod;",
2496              []>;
2497def TEX_2D_F32_F32_GRAD
2498  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2499                    Float32Regs:$b, Float32Regs:$a),
2500              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2501                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2502                   Float32Regs:$grady0, Float32Regs:$grady1),
2503              "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2504              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2505              "\\{$grady0, $grady1\\};",
2506              []>;
2507def TEX_2D_S32_S32
2508  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2509                    Int32Regs:$b, Int32Regs:$a),
2510              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2511              "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2512              "[$t, $s, \\{$x, $y\\}];",
2513              []>;
2514def TEX_2D_S32_F32
2515  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2516                    Int32Regs:$b, Int32Regs:$a),
2517              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2518              "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2519              "[$t, $s, \\{$x, $y\\}];",
2520              []>;
2521def TEX_2D_S32_F32_LEVEL
2522  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2523                    Int32Regs:$b, Int32Regs:$a),
2524              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2525                   Float32Regs:$lod),
2526              "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2527              "[$t, $s, \\{$x, $y\\}], $lod;",
2528              []>;
2529def TEX_2D_S32_F32_GRAD
2530  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2531                    Int32Regs:$b, Int32Regs:$a),
2532              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2533                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2534                   Float32Regs:$grady0, Float32Regs:$grady1),
2535              "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2536              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2537              "\\{$grady0, $grady1\\};",
2538              []>;
2539def TEX_2D_U32_S32
2540  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2541                    Int32Regs:$b, Int32Regs:$a),
2542              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2543              "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2544              "[$t, $s, \\{$x, $y\\}];",
2545              []>;
2546def TEX_2D_U32_F32
2547  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2548                    Int32Regs:$b, Int32Regs:$a),
2549              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2550              "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2551              "[$t, $s, \\{$x, $y\\}];",
2552              []>;
2553def TEX_2D_U32_F32_LEVEL
2554  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2555                    Int32Regs:$b, Int32Regs:$a),
2556              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2557                   Float32Regs:$lod),
2558              "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2559              "[$t, $s, \\{$x, $y\\}], $lod;",
2560              []>;
2561def TEX_2D_U32_F32_GRAD
2562  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2563                    Int32Regs:$b, Int32Regs:$a),
2564              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2565                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2566                   Float32Regs:$grady0, Float32Regs:$grady1),
2567              "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2568              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2569              "\\{$grady0, $grady1\\};",
2570              []>;
2571
2572def TEX_2D_ARRAY_F32_S32
2573  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2574                    Float32Regs:$b, Float32Regs:$a),
2575              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2576                   Int32Regs:$y),
2577              "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2578              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2579              []>;
2580def TEX_2D_ARRAY_F32_F32
2581  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2582                    Float32Regs:$b, Float32Regs:$a),
2583              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2584                   Float32Regs:$y),
2585              "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2586              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2587              []>;
2588def TEX_2D_ARRAY_F32_F32_LEVEL
2589  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2590                    Float32Regs:$b, Float32Regs:$a),
2591              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2592                   Float32Regs:$y, Float32Regs:$lod),
2593              "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2594              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2595              []>;
2596def TEX_2D_ARRAY_F32_F32_GRAD
2597  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2598                    Float32Regs:$b, Float32Regs:$a),
2599              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2600                   Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2601                   Float32Regs:$grady0, Float32Regs:$grady1),
2602              "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2603              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2604              "\\{$grady0, $grady1\\};",
2605              []>;
2606def TEX_2D_ARRAY_S32_S32
2607  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2608                    Int32Regs:$b, Int32Regs:$a),
2609              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2610                   Int32Regs:$y),
2611              "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2612              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2613              []>;
2614def TEX_2D_ARRAY_S32_F32
2615  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2616                    Int32Regs:$b, Int32Regs:$a),
2617              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2618                   Float32Regs:$y),
2619              "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2620              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2621              []>;
2622def TEX_2D_ARRAY_S32_F32_LEVEL
2623  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2624                    Int32Regs:$b, Int32Regs:$a),
2625              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2626                   Float32Regs:$y, Float32Regs:$lod),
2627              "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2628              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2629              []>;
2630def TEX_2D_ARRAY_S32_F32_GRAD
2631  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2632                    Int32Regs:$b, Int32Regs:$a),
2633              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2634                   Float32Regs:$y,
2635                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2636                   Float32Regs:$grady0, Float32Regs:$grady1),
2637              "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2638              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2639              "\\{$grady0, $grady1\\};",
2640              []>;
2641def TEX_2D_ARRAY_U32_S32
2642  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2643                    Int32Regs:$b, Int32Regs:$a),
2644              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2645                   Int32Regs:$y),
2646              "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2647              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2648              []>;
2649def TEX_2D_ARRAY_U32_F32
2650  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2651                    Int32Regs:$b, Int32Regs:$a),
2652              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2653                   Float32Regs:$y),
2654              "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2655              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2656              []>;
2657def TEX_2D_ARRAY_U32_F32_LEVEL
2658  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2659                    Int32Regs:$b, Int32Regs:$a),
2660              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2661                   Float32Regs:$y, Float32Regs:$lod),
2662              "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2663              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2664              []>;
2665def TEX_2D_ARRAY_U32_F32_GRAD
2666  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2667                    Int32Regs:$b, Int32Regs:$a),
2668              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2669                   Float32Regs:$y,
2670                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2671                   Float32Regs:$grady0, Float32Regs:$grady1),
2672              "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2673              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2674              "\\{$grady0, $grady1\\};",
2675              []>;
2676
2677def TEX_3D_F32_S32
2678  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2679                    Float32Regs:$b, Float32Regs:$a),
2680              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2681                   Int32Regs:$z),
2682              "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2683              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2684              []>;
2685def TEX_3D_F32_F32
2686  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2687                    Float32Regs:$b, Float32Regs:$a),
2688              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2689                   Float32Regs:$z),
2690              "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2691              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2692              []>;
2693def TEX_3D_F32_F32_LEVEL
2694  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2695                    Float32Regs:$b, Float32Regs:$a),
2696              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2697                   Float32Regs:$z, Float32Regs:$lod),
2698              "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2699              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2700              []>;
2701def TEX_3D_F32_F32_GRAD
2702  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2703                    Float32Regs:$b, Float32Regs:$a),
2704              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2705                   Float32Regs:$z,
2706                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2707                   Float32Regs:$gradx2, Float32Regs:$grady0,
2708                   Float32Regs:$grady1, Float32Regs:$grady2),
2709              "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2710              "[$t, $s, \\{$x, $y, $z, $z\\}], "
2711              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2712              "\\{$grady0, $grady1, $grady2, $grady2\\};",
2713              []>;
2714def TEX_3D_S32_S32
2715  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2716                    Int32Regs:$b, Int32Regs:$a),
2717              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2718                   Int32Regs:$z),
2719              "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2720              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2721              []>;
2722def TEX_3D_S32_F32
2723  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2724                    Int32Regs:$b, Int32Regs:$a),
2725              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2726                   Float32Regs:$z),
2727              "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2728              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2729              []>;
2730def TEX_3D_S32_F32_LEVEL
2731  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2732                    Int32Regs:$b, Int32Regs:$a),
2733              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2734                   Float32Regs:$z, Float32Regs:$lod),
2735              "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2736              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2737              []>;
2738def TEX_3D_S32_F32_GRAD
2739  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2740                    Int32Regs:$b, Int32Regs:$a),
2741              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2742                   Float32Regs:$z,
2743                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2744                   Float32Regs:$gradx2, Float32Regs:$grady0,
2745                   Float32Regs:$grady1, Float32Regs:$grady2),
2746              "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2747              "[$t, $s, \\{$x, $y, $z, $z\\}], "
2748              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2749              "\\{$grady0, $grady1, $grady2, $grady2\\};",
2750              []>;
2751def TEX_3D_U32_S32
2752  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2753                    Int32Regs:$b, Int32Regs:$a),
2754              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2755                   Int32Regs:$z),
2756              "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2757              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2758              []>;
2759def TEX_3D_U32_F32
2760  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2761                    Int32Regs:$b, Int32Regs:$a),
2762              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2763                   Float32Regs:$z),
2764              "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2765              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2766              []>;
2767def TEX_3D_U32_F32_LEVEL
2768  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2769                    Int32Regs:$b, Int32Regs:$a),
2770              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2771                   Float32Regs:$z, Float32Regs:$lod),
2772              "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2773              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2774              []>;
2775def TEX_3D_U32_F32_GRAD
2776  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2777                    Int32Regs:$b, Int32Regs:$a),
2778              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2779                   Float32Regs:$z,
2780                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2781                   Float32Regs:$gradx2, Float32Regs:$grady0,
2782                   Float32Regs:$grady1, Float32Regs:$grady2),
2783              "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2784              "[$t, $s, \\{$x, $y, $z, $z\\}], "
2785              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2786              "\\{$grady0, $grady1, $grady2, $grady2\\};",
2787              []>;
2788
2789def TEX_CUBE_F32_F32
2790  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2791                    Float32Regs:$b, Float32Regs:$a),
2792              (ins Int64Regs:$t, Int64Regs:$s,
2793               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2794              "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2795              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2796              []>;
2797def TEX_CUBE_F32_F32_LEVEL
2798  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2799                    Float32Regs:$b, Float32Regs:$a),
2800              (ins Int64Regs:$t, Int64Regs:$s,
2801                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2802                   Float32Regs:$lod),
2803              "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2804              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2805              []>;
2806def TEX_CUBE_S32_F32
2807  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2808                    Int32Regs:$b, Int32Regs:$a),
2809              (ins Int64Regs:$t, Int64Regs:$s,
2810                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2811              "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2812              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2813              []>;
2814def TEX_CUBE_S32_F32_LEVEL
2815  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2816                    Int32Regs:$b, Int32Regs:$a),
2817              (ins Int64Regs:$t, Int64Regs:$s,
2818                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2819                   Float32Regs:$lod),
2820              "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2821              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2822              []>;
2823def TEX_CUBE_U32_F32
2824  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2825                    Int32Regs:$b, Int32Regs:$a),
2826              (ins Int64Regs:$t, Int64Regs:$s,
2827                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2828              "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2829              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2830              []>;
2831def TEX_CUBE_U32_F32_LEVEL
2832  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2833                    Int32Regs:$b, Int32Regs:$a),
2834              (ins Int64Regs:$t, Int64Regs:$s,
2835                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2836                   Float32Regs:$lod),
2837              "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2838              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2839              []>;
2840
2841def TEX_CUBE_ARRAY_F32_F32
2842  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2843                    Float32Regs:$b, Float32Regs:$a),
2844              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2845               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2846              "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2847              "[$t, $s, \\{$l, $x, $y, $z\\}];",
2848              []>;
2849def TEX_CUBE_ARRAY_F32_F32_LEVEL
2850  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2851                    Float32Regs:$b, Float32Regs:$a),
2852              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2853                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2854                   Float32Regs:$lod),
2855              "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2856              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2857              []>;
2858def TEX_CUBE_ARRAY_S32_F32
2859  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2860                    Int32Regs:$b, Int32Regs:$a),
2861              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2862                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2863              "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2864              "[$t, $s, \\{$l, $x, $y, $z\\}];",
2865              []>;
2866def TEX_CUBE_ARRAY_S32_F32_LEVEL
2867  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2868                    Int32Regs:$b, Int32Regs:$a),
2869              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2870                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2871                   Float32Regs:$lod),
2872              "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2873              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2874              []>;
2875def TEX_CUBE_ARRAY_U32_F32
2876  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2877                    Int32Regs:$b, Int32Regs:$a),
2878              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2879                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2880              "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2881              "[$t, $s, \\{$l, $x, $y, $z\\}];",
2882              []>;
2883def TEX_CUBE_ARRAY_U32_F32_LEVEL
2884  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2885                    Int32Regs:$b, Int32Regs:$a),
2886              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2887                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2888                   Float32Regs:$lod),
2889              "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2890              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2891              []>;
2892
2893def TLD4_R_2D_F32_F32
2894  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2895                    Float32Regs:$v2, Float32Regs:$v3),
2896              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2897              "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2898              "[$t, $s, \\{$x, $y\\}];",
2899              []>;
2900def TLD4_G_2D_F32_F32
2901  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2902                    Float32Regs:$v2, Float32Regs:$v3),
2903              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2904              "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2905              "[$t, $s, \\{$x, $y\\}];",
2906              []>;
2907def TLD4_B_2D_F32_F32
2908  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2909                    Float32Regs:$v2, Float32Regs:$v3),
2910              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2911              "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2912              "[$t, $s, \\{$x, $y\\}];",
2913              []>;
2914def TLD4_A_2D_F32_F32
2915  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2916                    Float32Regs:$v2, Float32Regs:$v3),
2917              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2918              "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2919              "[$t, $s, \\{$x, $y\\}];",
2920              []>;
2921def TLD4_R_2D_S32_F32
2922  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2923                    Int32Regs:$v2, Int32Regs:$v3),
2924              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2925              "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2926              "[$t, $s, \\{$x, $y\\}];",
2927              []>;
2928def TLD4_G_2D_S32_F32
2929  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2930                    Int32Regs:$v2, Int32Regs:$v3),
2931              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2932              "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2933              "[$t, $s, \\{$x, $y\\}];",
2934              []>;
2935def TLD4_B_2D_S32_F32
2936  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2937                    Int32Regs:$v2, Int32Regs:$v3),
2938              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2939              "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2940              "[$t, $s, \\{$x, $y\\}];",
2941              []>;
2942def TLD4_A_2D_S32_F32
2943  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2944                    Int32Regs:$v2, Int32Regs:$v3),
2945              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2946              "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2947              "[$t, $s, \\{$x, $y\\}];",
2948              []>;
2949def TLD4_R_2D_U32_F32
2950  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2951                    Int32Regs:$v2, Int32Regs:$v3),
2952              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2953              "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2954              "[$t, $s, \\{$x, $y\\}];",
2955              []>;
2956def TLD4_G_2D_U32_F32
2957  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2958                    Int32Regs:$v2, Int32Regs:$v3),
2959              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2960              "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2961              "[$t, $s, \\{$x, $y\\}];",
2962              []>;
2963def TLD4_B_2D_U32_F32
2964  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2965                    Int32Regs:$v2, Int32Regs:$v3),
2966              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2967              "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2968              "[$t, $s, \\{$x, $y\\}];",
2969              []>;
2970def TLD4_A_2D_U32_F32
2971  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2972                    Int32Regs:$v2, Int32Regs:$v3),
2973              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2974              "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2975              "[$t, $s, \\{$x, $y\\}];",
2976              []>;
2977}
2978
2979
2980// texmode_unified
2981let IsTex = 1, IsTexModeUnified = 1 in {
2982// Texture fetch instructions using handles
2983def TEX_UNIFIED_1D_F32_S32
2984  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2985                    Float32Regs:$b, Float32Regs:$a),
2986              (ins Int64Regs:$t, Int32Regs:$x),
2987              "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2988              []>;
2989def TEX_UNIFIED_1D_F32_F32
2990  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2991                    Float32Regs:$b, Float32Regs:$a),
2992              (ins Int64Regs:$t, Float32Regs:$x),
2993              "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2994              []>;
2995def TEX_UNIFIED_1D_F32_F32_LEVEL
2996  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2997                    Float32Regs:$b, Float32Regs:$a),
2998              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2999              "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3000              "[$t, \\{$x\\}], $lod;",
3001              []>;
3002def TEX_UNIFIED_1D_F32_F32_GRAD
3003  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3004                    Float32Regs:$b, Float32Regs:$a),
3005              (ins Int64Regs:$t, Float32Regs:$x,
3006                   Float32Regs:$gradx, Float32Regs:$grady),
3007              "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3008              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3009              []>;
3010def TEX_UNIFIED_1D_S32_S32
3011  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3012                    Int32Regs:$b, Int32Regs:$a),
3013              (ins Int64Regs:$t, Int32Regs:$x),
3014              "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3015              []>;
3016def TEX_UNIFIED_1D_S32_F32
3017  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3018                    Int32Regs:$b, Int32Regs:$a),
3019              (ins Int64Regs:$t, Float32Regs:$x),
3020              "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3021              []>;
3022def TEX_UNIFIED_1D_S32_F32_LEVEL
3023  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3024                    Int32Regs:$b, Int32Regs:$a),
3025              (ins Int64Regs:$t, Float32Regs:$x,
3026                   Float32Regs:$lod),
3027              "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3028              "[$t, \\{$x\\}], $lod;",
3029              []>;
3030def TEX_UNIFIED_1D_S32_F32_GRAD
3031  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3032                    Int32Regs:$b, Int32Regs:$a),
3033              (ins Int64Regs:$t, Float32Regs:$x,
3034                   Float32Regs:$gradx, Float32Regs:$grady),
3035              "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3036              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3037              []>;
3038def TEX_UNIFIED_1D_U32_S32
3039  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3040                    Int32Regs:$b, Int32Regs:$a),
3041              (ins Int64Regs:$t, Int32Regs:$x),
3042              "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3043              []>;
3044def TEX_UNIFIED_1D_U32_F32
3045  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3046                    Int32Regs:$b, Int32Regs:$a),
3047              (ins Int64Regs:$t, Float32Regs:$x),
3048              "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3049              []>;
3050def TEX_UNIFIED_1D_U32_F32_LEVEL
3051  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3052                    Int32Regs:$b, Int32Regs:$a),
3053              (ins Int64Regs:$t, Float32Regs:$x,
3054                   Float32Regs:$lod),
3055              "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3056              "[$t, \\{$x\\}], $lod;",
3057              []>;
3058def TEX_UNIFIED_1D_U32_F32_GRAD
3059  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3060                    Int32Regs:$b, Int32Regs:$a),
3061              (ins Int64Regs:$t, Float32Regs:$x,
3062                   Float32Regs:$gradx, Float32Regs:$grady),
3063              "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3064              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3065              []>;
3066
3067def TEX_UNIFIED_1D_ARRAY_F32_S32
3068  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3069                    Float32Regs:$b, Float32Regs:$a),
3070              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3071              "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3072              "[$t, \\{$l, $x\\}];",
3073              []>;
3074def TEX_UNIFIED_1D_ARRAY_F32_F32
3075  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3076                    Float32Regs:$b, Float32Regs:$a),
3077              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3078              "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3079              "[$t, \\{$l, $x\\}];",
3080              []>;
3081def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3082  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3083                    Float32Regs:$b, Float32Regs:$a),
3084              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3085                   Float32Regs:$lod),
3086              "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3087              "[$t, \\{$l, $x\\}], $lod;",
3088              []>;
3089def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3090  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3091                    Float32Regs:$b, Float32Regs:$a),
3092              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3093                   Float32Regs:$gradx, Float32Regs:$grady),
3094              "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3095              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3096              []>;
3097def TEX_UNIFIED_1D_ARRAY_S32_S32
3098  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3099                    Int32Regs:$b, Int32Regs:$a),
3100              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3101              "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3102              "[$t, \\{$l, $x\\}];",
3103              []>;
3104def TEX_UNIFIED_1D_ARRAY_S32_F32
3105  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3106                    Int32Regs:$b, Int32Regs:$a),
3107              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3108              "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3109              "[$t, \\{$l, $x\\}];",
3110              []>;
3111def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3112  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3113                    Int32Regs:$b, Int32Regs:$a),
3114              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3115                   Float32Regs:$lod),
3116              "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3117              "[$t, \\{$l, $x\\}], $lod;",
3118              []>;
3119def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3120  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3121                    Int32Regs:$b, Int32Regs:$a),
3122              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3123                   Float32Regs:$gradx, Float32Regs:$grady),
3124              "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3125              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3126              []>;
3127def TEX_UNIFIED_1D_ARRAY_U32_S32
3128  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3129                    Int32Regs:$b, Int32Regs:$a),
3130              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3131              "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3132              "[$t, \\{$l, $x\\}];",
3133              []>;
3134def TEX_UNIFIED_1D_ARRAY_U32_F32
3135  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3136                    Int32Regs:$b, Int32Regs:$a),
3137              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3138              "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3139              "[$t, \\{$l, $x\\}];",
3140              []>;
3141def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3142  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3143                    Int32Regs:$b, Int32Regs:$a),
3144              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3145                   Float32Regs:$lod),
3146              "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3147              "[$t, \\{$l, $x\\}], $lod;",
3148              []>;
3149def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3150  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3151                    Int32Regs:$b, Int32Regs:$a),
3152              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3153                   Float32Regs:$gradx, Float32Regs:$grady),
3154              "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3155              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3156              []>;
3157
3158def TEX_UNIFIED_2D_F32_S32
3159  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3160                    Float32Regs:$b, Float32Regs:$a),
3161              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3162              "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3163              "[$t, \\{$x, $y\\}];",
3164              []>;
3165def TEX_UNIFIED_2D_F32_F32
3166  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3167                    Float32Regs:$b, Float32Regs:$a),
3168              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3169              "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3170              "[$t, \\{$x, $y\\}];",
3171              []>;
3172def TEX_UNIFIED_2D_F32_F32_LEVEL
3173  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3174                    Float32Regs:$b, Float32Regs:$a),
3175              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3176                   Float32Regs:$lod),
3177              "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3178              "[$t, \\{$x, $y\\}], $lod;",
3179              []>;
3180def TEX_UNIFIED_2D_F32_F32_GRAD
3181  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3182                    Float32Regs:$b, Float32Regs:$a),
3183              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3184                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3185                   Float32Regs:$grady0, Float32Regs:$grady1),
3186              "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3187              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3188              "\\{$grady0, $grady1\\};",
3189              []>;
3190def TEX_UNIFIED_2D_S32_S32
3191  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3192                    Int32Regs:$b, Int32Regs:$a),
3193              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3194              "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3195              "[$t, \\{$x, $y\\}];",
3196              []>;
3197def TEX_UNIFIED_2D_S32_F32
3198  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3199                    Int32Regs:$b, Int32Regs:$a),
3200              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3201              "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3202              "[$t, \\{$x, $y\\}];",
3203              []>;
3204def TEX_UNIFIED_2D_S32_F32_LEVEL
3205  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3206                    Int32Regs:$b, Int32Regs:$a),
3207              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3208                   Float32Regs:$lod),
3209              "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3210              "[$t, \\{$x, $y\\}], $lod;",
3211              []>;
3212def TEX_UNIFIED_2D_S32_F32_GRAD
3213  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3214                    Int32Regs:$b, Int32Regs:$a),
3215              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3216                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3217                   Float32Regs:$grady0, Float32Regs:$grady1),
3218              "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3219              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3220              "\\{$grady0, $grady1\\};",
3221              []>;
3222def TEX_UNIFIED_2D_U32_S32
3223  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3224                    Int32Regs:$b, Int32Regs:$a),
3225              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3226              "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3227              "[$t, \\{$x, $y\\}];",
3228              []>;
3229def TEX_UNIFIED_2D_U32_F32
3230  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3231                    Int32Regs:$b, Int32Regs:$a),
3232              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3233              "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3234              "[$t, \\{$x, $y\\}];",
3235              []>;
3236def TEX_UNIFIED_2D_U32_F32_LEVEL
3237  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3238                    Int32Regs:$b, Int32Regs:$a),
3239              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3240                   Float32Regs:$lod),
3241              "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3242              "[$t, \\{$x, $y\\}], $lod;",
3243              []>;
3244def TEX_UNIFIED_2D_U32_F32_GRAD
3245  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3246                    Int32Regs:$b, Int32Regs:$a),
3247              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3248                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3249                   Float32Regs:$grady0, Float32Regs:$grady1),
3250              "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3251              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3252              "\\{$grady0, $grady1\\};",
3253              []>;
3254
3255def TEX_UNIFIED_2D_ARRAY_F32_S32
3256  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3257                    Float32Regs:$b, Float32Regs:$a),
3258              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3259                   Int32Regs:$y),
3260              "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3261              "[$t, \\{$l, $x, $y, $y\\}];",
3262              []>;
3263def TEX_UNIFIED_2D_ARRAY_F32_F32
3264  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3265                    Float32Regs:$b, Float32Regs:$a),
3266              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3267                   Float32Regs:$y),
3268              "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3269              "[$t, \\{$l, $x, $y, $y\\}];",
3270              []>;
3271def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3272  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3273                    Float32Regs:$b, Float32Regs:$a),
3274              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3275                   Float32Regs:$y, Float32Regs:$lod),
3276              "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3277              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3278              []>;
3279def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3280  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3281                    Float32Regs:$b, Float32Regs:$a),
3282              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3283                   Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3284                   Float32Regs:$grady0, Float32Regs:$grady1),
3285              "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3286              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3287              "\\{$grady0, $grady1\\};",
3288              []>;
3289def TEX_UNIFIED_2D_ARRAY_S32_S32
3290  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3291                    Int32Regs:$b, Int32Regs:$a),
3292              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3293                   Int32Regs:$y),
3294              "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3295              "[$t, \\{$l, $x, $y, $y\\}];",
3296              []>;
3297def TEX_UNIFIED_2D_ARRAY_S32_F32
3298  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3299                    Int32Regs:$b, Int32Regs:$a),
3300              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3301                   Float32Regs:$y),
3302              "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3303              "[$t, \\{$l, $x, $y, $y\\}];",
3304              []>;
3305def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3306  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3307                    Int32Regs:$b, Int32Regs:$a),
3308              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3309                   Float32Regs:$y, Float32Regs:$lod),
3310              "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3311              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3312              []>;
3313def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3314  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3315                    Int32Regs:$b, Int32Regs:$a),
3316              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3317                   Float32Regs:$y,
3318                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3319                   Float32Regs:$grady0, Float32Regs:$grady1),
3320              "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3321              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3322              "\\{$grady0, $grady1\\};",
3323              []>;
3324def TEX_UNIFIED_2D_ARRAY_U32_S32
3325  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3326                    Int32Regs:$b, Int32Regs:$a),
3327              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3328                   Int32Regs:$y),
3329              "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3330              "[$t, \\{$l, $x, $y, $y\\}];",
3331              []>;
3332def TEX_UNIFIED_2D_ARRAY_U32_F32
3333  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3334                    Int32Regs:$b, Int32Regs:$a),
3335              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3336                   Float32Regs:$y),
3337              "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3338              "[$t, \\{$l, $x, $y, $y\\}];",
3339              []>;
3340def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3341  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3342                    Int32Regs:$b, Int32Regs:$a),
3343              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3344                   Float32Regs:$y, Float32Regs:$lod),
3345              "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3346              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3347              []>;
3348def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3349  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3350                    Int32Regs:$b, Int32Regs:$a),
3351              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3352                   Float32Regs:$y,
3353                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3354                   Float32Regs:$grady0, Float32Regs:$grady1),
3355              "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3356              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3357              "\\{$grady0, $grady1\\};",
3358              []>;
3359
3360def TEX_UNIFIED_3D_F32_S32
3361  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3362                    Float32Regs:$b, Float32Regs:$a),
3363              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3364                   Int32Regs:$z),
3365              "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3366              "[$t, \\{$x, $y, $z, $z\\}];",
3367              []>;
3368def TEX_UNIFIED_3D_F32_F32
3369  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3370                    Float32Regs:$b, Float32Regs:$a),
3371              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3372                   Float32Regs:$z),
3373              "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3374              "[$t, \\{$x, $y, $z, $z\\}];",
3375              []>;
3376def TEX_UNIFIED_3D_F32_F32_LEVEL
3377  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3378                    Float32Regs:$b, Float32Regs:$a),
3379              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3380                   Float32Regs:$z, Float32Regs:$lod),
3381              "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3382              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3383              []>;
3384def TEX_UNIFIED_3D_F32_F32_GRAD
3385  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3386                    Float32Regs:$b, Float32Regs:$a),
3387              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3388                   Float32Regs:$z,
3389                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3390                   Float32Regs:$gradx2, Float32Regs:$grady0,
3391                   Float32Regs:$grady1, Float32Regs:$grady2),
3392              "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3393              "[$t, \\{$x, $y, $z, $z\\}], "
3394              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3395              "\\{$grady0, $grady1, $grady2, $grady2\\};",
3396              []>;
3397def TEX_UNIFIED_3D_S32_S32
3398  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3399                    Int32Regs:$b, Int32Regs:$a),
3400              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3401                   Int32Regs:$z),
3402              "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3403              "[$t, \\{$x, $y, $z, $z\\}];",
3404              []>;
3405def TEX_UNIFIED_3D_S32_F32
3406  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3407                    Int32Regs:$b, Int32Regs:$a),
3408              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3409                   Float32Regs:$z),
3410              "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3411              "[$t, \\{$x, $y, $z, $z\\}];",
3412              []>;
3413def TEX_UNIFIED_3D_S32_F32_LEVEL
3414  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3415                    Int32Regs:$b, Int32Regs:$a),
3416              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3417                   Float32Regs:$z, Float32Regs:$lod),
3418              "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3419              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3420              []>;
3421def TEX_UNIFIED_3D_S32_F32_GRAD
3422  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3423                    Int32Regs:$b, Int32Regs:$a),
3424              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3425                   Float32Regs:$z,
3426                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3427                   Float32Regs:$gradx2, Float32Regs:$grady0,
3428                   Float32Regs:$grady1, Float32Regs:$grady2),
3429              "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3430              "[$t, \\{$x, $y, $z, $z\\}], "
3431              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3432              "\\{$grady0, $grady1, $grady2, $grady2\\};",
3433              []>;
3434def TEX_UNIFIED_3D_U32_S32
3435  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3436                    Int32Regs:$b, Int32Regs:$a),
3437              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3438                   Int32Regs:$z),
3439              "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3440              "[$t, \\{$x, $y, $z, $z\\}];",
3441              []>;
3442def TEX_UNIFIED_3D_U32_F32
3443  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3444                    Int32Regs:$b, Int32Regs:$a),
3445              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3446                   Float32Regs:$z),
3447              "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3448              "[$t, \\{$x, $y, $z, $z\\}];",
3449              []>;
3450def TEX_UNIFIED_3D_U32_F32_LEVEL
3451  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3452                    Int32Regs:$b, Int32Regs:$a),
3453              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3454                   Float32Regs:$z, Float32Regs:$lod),
3455              "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3456              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3457              []>;
3458def TEX_UNIFIED_3D_U32_F32_GRAD
3459  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3460                    Int32Regs:$b, Int32Regs:$a),
3461              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3462                   Float32Regs:$z,
3463                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3464                   Float32Regs:$gradx2, Float32Regs:$grady0,
3465                   Float32Regs:$grady1, Float32Regs:$grady2),
3466              "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3467              "[$t, \\{$x, $y, $z, $z\\}], "
3468              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3469              "\\{$grady0, $grady1, $grady2, $grady2\\};",
3470              []>;
3471
3472def TEX_UNIFIED_CUBE_F32_F32
3473  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3474                    Float32Regs:$b, Float32Regs:$a),
3475              (ins Int64Regs:$t,
3476               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3477              "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3478              "[$t, \\{$x, $y, $z, $z\\}];",
3479              []>;
3480def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3481  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3482                    Float32Regs:$b, Float32Regs:$a),
3483              (ins Int64Regs:$t,
3484                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3485                   Float32Regs:$lod),
3486              "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3487              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3488              []>;
3489def TEX_UNIFIED_CUBE_S32_F32
3490  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3491                    Int32Regs:$b, Int32Regs:$a),
3492              (ins Int64Regs:$t,
3493                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3494              "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3495              "[$t, \\{$x, $y, $z, $z\\}];",
3496              []>;
3497def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3498  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3499                    Int32Regs:$b, Int32Regs:$a),
3500              (ins Int64Regs:$t,
3501                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3502                   Float32Regs:$lod),
3503              "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3504              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3505              []>;
3506def TEX_UNIFIED_CUBE_U32_F32
3507  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3508                    Int32Regs:$b, Int32Regs:$a),
3509              (ins Int64Regs:$t,
3510                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3511              "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3512              "[$t, \\{$x, $y, $z, $z\\}];",
3513              []>;
3514def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3515  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3516                    Int32Regs:$b, Int32Regs:$a),
3517              (ins Int64Regs:$t,
3518                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3519                   Float32Regs:$lod),
3520              "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3521              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3522              []>;
3523
3524def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3525  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3526                    Float32Regs:$b, Float32Regs:$a),
3527              (ins Int64Regs:$t, Int32Regs:$l,
3528               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3529              "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3530              "[$t, \\{$l, $x, $y, $z\\}];",
3531              []>;
3532def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3533  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3534                    Float32Regs:$b, Float32Regs:$a),
3535              (ins Int64Regs:$t, Int32Regs:$l,
3536                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3537                   Float32Regs:$lod),
3538              "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3539              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3540              []>;
3541def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3542  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3543                    Int32Regs:$b, Int32Regs:$a),
3544              (ins Int64Regs:$t, Int32Regs:$l,
3545                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3546              "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3547              "[$t, \\{$l, $x, $y, $z\\}];",
3548              []>;
3549def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3550  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3551                    Int32Regs:$b, Int32Regs:$a),
3552              (ins Int64Regs:$t, Int32Regs:$l,
3553                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3554                   Float32Regs:$lod),
3555              "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3556              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3557              []>;
3558def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3559  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3560                    Int32Regs:$b, Int32Regs:$a),
3561              (ins Int64Regs:$t, Int32Regs:$l,
3562                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3563              "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3564              "[$t, \\{$l, $x, $y, $z\\}];",
3565              []>;
3566def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3567  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3568                    Int32Regs:$b, Int32Regs:$a),
3569              (ins Int64Regs:$t, Int32Regs:$l,
3570                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3571                   Float32Regs:$lod),
3572              "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3573              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3574              []>;
3575
3576def TLD4_UNIFIED_R_2D_F32_F32
3577  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3578                    Float32Regs:$v2, Float32Regs:$v3),
3579              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3580              "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3581              "[$t, \\{$x, $y\\}];",
3582              []>;
3583def TLD4_UNIFIED_G_2D_F32_F32
3584  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3585                    Float32Regs:$v2, Float32Regs:$v3),
3586              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3587              "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3588              "[$t, \\{$x, $y\\}];",
3589              []>;
3590def TLD4_UNIFIED_B_2D_F32_F32
3591  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3592                    Float32Regs:$v2, Float32Regs:$v3),
3593              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3594              "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3595              "[$t, \\{$x, $y\\}];",
3596              []>;
3597def TLD4_UNIFIED_A_2D_F32_F32
3598  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3599                    Float32Regs:$v2, Float32Regs:$v3),
3600              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3601              "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3602              "[$t, \\{$x, $y\\}];",
3603              []>;
3604def TLD4_UNIFIED_R_2D_S32_F32
3605  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3606                    Int32Regs:$v2, Int32Regs:$v3),
3607              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3608              "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3609              "[$t, \\{$x, $y\\}];",
3610              []>;
3611def TLD4_UNIFIED_G_2D_S32_F32
3612  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3613                    Int32Regs:$v2, Int32Regs:$v3),
3614              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3615              "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3616              "[$t, \\{$x, $y\\}];",
3617              []>;
3618def TLD4_UNIFIED_B_2D_S32_F32
3619  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3620                    Int32Regs:$v2, Int32Regs:$v3),
3621              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3622              "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3623              "[$t, \\{$x, $y\\}];",
3624              []>;
3625def TLD4_UNIFIED_A_2D_S32_F32
3626  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3627                    Int32Regs:$v2, Int32Regs:$v3),
3628              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3629              "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3630              "[$t, \\{$x, $y\\}];",
3631              []>;
3632def TLD4_UNIFIED_R_2D_U32_F32
3633  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3634                    Int32Regs:$v2, Int32Regs:$v3),
3635              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3636              "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3637              "[$t, \\{$x, $y\\}];",
3638              []>;
3639def TLD4_UNIFIED_G_2D_U32_F32
3640  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3641                    Int32Regs:$v2, Int32Regs:$v3),
3642              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3643              "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3644              "[$t, \\{$x, $y\\}];",
3645              []>;
3646def TLD4_UNIFIED_B_2D_U32_F32
3647  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3648                    Int32Regs:$v2, Int32Regs:$v3),
3649              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3650              "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3651              "[$t, \\{$x, $y\\}];",
3652              []>;
3653def TLD4_UNIFIED_A_2D_U32_F32
3654  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3655                    Int32Regs:$v2, Int32Regs:$v3),
3656              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3657              "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3658              "[$t, \\{$x, $y\\}];",
3659              []>;
3660}
3661
3662
3663
3664//=== Surface load instructions
3665// .clamp variant
3666let IsSuld = 1 in {
3667def SULD_1D_I8_CLAMP
3668  : NVPTXInst<(outs Int16Regs:$r),
3669              (ins Int64Regs:$s, Int32Regs:$x),
3670              "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3671              []>;
3672def SULD_1D_I16_CLAMP
3673  : NVPTXInst<(outs Int16Regs:$r),
3674              (ins Int64Regs:$s, Int32Regs:$x),
3675              "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3676              []>;
3677def SULD_1D_I32_CLAMP
3678  : NVPTXInst<(outs Int32Regs:$r),
3679              (ins Int64Regs:$s, Int32Regs:$x),
3680              "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3681              []>;
3682def SULD_1D_I64_CLAMP
3683  : NVPTXInst<(outs Int64Regs:$r),
3684              (ins Int64Regs:$s, Int32Regs:$x),
3685              "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3686              []>;
3687
3688def SULD_1D_ARRAY_I8_CLAMP
3689  : NVPTXInst<(outs Int16Regs:$r),
3690              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3691              "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3692              []>;
3693def SULD_1D_ARRAY_I16_CLAMP
3694  : NVPTXInst<(outs Int16Regs:$r),
3695              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3696              "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3697              []>;
3698def SULD_1D_ARRAY_I32_CLAMP
3699  : NVPTXInst<(outs Int32Regs:$r),
3700              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3701              "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3702              []>;
3703def SULD_1D_ARRAY_I64_CLAMP
3704  : NVPTXInst<(outs Int64Regs:$r),
3705              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3706              "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3707              []>;
3708
3709def SULD_2D_I8_CLAMP
3710  : NVPTXInst<(outs Int16Regs:$r),
3711              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3712              "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3713              []>;
3714def SULD_2D_I16_CLAMP
3715  : NVPTXInst<(outs Int16Regs:$r),
3716              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3717              "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3718              []>;
3719def SULD_2D_I32_CLAMP
3720  : NVPTXInst<(outs Int32Regs:$r),
3721              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3722              "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3723              []>;
3724def SULD_2D_I64_CLAMP
3725  : NVPTXInst<(outs Int64Regs:$r),
3726              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3727              "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3728              []>;
3729
3730def SULD_2D_ARRAY_I8_CLAMP
3731  : NVPTXInst<(outs Int16Regs:$r),
3732              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3733              "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3734              []>;
3735def SULD_2D_ARRAY_I16_CLAMP
3736  : NVPTXInst<(outs Int16Regs:$r),
3737              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3738              "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3739              []>;
3740def SULD_2D_ARRAY_I32_CLAMP
3741  : NVPTXInst<(outs Int32Regs:$r),
3742              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3743              "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3744              []>;
3745def SULD_2D_ARRAY_I64_CLAMP
3746  : NVPTXInst<(outs Int64Regs:$r),
3747              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3748              "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3749              []>;
3750
3751def SULD_3D_I8_CLAMP
3752  : NVPTXInst<(outs Int16Regs:$r),
3753              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3754              "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3755              []>;
3756def SULD_3D_I16_CLAMP
3757  : NVPTXInst<(outs Int16Regs:$r),
3758              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3759              "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3760              []>;
3761def SULD_3D_I32_CLAMP
3762  : NVPTXInst<(outs Int32Regs:$r),
3763              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3764              "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3765              []>;
3766def SULD_3D_I64_CLAMP
3767  : NVPTXInst<(outs Int64Regs:$r),
3768              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3769              "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3770              []>;
3771}
3772
3773let IsSuld = 2 in {
3774def SULD_1D_V2I8_CLAMP
3775  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3776              (ins Int64Regs:$s, Int32Regs:$x),
3777              "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3778              []>;
3779def SULD_1D_V2I16_CLAMP
3780  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3781              (ins Int64Regs:$s, Int32Regs:$x),
3782              "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3783              []>;
3784def SULD_1D_V2I32_CLAMP
3785  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3786              (ins Int64Regs:$s, Int32Regs:$x),
3787              "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3788              []>;
3789def SULD_1D_V2I64_CLAMP
3790  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3791              (ins Int64Regs:$s, Int32Regs:$x),
3792              "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3793              []>;
3794
3795def SULD_1D_ARRAY_V2I8_CLAMP
3796  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3797              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3798              "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3799              []>;
3800def SULD_1D_ARRAY_V2I16_CLAMP
3801  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3802              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3803              "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3804              []>;
3805def SULD_1D_ARRAY_V2I32_CLAMP
3806  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3807              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3808              "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3809              []>;
3810def SULD_1D_ARRAY_V2I64_CLAMP
3811  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3812              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3813              "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3814              []>;
3815
3816def SULD_2D_V2I8_CLAMP
3817  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3818              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3819              "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3820              []>;
3821def SULD_2D_V2I16_CLAMP
3822  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3823              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3824              "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3825              []>;
3826def SULD_2D_V2I32_CLAMP
3827  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3828              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3829              "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3830              []>;
3831def SULD_2D_V2I64_CLAMP
3832  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3833              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3834              "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3835              []>;
3836
3837def SULD_2D_ARRAY_V2I8_CLAMP
3838  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3839              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3840              "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3841              "[$s, \\{$l, $x, $y, $y\\}];",
3842              []>;
3843def SULD_2D_ARRAY_V2I16_CLAMP
3844  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3845              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3846              "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3847              "[$s, \\{$l, $x, $y, $y\\}];",
3848              []>;
3849def SULD_2D_ARRAY_V2I32_CLAMP
3850  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3851              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3852              "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3853              "[$s, \\{$l, $x, $y, $y\\}];",
3854              []>;
3855def SULD_2D_ARRAY_V2I64_CLAMP
3856  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3857              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3858              "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3859              "[$s, \\{$l, $x, $y, $y\\}];",
3860              []>;
3861
3862def SULD_3D_V2I8_CLAMP
3863  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3864              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3865              "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3866              []>;
3867def SULD_3D_V2I16_CLAMP
3868  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3869              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3870              "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3871              []>;
3872def SULD_3D_V2I32_CLAMP
3873  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3874              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3875              "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3876              []>;
3877def SULD_3D_V2I64_CLAMP
3878  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3879              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3880              "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3881              []>;
3882}
3883
3884let IsSuld = 3 in {
3885def SULD_1D_V4I8_CLAMP
3886  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3887              (ins Int64Regs:$s, Int32Regs:$x),
3888              "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3889              []>;
3890def SULD_1D_V4I16_CLAMP
3891  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3892              (ins Int64Regs:$s, Int32Regs:$x),
3893              "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3894              []>;
3895def SULD_1D_V4I32_CLAMP
3896  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3897              (ins Int64Regs:$s, Int32Regs:$x),
3898              "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3899              []>;
3900
3901def SULD_1D_ARRAY_V4I8_CLAMP
3902  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3903              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3904              "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3905              "[$s, \\{$l, $x\\}];",
3906              []>;
3907def SULD_1D_ARRAY_V4I16_CLAMP
3908  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3909              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3910              "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3911              "[$s, \\{$l, $x\\}];",
3912              []>;
3913def SULD_1D_ARRAY_V4I32_CLAMP
3914  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3915              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3916              "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3917              "[$s, \\{$l, $x\\}];",
3918              []>;
3919
3920def SULD_2D_V4I8_CLAMP
3921  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3922              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3923              "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3924              []>;
3925def SULD_2D_V4I16_CLAMP
3926  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3927              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3928              "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3929              []>;
3930def SULD_2D_V4I32_CLAMP
3931  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3932              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3933              "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3934              []>;
3935
3936def SULD_2D_ARRAY_V4I8_CLAMP
3937  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3938              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3939              "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3940              "[$s, \\{$l, $x, $y, $y\\}];",
3941              []>;
3942def SULD_2D_ARRAY_V4I16_CLAMP
3943  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3944              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3945              "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3946              "[$s, \\{$l, $x, $y, $y\\}];",
3947              []>;
3948def SULD_2D_ARRAY_V4I32_CLAMP
3949  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3950              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3951              "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3952              "[$s, \\{$l, $x, $y, $y\\}];",
3953              []>;
3954
3955
3956def SULD_3D_V4I8_CLAMP
3957  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3958              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3959              "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3960              "[$s, \\{$x, $y, $z, $z\\}];",
3961              []>;
3962def SULD_3D_V4I16_CLAMP
3963  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3964              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3965              "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3966              "[$s, \\{$x, $y, $z, $z\\}];",
3967              []>;
3968def SULD_3D_V4I32_CLAMP
3969  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3970              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3971              "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3972              "[$s, \\{$x, $y, $z, $z\\}];",
3973              []>;
3974}
3975
3976
3977// .trap variant
3978let IsSuld = 1 in {
3979def SULD_1D_I8_TRAP
3980  : NVPTXInst<(outs Int16Regs:$r),
3981              (ins Int64Regs:$s, Int32Regs:$x),
3982              "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3983              []>;
3984def SULD_1D_I16_TRAP
3985  : NVPTXInst<(outs Int16Regs:$r),
3986              (ins Int64Regs:$s, Int32Regs:$x),
3987              "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3988              []>;
3989def SULD_1D_I32_TRAP
3990  : NVPTXInst<(outs Int32Regs:$r),
3991              (ins Int64Regs:$s, Int32Regs:$x),
3992              "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3993              []>;
3994def SULD_1D_I64_TRAP
3995  : NVPTXInst<(outs Int64Regs:$r),
3996              (ins Int64Regs:$s, Int32Regs:$x),
3997              "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3998              []>;
3999
4000def SULD_1D_ARRAY_I8_TRAP
4001  : NVPTXInst<(outs Int16Regs:$r),
4002              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4003              "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4004              []>;
4005def SULD_1D_ARRAY_I16_TRAP
4006  : NVPTXInst<(outs Int16Regs:$r),
4007              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4008              "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4009              []>;
4010def SULD_1D_ARRAY_I32_TRAP
4011  : NVPTXInst<(outs Int32Regs:$r),
4012              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4013              "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4014              []>;
4015def SULD_1D_ARRAY_I64_TRAP
4016  : NVPTXInst<(outs Int64Regs:$r),
4017              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4018              "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4019              []>;
4020
4021def SULD_2D_I8_TRAP
4022  : NVPTXInst<(outs Int16Regs:$r),
4023              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4024              "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4025              []>;
4026def SULD_2D_I16_TRAP
4027  : NVPTXInst<(outs Int16Regs:$r),
4028              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4029              "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4030              []>;
4031def SULD_2D_I32_TRAP
4032  : NVPTXInst<(outs Int32Regs:$r),
4033              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4034              "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4035              []>;
4036def SULD_2D_I64_TRAP
4037  : NVPTXInst<(outs Int64Regs:$r),
4038              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4039              "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4040              []>;
4041
4042def SULD_2D_ARRAY_I8_TRAP
4043  : NVPTXInst<(outs Int16Regs:$r),
4044              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4045              "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4046              []>;
4047def SULD_2D_ARRAY_I16_TRAP
4048  : NVPTXInst<(outs Int16Regs:$r),
4049              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4050              "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4051              []>;
4052def SULD_2D_ARRAY_I32_TRAP
4053  : NVPTXInst<(outs Int32Regs:$r),
4054              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4055              "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4056              []>;
4057def SULD_2D_ARRAY_I64_TRAP
4058  : NVPTXInst<(outs Int64Regs:$r),
4059              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4060              "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4061              []>;
4062
4063def SULD_3D_I8_TRAP
4064  : NVPTXInst<(outs Int16Regs:$r),
4065              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4066              "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4067              []>;
4068def SULD_3D_I16_TRAP
4069  : NVPTXInst<(outs Int16Regs:$r),
4070              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4071              "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4072              []>;
4073def SULD_3D_I32_TRAP
4074  : NVPTXInst<(outs Int32Regs:$r),
4075              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4076              "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4077              []>;
4078def SULD_3D_I64_TRAP
4079  : NVPTXInst<(outs Int64Regs:$r),
4080              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4081              "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4082              []>;
4083}
4084
4085let IsSuld = 2 in {
4086def SULD_1D_V2I8_TRAP
4087  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4088              (ins Int64Regs:$s, Int32Regs:$x),
4089              "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4090              []>;
4091def SULD_1D_V2I16_TRAP
4092  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4093              (ins Int64Regs:$s, Int32Regs:$x),
4094              "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4095              []>;
4096def SULD_1D_V2I32_TRAP
4097  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4098              (ins Int64Regs:$s, Int32Regs:$x),
4099              "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4100              []>;
4101def SULD_1D_V2I64_TRAP
4102  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4103              (ins Int64Regs:$s, Int32Regs:$x),
4104              "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4105              []>;
4106
4107def SULD_1D_ARRAY_V2I8_TRAP
4108  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4109              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4110              "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4111              []>;
4112def SULD_1D_ARRAY_V2I16_TRAP
4113  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4114              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4115              "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4116              []>;
4117def SULD_1D_ARRAY_V2I32_TRAP
4118  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4119              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4120              "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4121              []>;
4122def SULD_1D_ARRAY_V2I64_TRAP
4123  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4124              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4125              "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4126              []>;
4127
4128def SULD_2D_V2I8_TRAP
4129  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4130              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4131              "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4132              []>;
4133def SULD_2D_V2I16_TRAP
4134  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4135              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4136              "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4137              []>;
4138def SULD_2D_V2I32_TRAP
4139  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4140              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4141              "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4142              []>;
4143def SULD_2D_V2I64_TRAP
4144  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4145              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4146              "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4147              []>;
4148
4149def SULD_2D_ARRAY_V2I8_TRAP
4150  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4151              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4152              "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4153              "[$s, \\{$l, $x, $y, $y\\}];",
4154              []>;
4155def SULD_2D_ARRAY_V2I16_TRAP
4156  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4157              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4158              "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4159              "[$s, \\{$l, $x, $y, $y\\}];",
4160              []>;
4161def SULD_2D_ARRAY_V2I32_TRAP
4162  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4163              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4164              "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4165              "[$s, \\{$l, $x, $y, $y\\}];",
4166              []>;
4167def SULD_2D_ARRAY_V2I64_TRAP
4168  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4169              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4170              "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4171              "[$s, \\{$l, $x, $y, $y\\}];",
4172              []>;
4173
4174def SULD_3D_V2I8_TRAP
4175  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4176              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4177              "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4178              []>;
4179def SULD_3D_V2I16_TRAP
4180  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4181              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4182              "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4183              []>;
4184def SULD_3D_V2I32_TRAP
4185  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4186              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4187              "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4188              []>;
4189def SULD_3D_V2I64_TRAP
4190  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4191              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4192              "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4193              []>;
4194}
4195
4196let IsSuld = 3 in {
4197def SULD_1D_V4I8_TRAP
4198  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4199              (ins Int64Regs:$s, Int32Regs:$x),
4200              "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4201              []>;
4202def SULD_1D_V4I16_TRAP
4203  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4204              (ins Int64Regs:$s, Int32Regs:$x),
4205              "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4206              []>;
4207def SULD_1D_V4I32_TRAP
4208  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4209              (ins Int64Regs:$s, Int32Regs:$x),
4210              "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4211              []>;
4212
4213def SULD_1D_ARRAY_V4I8_TRAP
4214  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4215              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4216              "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4217              "[$s, \\{$l, $x\\}];",
4218              []>;
4219def SULD_1D_ARRAY_V4I16_TRAP
4220  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4221              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4222              "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4223              "[$s, \\{$l, $x\\}];",
4224              []>;
4225def SULD_1D_ARRAY_V4I32_TRAP
4226  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4227              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4228              "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4229              "[$s, \\{$l, $x\\}];",
4230              []>;
4231
4232def SULD_2D_V4I8_TRAP
4233  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4234              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4235              "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4236              []>;
4237def SULD_2D_V4I16_TRAP
4238  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4239              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4240              "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4241              []>;
4242def SULD_2D_V4I32_TRAP
4243  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4244              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4245              "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4246              []>;
4247
4248def SULD_2D_ARRAY_V4I8_TRAP
4249  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4250              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4251              "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4252              "[$s, \\{$l, $x, $y, $y\\}];",
4253              []>;
4254def SULD_2D_ARRAY_V4I16_TRAP
4255  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4256              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4257              "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4258              "[$s, \\{$l, $x, $y, $y\\}];",
4259              []>;
4260def SULD_2D_ARRAY_V4I32_TRAP
4261  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4262              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4263              "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4264              "[$s, \\{$l, $x, $y, $y\\}];",
4265              []>;
4266
4267
4268def SULD_3D_V4I8_TRAP
4269  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4270              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4271              "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4272              "[$s, \\{$x, $y, $z, $z\\}];",
4273              []>;
4274def SULD_3D_V4I16_TRAP
4275  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4276              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4277              "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4278              "[$s, \\{$x, $y, $z, $z\\}];",
4279              []>;
4280def SULD_3D_V4I32_TRAP
4281  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4282              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4283              "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4284              "[$s, \\{$x, $y, $z, $z\\}];",
4285              []>;
4286}
4287
4288// .zero variant
4289let IsSuld = 1 in {
4290def SULD_1D_I8_ZERO
4291  : NVPTXInst<(outs Int16Regs:$r),
4292              (ins Int64Regs:$s, Int32Regs:$x),
4293              "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4294              []>;
4295def SULD_1D_I16_ZERO
4296  : NVPTXInst<(outs Int16Regs:$r),
4297              (ins Int64Regs:$s, Int32Regs:$x),
4298              "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4299              []>;
4300def SULD_1D_I32_ZERO
4301  : NVPTXInst<(outs Int32Regs:$r),
4302              (ins Int64Regs:$s, Int32Regs:$x),
4303              "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4304              []>;
4305def SULD_1D_I64_ZERO
4306  : NVPTXInst<(outs Int64Regs:$r),
4307              (ins Int64Regs:$s, Int32Regs:$x),
4308              "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4309              []>;
4310
4311def SULD_1D_ARRAY_I8_ZERO
4312  : NVPTXInst<(outs Int16Regs:$r),
4313              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4314              "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4315              []>;
4316def SULD_1D_ARRAY_I16_ZERO
4317  : NVPTXInst<(outs Int16Regs:$r),
4318              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4319              "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4320              []>;
4321def SULD_1D_ARRAY_I32_ZERO
4322  : NVPTXInst<(outs Int32Regs:$r),
4323              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4324              "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4325              []>;
4326def SULD_1D_ARRAY_I64_ZERO
4327  : NVPTXInst<(outs Int64Regs:$r),
4328              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4329              "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4330              []>;
4331
4332def SULD_2D_I8_ZERO
4333  : NVPTXInst<(outs Int16Regs:$r),
4334              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4335              "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4336              []>;
4337def SULD_2D_I16_ZERO
4338  : NVPTXInst<(outs Int16Regs:$r),
4339              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4340              "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4341              []>;
4342def SULD_2D_I32_ZERO
4343  : NVPTXInst<(outs Int32Regs:$r),
4344              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4345              "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4346              []>;
4347def SULD_2D_I64_ZERO
4348  : NVPTXInst<(outs Int64Regs:$r),
4349              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4350              "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4351              []>;
4352
4353def SULD_2D_ARRAY_I8_ZERO
4354  : NVPTXInst<(outs Int16Regs:$r),
4355              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4356              "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4357              []>;
4358def SULD_2D_ARRAY_I16_ZERO
4359  : NVPTXInst<(outs Int16Regs:$r),
4360              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4361              "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4362              []>;
4363def SULD_2D_ARRAY_I32_ZERO
4364  : NVPTXInst<(outs Int32Regs:$r),
4365              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4366              "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4367              []>;
4368def SULD_2D_ARRAY_I64_ZERO
4369  : NVPTXInst<(outs Int64Regs:$r),
4370              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4371              "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4372              []>;
4373
4374def SULD_3D_I8_ZERO
4375  : NVPTXInst<(outs Int16Regs:$r),
4376              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4377              "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4378              []>;
4379def SULD_3D_I16_ZERO
4380  : NVPTXInst<(outs Int16Regs:$r),
4381              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4382              "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4383              []>;
4384def SULD_3D_I32_ZERO
4385  : NVPTXInst<(outs Int32Regs:$r),
4386              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4387              "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4388              []>;
4389def SULD_3D_I64_ZERO
4390  : NVPTXInst<(outs Int64Regs:$r),
4391              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4392              "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4393              []>;
4394}
4395
4396let IsSuld = 2 in {
4397def SULD_1D_V2I8_ZERO
4398  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4399              (ins Int64Regs:$s, Int32Regs:$x),
4400              "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4401              []>;
4402def SULD_1D_V2I16_ZERO
4403  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4404              (ins Int64Regs:$s, Int32Regs:$x),
4405              "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4406              []>;
4407def SULD_1D_V2I32_ZERO
4408  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4409              (ins Int64Regs:$s, Int32Regs:$x),
4410              "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4411              []>;
4412def SULD_1D_V2I64_ZERO
4413  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4414              (ins Int64Regs:$s, Int32Regs:$x),
4415              "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4416              []>;
4417
4418def SULD_1D_ARRAY_V2I8_ZERO
4419  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4420              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4421              "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4422              []>;
4423def SULD_1D_ARRAY_V2I16_ZERO
4424  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4425              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4426              "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4427              []>;
4428def SULD_1D_ARRAY_V2I32_ZERO
4429  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4430              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4431              "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4432              []>;
4433def SULD_1D_ARRAY_V2I64_ZERO
4434  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4435              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4436              "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4437              []>;
4438
4439def SULD_2D_V2I8_ZERO
4440  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4441              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4442              "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4443              []>;
4444def SULD_2D_V2I16_ZERO
4445  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4446              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4447              "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4448              []>;
4449def SULD_2D_V2I32_ZERO
4450  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4451              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4452              "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4453              []>;
4454def SULD_2D_V2I64_ZERO
4455  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4456              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4457              "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4458              []>;
4459
4460def SULD_2D_ARRAY_V2I8_ZERO
4461  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4462              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4463              "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4464              "[$s, \\{$l, $x, $y, $y\\}];",
4465              []>;
4466def SULD_2D_ARRAY_V2I16_ZERO
4467  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4468              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4469              "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4470              "[$s, \\{$l, $x, $y, $y\\}];",
4471              []>;
4472def SULD_2D_ARRAY_V2I32_ZERO
4473  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4474              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4475              "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4476              "[$s, \\{$l, $x, $y, $y\\}];",
4477              []>;
4478def SULD_2D_ARRAY_V2I64_ZERO
4479  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4480              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4481              "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4482              "[$s, \\{$l, $x, $y, $y\\}];",
4483              []>;
4484
4485def SULD_3D_V2I8_ZERO
4486  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4487              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4488              "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4489              []>;
4490def SULD_3D_V2I16_ZERO
4491  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4492              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4493              "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4494              []>;
4495def SULD_3D_V2I32_ZERO
4496  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4497              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4498              "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4499              []>;
4500def SULD_3D_V2I64_ZERO
4501  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4502              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4503              "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4504              []>;
4505}
4506
4507let IsSuld = 3 in {
4508def SULD_1D_V4I8_ZERO
4509  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4510              (ins Int64Regs:$s, Int32Regs:$x),
4511              "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4512              []>;
4513def SULD_1D_V4I16_ZERO
4514  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4515              (ins Int64Regs:$s, Int32Regs:$x),
4516              "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4517              []>;
4518def SULD_1D_V4I32_ZERO
4519  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4520              (ins Int64Regs:$s, Int32Regs:$x),
4521              "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4522              []>;
4523
4524def SULD_1D_ARRAY_V4I8_ZERO
4525  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4526              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4527              "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4528              "[$s, \\{$l, $x\\}];",
4529              []>;
4530def SULD_1D_ARRAY_V4I16_ZERO
4531  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4532              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4533              "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4534              "[$s, \\{$l, $x\\}];",
4535              []>;
4536def SULD_1D_ARRAY_V4I32_ZERO
4537  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4538              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4539              "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4540              "[$s, \\{$l, $x\\}];",
4541              []>;
4542
4543def SULD_2D_V4I8_ZERO
4544  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4545              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4546              "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4547              []>;
4548def SULD_2D_V4I16_ZERO
4549  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4550              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4551              "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4552              []>;
4553def SULD_2D_V4I32_ZERO
4554  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4555              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4556              "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4557              []>;
4558
4559def SULD_2D_ARRAY_V4I8_ZERO
4560  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4561              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4562              "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4563              "[$s, \\{$l, $x, $y, $y\\}];",
4564              []>;
4565def SULD_2D_ARRAY_V4I16_ZERO
4566  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4567              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4568              "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4569              "[$s, \\{$l, $x, $y, $y\\}];",
4570              []>;
4571def SULD_2D_ARRAY_V4I32_ZERO
4572  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4573              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4574              "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4575              "[$s, \\{$l, $x, $y, $y\\}];",
4576              []>;
4577
4578
4579def SULD_3D_V4I8_ZERO
4580  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4581              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4582              "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4583              "[$s, \\{$x, $y, $z, $z\\}];",
4584              []>;
4585def SULD_3D_V4I16_ZERO
4586  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4587              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4588              "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4589              "[$s, \\{$x, $y, $z, $z\\}];",
4590              []>;
4591def SULD_3D_V4I32_ZERO
4592  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4593              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4594              "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4595              "[$s, \\{$x, $y, $z, $z\\}];",
4596              []>;
4597}
4598
4599//-----------------------------------
4600// Texture Query Intrinsics
4601//-----------------------------------
4602
4603let IsSurfTexQuery = 1 in {
4604def TXQ_CHANNEL_ORDER
4605  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4606              "txq.channel_order.b32 \t$d, [$a];",
4607              []>;
4608def TXQ_CHANNEL_DATA_TYPE
4609  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4610              "txq.channel_data_type.b32 \t$d, [$a];",
4611              []>;
4612def TXQ_WIDTH
4613  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4614              "txq.width.b32 \t$d, [$a];",
4615              []>;
4616def TXQ_HEIGHT
4617  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4618              "txq.height.b32 \t$d, [$a];",
4619              []>;
4620def TXQ_DEPTH
4621  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4622              "txq.depth.b32 \t$d, [$a];",
4623              []>;
4624def TXQ_ARRAY_SIZE
4625  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4626              "txq.array_size.b32 \t$d, [$a];",
4627              []>;
4628def TXQ_NUM_SAMPLES
4629  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4630              "txq.num_samples.b32 \t$d, [$a];",
4631              []>;
4632def TXQ_NUM_MIPMAP_LEVELS
4633  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4634              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4635              []>;
4636}
4637
4638def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4639          (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4640def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4641          (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4642def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4643          (TXQ_WIDTH Int64Regs:$a)>;
4644def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4645          (TXQ_HEIGHT Int64Regs:$a)>;
4646def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4647          (TXQ_DEPTH Int64Regs:$a)>;
4648def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4649          (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4650def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4651          (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4652def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4653          (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4654
4655
4656//-----------------------------------
4657// Surface Query Intrinsics
4658//-----------------------------------
4659
4660let IsSurfTexQuery = 1 in {
4661def SUQ_CHANNEL_ORDER
4662  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4663              "suq.channel_order.b32 \t$d, [$a];",
4664              []>;
4665def SUQ_CHANNEL_DATA_TYPE
4666  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4667              "suq.channel_data_type.b32 \t$d, [$a];",
4668              []>;
4669def SUQ_WIDTH
4670  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4671              "suq.width.b32 \t$d, [$a];",
4672              []>;
4673def SUQ_HEIGHT
4674  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4675              "suq.height.b32 \t$d, [$a];",
4676              []>;
4677def SUQ_DEPTH
4678  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4679              "suq.depth.b32 \t$d, [$a];",
4680              []>;
4681def SUQ_ARRAY_SIZE
4682  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4683              "suq.array_size.b32 \t$d, [$a];",
4684              []>;
4685}
4686
4687def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4688          (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4689def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4690          (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4691def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4692          (SUQ_WIDTH Int64Regs:$a)>;
4693def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4694          (SUQ_HEIGHT Int64Regs:$a)>;
4695def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4696          (SUQ_DEPTH Int64Regs:$a)>;
4697def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4698          (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4699
4700
4701//===- Handle Query -------------------------------------------------------===//
4702
4703// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4704def ISTYPEP_SAMPLER
4705  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4706              "istypep.samplerref \t$d, $a;",
4707              [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4708def ISTYPEP_SURFACE
4709  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4710              "istypep.surfref \t$d, $a;",
4711              [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4712def ISTYPEP_TEXTURE
4713  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4714              "istypep.texref \t$d, $a;",
4715              [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4716
4717//===- Surface Stores -----------------------------------------------------===//
4718
4719let IsSust = 1 in {
4720// Unformatted
4721// .clamp variant
4722def SUST_B_1D_B8_CLAMP
4723  : NVPTXInst<(outs),
4724              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4725              "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4726              []>;
4727def SUST_B_1D_B16_CLAMP
4728  : NVPTXInst<(outs),
4729              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4730              "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4731              []>;
4732def SUST_B_1D_B32_CLAMP
4733  : NVPTXInst<(outs),
4734              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4735              "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4736              []>;
4737def SUST_B_1D_B64_CLAMP
4738  : NVPTXInst<(outs),
4739              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4740              "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4741              []>;
4742def SUST_B_1D_V2B8_CLAMP
4743  : NVPTXInst<(outs),
4744              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4745              "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4746              []>;
4747def SUST_B_1D_V2B16_CLAMP
4748  : NVPTXInst<(outs),
4749              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4750              "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4751              []>;
4752def SUST_B_1D_V2B32_CLAMP
4753  : NVPTXInst<(outs),
4754              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4755              "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4756              []>;
4757def SUST_B_1D_V2B64_CLAMP
4758  : NVPTXInst<(outs),
4759              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4760              "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4761              []>;
4762def SUST_B_1D_V4B8_CLAMP
4763  : NVPTXInst<(outs),
4764              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4765                   Int16Regs:$b, Int16Regs:$a),
4766              "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4767              []>;
4768def SUST_B_1D_V4B16_CLAMP
4769  : NVPTXInst<(outs),
4770              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4771                   Int16Regs:$b, Int16Regs:$a),
4772              "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4773              []>;
4774def SUST_B_1D_V4B32_CLAMP
4775  : NVPTXInst<(outs),
4776              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4777                   Int32Regs:$b, Int32Regs:$a),
4778              "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4779              []>;
4780
4781
4782def SUST_B_1D_ARRAY_B8_CLAMP
4783  : NVPTXInst<(outs),
4784              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4785              "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4786              []>;
4787def SUST_B_1D_ARRAY_B16_CLAMP
4788  : NVPTXInst<(outs),
4789              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4790              "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4791              []>;
4792def SUST_B_1D_ARRAY_B32_CLAMP
4793  : NVPTXInst<(outs),
4794              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4795              "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4796              []>;
4797def SUST_B_1D_ARRAY_B64_CLAMP
4798  : NVPTXInst<(outs),
4799              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4800              "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4801              []>;
4802def SUST_B_1D_ARRAY_V2B8_CLAMP
4803  : NVPTXInst<(outs),
4804              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4805                   Int16Regs:$g),
4806              "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4807              []>;
4808def SUST_B_1D_ARRAY_V2B16_CLAMP
4809  : NVPTXInst<(outs),
4810              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4811                   Int16Regs:$g),
4812              "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4813              []>;
4814def SUST_B_1D_ARRAY_V2B32_CLAMP
4815  : NVPTXInst<(outs),
4816              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4817                   Int32Regs:$g),
4818              "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4819              []>;
4820def SUST_B_1D_ARRAY_V2B64_CLAMP
4821  : NVPTXInst<(outs),
4822              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4823                   Int64Regs:$g),
4824              "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4825              []>;
4826def SUST_B_1D_ARRAY_V4B8_CLAMP
4827  : NVPTXInst<(outs),
4828              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4829                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4830              "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4831              "\\{$r, $g, $b, $a\\};",
4832              []>;
4833def SUST_B_1D_ARRAY_V4B16_CLAMP
4834  : NVPTXInst<(outs),
4835              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4836                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4837             "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4838             "\\{$r, $g, $b, $a\\};",
4839              []>;
4840def SUST_B_1D_ARRAY_V4B32_CLAMP
4841  : NVPTXInst<(outs),
4842              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4843                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4844             "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4845             "\\{$r, $g, $b, $a\\};",
4846              []>;
4847
4848
4849def SUST_B_2D_B8_CLAMP
4850  : NVPTXInst<(outs),
4851              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4852              "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4853              []>;
4854def SUST_B_2D_B16_CLAMP
4855  : NVPTXInst<(outs),
4856              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4857              "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4858              []>;
4859def SUST_B_2D_B32_CLAMP
4860  : NVPTXInst<(outs),
4861              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4862              "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4863              []>;
4864def SUST_B_2D_B64_CLAMP
4865  : NVPTXInst<(outs),
4866              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4867              "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4868              []>;
4869def SUST_B_2D_V2B8_CLAMP
4870  : NVPTXInst<(outs),
4871              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4872                   Int16Regs:$g),
4873              "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4874              []>;
4875def SUST_B_2D_V2B16_CLAMP
4876  : NVPTXInst<(outs),
4877              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4878                   Int16Regs:$g),
4879              "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4880              []>;
4881def SUST_B_2D_V2B32_CLAMP
4882  : NVPTXInst<(outs),
4883              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4884                   Int32Regs:$g),
4885              "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4886              []>;
4887def SUST_B_2D_V2B64_CLAMP
4888  : NVPTXInst<(outs),
4889              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4890                   Int64Regs:$g),
4891              "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4892              []>;
4893def SUST_B_2D_V4B8_CLAMP
4894  : NVPTXInst<(outs),
4895              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4896                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4897              "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4898              "\\{$r, $g, $b, $a\\};",
4899              []>;
4900def SUST_B_2D_V4B16_CLAMP
4901  : NVPTXInst<(outs),
4902              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4903                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4904             "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4905             "\\{$r, $g, $b, $a\\};",
4906              []>;
4907def SUST_B_2D_V4B32_CLAMP
4908  : NVPTXInst<(outs),
4909              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4910                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4911             "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4912             "\\{$r, $g, $b, $a\\};",
4913              []>;
4914
4915
4916def SUST_B_2D_ARRAY_B8_CLAMP
4917  : NVPTXInst<(outs),
4918              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4919                   Int16Regs:$r),
4920              "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4921              []>;
4922def SUST_B_2D_ARRAY_B16_CLAMP
4923  : NVPTXInst<(outs),
4924              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4925                   Int16Regs:$r),
4926              "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4927              []>;
4928def SUST_B_2D_ARRAY_B32_CLAMP
4929  : NVPTXInst<(outs),
4930              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4931                   Int32Regs:$r),
4932              "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4933              []>;
4934def SUST_B_2D_ARRAY_B64_CLAMP
4935  : NVPTXInst<(outs),
4936              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4937                   Int64Regs:$r),
4938              "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4939              []>;
4940def SUST_B_2D_ARRAY_V2B8_CLAMP
4941  : NVPTXInst<(outs),
4942              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4943                   Int16Regs:$r, Int16Regs:$g),
4944              "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4945              "\\{$r, $g\\};",
4946              []>;
4947def SUST_B_2D_ARRAY_V2B16_CLAMP
4948  : NVPTXInst<(outs),
4949              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4950                   Int16Regs:$r, Int16Regs:$g),
4951             "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4952             "\\{$r, $g\\};",
4953              []>;
4954def SUST_B_2D_ARRAY_V2B32_CLAMP
4955  : NVPTXInst<(outs),
4956              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4957                   Int32Regs:$r, Int32Regs:$g),
4958             "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4959             "\\{$r, $g\\};",
4960              []>;
4961def SUST_B_2D_ARRAY_V2B64_CLAMP
4962  : NVPTXInst<(outs),
4963              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4964                   Int64Regs:$r, Int64Regs:$g),
4965             "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4966             "\\{$r, $g\\};",
4967              []>;
4968def SUST_B_2D_ARRAY_V4B8_CLAMP
4969  : NVPTXInst<(outs),
4970              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4971                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4972      "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4973      "\\{$r, $g, $b, $a\\};",
4974              []>;
4975def SUST_B_2D_ARRAY_V4B16_CLAMP
4976  : NVPTXInst<(outs),
4977              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4978                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4979     "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4980     "\\{$r, $g, $b, $a\\};",
4981              []>;
4982def SUST_B_2D_ARRAY_V4B32_CLAMP
4983  : NVPTXInst<(outs),
4984              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4985                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4986     "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4987     "\\{$r, $g, $b, $a\\};",
4988              []>;
4989
4990
4991def SUST_B_3D_B8_CLAMP
4992  : NVPTXInst<(outs),
4993              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4994                   Int16Regs:$r),
4995              "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4996              []>;
4997def SUST_B_3D_B16_CLAMP
4998  : NVPTXInst<(outs),
4999              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5000                   Int16Regs:$r),
5001              "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5002              []>;
5003def SUST_B_3D_B32_CLAMP
5004  : NVPTXInst<(outs),
5005              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5006                   Int32Regs:$r),
5007              "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5008              []>;
5009def SUST_B_3D_B64_CLAMP
5010  : NVPTXInst<(outs),
5011              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5012                   Int64Regs:$r),
5013              "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5014              []>;
5015def SUST_B_3D_V2B8_CLAMP
5016  : NVPTXInst<(outs),
5017              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5018                   Int16Regs:$r, Int16Regs:$g),
5019              "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5020              "\\{$r, $g\\};",
5021              []>;
5022def SUST_B_3D_V2B16_CLAMP
5023  : NVPTXInst<(outs),
5024              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5025                   Int16Regs:$r, Int16Regs:$g),
5026              "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5027              "\\{$r, $g\\};",
5028              []>;
5029def SUST_B_3D_V2B32_CLAMP
5030  : NVPTXInst<(outs),
5031              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5032                   Int32Regs:$r, Int32Regs:$g),
5033              "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5034              "\\{$r, $g\\};",
5035              []>;
5036def SUST_B_3D_V2B64_CLAMP
5037  : NVPTXInst<(outs),
5038              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5039                   Int64Regs:$r, Int64Regs:$g),
5040              "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5041              "\\{$r, $g\\};",
5042              []>;
5043def SUST_B_3D_V4B8_CLAMP
5044  : NVPTXInst<(outs),
5045              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5046                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5047         "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5048         "\\{$r, $g, $b, $a\\};",
5049              []>;
5050def SUST_B_3D_V4B16_CLAMP
5051  : NVPTXInst<(outs),
5052              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5053                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5054        "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5055        "\\{$r, $g, $b, $a\\};",
5056              []>;
5057def SUST_B_3D_V4B32_CLAMP
5058  : NVPTXInst<(outs),
5059              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5060                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5061        "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5062        "\\{$r, $g, $b, $a\\};",
5063              []>;
5064
5065
5066// .trap variant
5067def SUST_B_1D_B8_TRAP
5068  : NVPTXInst<(outs),
5069              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5070              "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5071              []>;
5072def SUST_B_1D_B16_TRAP
5073  : NVPTXInst<(outs),
5074              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5075              "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5076              []>;
5077def SUST_B_1D_B32_TRAP
5078  : NVPTXInst<(outs),
5079              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5080              "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5081              []>;
5082def SUST_B_1D_B64_TRAP
5083  : NVPTXInst<(outs),
5084              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5085              "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
5086              []>;
5087def SUST_B_1D_V2B8_TRAP
5088  : NVPTXInst<(outs),
5089              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5090              "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5091              []>;
5092def SUST_B_1D_V2B16_TRAP
5093  : NVPTXInst<(outs),
5094              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5095              "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5096              []>;
5097def SUST_B_1D_V2B32_TRAP
5098  : NVPTXInst<(outs),
5099              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5100              "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5101              []>;
5102def SUST_B_1D_V2B64_TRAP
5103  : NVPTXInst<(outs),
5104              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5105              "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5106              []>;
5107def SUST_B_1D_V4B8_TRAP
5108  : NVPTXInst<(outs),
5109              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5110                   Int16Regs:$b, Int16Regs:$a),
5111              "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5112              []>;
5113def SUST_B_1D_V4B16_TRAP
5114  : NVPTXInst<(outs),
5115              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5116                   Int16Regs:$b, Int16Regs:$a),
5117              "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5118              []>;
5119def SUST_B_1D_V4B32_TRAP
5120  : NVPTXInst<(outs),
5121              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5122                   Int32Regs:$b, Int32Regs:$a),
5123              "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5124              []>;
5125
5126
5127def SUST_B_1D_ARRAY_B8_TRAP
5128  : NVPTXInst<(outs),
5129              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5130              "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5131              []>;
5132def SUST_B_1D_ARRAY_B16_TRAP
5133  : NVPTXInst<(outs),
5134              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5135              "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5136              []>;
5137def SUST_B_1D_ARRAY_B32_TRAP
5138  : NVPTXInst<(outs),
5139              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5140              "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5141              []>;
5142def SUST_B_1D_ARRAY_B64_TRAP
5143  : NVPTXInst<(outs),
5144              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5145              "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5146              []>;
5147def SUST_B_1D_ARRAY_V2B8_TRAP
5148  : NVPTXInst<(outs),
5149              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5150                   Int16Regs:$g),
5151              "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5152              []>;
5153def SUST_B_1D_ARRAY_V2B16_TRAP
5154  : NVPTXInst<(outs),
5155              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5156                   Int16Regs:$g),
5157              "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5158              []>;
5159def SUST_B_1D_ARRAY_V2B32_TRAP
5160  : NVPTXInst<(outs),
5161              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5162                   Int32Regs:$g),
5163              "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5164              []>;
5165def SUST_B_1D_ARRAY_V2B64_TRAP
5166  : NVPTXInst<(outs),
5167              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5168                   Int64Regs:$g),
5169              "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5170              []>;
5171def SUST_B_1D_ARRAY_V4B8_TRAP
5172  : NVPTXInst<(outs),
5173              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5174                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5175              "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5176              "\\{$r, $g, $b, $a\\};",
5177              []>;
5178def SUST_B_1D_ARRAY_V4B16_TRAP
5179  : NVPTXInst<(outs),
5180              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5181                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5182             "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5183             "\\{$r, $g, $b, $a\\};",
5184              []>;
5185def SUST_B_1D_ARRAY_V4B32_TRAP
5186  : NVPTXInst<(outs),
5187              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5188                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5189             "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5190             "\\{$r, $g, $b, $a\\};",
5191              []>;
5192
5193
5194def SUST_B_2D_B8_TRAP
5195  : NVPTXInst<(outs),
5196              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5197              "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5198              []>;
5199def SUST_B_2D_B16_TRAP
5200  : NVPTXInst<(outs),
5201              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5202              "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5203              []>;
5204def SUST_B_2D_B32_TRAP
5205  : NVPTXInst<(outs),
5206              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5207              "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5208              []>;
5209def SUST_B_2D_B64_TRAP
5210  : NVPTXInst<(outs),
5211              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5212              "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5213              []>;
5214def SUST_B_2D_V2B8_TRAP
5215  : NVPTXInst<(outs),
5216              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5217                   Int16Regs:$g),
5218              "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5219              []>;
5220def SUST_B_2D_V2B16_TRAP
5221  : NVPTXInst<(outs),
5222              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5223                   Int16Regs:$g),
5224              "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5225              []>;
5226def SUST_B_2D_V2B32_TRAP
5227  : NVPTXInst<(outs),
5228              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5229                   Int32Regs:$g),
5230              "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5231              []>;
5232def SUST_B_2D_V2B64_TRAP
5233  : NVPTXInst<(outs),
5234              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5235                   Int64Regs:$g),
5236              "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5237              []>;
5238def SUST_B_2D_V4B8_TRAP
5239  : NVPTXInst<(outs),
5240              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5241                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5242              "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5243              "\\{$r, $g, $b, $a\\};",
5244              []>;
5245def SUST_B_2D_V4B16_TRAP
5246  : NVPTXInst<(outs),
5247              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5248                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5249             "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5250             "\\{$r, $g, $b, $a\\};",
5251              []>;
5252def SUST_B_2D_V4B32_TRAP
5253  : NVPTXInst<(outs),
5254              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5255                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5256             "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5257             "\\{$r, $g, $b, $a\\};",
5258              []>;
5259
5260
5261def SUST_B_2D_ARRAY_B8_TRAP
5262  : NVPTXInst<(outs),
5263              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5264                   Int16Regs:$r),
5265              "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5266              []>;
5267def SUST_B_2D_ARRAY_B16_TRAP
5268  : NVPTXInst<(outs),
5269              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5270                   Int16Regs:$r),
5271              "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5272              []>;
5273def SUST_B_2D_ARRAY_B32_TRAP
5274  : NVPTXInst<(outs),
5275              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5276                   Int32Regs:$r),
5277              "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5278              []>;
5279def SUST_B_2D_ARRAY_B64_TRAP
5280  : NVPTXInst<(outs),
5281              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5282                   Int64Regs:$r),
5283              "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5284              []>;
5285def SUST_B_2D_ARRAY_V2B8_TRAP
5286  : NVPTXInst<(outs),
5287              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5288                   Int16Regs:$r, Int16Regs:$g),
5289              "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5290              "\\{$r, $g\\};",
5291              []>;
5292def SUST_B_2D_ARRAY_V2B16_TRAP
5293  : NVPTXInst<(outs),
5294              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5295                   Int16Regs:$r, Int16Regs:$g),
5296             "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5297             "\\{$r, $g\\};",
5298              []>;
5299def SUST_B_2D_ARRAY_V2B32_TRAP
5300  : NVPTXInst<(outs),
5301              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5302                   Int32Regs:$r, Int32Regs:$g),
5303             "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5304             "\\{$r, $g\\};",
5305              []>;
5306def SUST_B_2D_ARRAY_V2B64_TRAP
5307  : NVPTXInst<(outs),
5308              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5309                   Int64Regs:$r, Int64Regs:$g),
5310             "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5311             "\\{$r, $g\\};",
5312              []>;
5313def SUST_B_2D_ARRAY_V4B8_TRAP
5314  : NVPTXInst<(outs),
5315              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5316                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5317      "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5318      "\\{$r, $g, $b, $a\\};",
5319              []>;
5320def SUST_B_2D_ARRAY_V4B16_TRAP
5321  : NVPTXInst<(outs),
5322              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5323                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5324     "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5325     "\\{$r, $g, $b, $a\\};",
5326              []>;
5327def SUST_B_2D_ARRAY_V4B32_TRAP
5328  : NVPTXInst<(outs),
5329              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5330                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5331     "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5332     "\\{$r, $g, $b, $a\\};",
5333              []>;
5334
5335
5336def SUST_B_3D_B8_TRAP
5337  : NVPTXInst<(outs),
5338              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5339                   Int16Regs:$r),
5340              "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5341              []>;
5342def SUST_B_3D_B16_TRAP
5343  : NVPTXInst<(outs),
5344              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5345                   Int16Regs:$r),
5346              "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5347              []>;
5348def SUST_B_3D_B32_TRAP
5349  : NVPTXInst<(outs),
5350              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5351                   Int32Regs:$r),
5352              "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5353              []>;
5354def SUST_B_3D_B64_TRAP
5355  : NVPTXInst<(outs),
5356              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5357                   Int64Regs:$r),
5358              "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5359              []>;
5360def SUST_B_3D_V2B8_TRAP
5361  : NVPTXInst<(outs),
5362              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5363                   Int16Regs:$r, Int16Regs:$g),
5364              "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5365              "\\{$r, $g\\};",
5366              []>;
5367def SUST_B_3D_V2B16_TRAP
5368  : NVPTXInst<(outs),
5369              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5370                   Int16Regs:$r, Int16Regs:$g),
5371              "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5372              "\\{$r, $g\\};",
5373              []>;
5374def SUST_B_3D_V2B32_TRAP
5375  : NVPTXInst<(outs),
5376              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5377                   Int32Regs:$r, Int32Regs:$g),
5378              "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5379              "\\{$r, $g\\};",
5380              []>;
5381def SUST_B_3D_V2B64_TRAP
5382  : NVPTXInst<(outs),
5383              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5384                   Int64Regs:$r, Int64Regs:$g),
5385              "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5386              "\\{$r, $g\\};",
5387              []>;
5388def SUST_B_3D_V4B8_TRAP
5389  : NVPTXInst<(outs),
5390              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5391                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5392         "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5393         "\\{$r, $g, $b, $a\\};",
5394              []>;
5395def SUST_B_3D_V4B16_TRAP
5396  : NVPTXInst<(outs),
5397              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5398                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5399        "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5400        "\\{$r, $g, $b, $a\\};",
5401              []>;
5402def SUST_B_3D_V4B32_TRAP
5403  : NVPTXInst<(outs),
5404              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5405                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5406        "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5407        "\\{$r, $g, $b, $a\\};",
5408              []>;
5409
5410
5411// .zero variant
5412def SUST_B_1D_B8_ZERO
5413  : NVPTXInst<(outs),
5414              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5415              "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5416              []>;
5417def SUST_B_1D_B16_ZERO
5418  : NVPTXInst<(outs),
5419              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5420              "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5421              []>;
5422def SUST_B_1D_B32_ZERO
5423  : NVPTXInst<(outs),
5424              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5425              "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5426              []>;
5427def SUST_B_1D_B64_ZERO
5428  : NVPTXInst<(outs),
5429              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5430              "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5431              []>;
5432def SUST_B_1D_V2B8_ZERO
5433  : NVPTXInst<(outs),
5434              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5435              "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5436              []>;
5437def SUST_B_1D_V2B16_ZERO
5438  : NVPTXInst<(outs),
5439              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5440              "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5441              []>;
5442def SUST_B_1D_V2B32_ZERO
5443  : NVPTXInst<(outs),
5444              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5445              "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5446              []>;
5447def SUST_B_1D_V2B64_ZERO
5448  : NVPTXInst<(outs),
5449              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5450              "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5451              []>;
5452def SUST_B_1D_V4B8_ZERO
5453  : NVPTXInst<(outs),
5454              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5455                   Int16Regs:$b, Int16Regs:$a),
5456              "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5457              []>;
5458def SUST_B_1D_V4B16_ZERO
5459  : NVPTXInst<(outs),
5460              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5461                   Int16Regs:$b, Int16Regs:$a),
5462              "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5463              []>;
5464def SUST_B_1D_V4B32_ZERO
5465  : NVPTXInst<(outs),
5466              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5467                   Int32Regs:$b, Int32Regs:$a),
5468              "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5469              []>;
5470
5471
5472def SUST_B_1D_ARRAY_B8_ZERO
5473  : NVPTXInst<(outs),
5474              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5475              "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5476              []>;
5477def SUST_B_1D_ARRAY_B16_ZERO
5478  : NVPTXInst<(outs),
5479              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5480              "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5481              []>;
5482def SUST_B_1D_ARRAY_B32_ZERO
5483  : NVPTXInst<(outs),
5484              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5485              "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5486              []>;
5487def SUST_B_1D_ARRAY_B64_ZERO
5488  : NVPTXInst<(outs),
5489              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5490              "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5491              []>;
5492def SUST_B_1D_ARRAY_V2B8_ZERO
5493  : NVPTXInst<(outs),
5494              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5495                   Int16Regs:$g),
5496              "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5497              []>;
5498def SUST_B_1D_ARRAY_V2B16_ZERO
5499  : NVPTXInst<(outs),
5500              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5501                   Int16Regs:$g),
5502              "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5503              []>;
5504def SUST_B_1D_ARRAY_V2B32_ZERO
5505  : NVPTXInst<(outs),
5506              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5507                   Int32Regs:$g),
5508              "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5509              []>;
5510def SUST_B_1D_ARRAY_V2B64_ZERO
5511  : NVPTXInst<(outs),
5512              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5513                   Int64Regs:$g),
5514              "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5515              []>;
5516def SUST_B_1D_ARRAY_V4B8_ZERO
5517  : NVPTXInst<(outs),
5518              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5519                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5520              "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5521              "\\{$r, $g, $b, $a\\};",
5522              []>;
5523def SUST_B_1D_ARRAY_V4B16_ZERO
5524  : NVPTXInst<(outs),
5525              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5526                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5527             "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5528             "\\{$r, $g, $b, $a\\};",
5529              []>;
5530def SUST_B_1D_ARRAY_V4B32_ZERO
5531  : NVPTXInst<(outs),
5532              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5533                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5534             "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5535             "\\{$r, $g, $b, $a\\};",
5536              []>;
5537
5538
5539def SUST_B_2D_B8_ZERO
5540  : NVPTXInst<(outs),
5541              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5542              "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5543              []>;
5544def SUST_B_2D_B16_ZERO
5545  : NVPTXInst<(outs),
5546              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5547              "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5548              []>;
5549def SUST_B_2D_B32_ZERO
5550  : NVPTXInst<(outs),
5551              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5552              "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5553              []>;
5554def SUST_B_2D_B64_ZERO
5555  : NVPTXInst<(outs),
5556              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5557              "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5558              []>;
5559def SUST_B_2D_V2B8_ZERO
5560  : NVPTXInst<(outs),
5561              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5562                   Int16Regs:$g),
5563              "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5564              []>;
5565def SUST_B_2D_V2B16_ZERO
5566  : NVPTXInst<(outs),
5567              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5568                   Int16Regs:$g),
5569              "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5570              []>;
5571def SUST_B_2D_V2B32_ZERO
5572  : NVPTXInst<(outs),
5573              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5574                   Int32Regs:$g),
5575              "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5576              []>;
5577def SUST_B_2D_V2B64_ZERO
5578  : NVPTXInst<(outs),
5579              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5580                   Int64Regs:$g),
5581              "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5582              []>;
5583def SUST_B_2D_V4B8_ZERO
5584  : NVPTXInst<(outs),
5585              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5586                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5587              "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5588              "\\{$r, $g, $b, $a\\};",
5589              []>;
5590def SUST_B_2D_V4B16_ZERO
5591  : NVPTXInst<(outs),
5592              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5593                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5594             "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5595             "\\{$r, $g, $b, $a\\};",
5596              []>;
5597def SUST_B_2D_V4B32_ZERO
5598  : NVPTXInst<(outs),
5599              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5600                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5601             "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5602             "\\{$r, $g, $b, $a\\};",
5603              []>;
5604
5605
5606def SUST_B_2D_ARRAY_B8_ZERO
5607  : NVPTXInst<(outs),
5608              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5609                   Int16Regs:$r),
5610              "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5611              []>;
5612def SUST_B_2D_ARRAY_B16_ZERO
5613  : NVPTXInst<(outs),
5614              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5615                   Int16Regs:$r),
5616              "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5617              []>;
5618def SUST_B_2D_ARRAY_B32_ZERO
5619  : NVPTXInst<(outs),
5620              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5621                   Int32Regs:$r),
5622              "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5623              []>;
5624def SUST_B_2D_ARRAY_B64_ZERO
5625  : NVPTXInst<(outs),
5626              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5627                   Int64Regs:$r),
5628              "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5629              []>;
5630def SUST_B_2D_ARRAY_V2B8_ZERO
5631  : NVPTXInst<(outs),
5632              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5633                   Int16Regs:$r, Int16Regs:$g),
5634              "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5635              "\\{$r, $g\\};",
5636              []>;
5637def SUST_B_2D_ARRAY_V2B16_ZERO
5638  : NVPTXInst<(outs),
5639              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5640                   Int16Regs:$r, Int16Regs:$g),
5641             "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5642             "\\{$r, $g\\};",
5643              []>;
5644def SUST_B_2D_ARRAY_V2B32_ZERO
5645  : NVPTXInst<(outs),
5646              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5647                   Int32Regs:$r, Int32Regs:$g),
5648             "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5649             "\\{$r, $g\\};",
5650              []>;
5651def SUST_B_2D_ARRAY_V2B64_ZERO
5652  : NVPTXInst<(outs),
5653              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5654                   Int64Regs:$r, Int64Regs:$g),
5655             "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5656             "\\{$r, $g\\};",
5657              []>;
5658def SUST_B_2D_ARRAY_V4B8_ZERO
5659  : NVPTXInst<(outs),
5660              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5661                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5662      "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5663      "\\{$r, $g, $b, $a\\};",
5664              []>;
5665def SUST_B_2D_ARRAY_V4B16_ZERO
5666  : NVPTXInst<(outs),
5667              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5668                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5669     "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5670     "\\{$r, $g, $b, $a\\};",
5671              []>;
5672def SUST_B_2D_ARRAY_V4B32_ZERO
5673  : NVPTXInst<(outs),
5674              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5675                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5676     "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5677     "\\{$r, $g, $b, $a\\};",
5678              []>;
5679
5680
5681def SUST_B_3D_B8_ZERO
5682  : NVPTXInst<(outs),
5683              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5684                   Int16Regs:$r),
5685              "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5686              []>;
5687def SUST_B_3D_B16_ZERO
5688  : NVPTXInst<(outs),
5689              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5690                   Int16Regs:$r),
5691              "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5692              []>;
5693def SUST_B_3D_B32_ZERO
5694  : NVPTXInst<(outs),
5695              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5696                   Int32Regs:$r),
5697              "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5698              []>;
5699def SUST_B_3D_B64_ZERO
5700  : NVPTXInst<(outs),
5701              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5702                   Int64Regs:$r),
5703              "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5704              []>;
5705def SUST_B_3D_V2B8_ZERO
5706  : NVPTXInst<(outs),
5707              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5708                   Int16Regs:$r, Int16Regs:$g),
5709              "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5710              "\\{$r, $g\\};",
5711              []>;
5712def SUST_B_3D_V2B16_ZERO
5713  : NVPTXInst<(outs),
5714              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5715                   Int16Regs:$r, Int16Regs:$g),
5716              "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5717              "\\{$r, $g\\};",
5718              []>;
5719def SUST_B_3D_V2B32_ZERO
5720  : NVPTXInst<(outs),
5721              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5722                   Int32Regs:$r, Int32Regs:$g),
5723              "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5724              "\\{$r, $g\\};",
5725              []>;
5726def SUST_B_3D_V2B64_ZERO
5727  : NVPTXInst<(outs),
5728              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5729                   Int64Regs:$r, Int64Regs:$g),
5730              "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5731              "\\{$r, $g\\};",
5732              []>;
5733def SUST_B_3D_V4B8_ZERO
5734  : NVPTXInst<(outs),
5735              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5736                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5737         "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5738         "\\{$r, $g, $b, $a\\};",
5739              []>;
5740def SUST_B_3D_V4B16_ZERO
5741  : NVPTXInst<(outs),
5742              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5743                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5744        "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5745        "\\{$r, $g, $b, $a\\};",
5746              []>;
5747def SUST_B_3D_V4B32_ZERO
5748  : NVPTXInst<(outs),
5749              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5750                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5751        "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5752        "\\{$r, $g, $b, $a\\};",
5753              []>;
5754
5755
5756
5757// Formatted
5758
5759def SUST_P_1D_B8_TRAP
5760  : NVPTXInst<(outs),
5761              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5762              "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5763              []>;
5764def SUST_P_1D_B16_TRAP
5765  : NVPTXInst<(outs),
5766              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5767              "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5768              []>;
5769def SUST_P_1D_B32_TRAP
5770  : NVPTXInst<(outs),
5771              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5772              "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5773              []>;
5774def SUST_P_1D_V2B8_TRAP
5775  : NVPTXInst<(outs),
5776              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5777              "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5778              []>;
5779def SUST_P_1D_V2B16_TRAP
5780  : NVPTXInst<(outs),
5781              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5782              "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5783              []>;
5784def SUST_P_1D_V2B32_TRAP
5785  : NVPTXInst<(outs),
5786              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5787              "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5788              []>;
5789def SUST_P_1D_V4B8_TRAP
5790  : NVPTXInst<(outs),
5791              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5792                   Int16Regs:$b, Int16Regs:$a),
5793              "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5794              []>;
5795def SUST_P_1D_V4B16_TRAP
5796  : NVPTXInst<(outs),
5797              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5798                   Int16Regs:$b, Int16Regs:$a),
5799              "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5800              []>;
5801def SUST_P_1D_V4B32_TRAP
5802  : NVPTXInst<(outs),
5803              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5804                   Int32Regs:$b, Int32Regs:$a),
5805              "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5806              []>;
5807
5808
5809def SUST_P_1D_ARRAY_B8_TRAP
5810  : NVPTXInst<(outs),
5811              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5812              "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5813              []>;
5814def SUST_P_1D_ARRAY_B16_TRAP
5815  : NVPTXInst<(outs),
5816              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5817              "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5818              []>;
5819def SUST_P_1D_ARRAY_B32_TRAP
5820  : NVPTXInst<(outs),
5821              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5822              "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5823              []>;
5824def SUST_P_1D_ARRAY_V2B8_TRAP
5825  : NVPTXInst<(outs),
5826              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5827                   Int16Regs:$g),
5828              "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5829              []>;
5830def SUST_P_1D_ARRAY_V2B16_TRAP
5831  : NVPTXInst<(outs),
5832              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5833                   Int16Regs:$g),
5834              "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5835              []>;
5836def SUST_P_1D_ARRAY_V2B32_TRAP
5837  : NVPTXInst<(outs),
5838              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5839                   Int32Regs:$g),
5840              "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5841              []>;
5842def SUST_P_1D_ARRAY_V4B8_TRAP
5843  : NVPTXInst<(outs),
5844              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5845                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5846              "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5847              "\\{$r, $g, $b, $a\\};",
5848              []>;
5849def SUST_P_1D_ARRAY_V4B16_TRAP
5850  : NVPTXInst<(outs),
5851              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5852                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5853             "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5854             "\\{$r, $g, $b, $a\\};",
5855              []>;
5856def SUST_P_1D_ARRAY_V4B32_TRAP
5857  : NVPTXInst<(outs),
5858              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5859                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5860             "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5861             "\\{$r, $g, $b, $a\\};",
5862              []>;
5863
5864
5865def SUST_P_2D_B8_TRAP
5866  : NVPTXInst<(outs),
5867              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5868              "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5869              []>;
5870def SUST_P_2D_B16_TRAP
5871  : NVPTXInst<(outs),
5872              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5873              "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5874              []>;
5875def SUST_P_2D_B32_TRAP
5876  : NVPTXInst<(outs),
5877              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5878              "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5879              []>;
5880def SUST_P_2D_V2B8_TRAP
5881  : NVPTXInst<(outs),
5882              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5883                   Int16Regs:$g),
5884              "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5885              []>;
5886def SUST_P_2D_V2B16_TRAP
5887  : NVPTXInst<(outs),
5888              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5889                   Int16Regs:$g),
5890              "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5891              []>;
5892def SUST_P_2D_V2B32_TRAP
5893  : NVPTXInst<(outs),
5894              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5895                   Int32Regs:$g),
5896              "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5897              []>;
5898def SUST_P_2D_V4B8_TRAP
5899  : NVPTXInst<(outs),
5900              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5901                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5902              "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5903              "\\{$r, $g, $b, $a\\};",
5904              []>;
5905def SUST_P_2D_V4B16_TRAP
5906  : NVPTXInst<(outs),
5907              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5908                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5909             "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5910             "\\{$r, $g, $b, $a\\};",
5911              []>;
5912def SUST_P_2D_V4B32_TRAP
5913  : NVPTXInst<(outs),
5914              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5915                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5916             "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5917             "\\{$r, $g, $b, $a\\};",
5918              []>;
5919
5920
5921def SUST_P_2D_ARRAY_B8_TRAP
5922  : NVPTXInst<(outs),
5923              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5924                   Int16Regs:$r),
5925              "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5926              []>;
5927def SUST_P_2D_ARRAY_B16_TRAP
5928  : NVPTXInst<(outs),
5929              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5930                   Int16Regs:$r),
5931              "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5932              []>;
5933def SUST_P_2D_ARRAY_B32_TRAP
5934  : NVPTXInst<(outs),
5935              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5936                   Int32Regs:$r),
5937              "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5938              []>;
5939def SUST_P_2D_ARRAY_V2B8_TRAP
5940  : NVPTXInst<(outs),
5941              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5942                   Int16Regs:$r, Int16Regs:$g),
5943              "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5944              "\\{$r, $g\\};",
5945              []>;
5946def SUST_P_2D_ARRAY_V2B16_TRAP
5947  : NVPTXInst<(outs),
5948              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5949                   Int16Regs:$r, Int16Regs:$g),
5950             "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5951             "\\{$r, $g\\};",
5952              []>;
5953def SUST_P_2D_ARRAY_V2B32_TRAP
5954  : NVPTXInst<(outs),
5955              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5956                   Int32Regs:$r, Int32Regs:$g),
5957             "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5958             "\\{$r, $g\\};",
5959              []>;
5960def SUST_P_2D_ARRAY_V4B8_TRAP
5961  : NVPTXInst<(outs),
5962              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5963                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5964      "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5965      "\\{$r, $g, $b, $a\\};",
5966              []>;
5967def SUST_P_2D_ARRAY_V4B16_TRAP
5968  : NVPTXInst<(outs),
5969              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5970                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5971     "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5972     "\\{$r, $g, $b, $a\\};",
5973              []>;
5974def SUST_P_2D_ARRAY_V4B32_TRAP
5975  : NVPTXInst<(outs),
5976              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5977                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5978     "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5979     "\\{$r, $g, $b, $a\\};",
5980              []>;
5981
5982
5983def SUST_P_3D_B8_TRAP
5984  : NVPTXInst<(outs),
5985              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5986                   Int16Regs:$r),
5987              "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5988              []>;
5989def SUST_P_3D_B16_TRAP
5990  : NVPTXInst<(outs),
5991              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5992                   Int16Regs:$r),
5993              "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5994              []>;
5995def SUST_P_3D_B32_TRAP
5996  : NVPTXInst<(outs),
5997              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5998                   Int32Regs:$r),
5999              "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6000              []>;
6001def SUST_P_3D_V2B8_TRAP
6002  : NVPTXInst<(outs),
6003              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6004                   Int16Regs:$r, Int16Regs:$g),
6005              "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6006              "\\{$r, $g\\};",
6007              []>;
6008def SUST_P_3D_V2B16_TRAP
6009  : NVPTXInst<(outs),
6010              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6011                   Int16Regs:$r, Int16Regs:$g),
6012              "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6013              "\\{$r, $g\\};",
6014              []>;
6015def SUST_P_3D_V2B32_TRAP
6016  : NVPTXInst<(outs),
6017              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6018                   Int32Regs:$r, Int32Regs:$g),
6019              "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6020              "\\{$r, $g\\};",
6021              []>;
6022def SUST_P_3D_V4B8_TRAP
6023  : NVPTXInst<(outs),
6024              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6025                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6026         "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6027         "\\{$r, $g, $b, $a\\};",
6028              []>;
6029def SUST_P_3D_V4B16_TRAP
6030  : NVPTXInst<(outs),
6031              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6032                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6033        "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6034        "\\{$r, $g, $b, $a\\};",
6035              []>;
6036def SUST_P_3D_V4B32_TRAP
6037  : NVPTXInst<(outs),
6038              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6039                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6040        "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6041        "\\{$r, $g, $b, $a\\};",
6042              []>;
6043}
6044
6045// Surface store instruction patterns
6046// I'm not sure why we can't just include these in the instruction definitions,
6047// but TableGen complains of type errors :(
6048
6049// .clamp variant
6050def : Pat<(int_nvvm_sust_b_1d_i8_clamp
6051           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6052          (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6053
6054def : Pat<(int_nvvm_sust_b_1d_i16_clamp
6055           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6056          (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6057
6058def : Pat<(int_nvvm_sust_b_1d_i32_clamp
6059           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6060          (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6061
6062def : Pat<(int_nvvm_sust_b_1d_i64_clamp
6063           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6064          (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6065
6066def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
6067           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6068          (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6069           Int16Regs:$r, Int16Regs:$g)>;
6070
6071def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
6072           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6073          (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6074           Int16Regs:$r, Int16Regs:$g)>;
6075
6076def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
6077           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6078          (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6079           Int32Regs:$r, Int32Regs:$g)>;
6080
6081def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
6082           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6083          (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
6084           Int64Regs:$r, Int64Regs:$g)>;
6085
6086def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
6087           Int64Regs:$s, Int32Regs:$x,
6088           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6089          (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6090           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6091
6092def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
6093           Int64Regs:$s, Int32Regs:$x,
6094           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6095          (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6096           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6097
6098def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
6099           Int64Regs:$s, Int32Regs:$x,
6100           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6101          (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6102           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6103
6104
6105
6106def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
6107           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6108          (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6109           Int16Regs:$r)>;
6110
6111def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
6112           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6113          (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6114           Int16Regs:$r)>;
6115
6116def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
6117           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6118          (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6119           Int32Regs:$r)>;
6120
6121def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
6122           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6123          (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6124           Int64Regs:$r)>;
6125
6126def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
6127          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6128          (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6129           Int16Regs:$r, Int16Regs:$g)>;
6130
6131def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
6132          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6133          (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6134           Int16Regs:$r, Int16Regs:$g)>;
6135
6136def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6137          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6138          (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6139           Int32Regs:$r, Int32Regs:$g)>;
6140
6141def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6142          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6143          (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6144           Int64Regs:$r, Int64Regs:$g)>;
6145
6146def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6147           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6148           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6149          (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6150           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6151
6152def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6153           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6154           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6155          (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6156           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6157
6158def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6159           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6160           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6161          (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6162           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6163
6164
6165
6166def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6167           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6168          (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6169           Int16Regs:$r)>;
6170
6171def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6172           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6173          (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6174           Int16Regs:$r)>;
6175
6176def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6177           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6178          (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6179           Int32Regs:$r)>;
6180
6181def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6182           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6183          (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6184           Int64Regs:$r)>;
6185
6186def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6187          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6188          (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6189           Int16Regs:$r, Int16Regs:$g)>;
6190
6191def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6192          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6193          (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6194           Int16Regs:$r, Int16Regs:$g)>;
6195
6196def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6197          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6198          (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6199           Int32Regs:$r, Int32Regs:$g)>;
6200
6201def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6202          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6203          (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6204           Int64Regs:$r, Int64Regs:$g)>;
6205
6206def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6207           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6208           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6209          (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6210           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6211
6212def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6213           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6214           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6215          (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6216           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6217
6218def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6219           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6220           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6221          (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6222           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6223
6224
6225
6226def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6227          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6228          (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6229           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6230           Int16Regs:$r)>;
6231
6232def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6233          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6234          (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6235           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6236           Int16Regs:$r)>;
6237
6238def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6239          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6240          (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6241           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6242           Int32Regs:$r)>;
6243
6244def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6245          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6246          (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6247           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6248           Int64Regs:$r)>;
6249
6250def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6251           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6252           Int16Regs:$r, Int16Regs:$g),
6253          (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6254           Int32Regs:$x, Int32Regs:$y,
6255           Int16Regs:$r, Int16Regs:$g)>;
6256
6257def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6258           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6259           Int16Regs:$r, Int16Regs:$g),
6260          (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6261           Int32Regs:$x, Int32Regs:$y,
6262           Int16Regs:$r, Int16Regs:$g)>;
6263
6264def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6265           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6266           Int32Regs:$g),
6267          (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6268           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6269
6270def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6271           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6272           Int64Regs:$g),
6273          (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6274           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6275
6276def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6277           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6278           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6279          (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6280           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6281           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6282
6283def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6284           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6285           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6286          (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6287           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6288           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6289
6290def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6291           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6292           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6293          (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6294           Int32Regs:$x, Int32Regs:$y,
6295           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6296
6297
6298
6299def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6300           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6301           Int16Regs:$r),
6302          (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6303           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6304           Int16Regs:$r)>;
6305
6306def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6307           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6308           Int16Regs:$r),
6309          (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6310           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6311           Int16Regs:$r)>;
6312
6313def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6314           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6315           Int32Regs:$r),
6316          (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6317           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6318           Int32Regs:$r)>;
6319
6320def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6321           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6322           Int64Regs:$r),
6323          (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6324           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6325           Int64Regs:$r)>;
6326
6327def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6328           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6329           Int16Regs:$r, Int16Regs:$g),
6330          (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6331           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6332           Int16Regs:$r, Int16Regs:$g)>;
6333
6334def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6335           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6336           Int16Regs:$r, Int16Regs:$g),
6337          (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6338           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6339           Int16Regs:$r, Int16Regs:$g)>;
6340
6341def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6342           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6343           Int32Regs:$r, Int32Regs:$g),
6344          (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6345           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6346           Int32Regs:$r, Int32Regs:$g)>;
6347
6348def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6349           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6350           Int64Regs:$r, Int64Regs:$g),
6351          (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6352           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6353           Int64Regs:$r, Int64Regs:$g)>;
6354
6355def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6356           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6357           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6358          (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6359           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6360           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6361
6362def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6363           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6364           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6365          (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6366           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6367           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6368
6369def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6370           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6371           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6372          (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6373           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6374           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6375
6376
6377// .trap variant
6378def : Pat<(int_nvvm_sust_b_1d_i8_trap
6379           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6380          (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6381
6382def : Pat<(int_nvvm_sust_b_1d_i16_trap
6383           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6384          (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6385
6386def : Pat<(int_nvvm_sust_b_1d_i32_trap
6387           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6388          (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6389
6390def : Pat<(int_nvvm_sust_b_1d_i64_trap
6391           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6392          (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6393
6394def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6395           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6396          (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6397           Int16Regs:$r, Int16Regs:$g)>;
6398
6399def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6400           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6401          (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6402           Int16Regs:$r, Int16Regs:$g)>;
6403
6404def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6405           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6406          (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6407           Int32Regs:$r, Int32Regs:$g)>;
6408
6409def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6410           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6411          (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6412           Int64Regs:$r, Int64Regs:$g)>;
6413
6414def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6415           Int64Regs:$s, Int32Regs:$x,
6416           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6417          (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6418           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6419
6420def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6421           Int64Regs:$s, Int32Regs:$x,
6422           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6423          (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6424           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6425
6426def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6427           Int64Regs:$s, Int32Regs:$x,
6428           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6429          (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6430           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6431
6432
6433
6434def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6435           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6436          (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6437           Int16Regs:$r)>;
6438
6439def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6440           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6441          (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6442           Int16Regs:$r)>;
6443
6444def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6445           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6446          (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6447           Int32Regs:$r)>;
6448
6449def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6450           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6451          (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6452           Int64Regs:$r)>;
6453
6454def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6455          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6456          (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6457           Int16Regs:$r, Int16Regs:$g)>;
6458
6459def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6460          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6461          (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6462           Int16Regs:$r, Int16Regs:$g)>;
6463
6464def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6465          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6466          (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6467           Int32Regs:$r, Int32Regs:$g)>;
6468
6469def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6470          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6471          (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6472           Int64Regs:$r, Int64Regs:$g)>;
6473
6474def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6475           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6476           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6477          (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6478           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6479
6480def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6481           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6482           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6483          (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6484           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6485
6486def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6487           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6488           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6489          (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6490           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6491
6492
6493
6494def : Pat<(int_nvvm_sust_b_2d_i8_trap
6495           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6496          (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6497           Int16Regs:$r)>;
6498
6499def : Pat<(int_nvvm_sust_b_2d_i16_trap
6500           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6501          (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6502           Int16Regs:$r)>;
6503
6504def : Pat<(int_nvvm_sust_b_2d_i32_trap
6505           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6506          (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6507           Int32Regs:$r)>;
6508
6509def : Pat<(int_nvvm_sust_b_2d_i64_trap
6510           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6511          (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6512           Int64Regs:$r)>;
6513
6514def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6515          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6516          (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6517           Int16Regs:$r, Int16Regs:$g)>;
6518
6519def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6520          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6521          (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6522           Int16Regs:$r, Int16Regs:$g)>;
6523
6524def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6525          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6526          (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6527           Int32Regs:$r, Int32Regs:$g)>;
6528
6529def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6530          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6531          (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6532           Int64Regs:$r, Int64Regs:$g)>;
6533
6534def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6535           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6536           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6537          (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6538           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6539
6540def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6541           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6542           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6543          (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6544           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6545
6546def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6547           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6548           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6549          (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6550           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6551
6552
6553
6554def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6555          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6556          (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6557           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6558           Int16Regs:$r)>;
6559
6560def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6561          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6562          (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6563           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6564           Int16Regs:$r)>;
6565
6566def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6567          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6568          (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6569           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6570           Int32Regs:$r)>;
6571
6572def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6573          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6574          (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6575           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6576           Int64Regs:$r)>;
6577
6578def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6579           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6580           Int16Regs:$r, Int16Regs:$g),
6581          (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6582           Int32Regs:$x, Int32Regs:$y,
6583           Int16Regs:$r, Int16Regs:$g)>;
6584
6585def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6586           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6587           Int16Regs:$r, Int16Regs:$g),
6588          (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6589           Int32Regs:$x, Int32Regs:$y,
6590           Int16Regs:$r, Int16Regs:$g)>;
6591
6592def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6593           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6594           Int32Regs:$g),
6595          (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6596           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6597
6598def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6599           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6600           Int64Regs:$g),
6601          (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6602           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6603
6604def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6605           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6606           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6607          (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6608           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6609           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6610
6611def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6612           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6613           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6614          (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6615           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6616           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6617
6618def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6619           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6620           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6621          (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6622           Int32Regs:$x, Int32Regs:$y,
6623           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6624
6625
6626
6627def : Pat<(int_nvvm_sust_b_3d_i8_trap
6628           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6629           Int16Regs:$r),
6630          (SUST_B_3D_B8_TRAP Int64Regs:$s,
6631           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6632           Int16Regs:$r)>;
6633
6634def : Pat<(int_nvvm_sust_b_3d_i16_trap
6635           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6636           Int16Regs:$r),
6637          (SUST_B_3D_B16_TRAP Int64Regs:$s,
6638           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6639           Int16Regs:$r)>;
6640
6641def : Pat<(int_nvvm_sust_b_3d_i32_trap
6642           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6643           Int32Regs:$r),
6644          (SUST_B_3D_B32_TRAP Int64Regs:$s,
6645           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6646           Int32Regs:$r)>;
6647
6648def : Pat<(int_nvvm_sust_b_3d_i64_trap
6649           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6650           Int64Regs:$r),
6651          (SUST_B_3D_B64_TRAP Int64Regs:$s,
6652           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6653           Int64Regs:$r)>;
6654
6655def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6656           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6657           Int16Regs:$r, Int16Regs:$g),
6658          (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6659           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6660           Int16Regs:$r, Int16Regs:$g)>;
6661
6662def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6663           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6664           Int16Regs:$r, Int16Regs:$g),
6665          (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6666           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6667           Int16Regs:$r, Int16Regs:$g)>;
6668
6669def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6670           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6671           Int32Regs:$r, Int32Regs:$g),
6672          (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6673           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6674           Int32Regs:$r, Int32Regs:$g)>;
6675
6676def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6677           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6678           Int64Regs:$r, Int64Regs:$g),
6679          (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6680           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6681           Int64Regs:$r, Int64Regs:$g)>;
6682
6683def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6684           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6685           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6686          (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6687           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6688           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6689
6690def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6691           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6692           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6693          (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6694           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6695           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6696
6697def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6698           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6699           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6700          (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6701           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6702           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6703
6704
6705// .zero variant
6706def : Pat<(int_nvvm_sust_b_1d_i8_zero
6707           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6708          (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6709
6710def : Pat<(int_nvvm_sust_b_1d_i16_zero
6711           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6712          (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6713
6714def : Pat<(int_nvvm_sust_b_1d_i32_zero
6715           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6716          (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6717
6718def : Pat<(int_nvvm_sust_b_1d_i64_zero
6719           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6720          (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6721
6722def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6723           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6724          (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6725           Int16Regs:$r, Int16Regs:$g)>;
6726
6727def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6728           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6729          (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6730           Int16Regs:$r, Int16Regs:$g)>;
6731
6732def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6733           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6734          (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6735           Int32Regs:$r, Int32Regs:$g)>;
6736
6737def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6738           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6739          (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6740           Int64Regs:$r, Int64Regs:$g)>;
6741
6742def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6743           Int64Regs:$s, Int32Regs:$x,
6744           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6745          (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6746           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6747
6748def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6749           Int64Regs:$s, Int32Regs:$x,
6750           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6751          (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6752           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6753
6754def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6755           Int64Regs:$s, Int32Regs:$x,
6756           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6757          (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6758           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6759
6760
6761
6762def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6763           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6764          (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6765           Int16Regs:$r)>;
6766
6767def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6768           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6769          (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6770           Int16Regs:$r)>;
6771
6772def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6773           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6774          (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6775           Int32Regs:$r)>;
6776
6777def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6778           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6779          (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6780           Int64Regs:$r)>;
6781
6782def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6783          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6784          (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6785           Int16Regs:$r, Int16Regs:$g)>;
6786
6787def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6788          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6789          (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6790           Int16Regs:$r, Int16Regs:$g)>;
6791
6792def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6793          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6794          (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6795           Int32Regs:$r, Int32Regs:$g)>;
6796
6797def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6798          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6799          (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6800           Int64Regs:$r, Int64Regs:$g)>;
6801
6802def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6803           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6804           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6805          (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6806           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6807
6808def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6809           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6810           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6811          (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6812           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6813
6814def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6815           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6816           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6817          (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6818           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6819
6820
6821
6822def : Pat<(int_nvvm_sust_b_2d_i8_zero
6823           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6824          (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6825           Int16Regs:$r)>;
6826
6827def : Pat<(int_nvvm_sust_b_2d_i16_zero
6828           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6829          (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6830           Int16Regs:$r)>;
6831
6832def : Pat<(int_nvvm_sust_b_2d_i32_zero
6833           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6834          (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6835           Int32Regs:$r)>;
6836
6837def : Pat<(int_nvvm_sust_b_2d_i64_zero
6838           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6839          (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6840           Int64Regs:$r)>;
6841
6842def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6843          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6844          (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6845           Int16Regs:$r, Int16Regs:$g)>;
6846
6847def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6848          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6849          (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6850           Int16Regs:$r, Int16Regs:$g)>;
6851
6852def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6853          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6854          (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6855           Int32Regs:$r, Int32Regs:$g)>;
6856
6857def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6858          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6859          (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6860           Int64Regs:$r, Int64Regs:$g)>;
6861
6862def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6863           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6864           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6865          (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6866           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6867
6868def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6869           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6870           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6871          (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6872           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6873
6874def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6875           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6876           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6877          (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6878           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6879
6880
6881
6882def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6883          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6884          (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6885           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6886           Int16Regs:$r)>;
6887
6888def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6889          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6890          (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6891           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6892           Int16Regs:$r)>;
6893
6894def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6895          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6896          (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6897           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6898           Int32Regs:$r)>;
6899
6900def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6901          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6902          (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6903           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6904           Int64Regs:$r)>;
6905
6906def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6907           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6908           Int16Regs:$r, Int16Regs:$g),
6909          (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6910           Int32Regs:$x, Int32Regs:$y,
6911           Int16Regs:$r, Int16Regs:$g)>;
6912
6913def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6914           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6915           Int16Regs:$r, Int16Regs:$g),
6916          (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6917           Int32Regs:$x, Int32Regs:$y,
6918           Int16Regs:$r, Int16Regs:$g)>;
6919
6920def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6921           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6922           Int32Regs:$g),
6923          (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6924           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6925
6926def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6927           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6928           Int64Regs:$g),
6929          (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6930           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6931
6932def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6933           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6934           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6935          (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6936           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6937           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6938
6939def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6940           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6941           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6942          (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6943           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6944           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6945
6946def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6947           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6948           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6949          (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6950           Int32Regs:$x, Int32Regs:$y,
6951           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6952
6953
6954
6955def : Pat<(int_nvvm_sust_b_3d_i8_zero
6956           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6957           Int16Regs:$r),
6958          (SUST_B_3D_B8_ZERO Int64Regs:$s,
6959           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6960           Int16Regs:$r)>;
6961
6962def : Pat<(int_nvvm_sust_b_3d_i16_zero
6963           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6964           Int16Regs:$r),
6965          (SUST_B_3D_B16_ZERO Int64Regs:$s,
6966           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6967           Int16Regs:$r)>;
6968
6969def : Pat<(int_nvvm_sust_b_3d_i32_zero
6970           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6971           Int32Regs:$r),
6972          (SUST_B_3D_B32_ZERO Int64Regs:$s,
6973           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6974           Int32Regs:$r)>;
6975
6976def : Pat<(int_nvvm_sust_b_3d_i64_zero
6977           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6978           Int64Regs:$r),
6979          (SUST_B_3D_B64_ZERO Int64Regs:$s,
6980           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6981           Int64Regs:$r)>;
6982
6983def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6984           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6985           Int16Regs:$r, Int16Regs:$g),
6986          (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6987           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6988           Int16Regs:$r, Int16Regs:$g)>;
6989
6990def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6991           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6992           Int16Regs:$r, Int16Regs:$g),
6993          (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6994           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6995           Int16Regs:$r, Int16Regs:$g)>;
6996
6997def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6998           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6999           Int32Regs:$r, Int32Regs:$g),
7000          (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
7001           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7002           Int32Regs:$r, Int32Regs:$g)>;
7003
7004def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
7005           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7006           Int64Regs:$r, Int64Regs:$g),
7007          (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
7008           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7009           Int64Regs:$r, Int64Regs:$g)>;
7010
7011def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
7012           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7013           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7014          (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
7015           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7016           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7017
7018def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
7019           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7020           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7021          (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
7022           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7023           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7024
7025def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
7026           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7027           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7028          (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
7029           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7030           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7031
7032
7033
7034
7035def : Pat<(int_nvvm_sust_p_1d_i8_trap
7036           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7037          (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7038
7039def : Pat<(int_nvvm_sust_p_1d_i16_trap
7040           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7041          (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7042
7043def : Pat<(int_nvvm_sust_p_1d_i32_trap
7044           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
7045          (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
7046
7047def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
7048           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7049          (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
7050           Int16Regs:$r, Int16Regs:$g)>;
7051
7052def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
7053           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7054          (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
7055           Int16Regs:$r, Int16Regs:$g)>;
7056
7057def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
7058           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7059          (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
7060           Int32Regs:$r, Int32Regs:$g)>;
7061
7062def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
7063           Int64Regs:$s, Int32Regs:$x,
7064           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7065          (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
7066           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7067
7068def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
7069           Int64Regs:$s, Int32Regs:$x,
7070           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7071          (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
7072           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7073
7074def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
7075           Int64Regs:$s, Int32Regs:$x,
7076           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7077          (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
7078           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7079
7080
7081
7082def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
7083           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7084          (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7085           Int16Regs:$r)>;
7086
7087def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
7088           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7089          (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7090           Int16Regs:$r)>;
7091
7092def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
7093           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
7094          (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7095           Int32Regs:$r)>;
7096
7097def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
7098          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7099          (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7100           Int16Regs:$r, Int16Regs:$g)>;
7101
7102def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
7103          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7104          (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7105           Int16Regs:$r, Int16Regs:$g)>;
7106
7107def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
7108          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7109          (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7110           Int32Regs:$r, Int32Regs:$g)>;
7111
7112def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
7113           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7114           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7115          (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7116           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7117
7118def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
7119           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7120           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7121          (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7122           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7123
7124def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
7125           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7126           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7127          (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7128           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7129
7130
7131
7132def : Pat<(int_nvvm_sust_p_2d_i8_trap
7133           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7134          (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7135           Int16Regs:$r)>;
7136
7137def : Pat<(int_nvvm_sust_p_2d_i16_trap
7138           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7139          (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7140           Int16Regs:$r)>;
7141
7142def : Pat<(int_nvvm_sust_p_2d_i32_trap
7143           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7144          (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7145           Int32Regs:$r)>;
7146
7147def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7148          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7149          (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7150           Int16Regs:$r, Int16Regs:$g)>;
7151
7152def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7153          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7154          (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7155           Int16Regs:$r, Int16Regs:$g)>;
7156
7157def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7158          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7159          (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7160           Int32Regs:$r, Int32Regs:$g)>;
7161
7162def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7163           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7164           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7165          (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7166           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7167
7168def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7169           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7170           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7171          (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7172           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7173
7174def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7175           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7176           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7177          (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7178           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7179
7180
7181
7182def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7183          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7184          (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7185           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7186           Int16Regs:$r)>;
7187
7188def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7189          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7190          (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7191           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7192           Int16Regs:$r)>;
7193
7194def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7195          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7196          (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7197           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7198           Int32Regs:$r)>;
7199
7200def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7201           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7202           Int16Regs:$r, Int16Regs:$g),
7203          (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7204           Int32Regs:$x, Int32Regs:$y,
7205           Int16Regs:$r, Int16Regs:$g)>;
7206
7207def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7208           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7209           Int16Regs:$r, Int16Regs:$g),
7210          (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7211           Int32Regs:$x, Int32Regs:$y,
7212           Int16Regs:$r, Int16Regs:$g)>;
7213
7214def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7215           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7216           Int32Regs:$g),
7217          (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7218           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7219
7220def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7221           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7222           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7223          (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7224           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7225           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7226
7227def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7228           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7229           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7230          (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7231           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7232           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7233
7234def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7235           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7236           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7237          (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7238           Int32Regs:$x, Int32Regs:$y,
7239           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7240
7241
7242
7243def : Pat<(int_nvvm_sust_p_3d_i8_trap
7244           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7245           Int16Regs:$r),
7246          (SUST_P_3D_B8_TRAP Int64Regs:$s,
7247           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7248           Int16Regs:$r)>;
7249
7250def : Pat<(int_nvvm_sust_p_3d_i16_trap
7251           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7252           Int16Regs:$r),
7253          (SUST_P_3D_B16_TRAP Int64Regs:$s,
7254           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7255           Int16Regs:$r)>;
7256
7257def : Pat<(int_nvvm_sust_p_3d_i32_trap
7258           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7259           Int32Regs:$r),
7260          (SUST_P_3D_B32_TRAP Int64Regs:$s,
7261           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7262           Int32Regs:$r)>;
7263
7264def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7265           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7266           Int16Regs:$r, Int16Regs:$g),
7267          (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7268           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7269           Int16Regs:$r, Int16Regs:$g)>;
7270
7271def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7272           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7273           Int16Regs:$r, Int16Regs:$g),
7274          (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7275           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7276           Int16Regs:$r, Int16Regs:$g)>;
7277
7278def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7279           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7280           Int32Regs:$r, Int32Regs:$g),
7281          (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7282           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7283           Int32Regs:$r, Int32Regs:$g)>;
7284
7285def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7286           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7287           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7288          (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7289           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7290           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7291
7292def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7293           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7294           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7295          (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7296           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7297           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7298
7299def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7300           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7301           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7302          (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7303           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7304           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7305
7306//-----------------------------------
7307// Read Special Registers
7308//-----------------------------------
7309
7310class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7311  : NVPTXInst<(outs Int64Regs:$d), (ins),
7312              !strconcat("mov.u64 \t$d, %", regname, ";"),
7313              [(set Int64Regs:$d, (intop))]>;
7314
7315class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7316  : NVPTXInst<(outs Int32Regs:$d), (ins),
7317              !strconcat("mov.u32 \t$d, %", regname, ";"),
7318              [(set Int32Regs:$d, (intop))]>;
7319
7320// TODO Add read vector-version of special registers
7321
7322def INT_PTX_SREG_TID_X :
7323    PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7324def INT_PTX_SREG_TID_Y :
7325    PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7326def INT_PTX_SREG_TID_Z :
7327    PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7328def INT_PTX_SREG_TID_W :
7329    PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7330
7331def INT_PTX_SREG_NTID_X :
7332    PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7333def INT_PTX_SREG_NTID_Y :
7334    PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7335def INT_PTX_SREG_NTID_Z :
7336    PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7337def INT_PTX_SREG_NTID_W :
7338    PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7339
7340def INT_PTX_SREG_LANEID :
7341    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7342def INT_PTX_SREG_WARPID :
7343    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7344def INT_PTX_SREG_NWARPID :
7345    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7346
7347def INT_PTX_SREG_CTAID_X :
7348    PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7349def INT_PTX_SREG_CTAID_Y :
7350    PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7351def INT_PTX_SREG_CTAID_Z :
7352    PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7353def INT_PTX_SREG_CTAID_W :
7354    PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7355
7356def INT_PTX_SREG_NCTAID_X :
7357    PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7358def INT_PTX_SREG_NCTAID_Y :
7359    PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7360def INT_PTX_SREG_NCTAID_Z :
7361    PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7362def INT_PTX_SREG_NCTAID_W :
7363    PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7364
7365def INT_PTX_SREG_SMID :
7366    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7367def INT_PTX_SREG_NSMID :
7368    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7369def INT_PTX_SREG_GRIDID :
7370    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7371
7372def INT_PTX_SREG_LANEMASK_EQ :
7373    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7374def INT_PTX_SREG_LANEMASK_LE :
7375    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7376def INT_PTX_SREG_LANEMASK_LT :
7377    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7378def INT_PTX_SREG_LANEMASK_GE :
7379    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7380def INT_PTX_SREG_LANEMASK_GT :
7381    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7382
7383def INT_PTX_SREG_CLOCK :
7384    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7385def INT_PTX_SREG_CLOCK64 :
7386    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7387
7388def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7389def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7390def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7391def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7392
7393// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7394// handle the constant.
7395def INT_PTX_SREG_WARPSIZE :
7396    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7397              [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7398
7399// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
7400// In addition to target-independent fields provided by WMMA_REGS, it adds
7401// the fields commonly used to implement specific PTX instruction -- register
7402// types and names, constraints, parts of assembly, etc.
7403class WMMA_REGINFO<WMMA_REGS r>
7404      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
7405  // NVPTX register types used to carry fragment data.
7406  NVPTXRegClass regclass = !cond(
7407    !eq(ptx_elt_type, "f16") : Float16x2Regs,
7408    !eq(ptx_elt_type, "f32") : Float32Regs,
7409    !eq(ptx_elt_type, "s32") : Int32Regs,
7410    !eq(ptx_elt_type, "s8") : Int32Regs,
7411    !eq(ptx_elt_type, "u8") : Int32Regs,
7412    !eq(ptx_elt_type, "s4") : Int32Regs,
7413    !eq(ptx_elt_type, "u4") : Int32Regs,
7414    !eq(ptx_elt_type, "b1") : Int32Regs);
7415
7416  // Instruction input/output arguments for the fragment.
7417  list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass);
7418
7419  // List of register names for the fragment -- ["ra0", "ra1",...]
7420  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
7421
7422  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
7423  string regstring = "{{$" # !head(reg_names)
7424                           # !foldl("", !tail(reg_names), a, b,
7425                                    !strconcat(a, ", $", b))
7426                     # "}}";
7427
7428  // Predicates for particular fragment variant. Technically those are
7429  // per-instruction predicates, but currently all fragments that can be used in
7430  // a given instruction are subject to the same constraints, so an instruction
7431  // can use predicates from any of its fragments. If/when this is no
7432  // longer the case, we can concat all per-fragment predicates to enforce that
7433  // all fragments of the instruction are viable.
7434  list<Predicate> Predicates = !cond(
7435    // fp16 -> fp16/fp32 @ m16n16k16
7436    !and(!eq(geom, "m16n16k16"),
7437         !or(!eq(ptx_elt_type, "f16"),
7438             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
7439
7440    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
7441    !and(!or(!eq(geom, "m8n32k16"),
7442             !eq(geom, "m32n8k16")),
7443         !or(!eq(ptx_elt_type, "f16"),
7444             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
7445
7446    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
7447    !and(!or(!eq(geom,"m16n16k16"),
7448             !eq(geom,"m8n32k16"),
7449             !eq(geom,"m32n8k16")),
7450         !or(!eq(ptx_elt_type, "u8"),
7451             !eq(ptx_elt_type, "s8"),
7452             !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
7453
7454    // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
7455    !or(!eq(geom,"m8n8k128"),
7456        !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63]);
7457
7458  // template DAGs for instruction inputs/output.
7459  dag Outs = !dag(outs, ptx_regs, reg_names);
7460  dag Ins = !dag(ins, ptx_regs, reg_names);
7461}
7462
7463// Convert dag of arguments into a dag to match given intrinsic.
7464class BuildPatternI<Intrinsic Intr, dag Ins> {
7465  // Build a dag pattern that matches the intrinsic call.
7466  dag ret = !foreach(tmp, Ins,
7467                          !subst(imem, ADDRvar,
7468                          !subst(MEMri64, ADDRri64,
7469                          !subst(MEMri, ADDRri,
7470                          !subst(ins, Intr, tmp)))));
7471}
7472
7473// Same as above, but uses PatFrag instead of an Intrinsic.
7474class BuildPatternPF<PatFrag Intr, dag Ins> {
7475  // Build a dag pattern that matches the intrinsic call.
7476  dag ret = !foreach(tmp, Ins,
7477                          !subst(imem, ADDRvar,
7478                          !subst(MEMri64, ADDRri64,
7479                          !subst(MEMri, ADDRri,
7480                          !subst(ins, Intr, tmp)))));
7481}
7482
7483// Common WMMA-related fields used for building patterns for all MMA instructions.
7484class WMMA_INSTR<string _Intr, list<dag> _Args>
7485  : NVPTXInst<(outs), (ins), "?", []> {
7486  Intrinsic Intr = !cast<Intrinsic>(_Intr);
7487  // Concatenate all arguments into a single dag.
7488  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
7489  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
7490  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
7491}
7492
7493//
7494// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7495//
7496
7497class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
7498                DAGOperand SrcOp>
7499  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
7500                              [!con((ins SrcOp:$src),
7501                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7502    Requires<Frag.Predicates> {
7503  // Load/store intrinsics are overloaded on pointer's address space.
7504  // To match the right intrinsic, we need to build AS-constrained PatFrag.
7505  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7506  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7507  // Build PatFrag that only matches particular address space.
7508  PatFrag IntrFrag = PatFrag<PFOperands,
7509                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7510                             !cond(!eq(Space, ".shared"): AS_match.shared,
7511                                   !eq(Space, ".global"): AS_match.global,
7512                                   1: AS_match.generic)>;
7513  // Build AS-constrained pattern.
7514  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7515
7516  let OutOperandList = Frag.Outs;
7517  let InOperandList = !con(Args, (ins MmaCode:$ptx));
7518  let AsmString = "wmma.load."
7519                  # Frag.frag
7520                  # ".sync"
7521                  # "${ptx:aligned}"
7522                  # "." # Layout
7523                  # "." # Frag.geom
7524                  # Space
7525                  # "." # Frag.ptx_elt_type # " \t"
7526                  # Frag.regstring
7527                  # ", [$src]"
7528                  # !if(WithStride, ", $ldm", "")
7529                  # ";";
7530}
7531
7532//
7533// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7534//
7535class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
7536                   bit WithStride, DAGOperand DstOp>
7537  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
7538               [!con((ins DstOp:$dst),
7539                     Frag.Ins,
7540                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7541    Requires<Frag.Predicates> {
7542
7543  // Load/store intrinsics are overloaded on pointer's address space.
7544  // To match the right intrinsic, we need to build AS-constrained PatFrag.
7545  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7546  dag PFOperands = !con((ops node:$dst),
7547                        !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
7548                        !if(WithStride, (ops node:$ldm), (ops)));
7549  // Build PatFrag that only matches particular address space.
7550  PatFrag IntrFrag = PatFrag<PFOperands,
7551                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7552                             !cond(!eq(Space, ".shared"): AS_match.shared,
7553                                   !eq(Space, ".global"): AS_match.global,
7554                                   1: AS_match.generic)>;
7555  // Build AS-constrained pattern.
7556  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7557
7558  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7559  let OutOperandList = (outs);
7560  let AsmString = "wmma.store.d.sync"
7561                  # "${ptx:aligned}"
7562                  # "." # Layout
7563                  # "." # Frag.geom
7564                  # Space
7565                  # "." # Frag.ptx_elt_type
7566                  # " \t[$dst],"
7567                  # Frag.regstring
7568                  # !if(WithStride, ", $ldm", "")
7569                  # ";";
7570}
7571
7572// Create all load/store variants
7573defset list<WMMA_INSTR> MMA_LDSTs  = {
7574  foreach layout = ["row", "col"] in {
7575    foreach stride = [0, 1] in {
7576      foreach space = [".global", ".shared", ""] in {
7577        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7578          foreach frag = NVVM_MMA_OPS.all_ld_ops in
7579            foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
7580              def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>;
7581          foreach frag = NVVM_MMA_OPS.all_st_ops in
7582            foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
7583              def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>;
7584        } // addr
7585      } // space
7586    } // stride
7587  } // layout
7588} // defset
7589
7590// WMMA.MMA
7591class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7592               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7593               string ALayout, string BLayout, int Satfinite>
7594  : WMMA_INSTR<WMMA_NAME_MMA<ALayout, BLayout, Satfinite, FragA, FragB, FragC, FragD>.record,
7595                             [FragA.Ins, FragB.Ins, FragC.Ins]>,
7596    // Requires does not seem to have effect on Instruction w/o Patterns.
7597    // We set it here anyways and propagate to the Pat<> we construct below.
7598    Requires<FragA.Predicates> {
7599  let OutOperandList = FragD.Outs;
7600  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7601  string TypeList = !cond(
7602    !eq(FragD.ptx_elt_type, "s32") : ".s32"
7603                                     # "." # FragA.ptx_elt_type
7604                                     # "." # FragB.ptx_elt_type
7605                                     # ".s32",
7606    1: "." # FragD.ptx_elt_type # "." # FragC.ptx_elt_type,
7607  );
7608  let AsmString = "wmma.mma"
7609                  # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "")
7610                  # ".sync"
7611                  # "${ptx:aligned}"
7612                  # "." # ALayout
7613                  # "." # BLayout
7614                  # "." # FragA.geom
7615                  # TypeList
7616                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
7617                  # FragD.regstring # ",\n\t\t"
7618                  # FragA.regstring # ",\n\t\t"
7619                  # FragB.regstring # ",\n\t\t"
7620                  # FragC.regstring # ";";
7621}
7622
7623defset list<WMMA_INSTR> MMAs  = {
7624  foreach layout_a = ["row", "col"] in {
7625    foreach layout_b = ["row", "col"] in {
7626      foreach satf = [0, 1] in {
7627        foreach op = NVVM_MMA_OPS.all_mma_ops in {
7628          foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in {
7629            def : WMMA_MMA<WMMA_REGINFO<op[0]>,
7630                           WMMA_REGINFO<op[1]>,
7631                           WMMA_REGINFO<op[2]>,
7632                           WMMA_REGINFO<op[3]>,
7633                           layout_a, layout_b, satf>;
7634          }
7635        } // op
7636      } // satf
7637    } // layout_b
7638  } // layout_a
7639} // defset
7640
7641
7642// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
7643// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
7644// the instruction record.
7645class WMMA_PAT<WMMA_INSTR wi>
7646      : Pat<wi.IntrinsicPattern,
7647            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
7648                 (wi ptx.version))>,
7649        Requires<wi.Predicates>;
7650
7651// Build intrinsic->instruction patterns for all MMA instructions.
7652foreach mma = !listconcat(MMAs, MMA_LDSTs) in
7653  def : WMMA_PAT<mma>;
7654