1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def immFloat0 : PatLeaf<(fpimm), [{
10    float f = (float)N->getValueAPF().convertToFloat();
11    return (f==0.0f);
12}]>;
13
14def immFloat1 : PatLeaf<(fpimm), [{
15    float f = (float)N->getValueAPF().convertToFloat();
16    return (f==1.0f);
17}]>;
18
19def immDouble0 : PatLeaf<(fpimm), [{
20    double d = (double)N->getValueAPF().convertToDouble();
21    return (d==0.0);
22}]>;
23
24def immDouble1 : PatLeaf<(fpimm), [{
25    double d = (double)N->getValueAPF().convertToDouble();
26    return (d==1.0);
27}]>;
28
29def AS_match {
30  code generic = [{
31   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32  }];
33  code shared = [{
34   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35  }];
36  code global = [{
37   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38  }];
39}
40
41// A node that will be replaced with the current PTX version.
42class PTX {
43  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45  }]>;
46  // (i32 0) will be XForm'ed to the currently used PTX version.
47  dag version = (PTXVerXform (i32 0));
48}
49def ptx : PTX;
50
51// Generates list of n sequential register names.
52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53class RegSeq<int n, string prefix> {
54  list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55                                        [prefix # !sub(n, 1)]),
56                            []);
57}
58
59class THREADMASK_INFO<bit sync> {
60  list<bit> ret = !if(sync, [0, 1], [0]);
61}
62
63//-----------------------------------
64// Synchronization and shuffle functions
65//-----------------------------------
66let isConvergent = true in {
67def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                  "bar.sync \t0;",
69      [(int_nvvm_barrier0)]>;
70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                  "bar.sync \t$src1;",
72      [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                  "bar.sync \t$src1, $src2;",
75      [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77  !strconcat("{{ \n\t",
78             ".reg .pred \t%p1; \n\t",
79             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80             "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81             "}}"),
82      [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84  !strconcat("{{ \n\t",
85             ".reg .pred \t%p1; \n\t",
86             ".reg .pred \t%p2; \n\t",
87             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88             "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90             "}}"),
91      [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93  !strconcat("{{ \n\t",
94             ".reg .pred \t%p1; \n\t",
95             ".reg .pred \t%p2; \n\t",
96             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97             "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99             "}}"),
100      [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
101
102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                             [(int_nvvm_bar_sync imm:$i)]>;
104
105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                             [(int_nvvm_bar_warp_sync imm:$i)]>,
107        Requires<[hasPTX60, hasSM30]>;
108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                             [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110        Requires<[hasPTX60, hasSM30]>;
111
112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                   [(int_nvvm_barrier_sync imm:$i)]>,
114        Requires<[hasPTX60, hasSM30]>;
115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                   [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117        Requires<[hasPTX60, hasSM30]>;
118
119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                 "barrier.sync \t$id, $cnt;",
121                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122        Requires<[hasPTX60, hasSM30]>;
123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                 "barrier.sync \t$id, $cnt;",
125                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126        Requires<[hasPTX60, hasSM30]>;
127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                 "barrier.sync \t$id, $cnt;",
129                 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130        Requires<[hasPTX60, hasSM30]>;
131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                 "barrier.sync \t$id, $cnt;",
133                 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134        Requires<[hasPTX60, hasSM30]>;
135
136class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
137                 bit offset_imm, bit mask_imm, bit threadmask_imm>
138      : NVPTXInst<(outs), (ins), "?", []> {
139  NVPTXRegClass rc = !cond(
140    !eq(reg, "i32"): Int32Regs,
141    !eq(reg, "f32"): Float32Regs);
142  string IntrName = "int_nvvm_shfl_"
143                    # !if(sync, "sync_", "")
144                    # mode
145                    # "_" # reg
146                    # !if(return_pred, "p", "");
147  Intrinsic Intr = !cast<Intrinsic>(IntrName);
148  let InOperandList = !con(
149    !if(sync,
150        !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
151        (ins)),
152    (ins rc:$src),
153    !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
154    !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
155    );
156  let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
157  let AsmString = "shfl."
158     # !if(sync, "sync.", "")
159     # mode # ".b32\t"
160     # "$dst"
161     # !if(return_pred, "|$pred", "") # ", "
162     # "$src, $offset, $mask"
163     # !if(sync, ", $threadmask", "")
164     # ";"
165     ;
166  let Pattern = [!con(
167      !foreach(tmp, OutOperandList,
168             !subst(outs, set,
169             !subst(i32imm, imm, tmp))),
170      (set !foreach(tmp, InOperandList,
171             !subst(ins, Intr,
172             !subst(i32imm, imm, tmp))))
173  )];
174}
175
176foreach sync = [false, true] in {
177  foreach mode = ["up", "down", "bfly", "idx"] in {
178    foreach regclass = ["i32", "f32"] in {
179      foreach return_pred = [false, true] in {
180        foreach offset_imm = [false, true] in {
181          foreach mask_imm = [false, true] in {
182            foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
183              def : SHFL_INSTR<sync, mode, regclass, return_pred,
184                               offset_imm, mask_imm, threadmask_imm>,
185                    Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>;
186            }
187          }
188        }
189      }
190    }
191  }
192}
193
194// vote.{all,any,uni,ballot}
195multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
196  def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
197              "vote." # mode # " \t$dest, $pred;",
198              [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
199        Requires<[hasPTX60, hasSM30]>;
200}
201
202defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
203defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
204defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
205defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
206
207// vote.sync.{all,any,uni,ballot}
208multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
209  def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
210              "vote.sync." # mode # " \t$dest, $pred, $mask;",
211              [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
212          Requires<[hasPTX60, hasSM30]>;
213  def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
214              "vote.sync." # mode #" \t$dest, $pred, $mask;",
215              [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
216          Requires<[hasPTX60, hasSM30]>;
217}
218
219defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
220defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
221defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
222defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
223
224multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
225                          Operand ImmOp> {
226  def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
227              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
228              [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
229           Requires<[hasPTX60, hasSM70]>;
230  def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
231              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
232              [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
233           Requires<[hasPTX60, hasSM70]>;
234  def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
235              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
236              [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
237           Requires<[hasPTX60, hasSM70]>;
238  def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
239              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240              [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
241           Requires<[hasPTX60, hasSM70]>;
242}
243
244defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
245                                        i32imm>;
246defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
247                                        i64imm>;
248
249multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
250                          Operand ImmOp> {
251  def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
252                     (ins i32imm:$mask, ImmOp:$value),
253              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
254              [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
255           Requires<[hasPTX60, hasSM70]>;
256  def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
257                     (ins Int32Regs:$mask, ImmOp:$value),
258              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
259              [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
260           Requires<[hasPTX60, hasSM70]>;
261  def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
262                     (ins i32imm:$mask, regclass:$value),
263              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
264              [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
265           Requires<[hasPTX60, hasSM70]>;
266  def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
267                     (ins Int32Regs:$mask, regclass:$value),
268              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
269              [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
270           Requires<[hasPTX60, hasSM70]>;
271}
272defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
273                                         i32imm>;
274defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
275                                         i64imm>;
276
277multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
278  def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
279          "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
280          [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
281        Requires<[hasPTX70, hasSM80]>;
282}
283
284defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
285defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
286defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
287defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
288defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
289defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
290defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
291defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
292
293} // isConvergent = true
294
295//-----------------------------------
296// Explicit Memory Fence Functions
297//-----------------------------------
298class MEMBAR<string StrOp, Intrinsic IntOP> :
299              NVPTXInst<(outs), (ins),
300            StrOp, [(IntOP)]>;
301
302def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
303def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
304def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
305
306
307//-----------------------------------
308// Async Copy Functions
309//-----------------------------------
310
311multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
312  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
313            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
314            [(Intrin Int32Regs:$addr)]>,
315    Requires<[hasPTX70, hasSM80]>;
316  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
317            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
318            [(Intrin Int64Regs:$addr)]>,
319    Requires<[hasPTX70, hasSM80]>;
320}
321
322defm CP_ASYNC_MBARRIER_ARRIVE :
323  CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
324defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
325  CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
326defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
327  CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
328defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
329  CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
330
331multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> {
332  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
333            !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
334            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
335    Requires<[hasPTX70, hasSM80]>;
336  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
337            !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
338            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
339    Requires<[hasPTX70, hasSM80]>;
340}
341
342defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
343  CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>;
344
345defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
346  CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>;
347
348defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
349  CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>;
350
351multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> {
352  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
353            !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
354            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
355    Requires<[hasPTX70, hasSM80]>;
356  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
357            !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
358            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
359    Requires<[hasPTX70, hasSM80]>;
360}
361
362defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
363  CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>;
364
365def CP_ASYNC_COMMIT_GROUP :
366  NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
367  Requires<[hasPTX70, hasSM80]>;
368
369def CP_ASYNC_WAIT_GROUP :
370  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
371  [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
372  Requires<[hasPTX70, hasSM80]>;
373
374def CP_ASYNC_WAIT_ALL :
375  NVPTXInst<(outs), (ins), "cp.async.wait_all;",
376  [(int_nvvm_cp_async_wait_all)]>,
377  Requires<[hasPTX70, hasSM80]>;
378
379//-----------------------------------
380// MBarrier Functions
381//-----------------------------------
382
383multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
384  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
385           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
386    [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
387    Requires<[hasPTX70, hasSM80]>;
388  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
389           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
390    [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
391    Requires<[hasPTX70, hasSM80]>;
392}
393
394defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
395defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
396                                          int_nvvm_mbarrier_init_shared>;
397
398multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
399  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
400           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
401    [(Intrin Int32Regs:$addr)]>,
402    Requires<[hasPTX70, hasSM80]>;
403  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
404           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
405    [(Intrin Int64Regs:$addr)]>,
406    Requires<[hasPTX70, hasSM80]>;
407}
408
409defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
410defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
411                                            int_nvvm_mbarrier_inval_shared>;
412
413multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
414  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
415           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
416    [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
417    Requires<[hasPTX70, hasSM80]>;
418  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
419           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
420    [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
421    Requires<[hasPTX70, hasSM80]>;
422}
423
424defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
425defm MBARRIER_ARRIVE_SHARED :
426  MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
427
428multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
429  def _32 : NVPTXInst<(outs Int64Regs:$state),
430           (ins Int32Regs:$addr, Int32Regs:$count),
431           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
432                      ".b64 $state, [$addr], $count;"),
433    [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
434    Requires<[hasPTX70, hasSM80]>;
435  def _64 : NVPTXInst<(outs Int64Regs:$state),
436           (ins Int64Regs:$addr, Int32Regs:$count),
437           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
438                      ".b64 $state, [$addr], $count;"),
439    [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
440    Requires<[hasPTX70, hasSM80]>;
441}
442
443defm MBARRIER_ARRIVE_NOCOMPLETE :
444  MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
445defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
446  MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
447
448multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
449  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
450           !strconcat("mbarrier.arrive_drop", AddrSpace,
451                      ".b64 $state, [$addr];"),
452           [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
453    Requires<[hasPTX70, hasSM80]>;
454  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
455           !strconcat("mbarrier.arrive_drop", AddrSpace,
456                      ".b64 $state, [$addr];"),
457           [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
458    Requires<[hasPTX70, hasSM80]>;
459}
460
461defm MBARRIER_ARRIVE_DROP :
462  MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
463defm MBARRIER_ARRIVE_DROP_SHARED :
464  MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
465
466multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
467  def _32 : NVPTXInst<(outs Int64Regs:$state),
468           (ins Int32Regs:$addr, Int32Regs:$count),
469           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
470                      ".b64 $state, [$addr], $count;"),
471           [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
472    Requires<[hasPTX70, hasSM80]>;
473  def _64 : NVPTXInst<(outs Int64Regs:$state),
474           (ins Int64Regs:$addr, Int32Regs:$count),
475           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
476                      ".b64 $state, [$addr], $count;"),
477           [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
478    Requires<[hasPTX70, hasSM80]>;
479}
480
481defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
482  MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
483defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
484  MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
485                       int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
486
487multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
488  def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
489           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
490           [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
491    Requires<[hasPTX70, hasSM80]>;
492  def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
493           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
494           [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
495    Requires<[hasPTX70, hasSM80]>;
496}
497
498defm MBARRIER_TEST_WAIT :
499  MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
500defm MBARRIER_TEST_WAIT_SHARED :
501  MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
502
503class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
504           NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
505           "mbarrier.pending_count.b64 $res, $state;",
506           [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
507    Requires<[hasPTX70, hasSM80]>;
508
509def MBARRIER_PENDING_COUNT :
510  MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
511
512//-----------------------------------
513// Math Functions
514//-----------------------------------
515
516// Map min(1.0, max(0.0, x)) to sat(x)
517// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
518// NaN
519// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
520// Same story for fmax, fmin.
521
522def : Pat<(int_nvvm_fmin_f immFloat1,
523            (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
524          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
525def : Pat<(int_nvvm_fmin_f immFloat1,
526            (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
527          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
528def : Pat<(int_nvvm_fmin_f
529            (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
530          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
531def : Pat<(int_nvvm_fmin_f
532            (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
533          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
534
535def : Pat<(int_nvvm_fmin_d immDouble1,
536            (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
537          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
538def : Pat<(int_nvvm_fmin_d immDouble1,
539            (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
540          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
541def : Pat<(int_nvvm_fmin_d
542            (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
543          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
544def : Pat<(int_nvvm_fmin_d
545            (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
546          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
547
548
549// We need a full string for OpcStr here because we need to deal with case like
550// INT_PTX_RECIP.
551class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
552  NVPTXRegClass src_regclass, Intrinsic IntOP>
553            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
554            OpcStr,
555        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
556
557// We need a full string for OpcStr here because we need to deal with the case
558// like INT_PTX_NATIVE_POWR_F.
559class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
560  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
561            : NVPTXInst<(outs t_regclass:$dst),
562              (ins s0_regclass:$src0, s1_regclass:$src1),
563            OpcStr,
564        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
565
566class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
567  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
568  NVPTXRegClass s2_regclass, Intrinsic IntOP>
569            : NVPTXInst<(outs t_regclass:$dst),
570              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
571            OpcStr,
572        [(set t_regclass:$dst,
573          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
574
575//
576// MISC
577//
578
579def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
580  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
581
582//
583// Min Max
584//
585
586def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
587  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
588def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
589  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
590
591def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
592  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
593def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
594  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
595
596def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
597  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
598def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
599  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
600
601
602//
603// Multiplication
604//
605
606def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
607  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
608def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
609  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
610
611def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
612  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
613def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
614  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
615
616def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
617  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
618def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
619  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
620def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
621  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
622def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
623  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
624def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
625  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
626def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
627  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
628def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
629  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
630def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
631  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
632
633def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
634  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
635def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
636  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
637def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
638  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
639def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
640  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
641
642def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
643  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
644def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
645  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
646
647//
648// Div
649//
650
651def INT_NVVM_DIV_APPROX_FTZ_F
652  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
653    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
654def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
655  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
656
657def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
658  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
659def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
660  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
661def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
662  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
663def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
664  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
665def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
666  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
667def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
668  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
669def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
670  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
671def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
672  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
673
674def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
675  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
676def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
677  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
678def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
679  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
680def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
681  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
682
683//
684// Sad
685//
686
687def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
688  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
689def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
690  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
691
692//
693// Floor  Ceil
694//
695
696def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
697          (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
698def : Pat<(int_nvvm_floor_f Float32Regs:$a),
699          (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
700def : Pat<(int_nvvm_floor_d Float64Regs:$a),
701          (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
702
703def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
704          (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
705def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
706          (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
707def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
708          (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
709
710//
711// Abs
712//
713
714def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
715  Float32Regs, int_nvvm_fabs_ftz_f>;
716def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
717  Float32Regs, int_nvvm_fabs_f>;
718
719def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
720  Float64Regs, int_nvvm_fabs_d>;
721
722//
723// Round
724//
725
726def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
727          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
728def : Pat<(int_nvvm_round_f Float32Regs:$a),
729          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
730def : Pat<(int_nvvm_round_d Float64Regs:$a),
731          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
732
733//
734// Trunc
735//
736
737def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
738          (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
739def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
740          (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
741def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
742          (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
743
744//
745// Saturate
746//
747
748def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
749          (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
750def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
751          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
752def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
753          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
754
755//
756// Exp2  Log2
757//
758
759def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
760  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
761def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
762  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
763def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
764  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
765
766def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
767  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
768def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
769  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
770def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
771  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
772
773//
774// Sin  Cos
775//
776
777def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
778  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
779def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
780  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
781
782def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
783  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
784def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
785  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
786
787//
788// Fma
789//
790
791def INT_NVVM_FMA_RN_FTZ_F
792  : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
793    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
794def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
795  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
796def INT_NVVM_FMA_RZ_FTZ_F
797  : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
798    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
799def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
800  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
801def INT_NVVM_FMA_RM_FTZ_F
802  : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
803    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
804def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
805  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
806def INT_NVVM_FMA_RP_FTZ_F
807  : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
808    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
809def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
810  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
811
812def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
813  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
814def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
815  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
816def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
817  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
818def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
819  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
820
821//
822// Rcp
823//
824
825def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
826  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
827def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
828  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
829def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
830  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
831def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
832  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
833def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
834  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
835def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
836  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
837def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
838  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
839def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
840  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
841
842def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
843  Float64Regs, int_nvvm_rcp_rn_d>;
844def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
845  Float64Regs, int_nvvm_rcp_rz_d>;
846def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
847  Float64Regs, int_nvvm_rcp_rm_d>;
848def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
849  Float64Regs, int_nvvm_rcp_rp_d>;
850
851def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
852  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
853
854//
855// Sqrt
856//
857
858def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
859  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
860def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
861  Float32Regs, int_nvvm_sqrt_rn_f>;
862def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
863  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
864def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
865  Float32Regs, int_nvvm_sqrt_rz_f>;
866def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
867  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
868def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
869  Float32Regs, int_nvvm_sqrt_rm_f>;
870def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
871  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
872def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
873  Float32Regs, int_nvvm_sqrt_rp_f>;
874def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
875  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
876def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
877  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
878
879def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
880  Float64Regs, int_nvvm_sqrt_rn_d>;
881def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
882  Float64Regs, int_nvvm_sqrt_rz_d>;
883def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
884  Float64Regs, int_nvvm_sqrt_rm_d>;
885def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
886  Float64Regs, int_nvvm_sqrt_rp_d>;
887
888// nvvm_sqrt intrinsic
889def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
890          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
891def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
892          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
893def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
894          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
895def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
896          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
897
898//
899// Rsqrt
900//
901
902def INT_NVVM_RSQRT_APPROX_FTZ_F
903  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
904    int_nvvm_rsqrt_approx_ftz_f>;
905def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
906  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
907def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
908  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
909
910//
911// Add
912//
913
914def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
915  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
916def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
917  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
918def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
919  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
920def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
921  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
922def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
923  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
924def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
925  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
926def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
927  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
928def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
929  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
930
931def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
932  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
933def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
934  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
935def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
936  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
937def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
938  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
939
940//
941// Convert
942//
943
944def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
945          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
946def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
947          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
948def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
949          (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
950def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
951          (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
952def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
953          (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
954def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
955          (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
956def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
957          (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
958def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
959          (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
960
961def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
962          (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
963def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
964          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
965def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
966          (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
967def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
968          (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
969
970def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
971          (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
972def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
973          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
974def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
975          (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
976def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
977          (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
978
979def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
980          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
981def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
982          (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
983def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
984          (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
985def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
986          (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
987
988def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
989          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
990def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
991          (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
992def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
993          (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
994def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
995          (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
996
997def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
998          (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
999def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1000          (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1001def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1002          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1003def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1004          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1005def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1006          (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1007def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1008          (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1009def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1010          (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1011def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1012          (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1013
1014def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1015          (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1016def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1017          (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1018def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1019          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1020def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1021          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1022def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1023          (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1024def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1025          (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1026def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1027          (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1028def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1029          (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1030
1031def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1032          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1033def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1034          (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1035def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1036          (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1037def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1038          (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1039
1040def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1041          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1042def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1043          (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1044def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1045          (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1046def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1047          (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1048
1049def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1050  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1051
1052def INT_NVVM_D2I_LO : F_MATH_1<
1053  !strconcat("{{\n\t",
1054             ".reg .b32 %temp; \n\t",
1055             "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1056             "}}"),
1057  Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1058def INT_NVVM_D2I_HI : F_MATH_1<
1059  !strconcat("{{\n\t",
1060             ".reg .b32 %temp; \n\t",
1061             "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1062             "}}"),
1063  Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1064
1065def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1066          (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1067def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1068          (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1069def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1070          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1071def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1072          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1073def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1074          (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1075def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1076          (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1077def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1078          (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1079def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1080          (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1081
1082def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1083          (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1084def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1085          (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1086def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1087          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1088def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1089          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1090def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1091          (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1092def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1093          (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1094def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1095          (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1096def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1097          (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1098
1099def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1100          (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1101def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1102          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1103def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1104          (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1105def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1106          (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1107
1108def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1109          (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1110def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1111          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1112def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1113          (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1114def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1115          (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1116
1117def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1118          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1119def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1120          (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1121def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1122          (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1123def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1124          (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1125
1126def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1127          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1128def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1129          (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1130def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1131          (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1132def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1133          (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1134
1135def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1136          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1137def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1138          (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1139def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1140          (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1141def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1142          (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1143
1144def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1145          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1146def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1147          (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1148def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1149          (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1150def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1151          (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1152
1153
1154def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1155          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
1156def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1157          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
1158
1159//
1160// Bitcast
1161//
1162
1163def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1164  Float32Regs, int_nvvm_bitcast_f2i>;
1165def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1166  Int32Regs, int_nvvm_bitcast_i2f>;
1167
1168def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1169  Int64Regs, int_nvvm_bitcast_ll2d>;
1170def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1171  Float64Regs, int_nvvm_bitcast_d2ll>;
1172
1173//
1174// FNS
1175//
1176
1177class INT_FNS_MBO<dag ins, dag Operands>
1178  : NVPTXInst<(outs Int32Regs:$dst), ins,
1179               "fns.b32 \t$dst, $mask, $base, $offset;",
1180               [(set Int32Regs:$dst, Operands )]>,
1181    Requires<[hasPTX60, hasSM30]>;
1182
1183def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1184                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1185def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1186                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1187def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1188                     (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1189def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1190                     (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1191def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1192                     (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1193def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1194                     (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1195def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1196                     (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1197def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1198                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1199
1200//-----------------------------------
1201// Atomic Functions
1202//-----------------------------------
1203
1204class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1205 : PatFrag<ops, frag, AS_match.global>;
1206class ATOMIC_SHARED_CHK <dag ops, dag frag>
1207 : PatFrag<ops, frag, AS_match.shared>;
1208class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1209 : PatFrag<ops, frag, AS_match.generic>;
1210
1211multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1212  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1213  Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1214  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1215    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1216    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1217  Requires<Pred>;
1218  def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1219    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1220    [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1221  Requires<Pred>;
1222}
1223multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1224  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1225  list<Predicate> Pred = []> {
1226  defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1227    IntOp, IMMType, IMM, Pred>;
1228  defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1229    IntOp, IMMType, IMM, Pred>;
1230}
1231
1232// has 2 operands, neg the second one
1233multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1234  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1235  Operand IMMType, list<Predicate> Pred> {
1236  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1237    !strconcat(
1238      "{{ \n\t",
1239      ".reg \t.s", TypeStr, " temp; \n\t",
1240      "neg.s", TypeStr, " \ttemp, $b; \n\t",
1241      "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1242      "}}"),
1243    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1244  Requires<Pred>;
1245}
1246multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1247  string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
1248  list<Predicate> Pred = []> {
1249 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1250   IntOp, IMMType, Pred> ;
1251 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1252   IntOp, IMMType, Pred> ;
1253}
1254
1255// has 3 operands
1256multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1257  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1258  Operand IMMType, list<Predicate> Pred> {
1259  def reg : NVPTXInst<(outs regclass:$dst),
1260    (ins ptrclass:$addr, regclass:$b, regclass:$c),
1261    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1262    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1263  Requires<Pred>;
1264
1265  def imm1 : NVPTXInst<(outs regclass:$dst),
1266    (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1267    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1268    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1269  Requires<Pred>;
1270
1271  def imm2 : NVPTXInst<(outs regclass:$dst),
1272    (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1273    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1274    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1275  Requires<Pred>;
1276
1277  def imm3 : NVPTXInst<(outs regclass:$dst),
1278    (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1279    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1280    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1281  Requires<Pred>;
1282}
1283multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1284  string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1285  defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1286    IntOp, IMMType, Pred>;
1287  defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1288    IntOp, IMMType, Pred>;
1289}
1290
1291// atom_add
1292
1293def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1294  (atomic_load_add_32 node:$a, node:$b)>;
1295def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1296  (atomic_load_add_32 node:$a, node:$b)>;
1297def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1298  (atomic_load_add_32 node:$a, node:$b)>;
1299def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1300  (atomic_load_add_64 node:$a, node:$b)>;
1301def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1302  (atomic_load_add_64 node:$a, node:$b)>;
1303def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1304  (atomic_load_add_64 node:$a, node:$b)>;
1305def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1306  (atomic_load_fadd node:$a, node:$b)>;
1307def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1308  (atomic_load_fadd node:$a, node:$b)>;
1309def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1310  (atomic_load_fadd node:$a, node:$b)>;
1311
1312defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1313  atomic_load_add_32_g, i32imm, imm>;
1314defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1315  atomic_load_add_32_s, i32imm, imm>;
1316defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1317  atomic_load_add_32_gen, i32imm, imm>;
1318defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1319  ".add", atomic_load_add_32_gen, i32imm, imm>;
1320
1321defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1322  atomic_load_add_64_g, i64imm, imm>;
1323defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1324  atomic_load_add_64_s, i64imm, imm>;
1325defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1326  atomic_load_add_64_gen, i64imm, imm>;
1327defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1328  ".add", atomic_load_add_64_gen, i64imm, imm>;
1329
1330defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1331  atomic_load_add_g, f32imm, fpimm>;
1332defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1333  atomic_load_add_s, f32imm, fpimm>;
1334defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1335  atomic_load_add_gen, f32imm, fpimm>;
1336
1337defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1338  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1339defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1340  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1341defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1342  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1343
1344// atom_sub
1345
1346def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1347  (atomic_load_sub_32 node:$a, node:$b)>;
1348def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1349  (atomic_load_sub_32 node:$a, node:$b)>;
1350def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1351  (atomic_load_sub_32 node:$a, node:$b)>;
1352def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1353  (atomic_load_sub_64 node:$a, node:$b)>;
1354def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1355  (atomic_load_sub_64 node:$a, node:$b)>;
1356def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1357  (atomic_load_sub_64 node:$a, node:$b)>;
1358
1359defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1360  atomic_load_sub_32_g, i32imm>;
1361defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1362  atomic_load_sub_64_g, i64imm>;
1363defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1364  atomic_load_sub_32_gen, i32imm>;
1365defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1366  ".add", atomic_load_sub_32_gen, i32imm>;
1367defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1368  atomic_load_sub_32_s, i32imm>;
1369defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1370  atomic_load_sub_64_s, i64imm>;
1371defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1372  atomic_load_sub_64_gen, i64imm>;
1373defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1374  ".add", atomic_load_sub_64_gen, i64imm>;
1375
1376// atom_swap
1377
1378def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1379  (atomic_swap_32 node:$a, node:$b)>;
1380def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1381  (atomic_swap_32 node:$a, node:$b)>;
1382def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1383  (atomic_swap_32 node:$a, node:$b)>;
1384def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1385  (atomic_swap_64 node:$a, node:$b)>;
1386def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1387  (atomic_swap_64 node:$a, node:$b)>;
1388def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1389  (atomic_swap_64 node:$a, node:$b)>;
1390
1391defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1392  atomic_swap_32_g, i32imm, imm>;
1393defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1394  atomic_swap_32_s, i32imm, imm>;
1395defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1396  atomic_swap_32_gen, i32imm, imm>;
1397defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1398  ".exch", atomic_swap_32_gen, i32imm, imm>;
1399defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1400  atomic_swap_64_g, i64imm, imm>;
1401defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1402  atomic_swap_64_s, i64imm, imm>;
1403defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1404  atomic_swap_64_gen, i64imm, imm>;
1405defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1406  ".exch", atomic_swap_64_gen, i64imm, imm>;
1407
1408// atom_max
1409
1410def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1411  , (atomic_load_max_32 node:$a, node:$b)>;
1412def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1413  (atomic_load_max_32 node:$a, node:$b)>;
1414def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1415  (atomic_load_max_32 node:$a, node:$b)>;
1416def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1417  , (atomic_load_max_64 node:$a, node:$b)>;
1418def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1419  (atomic_load_max_64 node:$a, node:$b)>;
1420def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1421  (atomic_load_max_64 node:$a, node:$b)>;
1422def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1423  (atomic_load_umax_32 node:$a, node:$b)>;
1424def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1425  (atomic_load_umax_32 node:$a, node:$b)>;
1426def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1427  (atomic_load_umax_32 node:$a, node:$b)>;
1428def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1429  (atomic_load_umax_64 node:$a, node:$b)>;
1430def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1431  (atomic_load_umax_64 node:$a, node:$b)>;
1432def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1433  (atomic_load_umax_64 node:$a, node:$b)>;
1434
1435defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1436  ".max", atomic_load_max_32_g, i32imm, imm>;
1437defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1438  ".max", atomic_load_max_32_s, i32imm, imm>;
1439defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1440  atomic_load_max_32_gen, i32imm, imm>;
1441defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1442  ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1443defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1444  ".max", atomic_load_max_64_g, i64imm, imm>;
1445defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1446  ".max", atomic_load_max_64_s, i64imm, imm>;
1447defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1448  atomic_load_max_64_gen, i64imm, imm>;
1449defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1450  ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1451defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1452  ".max", atomic_load_umax_32_g, i32imm, imm>;
1453defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1454  ".max", atomic_load_umax_32_s, i32imm, imm>;
1455defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1456  atomic_load_umax_32_gen, i32imm, imm>;
1457defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1458  ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1459defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1460  ".max", atomic_load_umax_64_g, i64imm, imm>;
1461defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1462  ".max", atomic_load_umax_64_s, i64imm, imm>;
1463defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1464  atomic_load_umax_64_gen, i64imm, imm>;
1465defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1466  ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1467
1468// atom_min
1469
1470def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1471  (atomic_load_min_32 node:$a, node:$b)>;
1472def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1473  (atomic_load_min_32 node:$a, node:$b)>;
1474def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1475  (atomic_load_min_32 node:$a, node:$b)>;
1476def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1477  (atomic_load_min_64 node:$a, node:$b)>;
1478def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1479  (atomic_load_min_64 node:$a, node:$b)>;
1480def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1481  (atomic_load_min_64 node:$a, node:$b)>;
1482def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1483  (atomic_load_umin_32 node:$a, node:$b)>;
1484def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1485  (atomic_load_umin_32 node:$a, node:$b)>;
1486def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1487  (atomic_load_umin_32 node:$a, node:$b)>;
1488def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1489  (atomic_load_umin_64 node:$a, node:$b)>;
1490def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1491  (atomic_load_umin_64 node:$a, node:$b)>;
1492def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1493  (atomic_load_umin_64 node:$a, node:$b)>;
1494
1495defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1496  ".min", atomic_load_min_32_g, i32imm, imm>;
1497defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1498  ".min", atomic_load_min_32_s, i32imm, imm>;
1499defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1500  atomic_load_min_32_gen, i32imm, imm>;
1501defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1502  ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1503defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1504  ".min", atomic_load_min_64_g, i64imm, imm>;
1505defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1506  ".min", atomic_load_min_64_s, i64imm, imm>;
1507defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1508  atomic_load_min_64_gen, i64imm, imm>;
1509defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1510  ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1511defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1512  ".min", atomic_load_umin_32_g, i32imm, imm>;
1513defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1514  ".min", atomic_load_umin_32_s, i32imm, imm>;
1515defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1516  atomic_load_umin_32_gen, i32imm, imm>;
1517defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1518  ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1519defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1520  ".min", atomic_load_umin_64_g, i64imm, imm>;
1521defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1522  ".min", atomic_load_umin_64_s, i64imm, imm>;
1523defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1524  atomic_load_umin_64_gen, i64imm, imm>;
1525defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1526  ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1527
1528// atom_inc  atom_dec
1529
1530def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1531  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1532def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1533  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1534def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1535  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1536def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1537  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1538def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1539  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1540def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1541  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1542
1543defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1544  atomic_load_inc_32_g, i32imm, imm>;
1545defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1546  atomic_load_inc_32_s, i32imm, imm>;
1547defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1548  atomic_load_inc_32_gen, i32imm, imm>;
1549defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1550  ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1551defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1552  atomic_load_dec_32_g, i32imm, imm>;
1553defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1554  atomic_load_dec_32_s, i32imm, imm>;
1555defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1556  atomic_load_dec_32_gen, i32imm, imm>;
1557defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1558  ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1559
1560// atom_and
1561
1562def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1563  (atomic_load_and_32 node:$a, node:$b)>;
1564def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1565  (atomic_load_and_32 node:$a, node:$b)>;
1566def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1567  (atomic_load_and_32 node:$a, node:$b)>;
1568def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1569  (atomic_load_and_64 node:$a, node:$b)>;
1570def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1571  (atomic_load_and_64 node:$a, node:$b)>;
1572def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1573  (atomic_load_and_64 node:$a, node:$b)>;
1574
1575defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1576  atomic_load_and_32_g, i32imm, imm>;
1577defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1578  atomic_load_and_32_s, i32imm, imm>;
1579defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1580  atomic_load_and_32_gen, i32imm, imm>;
1581defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1582  ".and", atomic_load_and_32_gen, i32imm, imm>;
1583defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1584  atomic_load_and_64_g, i64imm, imm>;
1585defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1586  atomic_load_and_64_s, i64imm, imm>;
1587defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1588  atomic_load_and_64_gen, i64imm, imm>;
1589defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1590  ".and", atomic_load_and_64_gen, i64imm, imm>;
1591
1592// atom_or
1593
1594def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1595  (atomic_load_or_32 node:$a, node:$b)>;
1596def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1597  (atomic_load_or_32 node:$a, node:$b)>;
1598def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1599  (atomic_load_or_32 node:$a, node:$b)>;
1600def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1601  (atomic_load_or_64 node:$a, node:$b)>;
1602def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1603  (atomic_load_or_64 node:$a, node:$b)>;
1604def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1605  (atomic_load_or_64 node:$a, node:$b)>;
1606
1607defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1608  atomic_load_or_32_g, i32imm, imm>;
1609defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1610  atomic_load_or_32_gen, i32imm, imm>;
1611defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1612  ".or", atomic_load_or_32_gen, i32imm, imm>;
1613defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1614  atomic_load_or_32_s, i32imm, imm>;
1615defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1616  atomic_load_or_64_g, i64imm, imm>;
1617defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1618  atomic_load_or_64_gen, i64imm, imm>;
1619defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1620  ".or", atomic_load_or_64_gen, i64imm, imm>;
1621defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1622  atomic_load_or_64_s, i64imm, imm>;
1623
1624// atom_xor
1625
1626def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1627  (atomic_load_xor_32 node:$a, node:$b)>;
1628def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1629  (atomic_load_xor_32 node:$a, node:$b)>;
1630def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1631  (atomic_load_xor_32 node:$a, node:$b)>;
1632def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1633  (atomic_load_xor_64 node:$a, node:$b)>;
1634def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1635  (atomic_load_xor_64 node:$a, node:$b)>;
1636def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1637  (atomic_load_xor_64 node:$a, node:$b)>;
1638
1639defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1640  atomic_load_xor_32_g, i32imm, imm>;
1641defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1642  atomic_load_xor_32_s, i32imm, imm>;
1643defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1644  atomic_load_xor_32_gen, i32imm, imm>;
1645defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1646  ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1647defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1648  atomic_load_xor_64_g, i64imm, imm>;
1649defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1650  atomic_load_xor_64_s, i64imm, imm>;
1651defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1652  atomic_load_xor_64_gen, i64imm, imm>;
1653defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1654  ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1655
1656// atom_cas
1657
1658def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1659  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1660def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1661  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1662def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1663  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1664def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1665  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1666def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1667  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1668def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1669  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1670
1671defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1672  atomic_cmp_swap_32_g, i32imm>;
1673defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1674  atomic_cmp_swap_32_s, i32imm>;
1675defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1676  atomic_cmp_swap_32_gen, i32imm>;
1677defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1678  ".cas", atomic_cmp_swap_32_gen, i32imm>;
1679defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1680  atomic_cmp_swap_64_g, i64imm>;
1681defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1682  atomic_cmp_swap_64_s, i64imm>;
1683defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1684  atomic_cmp_swap_64_gen, i64imm>;
1685defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1686  ".cas", atomic_cmp_swap_64_gen, i64imm>;
1687
1688// Support for scoped atomic operations.  Matches
1689// int_nvvm_atomic_{op}_{space}_{type}_{scope}
1690// and converts it into the appropriate instruction.
1691// NOTE: not all possible combinations are implemented
1692//  'space' is limited to generic as it's the only one needed to support CUDA.
1693//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1694class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1695                  dag ins, dag Operands>
1696      : NVPTXInst<(outs regclass:$result), ins,
1697                  AsmStr,
1698                  [(set regclass:$result, Operands)]>,
1699        Requires<Preds>;
1700
1701// Define instruction variants for all addressing modes.
1702multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1703                       NVPTXRegClass regclass, Operand ImmType,
1704                       SDNode Imm, ValueType ImmTy,
1705                       list<Predicate> Preds> {
1706  let AddedComplexity = 1 in {
1707    def : ATOM23_impl<AsmStr, regclass, Preds,
1708                      (ins Int32Regs:$src, regclass:$b),
1709                      (Intr Int32Regs:$src, regclass:$b)>;
1710    def : ATOM23_impl<AsmStr, regclass, Preds,
1711                      (ins Int64Regs:$src, regclass:$b),
1712                      (Intr Int64Regs:$src, regclass:$b)>;
1713  }
1714  // tablegen can't infer argument types from Intrinsic (though it can
1715  // from Instruction) so we have to enforce specific type on
1716  // immediates via explicit cast to ImmTy.
1717  def : ATOM23_impl<AsmStr, regclass, Preds,
1718                    (ins Int32Regs:$src, ImmType:$b),
1719                    (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1720  def : ATOM23_impl<AsmStr, regclass, Preds,
1721                    (ins Int64Regs:$src, ImmType:$b),
1722                    (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1723}
1724
1725multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1726                       NVPTXRegClass regclass, Operand ImmType,
1727                       SDNode Imm, ValueType ImmTy,
1728                       list<Predicate> Preds> {
1729  // Variants for register/immediate permutations of $b and $c
1730  let AddedComplexity = 2 in {
1731    def : ATOM23_impl<AsmStr, regclass, Preds,
1732                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
1733                      (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1734    def : ATOM23_impl<AsmStr, regclass, Preds,
1735                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
1736                      (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1737  }
1738  let AddedComplexity = 1 in {
1739    def : ATOM23_impl<AsmStr, regclass, Preds,
1740                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1741                      (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1742    def : ATOM23_impl<AsmStr, regclass, Preds,
1743                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1744                      (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1745    def : ATOM23_impl<AsmStr, regclass, Preds,
1746                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1747                      (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1748    def : ATOM23_impl<AsmStr, regclass, Preds,
1749                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1750                      (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1751  }
1752  def : ATOM23_impl<AsmStr, regclass, Preds,
1753                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1754                    (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1755  def : ATOM23_impl<AsmStr, regclass, Preds,
1756                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1757                    (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1758}
1759
1760// Constructs instrinsic name and instruction asm strings.
1761multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1762                       string ScopeStr, string SpaceStr,
1763                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1764                       ValueType ImmTy, list<Predicate> Preds> {
1765  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1766                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1767                            # "." # OpStr # "." # TypeStr
1768                            # " \t$result, [$src], $b;",
1769                     !cast<Intrinsic>(
1770                            "int_nvvm_atomic_" # OpStr
1771                            # "_" # SpaceStr # "_" # IntTypeStr
1772                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1773                     regclass, ImmType, Imm, ImmTy, Preds>;
1774}
1775multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1776                       string ScopeStr, string SpaceStr,
1777                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1778                       ValueType ImmTy, list<Predicate> Preds> {
1779  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1780                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1781                            # "." # OpStr # "." # TypeStr
1782                            # " \t$result, [$src], $b, $c;",
1783                     !cast<Intrinsic>(
1784                            "int_nvvm_atomic_" # OpStr
1785                            # "_" # SpaceStr # "_" # IntTypeStr
1786                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1787                     regclass, ImmType, Imm, ImmTy, Preds>;
1788}
1789
1790// Constructs variants for different address spaces.
1791// For now we only need variants for generic space pointers.
1792multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1793                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1794                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1795   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1796                            regclass, ImmType, Imm, ImmTy, Preds>;
1797}
1798multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1799                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1800                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1801   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1802                            regclass, ImmType, Imm, ImmTy, Preds>;
1803}
1804
1805// Constructs variants for different scopes of atomic op.
1806multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1807                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1808                       ValueType ImmTy, list<Predicate> Preds> {
1809   // .gpu scope is default and is currently covered by existing
1810   // atomics w/o explicitly specified scope.
1811   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1812                           regclass, ImmType, Imm, ImmTy,
1813                           !listconcat(Preds,[hasAtomScope])>;
1814   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1815                           regclass, ImmType, Imm, ImmTy,
1816                           !listconcat(Preds,[hasAtomScope])>;
1817}
1818multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1819           NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1820           list<Predicate> Preds> {
1821   // No need to define ".gpu"-scoped atomics.  They do the same thing
1822   // as the regular, non-scoped atomics defined elsewhere.
1823   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1824                           regclass, ImmType, Imm, ImmTy,
1825                           !listconcat(Preds,[hasAtomScope])>;
1826   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1827                           regclass, ImmType, Imm, ImmTy,
1828                           !listconcat(Preds,[hasAtomScope])>;
1829}
1830
1831// atom.add
1832multiclass ATOM2_add_impl<string OpStr> {
1833   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1834   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1835   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1836   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1837                            []>;
1838   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1839                            [hasAtomAddF64]>;
1840}
1841
1842// atom.{and,or,xor}
1843multiclass ATOM2_bitwise_impl<string OpStr> {
1844   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1845   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1846                            [hasAtomBitwise64]>;
1847}
1848
1849// atom.exch
1850multiclass ATOM2_exch_impl<string OpStr> {
1851   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1852   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1853}
1854
1855// atom.{min,max}
1856multiclass ATOM2_minmax_impl<string OpStr> {
1857   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1858   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1859   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1860                            [hasAtomMinMax64]>;
1861   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1862                            [hasAtomMinMax64]>;
1863}
1864
1865// atom.{inc,dec}
1866multiclass ATOM2_incdec_impl<string OpStr> {
1867   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1868}
1869
1870// atom.cas
1871multiclass ATOM3_cas_impl<string OpStr> {
1872   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1873   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1874}
1875
1876defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1877defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1878defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1879defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1880defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1881defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1882defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1883defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1884defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
1885defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1886
1887//-----------------------------------
1888// Support for ldu on sm_20 or later
1889//-----------------------------------
1890
1891// Don't annotate ldu instructions as mayLoad, as they load from memory that is
1892// read-only in a kernel.
1893
1894// Scalar
1895
1896multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1897  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1898               !strconcat("ldu.global.", TyStr),
1899                      []>, Requires<[hasLDU]>;
1900  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1901               !strconcat("ldu.global.", TyStr),
1902                        []>, Requires<[hasLDU]>;
1903 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1904               !strconcat("ldu.global.", TyStr),
1905                      []>, Requires<[hasLDU]>;
1906 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1907               !strconcat("ldu.global.", TyStr),
1908                      []>, Requires<[hasLDU]>;
1909 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1910               !strconcat("ldu.global.", TyStr),
1911                        []>, Requires<[hasLDU]>;
1912}
1913
1914defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1915defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1916defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1917defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1918defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1919defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1920defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1921defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1922defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1923defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1924
1925// vector
1926
1927// Elementized vector ldu
1928multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1929 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1930                     (ins Int32Regs:$src),
1931                     !strconcat("ldu.global.", TyStr), []>;
1932 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1933                     (ins Int64Regs:$src),
1934                     !strconcat("ldu.global.", TyStr), []>;
1935 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1936                     (ins MEMri:$src),
1937                     !strconcat("ldu.global.", TyStr), []>;
1938 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1939                     (ins MEMri64:$src),
1940                     !strconcat("ldu.global.", TyStr), []>;
1941 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1942                     (ins imemAny:$src),
1943                     !strconcat("ldu.global.", TyStr), []>;
1944}
1945
1946multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1947 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1948                            regclass:$dst4), (ins Int32Regs:$src),
1949               !strconcat("ldu.global.", TyStr), []>;
1950 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1951                            regclass:$dst4), (ins Int64Regs:$src),
1952               !strconcat("ldu.global.", TyStr), []>;
1953 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1954                            regclass:$dst4), (ins MEMri:$src),
1955               !strconcat("ldu.global.", TyStr), []>;
1956 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1957                            regclass:$dst4), (ins MEMri64:$src),
1958               !strconcat("ldu.global.", TyStr), []>;
1959 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1960                            regclass:$dst4), (ins imemAny:$src),
1961               !strconcat("ldu.global.", TyStr), []>;
1962}
1963
1964defm INT_PTX_LDU_G_v2i8_ELE
1965  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1966defm INT_PTX_LDU_G_v2i16_ELE
1967  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1968defm INT_PTX_LDU_G_v2i32_ELE
1969  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1970defm INT_PTX_LDU_G_v2f16_ELE
1971  : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1972defm INT_PTX_LDU_G_v2f16x2_ELE
1973  : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1974defm INT_PTX_LDU_G_v2f32_ELE
1975  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1976defm INT_PTX_LDU_G_v2i64_ELE
1977  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1978defm INT_PTX_LDU_G_v2f64_ELE
1979  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1980defm INT_PTX_LDU_G_v4i8_ELE
1981  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1982defm INT_PTX_LDU_G_v4i16_ELE
1983  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1984    Int16Regs>;
1985defm INT_PTX_LDU_G_v4i32_ELE
1986  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1987    Int32Regs>;
1988defm INT_PTX_LDU_G_v4f16_ELE
1989  : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1990    Float16Regs>;
1991defm INT_PTX_LDU_G_v4f16x2_ELE
1992  : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1993    Float16x2Regs>;
1994defm INT_PTX_LDU_G_v4f32_ELE
1995  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1996    Float32Regs>;
1997
1998
1999//-----------------------------------
2000// Support for ldg on sm_35 or later
2001//-----------------------------------
2002
2003// Don't annotate ld.global.nc as mayLoad, because these loads go through the
2004// non-coherent texture cache, and therefore the values read must be read-only
2005// during the lifetime of the kernel.
2006
2007multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2008  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2009               !strconcat("ld.global.nc.", TyStr),
2010                      []>, Requires<[hasLDG]>;
2011  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2012               !strconcat("ld.global.nc.", TyStr),
2013                        []>, Requires<[hasLDG]>;
2014 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2015               !strconcat("ld.global.nc.", TyStr),
2016                      []>, Requires<[hasLDG]>;
2017 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2018               !strconcat("ld.global.nc.", TyStr),
2019                      []>, Requires<[hasLDG]>;
2020 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2021               !strconcat("ld.global.nc.", TyStr),
2022                        []>, Requires<[hasLDG]>;
2023}
2024
2025defm INT_PTX_LDG_GLOBAL_i8
2026  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2027defm INT_PTX_LDG_GLOBAL_i16
2028  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2029defm INT_PTX_LDG_GLOBAL_i32
2030  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2031defm INT_PTX_LDG_GLOBAL_i64
2032  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2033defm INT_PTX_LDG_GLOBAL_f16
2034  : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
2035defm INT_PTX_LDG_GLOBAL_f16x2
2036  : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
2037defm INT_PTX_LDG_GLOBAL_f32
2038  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2039defm INT_PTX_LDG_GLOBAL_f64
2040  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2041defm INT_PTX_LDG_GLOBAL_p32
2042  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2043defm INT_PTX_LDG_GLOBAL_p64
2044  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2045
2046// vector
2047
2048// Elementized vector ldg
2049multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2050 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2051                     (ins Int32Regs:$src),
2052                     !strconcat("ld.global.nc.", TyStr), []>;
2053 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2054                     (ins Int64Regs:$src),
2055                     !strconcat("ld.global.nc.", TyStr), []>;
2056 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2057                     (ins MEMri:$src),
2058                     !strconcat("ld.global.nc.", TyStr), []>;
2059 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2060                     (ins MEMri64:$src),
2061                     !strconcat("ld.global.nc.", TyStr), []>;
2062 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2063                     (ins imemAny:$src),
2064                     !strconcat("ld.global.nc.", TyStr), []>;
2065}
2066
2067multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2068  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2069                              regclass:$dst4), (ins Int32Regs:$src),
2070               !strconcat("ld.global.nc.", TyStr), []>;
2071  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2072                               regclass:$dst4), (ins Int64Regs:$src),
2073               !strconcat("ld.global.nc.", TyStr), []>;
2074  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2075                              regclass:$dst4), (ins MEMri:$src),
2076               !strconcat("ld.global.nc.", TyStr), []>;
2077  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2078                              regclass:$dst4), (ins MEMri64:$src),
2079               !strconcat("ld.global.nc.", TyStr), []>;
2080  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2081                             regclass:$dst4), (ins imemAny:$src),
2082               !strconcat("ld.global.nc.", TyStr), []>;
2083}
2084
2085// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2086defm INT_PTX_LDG_G_v2i8_ELE
2087  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2088defm INT_PTX_LDG_G_v2i16_ELE
2089  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2090defm INT_PTX_LDG_G_v2i32_ELE
2091  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2092defm INT_PTX_LDG_G_v2f16_ELE
2093  : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
2094defm INT_PTX_LDG_G_v2f16x2_ELE
2095  : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
2096defm INT_PTX_LDG_G_v2f32_ELE
2097  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2098defm INT_PTX_LDG_G_v2i64_ELE
2099  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2100defm INT_PTX_LDG_G_v2f64_ELE
2101  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2102defm INT_PTX_LDG_G_v4i8_ELE
2103  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2104defm INT_PTX_LDG_G_v4i16_ELE
2105  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2106defm INT_PTX_LDG_G_v4i32_ELE
2107  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2108defm INT_PTX_LDG_G_v4f16_ELE
2109  : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
2110defm INT_PTX_LDG_G_v4f16x2_ELE
2111  : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
2112defm INT_PTX_LDG_G_v4f32_ELE
2113  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2114
2115
2116multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2117   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2118          !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2119      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2120   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2121          !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2122      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2123   def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2124          "{{ .reg .b64 %tmp;\n\t"
2125          #"  cvt.u64.u32 \t%tmp, $src;\n\t"
2126          #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
2127      [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2128      Requires<[useShortPtr]>;
2129}
2130
2131multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2132   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2133          !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2134      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2135   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2136          !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2137      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2138   def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2139          "{{ .reg .b64 %tmp;\n\t"
2140          #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2141          #"  cvt.u32.u64 \t$result, %tmp; }}",
2142      [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2143      Requires<[useShortPtr]>;
2144}
2145
2146defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2147defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2148defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2149defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2150
2151defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2152defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2153defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2154defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2155
2156
2157// nvvm.ptr.gen.to.param
2158def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2159  (ins Int32Regs:$src),
2160                        "mov.u32 \t$result, $src;",
2161                              [(set Int32Regs:$result,
2162                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2163def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2164  (ins Int64Regs:$src),
2165                        "mov.u64 \t$result, $src;",
2166                              [(set Int64Regs:$result,
2167                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2168
2169
2170// nvvm.move intrinsicc
2171def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2172                             "mov.b16 \t$r, $s;",
2173                             [(set Int16Regs:$r,
2174                               (int_nvvm_move_i16 Int16Regs:$s))]>;
2175def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2176                             "mov.b32 \t$r, $s;",
2177                             [(set Int32Regs:$r,
2178                               (int_nvvm_move_i32 Int32Regs:$s))]>;
2179def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2180                             "mov.b64 \t$r, $s;",
2181                             [(set Int64Regs:$r,
2182                               (int_nvvm_move_i64 Int64Regs:$s))]>;
2183def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2184                             "mov.f32 \t$r, $s;",
2185                             [(set Float32Regs:$r,
2186                               (int_nvvm_move_float Float32Regs:$s))]>;
2187def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2188                             "mov.f64 \t$r, $s;",
2189                             [(set Float64Regs:$r,
2190                               (int_nvvm_move_double Float64Regs:$s))]>;
2191def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2192                             "mov.u32 \t$r, $s;",
2193                             [(set Int32Regs:$r,
2194                               (int_nvvm_move_ptr Int32Regs:$s))]>;
2195def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2196                             "mov.u64 \t$r, $s;",
2197                             [(set Int64Regs:$r,
2198                               (int_nvvm_move_ptr Int64Regs:$s))]>;
2199
2200// @TODO: Are these actually needed, or will we always just see symbols
2201// copied to registers first?
2202/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2203                             "mov.u32 \t$r, $s;",
2204                             [(set Int32Regs:$r,
2205                             (int_nvvm_move_ptr texternalsym:$s))]>;
2206def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2207                             "mov.u64 \t$r, $s;",
2208                             [(set Int64Regs:$r,
2209                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
2210
2211
2212// MoveParam        %r1, param
2213// ptr_local_to_gen %r2, %r1
2214// ptr_gen_to_local %r3, %r2
2215// ->
2216// mov %r1, param
2217
2218// @TODO: Revisit this.  There is a type
2219// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2220// instructions are not currently defined. However, we can use the ptr
2221// variants and the asm printer will do the right thing.
2222def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2223                (MoveParam texternalsym:$src)))),
2224               (nvvm_move_ptr64  texternalsym:$src)>;
2225def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2226                (MoveParam texternalsym:$src)))),
2227               (nvvm_move_ptr32  texternalsym:$src)>;
2228
2229def texsurf_handles
2230  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2231              "mov.u64 \t$result, $src;", []>;
2232
2233//-----------------------------------
2234// Compiler Error Warn
2235// - Just ignore them in codegen
2236//-----------------------------------
2237
2238def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2239                "// llvm.nvvm.compiler.warn()",
2240                [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2241def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2242                "// llvm.nvvm.compiler.warn()",
2243                [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2244def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2245                "// llvm.nvvm.compiler.error()",
2246                [(int_nvvm_compiler_error Int32Regs:$a)]>;
2247def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2248                "// llvm.nvvm.compiler.error()",
2249                [(int_nvvm_compiler_error Int64Regs:$a)]>;
2250
2251
2252// isspacep
2253
2254def ISSPACEP_CONST_32
2255  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2256              "isspacep.const \t$d, $a;",
2257              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2258    Requires<[hasPTX31]>;
2259def ISSPACEP_CONST_64
2260  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2261              "isspacep.const \t$d, $a;",
2262              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2263    Requires<[hasPTX31]>;
2264def ISSPACEP_GLOBAL_32
2265  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2266              "isspacep.global \t$d, $a;",
2267              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2268def ISSPACEP_GLOBAL_64
2269  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2270              "isspacep.global \t$d, $a;",
2271              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2272def ISSPACEP_LOCAL_32
2273  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2274              "isspacep.local \t$d, $a;",
2275              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2276def ISSPACEP_LOCAL_64
2277  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2278              "isspacep.local \t$d, $a;",
2279              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2280def ISSPACEP_SHARED_32
2281  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2282              "isspacep.shared \t$d, $a;",
2283              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2284def ISSPACEP_SHARED_64
2285  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2286              "isspacep.shared \t$d, $a;",
2287              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2288
2289
2290// Special register reads
2291def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2292                            (ins SpecialRegs:$r),
2293                            "mov.b32 \t$d, $r;", []>;
2294
2295def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2296def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2297def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2298def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2299def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2300def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2301def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2302def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2303def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2304def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2305def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2306def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2307def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2308def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2309def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2310def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2311def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2312def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2313def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2314def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2315def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2316def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2317def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2318def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2319def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2320def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2321def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2322def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2323def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2324def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2325def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2326def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2327
2328
2329// rotate builtin support
2330
2331def ROTATE_B32_HW_IMM
2332  : NVPTXInst<(outs Int32Regs:$dst),
2333              (ins  Int32Regs:$src, i32imm:$amt),
2334              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2335              [(set Int32Regs:$dst,
2336                 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2337              Requires<[hasHWROT32]> ;
2338
2339def ROTATE_B32_HW_REG
2340  : NVPTXInst<(outs Int32Regs:$dst),
2341              (ins  Int32Regs:$src, Int32Regs:$amt),
2342              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2343              [(set Int32Regs:$dst,
2344                 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2345              Requires<[hasHWROT32]> ;
2346
2347def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2348          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2349      Requires<[noHWROT32]> ;
2350
2351def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2352          (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2353      Requires<[noHWROT32]> ;
2354
2355let hasSideEffects = false in {
2356  def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2357    !strconcat("{{\n\t",
2358               ".reg .b32 %dummy;\n\t",
2359               "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2360               "}}"),
2361          []> ;
2362
2363  def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2364    !strconcat("{{\n\t",
2365               ".reg .b32 %dummy;\n\t",
2366               "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2367               "}}"),
2368          []> ;
2369}
2370
2371let hasSideEffects = false in {
2372  def PACK_TWO_INT32
2373    : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2374                "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2375}
2376
2377def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2378          (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2379                          (GET_LO_INT64 Int64Regs:$src))> ;
2380
2381// Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2382// no side effects.
2383let hasSideEffects = false in {
2384  def SHF_L_WRAP_B32_IMM
2385    : NVPTXInst<(outs Int32Regs:$dst),
2386                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2387                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2388      Requires<[hasHWROT32]>;
2389
2390  def SHF_L_WRAP_B32_REG
2391    : NVPTXInst<(outs Int32Regs:$dst),
2392                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2393                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2394      Requires<[hasHWROT32]>;
2395
2396  def SHF_R_WRAP_B32_IMM
2397    : NVPTXInst<(outs Int32Regs:$dst),
2398                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2399                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2400      Requires<[hasHWROT32]>;
2401
2402  def SHF_R_WRAP_B32_REG
2403    : NVPTXInst<(outs Int32Regs:$dst),
2404                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2405                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2406      Requires<[hasHWROT32]>;
2407}
2408
2409// HW version of rotate 64
2410def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2411          (PACK_TWO_INT32
2412            (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2413                                (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2414            (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2415                                (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2416      Requires<[hasHWROT32]>;
2417
2418def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2419          (PACK_TWO_INT32
2420            (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2421                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2422            (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2423                               (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2424      Requires<[hasHWROT32]>;
2425
2426
2427def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2428          (PACK_TWO_INT32
2429            (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2430                                (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2431            (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2432                                (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2433      Requires<[hasHWROT32]>;
2434
2435def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2436          (PACK_TWO_INT32
2437            (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2438                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2439            (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2440                               (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2441      Requires<[hasHWROT32]>;
2442
2443// SW version of rotate 64
2444def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2445          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2446      Requires<[noHWROT32]>;
2447def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2448          (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2449      Requires<[noHWROT32]>;
2450def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2451          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2452      Requires<[noHWROT32]>;
2453def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2454          (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2455      Requires<[noHWROT32]>;
2456
2457
2458//-----------------------------------
2459// Texture Intrinsics
2460//-----------------------------------
2461
2462// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2463// also defined in NVPTXReplaceImageHandles.cpp
2464
2465// texmode_independent
2466let IsTex = true, IsTexModeUnified = false in {
2467// Texture fetch instructions using handles
2468def TEX_1D_F32_S32
2469  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2470                    Float32Regs:$b, Float32Regs:$a),
2471              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2472              "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2473              []>;
2474def TEX_1D_F32_F32
2475  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2476                    Float32Regs:$b, Float32Regs:$a),
2477              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2478              "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2479              []>;
2480def TEX_1D_F32_F32_LEVEL
2481  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2482                    Float32Regs:$b, Float32Regs:$a),
2483              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2484              "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2485              "[$t, $s, \\{$x\\}], $lod;",
2486              []>;
2487def TEX_1D_F32_F32_GRAD
2488  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2489                    Float32Regs:$b, Float32Regs:$a),
2490              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2491                   Float32Regs:$gradx, Float32Regs:$grady),
2492              "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2493              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2494              []>;
2495def TEX_1D_S32_S32
2496  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2497                    Int32Regs:$b, Int32Regs:$a),
2498              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2499              "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2500              []>;
2501def TEX_1D_S32_F32
2502  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2503                    Int32Regs:$b, Int32Regs:$a),
2504              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2505              "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2506              []>;
2507def TEX_1D_S32_F32_LEVEL
2508  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2509                    Int32Regs:$b, Int32Regs:$a),
2510              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2511                   Float32Regs:$lod),
2512              "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2513              "[$t, $s, \\{$x\\}], $lod;",
2514              []>;
2515def TEX_1D_S32_F32_GRAD
2516  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2517                    Int32Regs:$b, Int32Regs:$a),
2518              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2519                   Float32Regs:$gradx, Float32Regs:$grady),
2520              "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2521              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2522              []>;
2523def TEX_1D_U32_S32
2524  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2525                    Int32Regs:$b, Int32Regs:$a),
2526              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2527              "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2528              []>;
2529def TEX_1D_U32_F32
2530  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2531                    Int32Regs:$b, Int32Regs:$a),
2532              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2533              "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2534              []>;
2535def TEX_1D_U32_F32_LEVEL
2536  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2537                    Int32Regs:$b, Int32Regs:$a),
2538              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2539                   Float32Regs:$lod),
2540              "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2541              "[$t, $s, \\{$x\\}], $lod;",
2542              []>;
2543def TEX_1D_U32_F32_GRAD
2544  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2545                    Int32Regs:$b, Int32Regs:$a),
2546              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2547                   Float32Regs:$gradx, Float32Regs:$grady),
2548              "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2549              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2550              []>;
2551
2552def TEX_1D_ARRAY_F32_S32
2553  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2554                    Float32Regs:$b, Float32Regs:$a),
2555              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2556              "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2557              "[$t, $s, \\{$l, $x\\}];",
2558              []>;
2559def TEX_1D_ARRAY_F32_F32
2560  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2561                    Float32Regs:$b, Float32Regs:$a),
2562              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2563              "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2564              "[$t, $s, \\{$l, $x\\}];",
2565              []>;
2566def TEX_1D_ARRAY_F32_F32_LEVEL
2567  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2568                    Float32Regs:$b, Float32Regs:$a),
2569              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2570                   Float32Regs:$lod),
2571              "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2572              "[$t, $s, \\{$l, $x\\}], $lod;",
2573              []>;
2574def TEX_1D_ARRAY_F32_F32_GRAD
2575  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2576                    Float32Regs:$b, Float32Regs:$a),
2577              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2578                   Float32Regs:$gradx, Float32Regs:$grady),
2579              "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2580              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2581              []>;
2582def TEX_1D_ARRAY_S32_S32
2583  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2584                    Int32Regs:$b, Int32Regs:$a),
2585              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2586              "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2587              "[$t, $s, \\{$l, $x\\}];",
2588              []>;
2589def TEX_1D_ARRAY_S32_F32
2590  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2591                    Int32Regs:$b, Int32Regs:$a),
2592              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2593              "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2594              "[$t, $s, \\{$l, $x\\}];",
2595              []>;
2596def TEX_1D_ARRAY_S32_F32_LEVEL
2597  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2598                    Int32Regs:$b, Int32Regs:$a),
2599              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2600                   Float32Regs:$lod),
2601              "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2602              "[$t, $s, \\{$l, $x\\}], $lod;",
2603              []>;
2604def TEX_1D_ARRAY_S32_F32_GRAD
2605  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2606                    Int32Regs:$b, Int32Regs:$a),
2607              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2608                   Float32Regs:$gradx, Float32Regs:$grady),
2609              "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2610              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2611              []>;
2612def TEX_1D_ARRAY_U32_S32
2613  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2614                    Int32Regs:$b, Int32Regs:$a),
2615              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2616              "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2617              "[$t, $s, \\{$l, $x\\}];",
2618              []>;
2619def TEX_1D_ARRAY_U32_F32
2620  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2621                    Int32Regs:$b, Int32Regs:$a),
2622              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2623              "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2624              "[$t, $s, \\{$l, $x\\}];",
2625              []>;
2626def TEX_1D_ARRAY_U32_F32_LEVEL
2627  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2628                    Int32Regs:$b, Int32Regs:$a),
2629              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2630                   Float32Regs:$lod),
2631              "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2632              "[$t, $s, \\{$l, $x\\}], $lod;",
2633              []>;
2634def TEX_1D_ARRAY_U32_F32_GRAD
2635  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2636                    Int32Regs:$b, Int32Regs:$a),
2637              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2638                   Float32Regs:$gradx, Float32Regs:$grady),
2639              "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2640              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2641              []>;
2642
2643def TEX_2D_F32_S32
2644  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2645                    Float32Regs:$b, Float32Regs:$a),
2646              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2647              "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2648              "[$t, $s, \\{$x, $y\\}];",
2649              []>;
2650def TEX_2D_F32_F32
2651  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2652                    Float32Regs:$b, Float32Regs:$a),
2653              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2654              "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2655              "[$t, $s, \\{$x, $y\\}];",
2656              []>;
2657def TEX_2D_F32_F32_LEVEL
2658  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2659                    Float32Regs:$b, Float32Regs:$a),
2660              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2661                   Float32Regs:$lod),
2662              "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2663              "[$t, $s, \\{$x, $y\\}], $lod;",
2664              []>;
2665def TEX_2D_F32_F32_GRAD
2666  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2667                    Float32Regs:$b, Float32Regs:$a),
2668              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2669                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2670                   Float32Regs:$grady0, Float32Regs:$grady1),
2671              "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2672              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2673              "\\{$grady0, $grady1\\};",
2674              []>;
2675def TEX_2D_S32_S32
2676  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2677                    Int32Regs:$b, Int32Regs:$a),
2678              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2679              "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2680              "[$t, $s, \\{$x, $y\\}];",
2681              []>;
2682def TEX_2D_S32_F32
2683  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2684                    Int32Regs:$b, Int32Regs:$a),
2685              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2686              "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2687              "[$t, $s, \\{$x, $y\\}];",
2688              []>;
2689def TEX_2D_S32_F32_LEVEL
2690  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2691                    Int32Regs:$b, Int32Regs:$a),
2692              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2693                   Float32Regs:$lod),
2694              "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2695              "[$t, $s, \\{$x, $y\\}], $lod;",
2696              []>;
2697def TEX_2D_S32_F32_GRAD
2698  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2699                    Int32Regs:$b, Int32Regs:$a),
2700              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2701                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2702                   Float32Regs:$grady0, Float32Regs:$grady1),
2703              "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2704              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2705              "\\{$grady0, $grady1\\};",
2706              []>;
2707def TEX_2D_U32_S32
2708  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2709                    Int32Regs:$b, Int32Regs:$a),
2710              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2711              "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2712              "[$t, $s, \\{$x, $y\\}];",
2713              []>;
2714def TEX_2D_U32_F32
2715  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2716                    Int32Regs:$b, Int32Regs:$a),
2717              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2718              "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2719              "[$t, $s, \\{$x, $y\\}];",
2720              []>;
2721def TEX_2D_U32_F32_LEVEL
2722  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2723                    Int32Regs:$b, Int32Regs:$a),
2724              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2725                   Float32Regs:$lod),
2726              "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2727              "[$t, $s, \\{$x, $y\\}], $lod;",
2728              []>;
2729def TEX_2D_U32_F32_GRAD
2730  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2731                    Int32Regs:$b, Int32Regs:$a),
2732              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2733                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2734                   Float32Regs:$grady0, Float32Regs:$grady1),
2735              "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2736              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2737              "\\{$grady0, $grady1\\};",
2738              []>;
2739
2740def TEX_2D_ARRAY_F32_S32
2741  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2742                    Float32Regs:$b, Float32Regs:$a),
2743              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2744                   Int32Regs:$y),
2745              "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2746              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2747              []>;
2748def TEX_2D_ARRAY_F32_F32
2749  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2750                    Float32Regs:$b, Float32Regs:$a),
2751              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2752                   Float32Regs:$y),
2753              "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2754              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2755              []>;
2756def TEX_2D_ARRAY_F32_F32_LEVEL
2757  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2758                    Float32Regs:$b, Float32Regs:$a),
2759              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2760                   Float32Regs:$y, Float32Regs:$lod),
2761              "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2762              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2763              []>;
2764def TEX_2D_ARRAY_F32_F32_GRAD
2765  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2766                    Float32Regs:$b, Float32Regs:$a),
2767              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2768                   Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2769                   Float32Regs:$grady0, Float32Regs:$grady1),
2770              "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2771              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2772              "\\{$grady0, $grady1\\};",
2773              []>;
2774def TEX_2D_ARRAY_S32_S32
2775  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2776                    Int32Regs:$b, Int32Regs:$a),
2777              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2778                   Int32Regs:$y),
2779              "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2780              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2781              []>;
2782def TEX_2D_ARRAY_S32_F32
2783  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2784                    Int32Regs:$b, Int32Regs:$a),
2785              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2786                   Float32Regs:$y),
2787              "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2788              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2789              []>;
2790def TEX_2D_ARRAY_S32_F32_LEVEL
2791  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2792                    Int32Regs:$b, Int32Regs:$a),
2793              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2794                   Float32Regs:$y, Float32Regs:$lod),
2795              "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2796              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2797              []>;
2798def TEX_2D_ARRAY_S32_F32_GRAD
2799  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2800                    Int32Regs:$b, Int32Regs:$a),
2801              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2802                   Float32Regs:$y,
2803                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2804                   Float32Regs:$grady0, Float32Regs:$grady1),
2805              "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2806              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2807              "\\{$grady0, $grady1\\};",
2808              []>;
2809def TEX_2D_ARRAY_U32_S32
2810  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2811                    Int32Regs:$b, Int32Regs:$a),
2812              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2813                   Int32Regs:$y),
2814              "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2815              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2816              []>;
2817def TEX_2D_ARRAY_U32_F32
2818  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2819                    Int32Regs:$b, Int32Regs:$a),
2820              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2821                   Float32Regs:$y),
2822              "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2823              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2824              []>;
2825def TEX_2D_ARRAY_U32_F32_LEVEL
2826  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2827                    Int32Regs:$b, Int32Regs:$a),
2828              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2829                   Float32Regs:$y, Float32Regs:$lod),
2830              "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2831              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2832              []>;
2833def TEX_2D_ARRAY_U32_F32_GRAD
2834  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2835                    Int32Regs:$b, Int32Regs:$a),
2836              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2837                   Float32Regs:$y,
2838                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2839                   Float32Regs:$grady0, Float32Regs:$grady1),
2840              "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2841              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2842              "\\{$grady0, $grady1\\};",
2843              []>;
2844
2845def TEX_3D_F32_S32
2846  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2847                    Float32Regs:$b, Float32Regs:$a),
2848              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2849                   Int32Regs:$z),
2850              "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2851              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2852              []>;
2853def TEX_3D_F32_F32
2854  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2855                    Float32Regs:$b, Float32Regs:$a),
2856              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2857                   Float32Regs:$z),
2858              "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2859              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2860              []>;
2861def TEX_3D_F32_F32_LEVEL
2862  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2863                    Float32Regs:$b, Float32Regs:$a),
2864              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2865                   Float32Regs:$z, Float32Regs:$lod),
2866              "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2867              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2868              []>;
2869def TEX_3D_F32_F32_GRAD
2870  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2871                    Float32Regs:$b, Float32Regs:$a),
2872              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2873                   Float32Regs:$z,
2874                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2875                   Float32Regs:$gradx2, Float32Regs:$grady0,
2876                   Float32Regs:$grady1, Float32Regs:$grady2),
2877              "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2878              "[$t, $s, \\{$x, $y, $z, $z\\}], "
2879              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2880              "\\{$grady0, $grady1, $grady2, $grady2\\};",
2881              []>;
2882def TEX_3D_S32_S32
2883  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2884                    Int32Regs:$b, Int32Regs:$a),
2885              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2886                   Int32Regs:$z),
2887              "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2888              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2889              []>;
2890def TEX_3D_S32_F32
2891  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2892                    Int32Regs:$b, Int32Regs:$a),
2893              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2894                   Float32Regs:$z),
2895              "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2896              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2897              []>;
2898def TEX_3D_S32_F32_LEVEL
2899  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2900                    Int32Regs:$b, Int32Regs:$a),
2901              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2902                   Float32Regs:$z, Float32Regs:$lod),
2903              "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2904              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2905              []>;
2906def TEX_3D_S32_F32_GRAD
2907  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2908                    Int32Regs:$b, Int32Regs:$a),
2909              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2910                   Float32Regs:$z,
2911                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2912                   Float32Regs:$gradx2, Float32Regs:$grady0,
2913                   Float32Regs:$grady1, Float32Regs:$grady2),
2914              "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2915              "[$t, $s, \\{$x, $y, $z, $z\\}], "
2916              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2917              "\\{$grady0, $grady1, $grady2, $grady2\\};",
2918              []>;
2919def TEX_3D_U32_S32
2920  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2921                    Int32Regs:$b, Int32Regs:$a),
2922              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2923                   Int32Regs:$z),
2924              "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2925              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2926              []>;
2927def TEX_3D_U32_F32
2928  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2929                    Int32Regs:$b, Int32Regs:$a),
2930              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2931                   Float32Regs:$z),
2932              "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2933              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2934              []>;
2935def TEX_3D_U32_F32_LEVEL
2936  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2937                    Int32Regs:$b, Int32Regs:$a),
2938              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2939                   Float32Regs:$z, Float32Regs:$lod),
2940              "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2941              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2942              []>;
2943def TEX_3D_U32_F32_GRAD
2944  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2945                    Int32Regs:$b, Int32Regs:$a),
2946              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2947                   Float32Regs:$z,
2948                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2949                   Float32Regs:$gradx2, Float32Regs:$grady0,
2950                   Float32Regs:$grady1, Float32Regs:$grady2),
2951              "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2952              "[$t, $s, \\{$x, $y, $z, $z\\}], "
2953              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2954              "\\{$grady0, $grady1, $grady2, $grady2\\};",
2955              []>;
2956
2957def TEX_CUBE_F32_F32
2958  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2959                    Float32Regs:$b, Float32Regs:$a),
2960              (ins Int64Regs:$t, Int64Regs:$s,
2961               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2962              "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2963              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2964              []>;
2965def TEX_CUBE_F32_F32_LEVEL
2966  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2967                    Float32Regs:$b, Float32Regs:$a),
2968              (ins Int64Regs:$t, Int64Regs:$s,
2969                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2970                   Float32Regs:$lod),
2971              "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2972              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2973              []>;
2974def TEX_CUBE_S32_F32
2975  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2976                    Int32Regs:$b, Int32Regs:$a),
2977              (ins Int64Regs:$t, Int64Regs:$s,
2978                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2979              "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2980              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2981              []>;
2982def TEX_CUBE_S32_F32_LEVEL
2983  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2984                    Int32Regs:$b, Int32Regs:$a),
2985              (ins Int64Regs:$t, Int64Regs:$s,
2986                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2987                   Float32Regs:$lod),
2988              "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2989              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2990              []>;
2991def TEX_CUBE_U32_F32
2992  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2993                    Int32Regs:$b, Int32Regs:$a),
2994              (ins Int64Regs:$t, Int64Regs:$s,
2995                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2996              "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2997              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2998              []>;
2999def TEX_CUBE_U32_F32_LEVEL
3000  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3001                    Int32Regs:$b, Int32Regs:$a),
3002              (ins Int64Regs:$t, Int64Regs:$s,
3003                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3004                   Float32Regs:$lod),
3005              "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3006              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3007              []>;
3008
3009def TEX_CUBE_ARRAY_F32_F32
3010  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3011                    Float32Regs:$b, Float32Regs:$a),
3012              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3013               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3014              "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3015              "[$t, $s, \\{$l, $x, $y, $z\\}];",
3016              []>;
3017def TEX_CUBE_ARRAY_F32_F32_LEVEL
3018  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3019                    Float32Regs:$b, Float32Regs:$a),
3020              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3021                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3022                   Float32Regs:$lod),
3023              "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3024              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3025              []>;
3026def TEX_CUBE_ARRAY_S32_F32
3027  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3028                    Int32Regs:$b, Int32Regs:$a),
3029              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3030                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3031              "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3032              "[$t, $s, \\{$l, $x, $y, $z\\}];",
3033              []>;
3034def TEX_CUBE_ARRAY_S32_F32_LEVEL
3035  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3036                    Int32Regs:$b, Int32Regs:$a),
3037              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3038                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3039                   Float32Regs:$lod),
3040              "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3041              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3042              []>;
3043def TEX_CUBE_ARRAY_U32_F32
3044  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3045                    Int32Regs:$b, Int32Regs:$a),
3046              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3047                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3048              "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3049              "[$t, $s, \\{$l, $x, $y, $z\\}];",
3050              []>;
3051def TEX_CUBE_ARRAY_U32_F32_LEVEL
3052  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3053                    Int32Regs:$b, Int32Regs:$a),
3054              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3055                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3056                   Float32Regs:$lod),
3057              "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3058              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3059              []>;
3060
3061def TLD4_R_2D_F32_F32
3062  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3063                    Float32Regs:$v2, Float32Regs:$v3),
3064              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3065              "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3066              "[$t, $s, \\{$x, $y\\}];",
3067              []>;
3068def TLD4_G_2D_F32_F32
3069  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3070                    Float32Regs:$v2, Float32Regs:$v3),
3071              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3072              "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3073              "[$t, $s, \\{$x, $y\\}];",
3074              []>;
3075def TLD4_B_2D_F32_F32
3076  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3077                    Float32Regs:$v2, Float32Regs:$v3),
3078              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3079              "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3080              "[$t, $s, \\{$x, $y\\}];",
3081              []>;
3082def TLD4_A_2D_F32_F32
3083  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3084                    Float32Regs:$v2, Float32Regs:$v3),
3085              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3086              "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3087              "[$t, $s, \\{$x, $y\\}];",
3088              []>;
3089def TLD4_R_2D_S32_F32
3090  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3091                    Int32Regs:$v2, Int32Regs:$v3),
3092              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3093              "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3094              "[$t, $s, \\{$x, $y\\}];",
3095              []>;
3096def TLD4_G_2D_S32_F32
3097  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3098                    Int32Regs:$v2, Int32Regs:$v3),
3099              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3100              "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3101              "[$t, $s, \\{$x, $y\\}];",
3102              []>;
3103def TLD4_B_2D_S32_F32
3104  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3105                    Int32Regs:$v2, Int32Regs:$v3),
3106              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3107              "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3108              "[$t, $s, \\{$x, $y\\}];",
3109              []>;
3110def TLD4_A_2D_S32_F32
3111  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3112                    Int32Regs:$v2, Int32Regs:$v3),
3113              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3114              "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3115              "[$t, $s, \\{$x, $y\\}];",
3116              []>;
3117def TLD4_R_2D_U32_F32
3118  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3119                    Int32Regs:$v2, Int32Regs:$v3),
3120              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3121              "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3122              "[$t, $s, \\{$x, $y\\}];",
3123              []>;
3124def TLD4_G_2D_U32_F32
3125  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3126                    Int32Regs:$v2, Int32Regs:$v3),
3127              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3128              "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3129              "[$t, $s, \\{$x, $y\\}];",
3130              []>;
3131def TLD4_B_2D_U32_F32
3132  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3133                    Int32Regs:$v2, Int32Regs:$v3),
3134              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3135              "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3136              "[$t, $s, \\{$x, $y\\}];",
3137              []>;
3138def TLD4_A_2D_U32_F32
3139  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3140                    Int32Regs:$v2, Int32Regs:$v3),
3141              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3142              "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3143              "[$t, $s, \\{$x, $y\\}];",
3144              []>;
3145}
3146
3147
3148// texmode_unified
3149let IsTex = true, IsTexModeUnified = true in {
3150// Texture fetch instructions using handles
3151def TEX_UNIFIED_1D_F32_S32
3152  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3153                    Float32Regs:$b, Float32Regs:$a),
3154              (ins Int64Regs:$t, Int32Regs:$x),
3155              "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3156              []>;
3157def TEX_UNIFIED_1D_F32_F32
3158  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3159                    Float32Regs:$b, Float32Regs:$a),
3160              (ins Int64Regs:$t, Float32Regs:$x),
3161              "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3162              []>;
3163def TEX_UNIFIED_1D_F32_F32_LEVEL
3164  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3165                    Float32Regs:$b, Float32Regs:$a),
3166              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
3167              "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3168              "[$t, \\{$x\\}], $lod;",
3169              []>;
3170def TEX_UNIFIED_1D_F32_F32_GRAD
3171  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3172                    Float32Regs:$b, Float32Regs:$a),
3173              (ins Int64Regs:$t, Float32Regs:$x,
3174                   Float32Regs:$gradx, Float32Regs:$grady),
3175              "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3176              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3177              []>;
3178def TEX_UNIFIED_1D_S32_S32
3179  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3180                    Int32Regs:$b, Int32Regs:$a),
3181              (ins Int64Regs:$t, Int32Regs:$x),
3182              "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3183              []>;
3184def TEX_UNIFIED_1D_S32_F32
3185  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3186                    Int32Regs:$b, Int32Regs:$a),
3187              (ins Int64Regs:$t, Float32Regs:$x),
3188              "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3189              []>;
3190def TEX_UNIFIED_1D_S32_F32_LEVEL
3191  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3192                    Int32Regs:$b, Int32Regs:$a),
3193              (ins Int64Regs:$t, Float32Regs:$x,
3194                   Float32Regs:$lod),
3195              "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3196              "[$t, \\{$x\\}], $lod;",
3197              []>;
3198def TEX_UNIFIED_1D_S32_F32_GRAD
3199  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3200                    Int32Regs:$b, Int32Regs:$a),
3201              (ins Int64Regs:$t, Float32Regs:$x,
3202                   Float32Regs:$gradx, Float32Regs:$grady),
3203              "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3204              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3205              []>;
3206def TEX_UNIFIED_1D_U32_S32
3207  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3208                    Int32Regs:$b, Int32Regs:$a),
3209              (ins Int64Regs:$t, Int32Regs:$x),
3210              "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3211              []>;
3212def TEX_UNIFIED_1D_U32_F32
3213  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3214                    Int32Regs:$b, Int32Regs:$a),
3215              (ins Int64Regs:$t, Float32Regs:$x),
3216              "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3217              []>;
3218def TEX_UNIFIED_1D_U32_F32_LEVEL
3219  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3220                    Int32Regs:$b, Int32Regs:$a),
3221              (ins Int64Regs:$t, Float32Regs:$x,
3222                   Float32Regs:$lod),
3223              "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3224              "[$t, \\{$x\\}], $lod;",
3225              []>;
3226def TEX_UNIFIED_1D_U32_F32_GRAD
3227  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3228                    Int32Regs:$b, Int32Regs:$a),
3229              (ins Int64Regs:$t, Float32Regs:$x,
3230                   Float32Regs:$gradx, Float32Regs:$grady),
3231              "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3232              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3233              []>;
3234
3235def TEX_UNIFIED_1D_ARRAY_F32_S32
3236  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3237                    Float32Regs:$b, Float32Regs:$a),
3238              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3239              "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3240              "[$t, \\{$l, $x\\}];",
3241              []>;
3242def TEX_UNIFIED_1D_ARRAY_F32_F32
3243  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3244                    Float32Regs:$b, Float32Regs:$a),
3245              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3246              "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3247              "[$t, \\{$l, $x\\}];",
3248              []>;
3249def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3250  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3251                    Float32Regs:$b, Float32Regs:$a),
3252              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3253                   Float32Regs:$lod),
3254              "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3255              "[$t, \\{$l, $x\\}], $lod;",
3256              []>;
3257def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3258  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3259                    Float32Regs:$b, Float32Regs:$a),
3260              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3261                   Float32Regs:$gradx, Float32Regs:$grady),
3262              "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3263              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3264              []>;
3265def TEX_UNIFIED_1D_ARRAY_S32_S32
3266  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3267                    Int32Regs:$b, Int32Regs:$a),
3268              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3269              "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3270              "[$t, \\{$l, $x\\}];",
3271              []>;
3272def TEX_UNIFIED_1D_ARRAY_S32_F32
3273  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3274                    Int32Regs:$b, Int32Regs:$a),
3275              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3276              "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3277              "[$t, \\{$l, $x\\}];",
3278              []>;
3279def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3280  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3281                    Int32Regs:$b, Int32Regs:$a),
3282              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3283                   Float32Regs:$lod),
3284              "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3285              "[$t, \\{$l, $x\\}], $lod;",
3286              []>;
3287def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3288  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3289                    Int32Regs:$b, Int32Regs:$a),
3290              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3291                   Float32Regs:$gradx, Float32Regs:$grady),
3292              "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3293              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3294              []>;
3295def TEX_UNIFIED_1D_ARRAY_U32_S32
3296  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3297                    Int32Regs:$b, Int32Regs:$a),
3298              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3299              "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3300              "[$t, \\{$l, $x\\}];",
3301              []>;
3302def TEX_UNIFIED_1D_ARRAY_U32_F32
3303  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3304                    Int32Regs:$b, Int32Regs:$a),
3305              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3306              "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3307              "[$t, \\{$l, $x\\}];",
3308              []>;
3309def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3310  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3311                    Int32Regs:$b, Int32Regs:$a),
3312              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3313                   Float32Regs:$lod),
3314              "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3315              "[$t, \\{$l, $x\\}], $lod;",
3316              []>;
3317def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3318  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3319                    Int32Regs:$b, Int32Regs:$a),
3320              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3321                   Float32Regs:$gradx, Float32Regs:$grady),
3322              "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3323              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3324              []>;
3325
3326def TEX_UNIFIED_2D_F32_S32
3327  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3328                    Float32Regs:$b, Float32Regs:$a),
3329              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3330              "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3331              "[$t, \\{$x, $y\\}];",
3332              []>;
3333def TEX_UNIFIED_2D_F32_F32
3334  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3335                    Float32Regs:$b, Float32Regs:$a),
3336              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3337              "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3338              "[$t, \\{$x, $y\\}];",
3339              []>;
3340def TEX_UNIFIED_2D_F32_F32_LEVEL
3341  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3342                    Float32Regs:$b, Float32Regs:$a),
3343              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3344                   Float32Regs:$lod),
3345              "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3346              "[$t, \\{$x, $y\\}], $lod;",
3347              []>;
3348def TEX_UNIFIED_2D_F32_F32_GRAD
3349  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3350                    Float32Regs:$b, Float32Regs:$a),
3351              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3352                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3353                   Float32Regs:$grady0, Float32Regs:$grady1),
3354              "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3355              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3356              "\\{$grady0, $grady1\\};",
3357              []>;
3358def TEX_UNIFIED_2D_S32_S32
3359  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3360                    Int32Regs:$b, Int32Regs:$a),
3361              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3362              "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3363              "[$t, \\{$x, $y\\}];",
3364              []>;
3365def TEX_UNIFIED_2D_S32_F32
3366  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3367                    Int32Regs:$b, Int32Regs:$a),
3368              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3369              "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3370              "[$t, \\{$x, $y\\}];",
3371              []>;
3372def TEX_UNIFIED_2D_S32_F32_LEVEL
3373  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3374                    Int32Regs:$b, Int32Regs:$a),
3375              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3376                   Float32Regs:$lod),
3377              "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3378              "[$t, \\{$x, $y\\}], $lod;",
3379              []>;
3380def TEX_UNIFIED_2D_S32_F32_GRAD
3381  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3382                    Int32Regs:$b, Int32Regs:$a),
3383              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3384                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3385                   Float32Regs:$grady0, Float32Regs:$grady1),
3386              "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3387              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3388              "\\{$grady0, $grady1\\};",
3389              []>;
3390def TEX_UNIFIED_2D_U32_S32
3391  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3392                    Int32Regs:$b, Int32Regs:$a),
3393              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3394              "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3395              "[$t, \\{$x, $y\\}];",
3396              []>;
3397def TEX_UNIFIED_2D_U32_F32
3398  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3399                    Int32Regs:$b, Int32Regs:$a),
3400              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3401              "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3402              "[$t, \\{$x, $y\\}];",
3403              []>;
3404def TEX_UNIFIED_2D_U32_F32_LEVEL
3405  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3406                    Int32Regs:$b, Int32Regs:$a),
3407              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3408                   Float32Regs:$lod),
3409              "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3410              "[$t, \\{$x, $y\\}], $lod;",
3411              []>;
3412def TEX_UNIFIED_2D_U32_F32_GRAD
3413  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3414                    Int32Regs:$b, Int32Regs:$a),
3415              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3416                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3417                   Float32Regs:$grady0, Float32Regs:$grady1),
3418              "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3419              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3420              "\\{$grady0, $grady1\\};",
3421              []>;
3422
3423def TEX_UNIFIED_2D_ARRAY_F32_S32
3424  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3425                    Float32Regs:$b, Float32Regs:$a),
3426              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3427                   Int32Regs:$y),
3428              "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3429              "[$t, \\{$l, $x, $y, $y\\}];",
3430              []>;
3431def TEX_UNIFIED_2D_ARRAY_F32_F32
3432  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3433                    Float32Regs:$b, Float32Regs:$a),
3434              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3435                   Float32Regs:$y),
3436              "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3437              "[$t, \\{$l, $x, $y, $y\\}];",
3438              []>;
3439def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3440  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3441                    Float32Regs:$b, Float32Regs:$a),
3442              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3443                   Float32Regs:$y, Float32Regs:$lod),
3444              "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3445              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3446              []>;
3447def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3448  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3449                    Float32Regs:$b, Float32Regs:$a),
3450              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3451                   Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3452                   Float32Regs:$grady0, Float32Regs:$grady1),
3453              "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3454              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3455              "\\{$grady0, $grady1\\};",
3456              []>;
3457def TEX_UNIFIED_2D_ARRAY_S32_S32
3458  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3459                    Int32Regs:$b, Int32Regs:$a),
3460              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3461                   Int32Regs:$y),
3462              "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3463              "[$t, \\{$l, $x, $y, $y\\}];",
3464              []>;
3465def TEX_UNIFIED_2D_ARRAY_S32_F32
3466  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3467                    Int32Regs:$b, Int32Regs:$a),
3468              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3469                   Float32Regs:$y),
3470              "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3471              "[$t, \\{$l, $x, $y, $y\\}];",
3472              []>;
3473def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3474  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3475                    Int32Regs:$b, Int32Regs:$a),
3476              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3477                   Float32Regs:$y, Float32Regs:$lod),
3478              "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3479              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3480              []>;
3481def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3482  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3483                    Int32Regs:$b, Int32Regs:$a),
3484              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3485                   Float32Regs:$y,
3486                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3487                   Float32Regs:$grady0, Float32Regs:$grady1),
3488              "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3489              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3490              "\\{$grady0, $grady1\\};",
3491              []>;
3492def TEX_UNIFIED_2D_ARRAY_U32_S32
3493  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3494                    Int32Regs:$b, Int32Regs:$a),
3495              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3496                   Int32Regs:$y),
3497              "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3498              "[$t, \\{$l, $x, $y, $y\\}];",
3499              []>;
3500def TEX_UNIFIED_2D_ARRAY_U32_F32
3501  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3502                    Int32Regs:$b, Int32Regs:$a),
3503              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3504                   Float32Regs:$y),
3505              "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3506              "[$t, \\{$l, $x, $y, $y\\}];",
3507              []>;
3508def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3509  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3510                    Int32Regs:$b, Int32Regs:$a),
3511              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3512                   Float32Regs:$y, Float32Regs:$lod),
3513              "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3514              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3515              []>;
3516def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3517  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3518                    Int32Regs:$b, Int32Regs:$a),
3519              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3520                   Float32Regs:$y,
3521                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3522                   Float32Regs:$grady0, Float32Regs:$grady1),
3523              "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3524              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3525              "\\{$grady0, $grady1\\};",
3526              []>;
3527
3528def TEX_UNIFIED_3D_F32_S32
3529  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3530                    Float32Regs:$b, Float32Regs:$a),
3531              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3532                   Int32Regs:$z),
3533              "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3534              "[$t, \\{$x, $y, $z, $z\\}];",
3535              []>;
3536def TEX_UNIFIED_3D_F32_F32
3537  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3538                    Float32Regs:$b, Float32Regs:$a),
3539              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3540                   Float32Regs:$z),
3541              "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3542              "[$t, \\{$x, $y, $z, $z\\}];",
3543              []>;
3544def TEX_UNIFIED_3D_F32_F32_LEVEL
3545  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3546                    Float32Regs:$b, Float32Regs:$a),
3547              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3548                   Float32Regs:$z, Float32Regs:$lod),
3549              "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3550              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3551              []>;
3552def TEX_UNIFIED_3D_F32_F32_GRAD
3553  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3554                    Float32Regs:$b, Float32Regs:$a),
3555              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3556                   Float32Regs:$z,
3557                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3558                   Float32Regs:$gradx2, Float32Regs:$grady0,
3559                   Float32Regs:$grady1, Float32Regs:$grady2),
3560              "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3561              "[$t, \\{$x, $y, $z, $z\\}], "
3562              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3563              "\\{$grady0, $grady1, $grady2, $grady2\\};",
3564              []>;
3565def TEX_UNIFIED_3D_S32_S32
3566  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3567                    Int32Regs:$b, Int32Regs:$a),
3568              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3569                   Int32Regs:$z),
3570              "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3571              "[$t, \\{$x, $y, $z, $z\\}];",
3572              []>;
3573def TEX_UNIFIED_3D_S32_F32
3574  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3575                    Int32Regs:$b, Int32Regs:$a),
3576              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3577                   Float32Regs:$z),
3578              "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3579              "[$t, \\{$x, $y, $z, $z\\}];",
3580              []>;
3581def TEX_UNIFIED_3D_S32_F32_LEVEL
3582  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3583                    Int32Regs:$b, Int32Regs:$a),
3584              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3585                   Float32Regs:$z, Float32Regs:$lod),
3586              "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3587              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3588              []>;
3589def TEX_UNIFIED_3D_S32_F32_GRAD
3590  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3591                    Int32Regs:$b, Int32Regs:$a),
3592              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3593                   Float32Regs:$z,
3594                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3595                   Float32Regs:$gradx2, Float32Regs:$grady0,
3596                   Float32Regs:$grady1, Float32Regs:$grady2),
3597              "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3598              "[$t, \\{$x, $y, $z, $z\\}], "
3599              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3600              "\\{$grady0, $grady1, $grady2, $grady2\\};",
3601              []>;
3602def TEX_UNIFIED_3D_U32_S32
3603  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3604                    Int32Regs:$b, Int32Regs:$a),
3605              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3606                   Int32Regs:$z),
3607              "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3608              "[$t, \\{$x, $y, $z, $z\\}];",
3609              []>;
3610def TEX_UNIFIED_3D_U32_F32
3611  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3612                    Int32Regs:$b, Int32Regs:$a),
3613              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3614                   Float32Regs:$z),
3615              "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3616              "[$t, \\{$x, $y, $z, $z\\}];",
3617              []>;
3618def TEX_UNIFIED_3D_U32_F32_LEVEL
3619  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3620                    Int32Regs:$b, Int32Regs:$a),
3621              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3622                   Float32Regs:$z, Float32Regs:$lod),
3623              "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3624              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3625              []>;
3626def TEX_UNIFIED_3D_U32_F32_GRAD
3627  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3628                    Int32Regs:$b, Int32Regs:$a),
3629              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3630                   Float32Regs:$z,
3631                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3632                   Float32Regs:$gradx2, Float32Regs:$grady0,
3633                   Float32Regs:$grady1, Float32Regs:$grady2),
3634              "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3635              "[$t, \\{$x, $y, $z, $z\\}], "
3636              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3637              "\\{$grady0, $grady1, $grady2, $grady2\\};",
3638              []>;
3639
3640def TEX_UNIFIED_CUBE_F32_F32
3641  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3642                    Float32Regs:$b, Float32Regs:$a),
3643              (ins Int64Regs:$t,
3644               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3645              "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3646              "[$t, \\{$x, $y, $z, $z\\}];",
3647              []>;
3648def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3649  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3650                    Float32Regs:$b, Float32Regs:$a),
3651              (ins Int64Regs:$t,
3652                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3653                   Float32Regs:$lod),
3654              "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3655              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3656              []>;
3657def TEX_UNIFIED_CUBE_S32_F32
3658  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3659                    Int32Regs:$b, Int32Regs:$a),
3660              (ins Int64Regs:$t,
3661                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3662              "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3663              "[$t, \\{$x, $y, $z, $z\\}];",
3664              []>;
3665def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3666  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3667                    Int32Regs:$b, Int32Regs:$a),
3668              (ins Int64Regs:$t,
3669                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3670                   Float32Regs:$lod),
3671              "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3672              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3673              []>;
3674def TEX_UNIFIED_CUBE_U32_F32
3675  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3676                    Int32Regs:$b, Int32Regs:$a),
3677              (ins Int64Regs:$t,
3678                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3679              "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3680              "[$t, \\{$x, $y, $z, $z\\}];",
3681              []>;
3682def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3683  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3684                    Int32Regs:$b, Int32Regs:$a),
3685              (ins Int64Regs:$t,
3686                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3687                   Float32Regs:$lod),
3688              "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3689              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3690              []>;
3691
3692def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3693  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3694                    Float32Regs:$b, Float32Regs:$a),
3695              (ins Int64Regs:$t, Int32Regs:$l,
3696               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3697              "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3698              "[$t, \\{$l, $x, $y, $z\\}];",
3699              []>;
3700def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3701  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3702                    Float32Regs:$b, Float32Regs:$a),
3703              (ins Int64Regs:$t, Int32Regs:$l,
3704                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3705                   Float32Regs:$lod),
3706              "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3707              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3708              []>;
3709def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3710  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3711                    Int32Regs:$b, Int32Regs:$a),
3712              (ins Int64Regs:$t, Int32Regs:$l,
3713                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3714              "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3715              "[$t, \\{$l, $x, $y, $z\\}];",
3716              []>;
3717def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3718  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3719                    Int32Regs:$b, Int32Regs:$a),
3720              (ins Int64Regs:$t, Int32Regs:$l,
3721                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3722                   Float32Regs:$lod),
3723              "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3724              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3725              []>;
3726def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3727  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3728                    Int32Regs:$b, Int32Regs:$a),
3729              (ins Int64Regs:$t, Int32Regs:$l,
3730                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3731              "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3732              "[$t, \\{$l, $x, $y, $z\\}];",
3733              []>;
3734def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3735  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3736                    Int32Regs:$b, Int32Regs:$a),
3737              (ins Int64Regs:$t, Int32Regs:$l,
3738                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3739                   Float32Regs:$lod),
3740              "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3741              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3742              []>;
3743
3744def TLD4_UNIFIED_R_2D_F32_F32
3745  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3746                    Float32Regs:$v2, Float32Regs:$v3),
3747              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3748              "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3749              "[$t, \\{$x, $y\\}];",
3750              []>;
3751def TLD4_UNIFIED_G_2D_F32_F32
3752  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3753                    Float32Regs:$v2, Float32Regs:$v3),
3754              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3755              "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3756              "[$t, \\{$x, $y\\}];",
3757              []>;
3758def TLD4_UNIFIED_B_2D_F32_F32
3759  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3760                    Float32Regs:$v2, Float32Regs:$v3),
3761              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3762              "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3763              "[$t, \\{$x, $y\\}];",
3764              []>;
3765def TLD4_UNIFIED_A_2D_F32_F32
3766  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3767                    Float32Regs:$v2, Float32Regs:$v3),
3768              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3769              "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3770              "[$t, \\{$x, $y\\}];",
3771              []>;
3772def TLD4_UNIFIED_R_2D_S32_F32
3773  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3774                    Int32Regs:$v2, Int32Regs:$v3),
3775              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3776              "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3777              "[$t, \\{$x, $y\\}];",
3778              []>;
3779def TLD4_UNIFIED_G_2D_S32_F32
3780  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3781                    Int32Regs:$v2, Int32Regs:$v3),
3782              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3783              "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3784              "[$t, \\{$x, $y\\}];",
3785              []>;
3786def TLD4_UNIFIED_B_2D_S32_F32
3787  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3788                    Int32Regs:$v2, Int32Regs:$v3),
3789              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3790              "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3791              "[$t, \\{$x, $y\\}];",
3792              []>;
3793def TLD4_UNIFIED_A_2D_S32_F32
3794  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3795                    Int32Regs:$v2, Int32Regs:$v3),
3796              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3797              "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3798              "[$t, \\{$x, $y\\}];",
3799              []>;
3800def TLD4_UNIFIED_R_2D_U32_F32
3801  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3802                    Int32Regs:$v2, Int32Regs:$v3),
3803              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3804              "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3805              "[$t, \\{$x, $y\\}];",
3806              []>;
3807def TLD4_UNIFIED_G_2D_U32_F32
3808  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3809                    Int32Regs:$v2, Int32Regs:$v3),
3810              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3811              "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3812              "[$t, \\{$x, $y\\}];",
3813              []>;
3814def TLD4_UNIFIED_B_2D_U32_F32
3815  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3816                    Int32Regs:$v2, Int32Regs:$v3),
3817              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3818              "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3819              "[$t, \\{$x, $y\\}];",
3820              []>;
3821def TLD4_UNIFIED_A_2D_U32_F32
3822  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3823                    Int32Regs:$v2, Int32Regs:$v3),
3824              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3825              "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3826              "[$t, \\{$x, $y\\}];",
3827              []>;
3828}
3829
3830
3831
3832//=== Surface load instructions
3833// .clamp variant
3834let IsSuld = true in {
3835def SULD_1D_I8_CLAMP
3836  : NVPTXInst<(outs Int16Regs:$r),
3837              (ins Int64Regs:$s, Int32Regs:$x),
3838              "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3839              []>;
3840def SULD_1D_I16_CLAMP
3841  : NVPTXInst<(outs Int16Regs:$r),
3842              (ins Int64Regs:$s, Int32Regs:$x),
3843              "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3844              []>;
3845def SULD_1D_I32_CLAMP
3846  : NVPTXInst<(outs Int32Regs:$r),
3847              (ins Int64Regs:$s, Int32Regs:$x),
3848              "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3849              []>;
3850def SULD_1D_I64_CLAMP
3851  : NVPTXInst<(outs Int64Regs:$r),
3852              (ins Int64Regs:$s, Int32Regs:$x),
3853              "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3854              []>;
3855
3856def SULD_1D_ARRAY_I8_CLAMP
3857  : NVPTXInst<(outs Int16Regs:$r),
3858              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3859              "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3860              []>;
3861def SULD_1D_ARRAY_I16_CLAMP
3862  : NVPTXInst<(outs Int16Regs:$r),
3863              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3864              "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3865              []>;
3866def SULD_1D_ARRAY_I32_CLAMP
3867  : NVPTXInst<(outs Int32Regs:$r),
3868              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3869              "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3870              []>;
3871def SULD_1D_ARRAY_I64_CLAMP
3872  : NVPTXInst<(outs Int64Regs:$r),
3873              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3874              "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3875              []>;
3876
3877def SULD_2D_I8_CLAMP
3878  : NVPTXInst<(outs Int16Regs:$r),
3879              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3880              "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3881              []>;
3882def SULD_2D_I16_CLAMP
3883  : NVPTXInst<(outs Int16Regs:$r),
3884              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3885              "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3886              []>;
3887def SULD_2D_I32_CLAMP
3888  : NVPTXInst<(outs Int32Regs:$r),
3889              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3890              "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3891              []>;
3892def SULD_2D_I64_CLAMP
3893  : NVPTXInst<(outs Int64Regs:$r),
3894              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3895              "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3896              []>;
3897
3898def SULD_2D_ARRAY_I8_CLAMP
3899  : NVPTXInst<(outs Int16Regs:$r),
3900              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3901              "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3902              []>;
3903def SULD_2D_ARRAY_I16_CLAMP
3904  : NVPTXInst<(outs Int16Regs:$r),
3905              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3906              "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3907              []>;
3908def SULD_2D_ARRAY_I32_CLAMP
3909  : NVPTXInst<(outs Int32Regs:$r),
3910              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3911              "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3912              []>;
3913def SULD_2D_ARRAY_I64_CLAMP
3914  : NVPTXInst<(outs Int64Regs:$r),
3915              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3916              "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3917              []>;
3918
3919def SULD_3D_I8_CLAMP
3920  : NVPTXInst<(outs Int16Regs:$r),
3921              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3922              "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3923              []>;
3924def SULD_3D_I16_CLAMP
3925  : NVPTXInst<(outs Int16Regs:$r),
3926              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3927              "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3928              []>;
3929def SULD_3D_I32_CLAMP
3930  : NVPTXInst<(outs Int32Regs:$r),
3931              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3932              "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3933              []>;
3934def SULD_3D_I64_CLAMP
3935  : NVPTXInst<(outs Int64Regs:$r),
3936              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3937              "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3938              []>;
3939}
3940
3941let IsSuld = 2 in {
3942def SULD_1D_V2I8_CLAMP
3943  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3944              (ins Int64Regs:$s, Int32Regs:$x),
3945              "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3946              []>;
3947def SULD_1D_V2I16_CLAMP
3948  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3949              (ins Int64Regs:$s, Int32Regs:$x),
3950              "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3951              []>;
3952def SULD_1D_V2I32_CLAMP
3953  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3954              (ins Int64Regs:$s, Int32Regs:$x),
3955              "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3956              []>;
3957def SULD_1D_V2I64_CLAMP
3958  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3959              (ins Int64Regs:$s, Int32Regs:$x),
3960              "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3961              []>;
3962
3963def SULD_1D_ARRAY_V2I8_CLAMP
3964  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3965              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3966              "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3967              []>;
3968def SULD_1D_ARRAY_V2I16_CLAMP
3969  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3970              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3971              "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3972              []>;
3973def SULD_1D_ARRAY_V2I32_CLAMP
3974  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3975              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3976              "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3977              []>;
3978def SULD_1D_ARRAY_V2I64_CLAMP
3979  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3980              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3981              "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3982              []>;
3983
3984def SULD_2D_V2I8_CLAMP
3985  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3986              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3987              "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3988              []>;
3989def SULD_2D_V2I16_CLAMP
3990  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3991              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3992              "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3993              []>;
3994def SULD_2D_V2I32_CLAMP
3995  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3996              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3997              "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3998              []>;
3999def SULD_2D_V2I64_CLAMP
4000  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4001              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4002              "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4003              []>;
4004
4005def SULD_2D_ARRAY_V2I8_CLAMP
4006  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4007              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4008              "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
4009              "[$s, \\{$l, $x, $y, $y\\}];",
4010              []>;
4011def SULD_2D_ARRAY_V2I16_CLAMP
4012  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4013              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4014              "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
4015              "[$s, \\{$l, $x, $y, $y\\}];",
4016              []>;
4017def SULD_2D_ARRAY_V2I32_CLAMP
4018  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4019              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4020              "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
4021              "[$s, \\{$l, $x, $y, $y\\}];",
4022              []>;
4023def SULD_2D_ARRAY_V2I64_CLAMP
4024  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4025              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4026              "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
4027              "[$s, \\{$l, $x, $y, $y\\}];",
4028              []>;
4029
4030def SULD_3D_V2I8_CLAMP
4031  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4032              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4033              "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4034              []>;
4035def SULD_3D_V2I16_CLAMP
4036  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4037              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4038              "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4039              []>;
4040def SULD_3D_V2I32_CLAMP
4041  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4042              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4043              "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4044              []>;
4045def SULD_3D_V2I64_CLAMP
4046  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4047              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4048              "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4049              []>;
4050}
4051
4052let IsSuld = 3 in {
4053def SULD_1D_V4I8_CLAMP
4054  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4055              (ins Int64Regs:$s, Int32Regs:$x),
4056              "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4057              []>;
4058def SULD_1D_V4I16_CLAMP
4059  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4060              (ins Int64Regs:$s, Int32Regs:$x),
4061              "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4062              []>;
4063def SULD_1D_V4I32_CLAMP
4064  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4065              (ins Int64Regs:$s, Int32Regs:$x),
4066              "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4067              []>;
4068
4069def SULD_1D_ARRAY_V4I8_CLAMP
4070  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4071              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4072              "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
4073              "[$s, \\{$l, $x\\}];",
4074              []>;
4075def SULD_1D_ARRAY_V4I16_CLAMP
4076  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4077              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4078              "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
4079              "[$s, \\{$l, $x\\}];",
4080              []>;
4081def SULD_1D_ARRAY_V4I32_CLAMP
4082  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4083              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4084              "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
4085              "[$s, \\{$l, $x\\}];",
4086              []>;
4087
4088def SULD_2D_V4I8_CLAMP
4089  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4090              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4091              "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4092              []>;
4093def SULD_2D_V4I16_CLAMP
4094  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4095              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4096              "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4097              []>;
4098def SULD_2D_V4I32_CLAMP
4099  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4100              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4101              "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4102              []>;
4103
4104def SULD_2D_ARRAY_V4I8_CLAMP
4105  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4106              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4107              "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
4108              "[$s, \\{$l, $x, $y, $y\\}];",
4109              []>;
4110def SULD_2D_ARRAY_V4I16_CLAMP
4111  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4112              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4113              "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
4114              "[$s, \\{$l, $x, $y, $y\\}];",
4115              []>;
4116def SULD_2D_ARRAY_V4I32_CLAMP
4117  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4118              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4119              "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
4120              "[$s, \\{$l, $x, $y, $y\\}];",
4121              []>;
4122
4123
4124def SULD_3D_V4I8_CLAMP
4125  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4126              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4127              "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
4128              "[$s, \\{$x, $y, $z, $z\\}];",
4129              []>;
4130def SULD_3D_V4I16_CLAMP
4131  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4132              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4133              "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
4134              "[$s, \\{$x, $y, $z, $z\\}];",
4135              []>;
4136def SULD_3D_V4I32_CLAMP
4137  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4138              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4139              "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
4140              "[$s, \\{$x, $y, $z, $z\\}];",
4141              []>;
4142}
4143
4144
4145// .trap variant
4146let IsSuld = true in {
4147def SULD_1D_I8_TRAP
4148  : NVPTXInst<(outs Int16Regs:$r),
4149              (ins Int64Regs:$s, Int32Regs:$x),
4150              "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
4151              []>;
4152def SULD_1D_I16_TRAP
4153  : NVPTXInst<(outs Int16Regs:$r),
4154              (ins Int64Regs:$s, Int32Regs:$x),
4155              "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
4156              []>;
4157def SULD_1D_I32_TRAP
4158  : NVPTXInst<(outs Int32Regs:$r),
4159              (ins Int64Regs:$s, Int32Regs:$x),
4160              "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
4161              []>;
4162def SULD_1D_I64_TRAP
4163  : NVPTXInst<(outs Int64Regs:$r),
4164              (ins Int64Regs:$s, Int32Regs:$x),
4165              "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
4166              []>;
4167
4168def SULD_1D_ARRAY_I8_TRAP
4169  : NVPTXInst<(outs Int16Regs:$r),
4170              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4171              "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4172              []>;
4173def SULD_1D_ARRAY_I16_TRAP
4174  : NVPTXInst<(outs Int16Regs:$r),
4175              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4176              "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4177              []>;
4178def SULD_1D_ARRAY_I32_TRAP
4179  : NVPTXInst<(outs Int32Regs:$r),
4180              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4181              "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4182              []>;
4183def SULD_1D_ARRAY_I64_TRAP
4184  : NVPTXInst<(outs Int64Regs:$r),
4185              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4186              "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4187              []>;
4188
4189def SULD_2D_I8_TRAP
4190  : NVPTXInst<(outs Int16Regs:$r),
4191              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4192              "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4193              []>;
4194def SULD_2D_I16_TRAP
4195  : NVPTXInst<(outs Int16Regs:$r),
4196              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4197              "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4198              []>;
4199def SULD_2D_I32_TRAP
4200  : NVPTXInst<(outs Int32Regs:$r),
4201              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4202              "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4203              []>;
4204def SULD_2D_I64_TRAP
4205  : NVPTXInst<(outs Int64Regs:$r),
4206              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4207              "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4208              []>;
4209
4210def SULD_2D_ARRAY_I8_TRAP
4211  : NVPTXInst<(outs Int16Regs:$r),
4212              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4213              "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4214              []>;
4215def SULD_2D_ARRAY_I16_TRAP
4216  : NVPTXInst<(outs Int16Regs:$r),
4217              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4218              "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4219              []>;
4220def SULD_2D_ARRAY_I32_TRAP
4221  : NVPTXInst<(outs Int32Regs:$r),
4222              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4223              "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4224              []>;
4225def SULD_2D_ARRAY_I64_TRAP
4226  : NVPTXInst<(outs Int64Regs:$r),
4227              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4228              "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4229              []>;
4230
4231def SULD_3D_I8_TRAP
4232  : NVPTXInst<(outs Int16Regs:$r),
4233              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4234              "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4235              []>;
4236def SULD_3D_I16_TRAP
4237  : NVPTXInst<(outs Int16Regs:$r),
4238              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4239              "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4240              []>;
4241def SULD_3D_I32_TRAP
4242  : NVPTXInst<(outs Int32Regs:$r),
4243              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4244              "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4245              []>;
4246def SULD_3D_I64_TRAP
4247  : NVPTXInst<(outs Int64Regs:$r),
4248              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4249              "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4250              []>;
4251}
4252
4253let IsSuld = 2 in {
4254def SULD_1D_V2I8_TRAP
4255  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4256              (ins Int64Regs:$s, Int32Regs:$x),
4257              "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4258              []>;
4259def SULD_1D_V2I16_TRAP
4260  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4261              (ins Int64Regs:$s, Int32Regs:$x),
4262              "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4263              []>;
4264def SULD_1D_V2I32_TRAP
4265  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4266              (ins Int64Regs:$s, Int32Regs:$x),
4267              "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4268              []>;
4269def SULD_1D_V2I64_TRAP
4270  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4271              (ins Int64Regs:$s, Int32Regs:$x),
4272              "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4273              []>;
4274
4275def SULD_1D_ARRAY_V2I8_TRAP
4276  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4277              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4278              "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4279              []>;
4280def SULD_1D_ARRAY_V2I16_TRAP
4281  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4282              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4283              "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4284              []>;
4285def SULD_1D_ARRAY_V2I32_TRAP
4286  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4287              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4288              "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4289              []>;
4290def SULD_1D_ARRAY_V2I64_TRAP
4291  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4292              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4293              "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4294              []>;
4295
4296def SULD_2D_V2I8_TRAP
4297  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4298              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4299              "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4300              []>;
4301def SULD_2D_V2I16_TRAP
4302  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4303              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4304              "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4305              []>;
4306def SULD_2D_V2I32_TRAP
4307  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4308              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4309              "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4310              []>;
4311def SULD_2D_V2I64_TRAP
4312  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4313              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4314              "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4315              []>;
4316
4317def SULD_2D_ARRAY_V2I8_TRAP
4318  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4319              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4320              "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4321              "[$s, \\{$l, $x, $y, $y\\}];",
4322              []>;
4323def SULD_2D_ARRAY_V2I16_TRAP
4324  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4325              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4326              "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4327              "[$s, \\{$l, $x, $y, $y\\}];",
4328              []>;
4329def SULD_2D_ARRAY_V2I32_TRAP
4330  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4331              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4332              "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4333              "[$s, \\{$l, $x, $y, $y\\}];",
4334              []>;
4335def SULD_2D_ARRAY_V2I64_TRAP
4336  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4337              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4338              "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4339              "[$s, \\{$l, $x, $y, $y\\}];",
4340              []>;
4341
4342def SULD_3D_V2I8_TRAP
4343  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4344              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4345              "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4346              []>;
4347def SULD_3D_V2I16_TRAP
4348  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4349              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4350              "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4351              []>;
4352def SULD_3D_V2I32_TRAP
4353  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4354              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4355              "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4356              []>;
4357def SULD_3D_V2I64_TRAP
4358  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4359              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4360              "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4361              []>;
4362}
4363
4364let IsSuld = 3 in {
4365def SULD_1D_V4I8_TRAP
4366  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4367              (ins Int64Regs:$s, Int32Regs:$x),
4368              "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4369              []>;
4370def SULD_1D_V4I16_TRAP
4371  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4372              (ins Int64Regs:$s, Int32Regs:$x),
4373              "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4374              []>;
4375def SULD_1D_V4I32_TRAP
4376  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4377              (ins Int64Regs:$s, Int32Regs:$x),
4378              "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4379              []>;
4380
4381def SULD_1D_ARRAY_V4I8_TRAP
4382  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4383              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4384              "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4385              "[$s, \\{$l, $x\\}];",
4386              []>;
4387def SULD_1D_ARRAY_V4I16_TRAP
4388  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4389              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4390              "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4391              "[$s, \\{$l, $x\\}];",
4392              []>;
4393def SULD_1D_ARRAY_V4I32_TRAP
4394  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4395              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4396              "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4397              "[$s, \\{$l, $x\\}];",
4398              []>;
4399
4400def SULD_2D_V4I8_TRAP
4401  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4402              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4403              "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4404              []>;
4405def SULD_2D_V4I16_TRAP
4406  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4407              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4408              "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4409              []>;
4410def SULD_2D_V4I32_TRAP
4411  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4412              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4413              "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4414              []>;
4415
4416def SULD_2D_ARRAY_V4I8_TRAP
4417  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4418              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4419              "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4420              "[$s, \\{$l, $x, $y, $y\\}];",
4421              []>;
4422def SULD_2D_ARRAY_V4I16_TRAP
4423  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4424              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4425              "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4426              "[$s, \\{$l, $x, $y, $y\\}];",
4427              []>;
4428def SULD_2D_ARRAY_V4I32_TRAP
4429  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4430              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4431              "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4432              "[$s, \\{$l, $x, $y, $y\\}];",
4433              []>;
4434
4435
4436def SULD_3D_V4I8_TRAP
4437  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4438              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4439              "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4440              "[$s, \\{$x, $y, $z, $z\\}];",
4441              []>;
4442def SULD_3D_V4I16_TRAP
4443  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4444              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4445              "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4446              "[$s, \\{$x, $y, $z, $z\\}];",
4447              []>;
4448def SULD_3D_V4I32_TRAP
4449  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4450              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4451              "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4452              "[$s, \\{$x, $y, $z, $z\\}];",
4453              []>;
4454}
4455
4456// .zero variant
4457let IsSuld = true in {
4458def SULD_1D_I8_ZERO
4459  : NVPTXInst<(outs Int16Regs:$r),
4460              (ins Int64Regs:$s, Int32Regs:$x),
4461              "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4462              []>;
4463def SULD_1D_I16_ZERO
4464  : NVPTXInst<(outs Int16Regs:$r),
4465              (ins Int64Regs:$s, Int32Regs:$x),
4466              "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4467              []>;
4468def SULD_1D_I32_ZERO
4469  : NVPTXInst<(outs Int32Regs:$r),
4470              (ins Int64Regs:$s, Int32Regs:$x),
4471              "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4472              []>;
4473def SULD_1D_I64_ZERO
4474  : NVPTXInst<(outs Int64Regs:$r),
4475              (ins Int64Regs:$s, Int32Regs:$x),
4476              "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4477              []>;
4478
4479def SULD_1D_ARRAY_I8_ZERO
4480  : NVPTXInst<(outs Int16Regs:$r),
4481              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4482              "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4483              []>;
4484def SULD_1D_ARRAY_I16_ZERO
4485  : NVPTXInst<(outs Int16Regs:$r),
4486              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4487              "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4488              []>;
4489def SULD_1D_ARRAY_I32_ZERO
4490  : NVPTXInst<(outs Int32Regs:$r),
4491              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4492              "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4493              []>;
4494def SULD_1D_ARRAY_I64_ZERO
4495  : NVPTXInst<(outs Int64Regs:$r),
4496              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4497              "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4498              []>;
4499
4500def SULD_2D_I8_ZERO
4501  : NVPTXInst<(outs Int16Regs:$r),
4502              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4503              "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4504              []>;
4505def SULD_2D_I16_ZERO
4506  : NVPTXInst<(outs Int16Regs:$r),
4507              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4508              "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4509              []>;
4510def SULD_2D_I32_ZERO
4511  : NVPTXInst<(outs Int32Regs:$r),
4512              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4513              "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4514              []>;
4515def SULD_2D_I64_ZERO
4516  : NVPTXInst<(outs Int64Regs:$r),
4517              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4518              "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4519              []>;
4520
4521def SULD_2D_ARRAY_I8_ZERO
4522  : NVPTXInst<(outs Int16Regs:$r),
4523              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4524              "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4525              []>;
4526def SULD_2D_ARRAY_I16_ZERO
4527  : NVPTXInst<(outs Int16Regs:$r),
4528              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4529              "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4530              []>;
4531def SULD_2D_ARRAY_I32_ZERO
4532  : NVPTXInst<(outs Int32Regs:$r),
4533              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4534              "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4535              []>;
4536def SULD_2D_ARRAY_I64_ZERO
4537  : NVPTXInst<(outs Int64Regs:$r),
4538              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4539              "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4540              []>;
4541
4542def SULD_3D_I8_ZERO
4543  : NVPTXInst<(outs Int16Regs:$r),
4544              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4545              "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4546              []>;
4547def SULD_3D_I16_ZERO
4548  : NVPTXInst<(outs Int16Regs:$r),
4549              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4550              "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4551              []>;
4552def SULD_3D_I32_ZERO
4553  : NVPTXInst<(outs Int32Regs:$r),
4554              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4555              "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4556              []>;
4557def SULD_3D_I64_ZERO
4558  : NVPTXInst<(outs Int64Regs:$r),
4559              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4560              "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4561              []>;
4562}
4563
4564let IsSuld = 2 in {
4565def SULD_1D_V2I8_ZERO
4566  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4567              (ins Int64Regs:$s, Int32Regs:$x),
4568              "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4569              []>;
4570def SULD_1D_V2I16_ZERO
4571  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4572              (ins Int64Regs:$s, Int32Regs:$x),
4573              "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4574              []>;
4575def SULD_1D_V2I32_ZERO
4576  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4577              (ins Int64Regs:$s, Int32Regs:$x),
4578              "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4579              []>;
4580def SULD_1D_V2I64_ZERO
4581  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4582              (ins Int64Regs:$s, Int32Regs:$x),
4583              "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4584              []>;
4585
4586def SULD_1D_ARRAY_V2I8_ZERO
4587  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4588              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4589              "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4590              []>;
4591def SULD_1D_ARRAY_V2I16_ZERO
4592  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4593              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4594              "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4595              []>;
4596def SULD_1D_ARRAY_V2I32_ZERO
4597  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4598              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4599              "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4600              []>;
4601def SULD_1D_ARRAY_V2I64_ZERO
4602  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4603              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4604              "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4605              []>;
4606
4607def SULD_2D_V2I8_ZERO
4608  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4609              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4610              "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4611              []>;
4612def SULD_2D_V2I16_ZERO
4613  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4614              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4615              "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4616              []>;
4617def SULD_2D_V2I32_ZERO
4618  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4619              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4620              "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4621              []>;
4622def SULD_2D_V2I64_ZERO
4623  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4624              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4625              "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4626              []>;
4627
4628def SULD_2D_ARRAY_V2I8_ZERO
4629  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4630              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4631              "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4632              "[$s, \\{$l, $x, $y, $y\\}];",
4633              []>;
4634def SULD_2D_ARRAY_V2I16_ZERO
4635  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4636              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4637              "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4638              "[$s, \\{$l, $x, $y, $y\\}];",
4639              []>;
4640def SULD_2D_ARRAY_V2I32_ZERO
4641  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4642              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4643              "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4644              "[$s, \\{$l, $x, $y, $y\\}];",
4645              []>;
4646def SULD_2D_ARRAY_V2I64_ZERO
4647  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4648              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4649              "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4650              "[$s, \\{$l, $x, $y, $y\\}];",
4651              []>;
4652
4653def SULD_3D_V2I8_ZERO
4654  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4655              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4656              "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4657              []>;
4658def SULD_3D_V2I16_ZERO
4659  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4660              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4661              "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4662              []>;
4663def SULD_3D_V2I32_ZERO
4664  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4665              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4666              "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4667              []>;
4668def SULD_3D_V2I64_ZERO
4669  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4670              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4671              "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4672              []>;
4673}
4674
4675let IsSuld = 3 in {
4676def SULD_1D_V4I8_ZERO
4677  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4678              (ins Int64Regs:$s, Int32Regs:$x),
4679              "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4680              []>;
4681def SULD_1D_V4I16_ZERO
4682  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4683              (ins Int64Regs:$s, Int32Regs:$x),
4684              "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4685              []>;
4686def SULD_1D_V4I32_ZERO
4687  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4688              (ins Int64Regs:$s, Int32Regs:$x),
4689              "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4690              []>;
4691
4692def SULD_1D_ARRAY_V4I8_ZERO
4693  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4694              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4695              "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4696              "[$s, \\{$l, $x\\}];",
4697              []>;
4698def SULD_1D_ARRAY_V4I16_ZERO
4699  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4700              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4701              "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4702              "[$s, \\{$l, $x\\}];",
4703              []>;
4704def SULD_1D_ARRAY_V4I32_ZERO
4705  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4706              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4707              "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4708              "[$s, \\{$l, $x\\}];",
4709              []>;
4710
4711def SULD_2D_V4I8_ZERO
4712  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4713              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4714              "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4715              []>;
4716def SULD_2D_V4I16_ZERO
4717  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4718              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4719              "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4720              []>;
4721def SULD_2D_V4I32_ZERO
4722  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4723              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4724              "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4725              []>;
4726
4727def SULD_2D_ARRAY_V4I8_ZERO
4728  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4729              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4730              "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4731              "[$s, \\{$l, $x, $y, $y\\}];",
4732              []>;
4733def SULD_2D_ARRAY_V4I16_ZERO
4734  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4735              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4736              "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4737              "[$s, \\{$l, $x, $y, $y\\}];",
4738              []>;
4739def SULD_2D_ARRAY_V4I32_ZERO
4740  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4741              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4742              "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4743              "[$s, \\{$l, $x, $y, $y\\}];",
4744              []>;
4745
4746
4747def SULD_3D_V4I8_ZERO
4748  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4749              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4750              "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4751              "[$s, \\{$x, $y, $z, $z\\}];",
4752              []>;
4753def SULD_3D_V4I16_ZERO
4754  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4755              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4756              "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4757              "[$s, \\{$x, $y, $z, $z\\}];",
4758              []>;
4759def SULD_3D_V4I32_ZERO
4760  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4761              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4762              "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4763              "[$s, \\{$x, $y, $z, $z\\}];",
4764              []>;
4765}
4766
4767//-----------------------------------
4768// Texture Query Intrinsics
4769//-----------------------------------
4770
4771let IsSurfTexQuery = true in {
4772def TXQ_CHANNEL_ORDER
4773  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4774              "txq.channel_order.b32 \t$d, [$a];",
4775              []>;
4776def TXQ_CHANNEL_DATA_TYPE
4777  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4778              "txq.channel_data_type.b32 \t$d, [$a];",
4779              []>;
4780def TXQ_WIDTH
4781  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4782              "txq.width.b32 \t$d, [$a];",
4783              []>;
4784def TXQ_HEIGHT
4785  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4786              "txq.height.b32 \t$d, [$a];",
4787              []>;
4788def TXQ_DEPTH
4789  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4790              "txq.depth.b32 \t$d, [$a];",
4791              []>;
4792def TXQ_ARRAY_SIZE
4793  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4794              "txq.array_size.b32 \t$d, [$a];",
4795              []>;
4796def TXQ_NUM_SAMPLES
4797  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4798              "txq.num_samples.b32 \t$d, [$a];",
4799              []>;
4800def TXQ_NUM_MIPMAP_LEVELS
4801  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4802              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4803              []>;
4804}
4805
4806def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4807          (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4808def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4809          (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4810def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4811          (TXQ_WIDTH Int64Regs:$a)>;
4812def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4813          (TXQ_HEIGHT Int64Regs:$a)>;
4814def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4815          (TXQ_DEPTH Int64Regs:$a)>;
4816def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4817          (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4818def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4819          (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4820def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4821          (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4822
4823
4824//-----------------------------------
4825// Surface Query Intrinsics
4826//-----------------------------------
4827
4828let IsSurfTexQuery = true in {
4829def SUQ_CHANNEL_ORDER
4830  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4831              "suq.channel_order.b32 \t$d, [$a];",
4832              []>;
4833def SUQ_CHANNEL_DATA_TYPE
4834  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4835              "suq.channel_data_type.b32 \t$d, [$a];",
4836              []>;
4837def SUQ_WIDTH
4838  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4839              "suq.width.b32 \t$d, [$a];",
4840              []>;
4841def SUQ_HEIGHT
4842  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4843              "suq.height.b32 \t$d, [$a];",
4844              []>;
4845def SUQ_DEPTH
4846  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4847              "suq.depth.b32 \t$d, [$a];",
4848              []>;
4849def SUQ_ARRAY_SIZE
4850  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4851              "suq.array_size.b32 \t$d, [$a];",
4852              []>;
4853}
4854
4855def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4856          (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4857def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4858          (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4859def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4860          (SUQ_WIDTH Int64Regs:$a)>;
4861def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4862          (SUQ_HEIGHT Int64Regs:$a)>;
4863def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4864          (SUQ_DEPTH Int64Regs:$a)>;
4865def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4866          (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4867
4868
4869//===- Handle Query -------------------------------------------------------===//
4870
4871// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4872def ISTYPEP_SAMPLER
4873  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4874              "istypep.samplerref \t$d, $a;",
4875              [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4876def ISTYPEP_SURFACE
4877  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4878              "istypep.surfref \t$d, $a;",
4879              [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4880def ISTYPEP_TEXTURE
4881  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4882              "istypep.texref \t$d, $a;",
4883              [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4884
4885//===- Surface Stores -----------------------------------------------------===//
4886
4887let IsSust = true in {
4888// Unformatted
4889// .clamp variant
4890def SUST_B_1D_B8_CLAMP
4891  : NVPTXInst<(outs),
4892              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4893              "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4894              []>;
4895def SUST_B_1D_B16_CLAMP
4896  : NVPTXInst<(outs),
4897              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4898              "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4899              []>;
4900def SUST_B_1D_B32_CLAMP
4901  : NVPTXInst<(outs),
4902              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4903              "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4904              []>;
4905def SUST_B_1D_B64_CLAMP
4906  : NVPTXInst<(outs),
4907              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4908              "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4909              []>;
4910def SUST_B_1D_V2B8_CLAMP
4911  : NVPTXInst<(outs),
4912              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4913              "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4914              []>;
4915def SUST_B_1D_V2B16_CLAMP
4916  : NVPTXInst<(outs),
4917              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4918              "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4919              []>;
4920def SUST_B_1D_V2B32_CLAMP
4921  : NVPTXInst<(outs),
4922              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4923              "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4924              []>;
4925def SUST_B_1D_V2B64_CLAMP
4926  : NVPTXInst<(outs),
4927              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4928              "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4929              []>;
4930def SUST_B_1D_V4B8_CLAMP
4931  : NVPTXInst<(outs),
4932              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4933                   Int16Regs:$b, Int16Regs:$a),
4934              "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4935              []>;
4936def SUST_B_1D_V4B16_CLAMP
4937  : NVPTXInst<(outs),
4938              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4939                   Int16Regs:$b, Int16Regs:$a),
4940              "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4941              []>;
4942def SUST_B_1D_V4B32_CLAMP
4943  : NVPTXInst<(outs),
4944              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4945                   Int32Regs:$b, Int32Regs:$a),
4946              "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4947              []>;
4948
4949
4950def SUST_B_1D_ARRAY_B8_CLAMP
4951  : NVPTXInst<(outs),
4952              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4953              "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4954              []>;
4955def SUST_B_1D_ARRAY_B16_CLAMP
4956  : NVPTXInst<(outs),
4957              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4958              "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4959              []>;
4960def SUST_B_1D_ARRAY_B32_CLAMP
4961  : NVPTXInst<(outs),
4962              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4963              "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4964              []>;
4965def SUST_B_1D_ARRAY_B64_CLAMP
4966  : NVPTXInst<(outs),
4967              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4968              "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4969              []>;
4970def SUST_B_1D_ARRAY_V2B8_CLAMP
4971  : NVPTXInst<(outs),
4972              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4973                   Int16Regs:$g),
4974              "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4975              []>;
4976def SUST_B_1D_ARRAY_V2B16_CLAMP
4977  : NVPTXInst<(outs),
4978              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4979                   Int16Regs:$g),
4980              "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4981              []>;
4982def SUST_B_1D_ARRAY_V2B32_CLAMP
4983  : NVPTXInst<(outs),
4984              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4985                   Int32Regs:$g),
4986              "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4987              []>;
4988def SUST_B_1D_ARRAY_V2B64_CLAMP
4989  : NVPTXInst<(outs),
4990              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4991                   Int64Regs:$g),
4992              "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4993              []>;
4994def SUST_B_1D_ARRAY_V4B8_CLAMP
4995  : NVPTXInst<(outs),
4996              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4997                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4998              "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4999              "\\{$r, $g, $b, $a\\};",
5000              []>;
5001def SUST_B_1D_ARRAY_V4B16_CLAMP
5002  : NVPTXInst<(outs),
5003              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5004                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5005             "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
5006             "\\{$r, $g, $b, $a\\};",
5007              []>;
5008def SUST_B_1D_ARRAY_V4B32_CLAMP
5009  : NVPTXInst<(outs),
5010              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5011                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5012             "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
5013             "\\{$r, $g, $b, $a\\};",
5014              []>;
5015
5016
5017def SUST_B_2D_B8_CLAMP
5018  : NVPTXInst<(outs),
5019              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5020              "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5021              []>;
5022def SUST_B_2D_B16_CLAMP
5023  : NVPTXInst<(outs),
5024              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5025              "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5026              []>;
5027def SUST_B_2D_B32_CLAMP
5028  : NVPTXInst<(outs),
5029              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5030              "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5031              []>;
5032def SUST_B_2D_B64_CLAMP
5033  : NVPTXInst<(outs),
5034              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5035              "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5036              []>;
5037def SUST_B_2D_V2B8_CLAMP
5038  : NVPTXInst<(outs),
5039              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5040                   Int16Regs:$g),
5041              "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5042              []>;
5043def SUST_B_2D_V2B16_CLAMP
5044  : NVPTXInst<(outs),
5045              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5046                   Int16Regs:$g),
5047              "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5048              []>;
5049def SUST_B_2D_V2B32_CLAMP
5050  : NVPTXInst<(outs),
5051              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5052                   Int32Regs:$g),
5053              "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5054              []>;
5055def SUST_B_2D_V2B64_CLAMP
5056  : NVPTXInst<(outs),
5057              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5058                   Int64Regs:$g),
5059              "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5060              []>;
5061def SUST_B_2D_V4B8_CLAMP
5062  : NVPTXInst<(outs),
5063              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5064                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5065              "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
5066              "\\{$r, $g, $b, $a\\};",
5067              []>;
5068def SUST_B_2D_V4B16_CLAMP
5069  : NVPTXInst<(outs),
5070              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5071                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5072             "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
5073             "\\{$r, $g, $b, $a\\};",
5074              []>;
5075def SUST_B_2D_V4B32_CLAMP
5076  : NVPTXInst<(outs),
5077              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5078                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5079             "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
5080             "\\{$r, $g, $b, $a\\};",
5081              []>;
5082
5083
5084def SUST_B_2D_ARRAY_B8_CLAMP
5085  : NVPTXInst<(outs),
5086              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5087                   Int16Regs:$r),
5088              "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5089              []>;
5090def SUST_B_2D_ARRAY_B16_CLAMP
5091  : NVPTXInst<(outs),
5092              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5093                   Int16Regs:$r),
5094              "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5095              []>;
5096def SUST_B_2D_ARRAY_B32_CLAMP
5097  : NVPTXInst<(outs),
5098              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5099                   Int32Regs:$r),
5100              "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5101              []>;
5102def SUST_B_2D_ARRAY_B64_CLAMP
5103  : NVPTXInst<(outs),
5104              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5105                   Int64Regs:$r),
5106              "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5107              []>;
5108def SUST_B_2D_ARRAY_V2B8_CLAMP
5109  : NVPTXInst<(outs),
5110              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5111                   Int16Regs:$r, Int16Regs:$g),
5112              "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5113              "\\{$r, $g\\};",
5114              []>;
5115def SUST_B_2D_ARRAY_V2B16_CLAMP
5116  : NVPTXInst<(outs),
5117              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5118                   Int16Regs:$r, Int16Regs:$g),
5119             "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5120             "\\{$r, $g\\};",
5121              []>;
5122def SUST_B_2D_ARRAY_V2B32_CLAMP
5123  : NVPTXInst<(outs),
5124              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5125                   Int32Regs:$r, Int32Regs:$g),
5126             "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5127             "\\{$r, $g\\};",
5128              []>;
5129def SUST_B_2D_ARRAY_V2B64_CLAMP
5130  : NVPTXInst<(outs),
5131              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5132                   Int64Regs:$r, Int64Regs:$g),
5133             "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5134             "\\{$r, $g\\};",
5135              []>;
5136def SUST_B_2D_ARRAY_V4B8_CLAMP
5137  : NVPTXInst<(outs),
5138              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5139                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5140      "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5141      "\\{$r, $g, $b, $a\\};",
5142              []>;
5143def SUST_B_2D_ARRAY_V4B16_CLAMP
5144  : NVPTXInst<(outs),
5145              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5146                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5147     "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5148     "\\{$r, $g, $b, $a\\};",
5149              []>;
5150def SUST_B_2D_ARRAY_V4B32_CLAMP
5151  : NVPTXInst<(outs),
5152              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5153                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5154     "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5155     "\\{$r, $g, $b, $a\\};",
5156              []>;
5157
5158
5159def SUST_B_3D_B8_CLAMP
5160  : NVPTXInst<(outs),
5161              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5162                   Int16Regs:$r),
5163              "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5164              []>;
5165def SUST_B_3D_B16_CLAMP
5166  : NVPTXInst<(outs),
5167              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5168                   Int16Regs:$r),
5169              "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5170              []>;
5171def SUST_B_3D_B32_CLAMP
5172  : NVPTXInst<(outs),
5173              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5174                   Int32Regs:$r),
5175              "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5176              []>;
5177def SUST_B_3D_B64_CLAMP
5178  : NVPTXInst<(outs),
5179              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5180                   Int64Regs:$r),
5181              "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5182              []>;
5183def SUST_B_3D_V2B8_CLAMP
5184  : NVPTXInst<(outs),
5185              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5186                   Int16Regs:$r, Int16Regs:$g),
5187              "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5188              "\\{$r, $g\\};",
5189              []>;
5190def SUST_B_3D_V2B16_CLAMP
5191  : NVPTXInst<(outs),
5192              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5193                   Int16Regs:$r, Int16Regs:$g),
5194              "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5195              "\\{$r, $g\\};",
5196              []>;
5197def SUST_B_3D_V2B32_CLAMP
5198  : NVPTXInst<(outs),
5199              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5200                   Int32Regs:$r, Int32Regs:$g),
5201              "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5202              "\\{$r, $g\\};",
5203              []>;
5204def SUST_B_3D_V2B64_CLAMP
5205  : NVPTXInst<(outs),
5206              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5207                   Int64Regs:$r, Int64Regs:$g),
5208              "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5209              "\\{$r, $g\\};",
5210              []>;
5211def SUST_B_3D_V4B8_CLAMP
5212  : NVPTXInst<(outs),
5213              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5214                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5215         "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5216         "\\{$r, $g, $b, $a\\};",
5217              []>;
5218def SUST_B_3D_V4B16_CLAMP
5219  : NVPTXInst<(outs),
5220              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5221                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5222        "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5223        "\\{$r, $g, $b, $a\\};",
5224              []>;
5225def SUST_B_3D_V4B32_CLAMP
5226  : NVPTXInst<(outs),
5227              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5228                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5229        "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5230        "\\{$r, $g, $b, $a\\};",
5231              []>;
5232
5233
5234// .trap variant
5235def SUST_B_1D_B8_TRAP
5236  : NVPTXInst<(outs),
5237              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5238              "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5239              []>;
5240def SUST_B_1D_B16_TRAP
5241  : NVPTXInst<(outs),
5242              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5243              "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5244              []>;
5245def SUST_B_1D_B32_TRAP
5246  : NVPTXInst<(outs),
5247              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5248              "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5249              []>;
5250def SUST_B_1D_B64_TRAP
5251  : NVPTXInst<(outs),
5252              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5253              "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
5254              []>;
5255def SUST_B_1D_V2B8_TRAP
5256  : NVPTXInst<(outs),
5257              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5258              "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5259              []>;
5260def SUST_B_1D_V2B16_TRAP
5261  : NVPTXInst<(outs),
5262              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5263              "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5264              []>;
5265def SUST_B_1D_V2B32_TRAP
5266  : NVPTXInst<(outs),
5267              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5268              "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5269              []>;
5270def SUST_B_1D_V2B64_TRAP
5271  : NVPTXInst<(outs),
5272              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5273              "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5274              []>;
5275def SUST_B_1D_V4B8_TRAP
5276  : NVPTXInst<(outs),
5277              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5278                   Int16Regs:$b, Int16Regs:$a),
5279              "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5280              []>;
5281def SUST_B_1D_V4B16_TRAP
5282  : NVPTXInst<(outs),
5283              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5284                   Int16Regs:$b, Int16Regs:$a),
5285              "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5286              []>;
5287def SUST_B_1D_V4B32_TRAP
5288  : NVPTXInst<(outs),
5289              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5290                   Int32Regs:$b, Int32Regs:$a),
5291              "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5292              []>;
5293
5294
5295def SUST_B_1D_ARRAY_B8_TRAP
5296  : NVPTXInst<(outs),
5297              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5298              "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5299              []>;
5300def SUST_B_1D_ARRAY_B16_TRAP
5301  : NVPTXInst<(outs),
5302              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5303              "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5304              []>;
5305def SUST_B_1D_ARRAY_B32_TRAP
5306  : NVPTXInst<(outs),
5307              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5308              "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5309              []>;
5310def SUST_B_1D_ARRAY_B64_TRAP
5311  : NVPTXInst<(outs),
5312              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5313              "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5314              []>;
5315def SUST_B_1D_ARRAY_V2B8_TRAP
5316  : NVPTXInst<(outs),
5317              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5318                   Int16Regs:$g),
5319              "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5320              []>;
5321def SUST_B_1D_ARRAY_V2B16_TRAP
5322  : NVPTXInst<(outs),
5323              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5324                   Int16Regs:$g),
5325              "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5326              []>;
5327def SUST_B_1D_ARRAY_V2B32_TRAP
5328  : NVPTXInst<(outs),
5329              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5330                   Int32Regs:$g),
5331              "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5332              []>;
5333def SUST_B_1D_ARRAY_V2B64_TRAP
5334  : NVPTXInst<(outs),
5335              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5336                   Int64Regs:$g),
5337              "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5338              []>;
5339def SUST_B_1D_ARRAY_V4B8_TRAP
5340  : NVPTXInst<(outs),
5341              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5342                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5343              "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5344              "\\{$r, $g, $b, $a\\};",
5345              []>;
5346def SUST_B_1D_ARRAY_V4B16_TRAP
5347  : NVPTXInst<(outs),
5348              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5349                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5350             "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5351             "\\{$r, $g, $b, $a\\};",
5352              []>;
5353def SUST_B_1D_ARRAY_V4B32_TRAP
5354  : NVPTXInst<(outs),
5355              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5356                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5357             "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5358             "\\{$r, $g, $b, $a\\};",
5359              []>;
5360
5361
5362def SUST_B_2D_B8_TRAP
5363  : NVPTXInst<(outs),
5364              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5365              "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5366              []>;
5367def SUST_B_2D_B16_TRAP
5368  : NVPTXInst<(outs),
5369              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5370              "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5371              []>;
5372def SUST_B_2D_B32_TRAP
5373  : NVPTXInst<(outs),
5374              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5375              "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5376              []>;
5377def SUST_B_2D_B64_TRAP
5378  : NVPTXInst<(outs),
5379              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5380              "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5381              []>;
5382def SUST_B_2D_V2B8_TRAP
5383  : NVPTXInst<(outs),
5384              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5385                   Int16Regs:$g),
5386              "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5387              []>;
5388def SUST_B_2D_V2B16_TRAP
5389  : NVPTXInst<(outs),
5390              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5391                   Int16Regs:$g),
5392              "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5393              []>;
5394def SUST_B_2D_V2B32_TRAP
5395  : NVPTXInst<(outs),
5396              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5397                   Int32Regs:$g),
5398              "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5399              []>;
5400def SUST_B_2D_V2B64_TRAP
5401  : NVPTXInst<(outs),
5402              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5403                   Int64Regs:$g),
5404              "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5405              []>;
5406def SUST_B_2D_V4B8_TRAP
5407  : NVPTXInst<(outs),
5408              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5409                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5410              "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5411              "\\{$r, $g, $b, $a\\};",
5412              []>;
5413def SUST_B_2D_V4B16_TRAP
5414  : NVPTXInst<(outs),
5415              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5416                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5417             "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5418             "\\{$r, $g, $b, $a\\};",
5419              []>;
5420def SUST_B_2D_V4B32_TRAP
5421  : NVPTXInst<(outs),
5422              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5423                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5424             "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5425             "\\{$r, $g, $b, $a\\};",
5426              []>;
5427
5428
5429def SUST_B_2D_ARRAY_B8_TRAP
5430  : NVPTXInst<(outs),
5431              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5432                   Int16Regs:$r),
5433              "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5434              []>;
5435def SUST_B_2D_ARRAY_B16_TRAP
5436  : NVPTXInst<(outs),
5437              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5438                   Int16Regs:$r),
5439              "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5440              []>;
5441def SUST_B_2D_ARRAY_B32_TRAP
5442  : NVPTXInst<(outs),
5443              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5444                   Int32Regs:$r),
5445              "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5446              []>;
5447def SUST_B_2D_ARRAY_B64_TRAP
5448  : NVPTXInst<(outs),
5449              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5450                   Int64Regs:$r),
5451              "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5452              []>;
5453def SUST_B_2D_ARRAY_V2B8_TRAP
5454  : NVPTXInst<(outs),
5455              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5456                   Int16Regs:$r, Int16Regs:$g),
5457              "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5458              "\\{$r, $g\\};",
5459              []>;
5460def SUST_B_2D_ARRAY_V2B16_TRAP
5461  : NVPTXInst<(outs),
5462              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5463                   Int16Regs:$r, Int16Regs:$g),
5464             "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5465             "\\{$r, $g\\};",
5466              []>;
5467def SUST_B_2D_ARRAY_V2B32_TRAP
5468  : NVPTXInst<(outs),
5469              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5470                   Int32Regs:$r, Int32Regs:$g),
5471             "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5472             "\\{$r, $g\\};",
5473              []>;
5474def SUST_B_2D_ARRAY_V2B64_TRAP
5475  : NVPTXInst<(outs),
5476              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5477                   Int64Regs:$r, Int64Regs:$g),
5478             "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5479             "\\{$r, $g\\};",
5480              []>;
5481def SUST_B_2D_ARRAY_V4B8_TRAP
5482  : NVPTXInst<(outs),
5483              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5484                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5485      "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5486      "\\{$r, $g, $b, $a\\};",
5487              []>;
5488def SUST_B_2D_ARRAY_V4B16_TRAP
5489  : NVPTXInst<(outs),
5490              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5491                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5492     "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5493     "\\{$r, $g, $b, $a\\};",
5494              []>;
5495def SUST_B_2D_ARRAY_V4B32_TRAP
5496  : NVPTXInst<(outs),
5497              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5498                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5499     "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5500     "\\{$r, $g, $b, $a\\};",
5501              []>;
5502
5503
5504def SUST_B_3D_B8_TRAP
5505  : NVPTXInst<(outs),
5506              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5507                   Int16Regs:$r),
5508              "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5509              []>;
5510def SUST_B_3D_B16_TRAP
5511  : NVPTXInst<(outs),
5512              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5513                   Int16Regs:$r),
5514              "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5515              []>;
5516def SUST_B_3D_B32_TRAP
5517  : NVPTXInst<(outs),
5518              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5519                   Int32Regs:$r),
5520              "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5521              []>;
5522def SUST_B_3D_B64_TRAP
5523  : NVPTXInst<(outs),
5524              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5525                   Int64Regs:$r),
5526              "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5527              []>;
5528def SUST_B_3D_V2B8_TRAP
5529  : NVPTXInst<(outs),
5530              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5531                   Int16Regs:$r, Int16Regs:$g),
5532              "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5533              "\\{$r, $g\\};",
5534              []>;
5535def SUST_B_3D_V2B16_TRAP
5536  : NVPTXInst<(outs),
5537              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5538                   Int16Regs:$r, Int16Regs:$g),
5539              "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5540              "\\{$r, $g\\};",
5541              []>;
5542def SUST_B_3D_V2B32_TRAP
5543  : NVPTXInst<(outs),
5544              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5545                   Int32Regs:$r, Int32Regs:$g),
5546              "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5547              "\\{$r, $g\\};",
5548              []>;
5549def SUST_B_3D_V2B64_TRAP
5550  : NVPTXInst<(outs),
5551              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5552                   Int64Regs:$r, Int64Regs:$g),
5553              "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5554              "\\{$r, $g\\};",
5555              []>;
5556def SUST_B_3D_V4B8_TRAP
5557  : NVPTXInst<(outs),
5558              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5559                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5560         "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5561         "\\{$r, $g, $b, $a\\};",
5562              []>;
5563def SUST_B_3D_V4B16_TRAP
5564  : NVPTXInst<(outs),
5565              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5566                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5567        "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5568        "\\{$r, $g, $b, $a\\};",
5569              []>;
5570def SUST_B_3D_V4B32_TRAP
5571  : NVPTXInst<(outs),
5572              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5573                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5574        "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5575        "\\{$r, $g, $b, $a\\};",
5576              []>;
5577
5578
5579// .zero variant
5580def SUST_B_1D_B8_ZERO
5581  : NVPTXInst<(outs),
5582              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5583              "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5584              []>;
5585def SUST_B_1D_B16_ZERO
5586  : NVPTXInst<(outs),
5587              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5588              "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5589              []>;
5590def SUST_B_1D_B32_ZERO
5591  : NVPTXInst<(outs),
5592              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5593              "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5594              []>;
5595def SUST_B_1D_B64_ZERO
5596  : NVPTXInst<(outs),
5597              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5598              "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5599              []>;
5600def SUST_B_1D_V2B8_ZERO
5601  : NVPTXInst<(outs),
5602              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5603              "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5604              []>;
5605def SUST_B_1D_V2B16_ZERO
5606  : NVPTXInst<(outs),
5607              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5608              "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5609              []>;
5610def SUST_B_1D_V2B32_ZERO
5611  : NVPTXInst<(outs),
5612              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5613              "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5614              []>;
5615def SUST_B_1D_V2B64_ZERO
5616  : NVPTXInst<(outs),
5617              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5618              "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5619              []>;
5620def SUST_B_1D_V4B8_ZERO
5621  : NVPTXInst<(outs),
5622              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5623                   Int16Regs:$b, Int16Regs:$a),
5624              "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5625              []>;
5626def SUST_B_1D_V4B16_ZERO
5627  : NVPTXInst<(outs),
5628              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5629                   Int16Regs:$b, Int16Regs:$a),
5630              "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5631              []>;
5632def SUST_B_1D_V4B32_ZERO
5633  : NVPTXInst<(outs),
5634              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5635                   Int32Regs:$b, Int32Regs:$a),
5636              "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5637              []>;
5638
5639
5640def SUST_B_1D_ARRAY_B8_ZERO
5641  : NVPTXInst<(outs),
5642              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5643              "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5644              []>;
5645def SUST_B_1D_ARRAY_B16_ZERO
5646  : NVPTXInst<(outs),
5647              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5648              "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5649              []>;
5650def SUST_B_1D_ARRAY_B32_ZERO
5651  : NVPTXInst<(outs),
5652              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5653              "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5654              []>;
5655def SUST_B_1D_ARRAY_B64_ZERO
5656  : NVPTXInst<(outs),
5657              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5658              "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5659              []>;
5660def SUST_B_1D_ARRAY_V2B8_ZERO
5661  : NVPTXInst<(outs),
5662              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5663                   Int16Regs:$g),
5664              "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5665              []>;
5666def SUST_B_1D_ARRAY_V2B16_ZERO
5667  : NVPTXInst<(outs),
5668              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5669                   Int16Regs:$g),
5670              "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5671              []>;
5672def SUST_B_1D_ARRAY_V2B32_ZERO
5673  : NVPTXInst<(outs),
5674              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5675                   Int32Regs:$g),
5676              "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5677              []>;
5678def SUST_B_1D_ARRAY_V2B64_ZERO
5679  : NVPTXInst<(outs),
5680              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5681                   Int64Regs:$g),
5682              "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5683              []>;
5684def SUST_B_1D_ARRAY_V4B8_ZERO
5685  : NVPTXInst<(outs),
5686              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5687                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5688              "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5689              "\\{$r, $g, $b, $a\\};",
5690              []>;
5691def SUST_B_1D_ARRAY_V4B16_ZERO
5692  : NVPTXInst<(outs),
5693              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5694                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5695             "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5696             "\\{$r, $g, $b, $a\\};",
5697              []>;
5698def SUST_B_1D_ARRAY_V4B32_ZERO
5699  : NVPTXInst<(outs),
5700              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5701                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5702             "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5703             "\\{$r, $g, $b, $a\\};",
5704              []>;
5705
5706
5707def SUST_B_2D_B8_ZERO
5708  : NVPTXInst<(outs),
5709              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5710              "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5711              []>;
5712def SUST_B_2D_B16_ZERO
5713  : NVPTXInst<(outs),
5714              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5715              "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5716              []>;
5717def SUST_B_2D_B32_ZERO
5718  : NVPTXInst<(outs),
5719              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5720              "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5721              []>;
5722def SUST_B_2D_B64_ZERO
5723  : NVPTXInst<(outs),
5724              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5725              "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5726              []>;
5727def SUST_B_2D_V2B8_ZERO
5728  : NVPTXInst<(outs),
5729              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5730                   Int16Regs:$g),
5731              "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5732              []>;
5733def SUST_B_2D_V2B16_ZERO
5734  : NVPTXInst<(outs),
5735              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5736                   Int16Regs:$g),
5737              "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5738              []>;
5739def SUST_B_2D_V2B32_ZERO
5740  : NVPTXInst<(outs),
5741              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5742                   Int32Regs:$g),
5743              "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5744              []>;
5745def SUST_B_2D_V2B64_ZERO
5746  : NVPTXInst<(outs),
5747              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5748                   Int64Regs:$g),
5749              "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5750              []>;
5751def SUST_B_2D_V4B8_ZERO
5752  : NVPTXInst<(outs),
5753              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5754                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5755              "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5756              "\\{$r, $g, $b, $a\\};",
5757              []>;
5758def SUST_B_2D_V4B16_ZERO
5759  : NVPTXInst<(outs),
5760              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5761                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5762             "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5763             "\\{$r, $g, $b, $a\\};",
5764              []>;
5765def SUST_B_2D_V4B32_ZERO
5766  : NVPTXInst<(outs),
5767              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5768                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5769             "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5770             "\\{$r, $g, $b, $a\\};",
5771              []>;
5772
5773
5774def SUST_B_2D_ARRAY_B8_ZERO
5775  : NVPTXInst<(outs),
5776              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5777                   Int16Regs:$r),
5778              "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5779              []>;
5780def SUST_B_2D_ARRAY_B16_ZERO
5781  : NVPTXInst<(outs),
5782              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5783                   Int16Regs:$r),
5784              "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5785              []>;
5786def SUST_B_2D_ARRAY_B32_ZERO
5787  : NVPTXInst<(outs),
5788              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5789                   Int32Regs:$r),
5790              "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5791              []>;
5792def SUST_B_2D_ARRAY_B64_ZERO
5793  : NVPTXInst<(outs),
5794              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5795                   Int64Regs:$r),
5796              "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5797              []>;
5798def SUST_B_2D_ARRAY_V2B8_ZERO
5799  : NVPTXInst<(outs),
5800              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5801                   Int16Regs:$r, Int16Regs:$g),
5802              "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5803              "\\{$r, $g\\};",
5804              []>;
5805def SUST_B_2D_ARRAY_V2B16_ZERO
5806  : NVPTXInst<(outs),
5807              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5808                   Int16Regs:$r, Int16Regs:$g),
5809             "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5810             "\\{$r, $g\\};",
5811              []>;
5812def SUST_B_2D_ARRAY_V2B32_ZERO
5813  : NVPTXInst<(outs),
5814              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5815                   Int32Regs:$r, Int32Regs:$g),
5816             "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5817             "\\{$r, $g\\};",
5818              []>;
5819def SUST_B_2D_ARRAY_V2B64_ZERO
5820  : NVPTXInst<(outs),
5821              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5822                   Int64Regs:$r, Int64Regs:$g),
5823             "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5824             "\\{$r, $g\\};",
5825              []>;
5826def SUST_B_2D_ARRAY_V4B8_ZERO
5827  : NVPTXInst<(outs),
5828              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5829                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5830      "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5831      "\\{$r, $g, $b, $a\\};",
5832              []>;
5833def SUST_B_2D_ARRAY_V4B16_ZERO
5834  : NVPTXInst<(outs),
5835              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5836                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5837     "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5838     "\\{$r, $g, $b, $a\\};",
5839              []>;
5840def SUST_B_2D_ARRAY_V4B32_ZERO
5841  : NVPTXInst<(outs),
5842              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5843                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5844     "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5845     "\\{$r, $g, $b, $a\\};",
5846              []>;
5847
5848
5849def SUST_B_3D_B8_ZERO
5850  : NVPTXInst<(outs),
5851              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5852                   Int16Regs:$r),
5853              "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5854              []>;
5855def SUST_B_3D_B16_ZERO
5856  : NVPTXInst<(outs),
5857              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5858                   Int16Regs:$r),
5859              "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5860              []>;
5861def SUST_B_3D_B32_ZERO
5862  : NVPTXInst<(outs),
5863              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5864                   Int32Regs:$r),
5865              "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5866              []>;
5867def SUST_B_3D_B64_ZERO
5868  : NVPTXInst<(outs),
5869              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5870                   Int64Regs:$r),
5871              "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5872              []>;
5873def SUST_B_3D_V2B8_ZERO
5874  : NVPTXInst<(outs),
5875              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5876                   Int16Regs:$r, Int16Regs:$g),
5877              "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5878              "\\{$r, $g\\};",
5879              []>;
5880def SUST_B_3D_V2B16_ZERO
5881  : NVPTXInst<(outs),
5882              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5883                   Int16Regs:$r, Int16Regs:$g),
5884              "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5885              "\\{$r, $g\\};",
5886              []>;
5887def SUST_B_3D_V2B32_ZERO
5888  : NVPTXInst<(outs),
5889              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5890                   Int32Regs:$r, Int32Regs:$g),
5891              "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5892              "\\{$r, $g\\};",
5893              []>;
5894def SUST_B_3D_V2B64_ZERO
5895  : NVPTXInst<(outs),
5896              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5897                   Int64Regs:$r, Int64Regs:$g),
5898              "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5899              "\\{$r, $g\\};",
5900              []>;
5901def SUST_B_3D_V4B8_ZERO
5902  : NVPTXInst<(outs),
5903              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5904                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5905         "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5906         "\\{$r, $g, $b, $a\\};",
5907              []>;
5908def SUST_B_3D_V4B16_ZERO
5909  : NVPTXInst<(outs),
5910              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5911                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5912        "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5913        "\\{$r, $g, $b, $a\\};",
5914              []>;
5915def SUST_B_3D_V4B32_ZERO
5916  : NVPTXInst<(outs),
5917              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5918                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5919        "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5920        "\\{$r, $g, $b, $a\\};",
5921              []>;
5922
5923
5924
5925// Formatted
5926
5927def SUST_P_1D_B8_TRAP
5928  : NVPTXInst<(outs),
5929              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5930              "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5931              []>;
5932def SUST_P_1D_B16_TRAP
5933  : NVPTXInst<(outs),
5934              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5935              "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5936              []>;
5937def SUST_P_1D_B32_TRAP
5938  : NVPTXInst<(outs),
5939              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5940              "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5941              []>;
5942def SUST_P_1D_V2B8_TRAP
5943  : NVPTXInst<(outs),
5944              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5945              "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5946              []>;
5947def SUST_P_1D_V2B16_TRAP
5948  : NVPTXInst<(outs),
5949              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5950              "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5951              []>;
5952def SUST_P_1D_V2B32_TRAP
5953  : NVPTXInst<(outs),
5954              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5955              "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5956              []>;
5957def SUST_P_1D_V4B8_TRAP
5958  : NVPTXInst<(outs),
5959              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5960                   Int16Regs:$b, Int16Regs:$a),
5961              "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5962              []>;
5963def SUST_P_1D_V4B16_TRAP
5964  : NVPTXInst<(outs),
5965              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5966                   Int16Regs:$b, Int16Regs:$a),
5967              "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5968              []>;
5969def SUST_P_1D_V4B32_TRAP
5970  : NVPTXInst<(outs),
5971              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5972                   Int32Regs:$b, Int32Regs:$a),
5973              "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5974              []>;
5975
5976
5977def SUST_P_1D_ARRAY_B8_TRAP
5978  : NVPTXInst<(outs),
5979              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5980              "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5981              []>;
5982def SUST_P_1D_ARRAY_B16_TRAP
5983  : NVPTXInst<(outs),
5984              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5985              "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5986              []>;
5987def SUST_P_1D_ARRAY_B32_TRAP
5988  : NVPTXInst<(outs),
5989              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5990              "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5991              []>;
5992def SUST_P_1D_ARRAY_V2B8_TRAP
5993  : NVPTXInst<(outs),
5994              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5995                   Int16Regs:$g),
5996              "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5997              []>;
5998def SUST_P_1D_ARRAY_V2B16_TRAP
5999  : NVPTXInst<(outs),
6000              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
6001                   Int16Regs:$g),
6002              "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
6003              []>;
6004def SUST_P_1D_ARRAY_V2B32_TRAP
6005  : NVPTXInst<(outs),
6006              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
6007                   Int32Regs:$g),
6008              "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
6009              []>;
6010def SUST_P_1D_ARRAY_V4B8_TRAP
6011  : NVPTXInst<(outs),
6012              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
6013                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6014              "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
6015              "\\{$r, $g, $b, $a\\};",
6016              []>;
6017def SUST_P_1D_ARRAY_V4B16_TRAP
6018  : NVPTXInst<(outs),
6019              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
6020                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6021             "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
6022             "\\{$r, $g, $b, $a\\};",
6023              []>;
6024def SUST_P_1D_ARRAY_V4B32_TRAP
6025  : NVPTXInst<(outs),
6026              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
6027                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6028             "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
6029             "\\{$r, $g, $b, $a\\};",
6030              []>;
6031
6032
6033def SUST_P_2D_B8_TRAP
6034  : NVPTXInst<(outs),
6035              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6036              "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
6037              []>;
6038def SUST_P_2D_B16_TRAP
6039  : NVPTXInst<(outs),
6040              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6041              "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
6042              []>;
6043def SUST_P_2D_B32_TRAP
6044  : NVPTXInst<(outs),
6045              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6046              "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
6047              []>;
6048def SUST_P_2D_V2B8_TRAP
6049  : NVPTXInst<(outs),
6050              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6051                   Int16Regs:$g),
6052              "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
6053              []>;
6054def SUST_P_2D_V2B16_TRAP
6055  : NVPTXInst<(outs),
6056              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6057                   Int16Regs:$g),
6058              "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
6059              []>;
6060def SUST_P_2D_V2B32_TRAP
6061  : NVPTXInst<(outs),
6062              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6063                   Int32Regs:$g),
6064              "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
6065              []>;
6066def SUST_P_2D_V4B8_TRAP
6067  : NVPTXInst<(outs),
6068              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6069                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6070              "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
6071              "\\{$r, $g, $b, $a\\};",
6072              []>;
6073def SUST_P_2D_V4B16_TRAP
6074  : NVPTXInst<(outs),
6075              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6076                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6077             "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
6078             "\\{$r, $g, $b, $a\\};",
6079              []>;
6080def SUST_P_2D_V4B32_TRAP
6081  : NVPTXInst<(outs),
6082              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6083                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6084             "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
6085             "\\{$r, $g, $b, $a\\};",
6086              []>;
6087
6088
6089def SUST_P_2D_ARRAY_B8_TRAP
6090  : NVPTXInst<(outs),
6091              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6092                   Int16Regs:$r),
6093              "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
6094              []>;
6095def SUST_P_2D_ARRAY_B16_TRAP
6096  : NVPTXInst<(outs),
6097              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6098                   Int16Regs:$r),
6099              "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
6100              []>;
6101def SUST_P_2D_ARRAY_B32_TRAP
6102  : NVPTXInst<(outs),
6103              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6104                   Int32Regs:$r),
6105              "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
6106              []>;
6107def SUST_P_2D_ARRAY_V2B8_TRAP
6108  : NVPTXInst<(outs),
6109              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6110                   Int16Regs:$r, Int16Regs:$g),
6111              "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6112              "\\{$r, $g\\};",
6113              []>;
6114def SUST_P_2D_ARRAY_V2B16_TRAP
6115  : NVPTXInst<(outs),
6116              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6117                   Int16Regs:$r, Int16Regs:$g),
6118             "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6119             "\\{$r, $g\\};",
6120              []>;
6121def SUST_P_2D_ARRAY_V2B32_TRAP
6122  : NVPTXInst<(outs),
6123              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6124                   Int32Regs:$r, Int32Regs:$g),
6125             "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6126             "\\{$r, $g\\};",
6127              []>;
6128def SUST_P_2D_ARRAY_V4B8_TRAP
6129  : NVPTXInst<(outs),
6130              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6131                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6132      "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6133      "\\{$r, $g, $b, $a\\};",
6134              []>;
6135def SUST_P_2D_ARRAY_V4B16_TRAP
6136  : NVPTXInst<(outs),
6137              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6138                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6139     "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6140     "\\{$r, $g, $b, $a\\};",
6141              []>;
6142def SUST_P_2D_ARRAY_V4B32_TRAP
6143  : NVPTXInst<(outs),
6144              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6145                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6146     "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6147     "\\{$r, $g, $b, $a\\};",
6148              []>;
6149
6150
6151def SUST_P_3D_B8_TRAP
6152  : NVPTXInst<(outs),
6153              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6154                   Int16Regs:$r),
6155              "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6156              []>;
6157def SUST_P_3D_B16_TRAP
6158  : NVPTXInst<(outs),
6159              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6160                   Int16Regs:$r),
6161              "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6162              []>;
6163def SUST_P_3D_B32_TRAP
6164  : NVPTXInst<(outs),
6165              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6166                   Int32Regs:$r),
6167              "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6168              []>;
6169def SUST_P_3D_V2B8_TRAP
6170  : NVPTXInst<(outs),
6171              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6172                   Int16Regs:$r, Int16Regs:$g),
6173              "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6174              "\\{$r, $g\\};",
6175              []>;
6176def SUST_P_3D_V2B16_TRAP
6177  : NVPTXInst<(outs),
6178              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6179                   Int16Regs:$r, Int16Regs:$g),
6180              "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6181              "\\{$r, $g\\};",
6182              []>;
6183def SUST_P_3D_V2B32_TRAP
6184  : NVPTXInst<(outs),
6185              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6186                   Int32Regs:$r, Int32Regs:$g),
6187              "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6188              "\\{$r, $g\\};",
6189              []>;
6190def SUST_P_3D_V4B8_TRAP
6191  : NVPTXInst<(outs),
6192              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6193                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6194         "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6195         "\\{$r, $g, $b, $a\\};",
6196              []>;
6197def SUST_P_3D_V4B16_TRAP
6198  : NVPTXInst<(outs),
6199              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6200                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6201        "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6202        "\\{$r, $g, $b, $a\\};",
6203              []>;
6204def SUST_P_3D_V4B32_TRAP
6205  : NVPTXInst<(outs),
6206              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6207                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6208        "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6209        "\\{$r, $g, $b, $a\\};",
6210              []>;
6211}
6212
6213// Surface store instruction patterns
6214// I'm not sure why we can't just include these in the instruction definitions,
6215// but TableGen complains of type errors :(
6216
6217// .clamp variant
6218def : Pat<(int_nvvm_sust_b_1d_i8_clamp
6219           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6220          (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6221
6222def : Pat<(int_nvvm_sust_b_1d_i16_clamp
6223           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6224          (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6225
6226def : Pat<(int_nvvm_sust_b_1d_i32_clamp
6227           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6228          (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6229
6230def : Pat<(int_nvvm_sust_b_1d_i64_clamp
6231           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6232          (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6233
6234def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
6235           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6236          (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6237           Int16Regs:$r, Int16Regs:$g)>;
6238
6239def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
6240           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6241          (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6242           Int16Regs:$r, Int16Regs:$g)>;
6243
6244def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
6245           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6246          (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6247           Int32Regs:$r, Int32Regs:$g)>;
6248
6249def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
6250           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6251          (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
6252           Int64Regs:$r, Int64Regs:$g)>;
6253
6254def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
6255           Int64Regs:$s, Int32Regs:$x,
6256           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6257          (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6258           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6259
6260def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
6261           Int64Regs:$s, Int32Regs:$x,
6262           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6263          (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6264           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6265
6266def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
6267           Int64Regs:$s, Int32Regs:$x,
6268           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6269          (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6270           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6271
6272
6273
6274def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
6275           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6276          (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6277           Int16Regs:$r)>;
6278
6279def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
6280           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6281          (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6282           Int16Regs:$r)>;
6283
6284def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
6285           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6286          (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6287           Int32Regs:$r)>;
6288
6289def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
6290           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6291          (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6292           Int64Regs:$r)>;
6293
6294def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
6295          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6296          (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6297           Int16Regs:$r, Int16Regs:$g)>;
6298
6299def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
6300          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6301          (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6302           Int16Regs:$r, Int16Regs:$g)>;
6303
6304def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6305          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6306          (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6307           Int32Regs:$r, Int32Regs:$g)>;
6308
6309def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6310          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6311          (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6312           Int64Regs:$r, Int64Regs:$g)>;
6313
6314def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6315           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6316           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6317          (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6318           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6319
6320def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6321           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6322           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6323          (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6324           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6325
6326def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6327           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6328           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6329          (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6330           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6331
6332
6333
6334def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6335           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6336          (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6337           Int16Regs:$r)>;
6338
6339def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6340           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6341          (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6342           Int16Regs:$r)>;
6343
6344def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6345           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6346          (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6347           Int32Regs:$r)>;
6348
6349def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6350           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6351          (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6352           Int64Regs:$r)>;
6353
6354def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6355          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6356          (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6357           Int16Regs:$r, Int16Regs:$g)>;
6358
6359def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6360          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6361          (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6362           Int16Regs:$r, Int16Regs:$g)>;
6363
6364def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6365          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6366          (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6367           Int32Regs:$r, Int32Regs:$g)>;
6368
6369def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6370          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6371          (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6372           Int64Regs:$r, Int64Regs:$g)>;
6373
6374def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6375           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6376           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6377          (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6378           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6379
6380def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6381           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6382           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6383          (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6384           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6385
6386def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6387           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6388           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6389          (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6390           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6391
6392
6393
6394def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6395          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6396          (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6397           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6398           Int16Regs:$r)>;
6399
6400def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6401          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6402          (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6403           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6404           Int16Regs:$r)>;
6405
6406def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6407          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6408          (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6409           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6410           Int32Regs:$r)>;
6411
6412def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6413          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6414          (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6415           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6416           Int64Regs:$r)>;
6417
6418def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6419           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6420           Int16Regs:$r, Int16Regs:$g),
6421          (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6422           Int32Regs:$x, Int32Regs:$y,
6423           Int16Regs:$r, Int16Regs:$g)>;
6424
6425def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6426           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6427           Int16Regs:$r, Int16Regs:$g),
6428          (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6429           Int32Regs:$x, Int32Regs:$y,
6430           Int16Regs:$r, Int16Regs:$g)>;
6431
6432def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6433           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6434           Int32Regs:$g),
6435          (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6436           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6437
6438def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6439           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6440           Int64Regs:$g),
6441          (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6442           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6443
6444def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6445           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6446           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6447          (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6448           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6449           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6450
6451def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6452           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6453           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6454          (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6455           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6456           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6457
6458def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6459           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6460           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6461          (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6462           Int32Regs:$x, Int32Regs:$y,
6463           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6464
6465
6466
6467def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6468           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6469           Int16Regs:$r),
6470          (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6471           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6472           Int16Regs:$r)>;
6473
6474def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6475           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6476           Int16Regs:$r),
6477          (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6478           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6479           Int16Regs:$r)>;
6480
6481def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6482           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6483           Int32Regs:$r),
6484          (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6485           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6486           Int32Regs:$r)>;
6487
6488def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6489           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6490           Int64Regs:$r),
6491          (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6492           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6493           Int64Regs:$r)>;
6494
6495def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6496           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6497           Int16Regs:$r, Int16Regs:$g),
6498          (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6499           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6500           Int16Regs:$r, Int16Regs:$g)>;
6501
6502def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6503           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6504           Int16Regs:$r, Int16Regs:$g),
6505          (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6506           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6507           Int16Regs:$r, Int16Regs:$g)>;
6508
6509def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6510           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6511           Int32Regs:$r, Int32Regs:$g),
6512          (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6513           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6514           Int32Regs:$r, Int32Regs:$g)>;
6515
6516def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6517           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6518           Int64Regs:$r, Int64Regs:$g),
6519          (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6520           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6521           Int64Regs:$r, Int64Regs:$g)>;
6522
6523def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6524           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6525           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6526          (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6527           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6528           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6529
6530def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6531           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6532           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6533          (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6534           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6535           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6536
6537def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6538           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6539           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6540          (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6541           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6542           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6543
6544
6545// .trap variant
6546def : Pat<(int_nvvm_sust_b_1d_i8_trap
6547           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6548          (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6549
6550def : Pat<(int_nvvm_sust_b_1d_i16_trap
6551           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6552          (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6553
6554def : Pat<(int_nvvm_sust_b_1d_i32_trap
6555           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6556          (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6557
6558def : Pat<(int_nvvm_sust_b_1d_i64_trap
6559           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6560          (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6561
6562def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6563           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6564          (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6565           Int16Regs:$r, Int16Regs:$g)>;
6566
6567def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6568           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6569          (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6570           Int16Regs:$r, Int16Regs:$g)>;
6571
6572def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6573           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6574          (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6575           Int32Regs:$r, Int32Regs:$g)>;
6576
6577def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6578           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6579          (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6580           Int64Regs:$r, Int64Regs:$g)>;
6581
6582def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6583           Int64Regs:$s, Int32Regs:$x,
6584           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6585          (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6586           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6587
6588def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6589           Int64Regs:$s, Int32Regs:$x,
6590           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6591          (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6592           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6593
6594def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6595           Int64Regs:$s, Int32Regs:$x,
6596           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6597          (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6598           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6599
6600
6601
6602def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6603           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6604          (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6605           Int16Regs:$r)>;
6606
6607def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6608           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6609          (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6610           Int16Regs:$r)>;
6611
6612def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6613           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6614          (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6615           Int32Regs:$r)>;
6616
6617def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6618           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6619          (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6620           Int64Regs:$r)>;
6621
6622def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6623          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6624          (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6625           Int16Regs:$r, Int16Regs:$g)>;
6626
6627def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6628          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6629          (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6630           Int16Regs:$r, Int16Regs:$g)>;
6631
6632def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6633          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6634          (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6635           Int32Regs:$r, Int32Regs:$g)>;
6636
6637def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6638          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6639          (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6640           Int64Regs:$r, Int64Regs:$g)>;
6641
6642def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6643           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6644           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6645          (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6646           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6647
6648def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6649           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6650           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6651          (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6652           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6653
6654def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6655           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6656           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6657          (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6658           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6659
6660
6661
6662def : Pat<(int_nvvm_sust_b_2d_i8_trap
6663           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6664          (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6665           Int16Regs:$r)>;
6666
6667def : Pat<(int_nvvm_sust_b_2d_i16_trap
6668           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6669          (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6670           Int16Regs:$r)>;
6671
6672def : Pat<(int_nvvm_sust_b_2d_i32_trap
6673           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6674          (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6675           Int32Regs:$r)>;
6676
6677def : Pat<(int_nvvm_sust_b_2d_i64_trap
6678           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6679          (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6680           Int64Regs:$r)>;
6681
6682def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6683          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6684          (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6685           Int16Regs:$r, Int16Regs:$g)>;
6686
6687def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6688          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6689          (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6690           Int16Regs:$r, Int16Regs:$g)>;
6691
6692def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6693          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6694          (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6695           Int32Regs:$r, Int32Regs:$g)>;
6696
6697def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6698          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6699          (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6700           Int64Regs:$r, Int64Regs:$g)>;
6701
6702def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6703           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6704           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6705          (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6706           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6707
6708def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6709           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6710           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6711          (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6712           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6713
6714def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6715           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6716           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6717          (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6718           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6719
6720
6721
6722def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6723          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6724          (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6725           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6726           Int16Regs:$r)>;
6727
6728def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6729          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6730          (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6731           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6732           Int16Regs:$r)>;
6733
6734def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6735          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6736          (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6737           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6738           Int32Regs:$r)>;
6739
6740def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6741          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6742          (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6743           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6744           Int64Regs:$r)>;
6745
6746def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6747           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6748           Int16Regs:$r, Int16Regs:$g),
6749          (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6750           Int32Regs:$x, Int32Regs:$y,
6751           Int16Regs:$r, Int16Regs:$g)>;
6752
6753def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6754           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6755           Int16Regs:$r, Int16Regs:$g),
6756          (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6757           Int32Regs:$x, Int32Regs:$y,
6758           Int16Regs:$r, Int16Regs:$g)>;
6759
6760def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6761           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6762           Int32Regs:$g),
6763          (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6764           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6765
6766def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6767           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6768           Int64Regs:$g),
6769          (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6770           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6771
6772def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6773           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6774           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6775          (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6776           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6777           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6778
6779def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6780           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6781           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6782          (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6783           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6784           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6785
6786def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6787           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6788           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6789          (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6790           Int32Regs:$x, Int32Regs:$y,
6791           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6792
6793
6794
6795def : Pat<(int_nvvm_sust_b_3d_i8_trap
6796           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6797           Int16Regs:$r),
6798          (SUST_B_3D_B8_TRAP Int64Regs:$s,
6799           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6800           Int16Regs:$r)>;
6801
6802def : Pat<(int_nvvm_sust_b_3d_i16_trap
6803           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6804           Int16Regs:$r),
6805          (SUST_B_3D_B16_TRAP Int64Regs:$s,
6806           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6807           Int16Regs:$r)>;
6808
6809def : Pat<(int_nvvm_sust_b_3d_i32_trap
6810           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6811           Int32Regs:$r),
6812          (SUST_B_3D_B32_TRAP Int64Regs:$s,
6813           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6814           Int32Regs:$r)>;
6815
6816def : Pat<(int_nvvm_sust_b_3d_i64_trap
6817           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6818           Int64Regs:$r),
6819          (SUST_B_3D_B64_TRAP Int64Regs:$s,
6820           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6821           Int64Regs:$r)>;
6822
6823def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6824           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6825           Int16Regs:$r, Int16Regs:$g),
6826          (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6827           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6828           Int16Regs:$r, Int16Regs:$g)>;
6829
6830def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6831           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6832           Int16Regs:$r, Int16Regs:$g),
6833          (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6834           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6835           Int16Regs:$r, Int16Regs:$g)>;
6836
6837def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6838           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6839           Int32Regs:$r, Int32Regs:$g),
6840          (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6841           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6842           Int32Regs:$r, Int32Regs:$g)>;
6843
6844def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6845           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6846           Int64Regs:$r, Int64Regs:$g),
6847          (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6848           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6849           Int64Regs:$r, Int64Regs:$g)>;
6850
6851def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6852           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6853           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6854          (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6855           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6856           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6857
6858def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6859           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6860           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6861          (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6862           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6863           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6864
6865def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6866           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6867           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6868          (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6869           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6870           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6871
6872
6873// .zero variant
6874def : Pat<(int_nvvm_sust_b_1d_i8_zero
6875           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6876          (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6877
6878def : Pat<(int_nvvm_sust_b_1d_i16_zero
6879           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6880          (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6881
6882def : Pat<(int_nvvm_sust_b_1d_i32_zero
6883           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6884          (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6885
6886def : Pat<(int_nvvm_sust_b_1d_i64_zero
6887           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6888          (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6889
6890def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6891           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6892          (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6893           Int16Regs:$r, Int16Regs:$g)>;
6894
6895def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6896           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6897          (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6898           Int16Regs:$r, Int16Regs:$g)>;
6899
6900def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6901           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6902          (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6903           Int32Regs:$r, Int32Regs:$g)>;
6904
6905def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6906           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6907          (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6908           Int64Regs:$r, Int64Regs:$g)>;
6909
6910def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6911           Int64Regs:$s, Int32Regs:$x,
6912           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6913          (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6914           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6915
6916def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6917           Int64Regs:$s, Int32Regs:$x,
6918           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6919          (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6920           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6921
6922def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6923           Int64Regs:$s, Int32Regs:$x,
6924           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6925          (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6926           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6927
6928
6929
6930def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6931           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6932          (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6933           Int16Regs:$r)>;
6934
6935def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6936           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6937          (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6938           Int16Regs:$r)>;
6939
6940def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6941           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6942          (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6943           Int32Regs:$r)>;
6944
6945def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6946           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6947          (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6948           Int64Regs:$r)>;
6949
6950def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6951          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6952          (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6953           Int16Regs:$r, Int16Regs:$g)>;
6954
6955def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6956          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6957          (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6958           Int16Regs:$r, Int16Regs:$g)>;
6959
6960def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6961          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6962          (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6963           Int32Regs:$r, Int32Regs:$g)>;
6964
6965def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6966          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6967          (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6968           Int64Regs:$r, Int64Regs:$g)>;
6969
6970def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6971           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6972           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6973          (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6974           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6975
6976def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6977           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6978           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6979          (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6980           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6981
6982def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6983           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6984           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6985          (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6986           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6987
6988
6989
6990def : Pat<(int_nvvm_sust_b_2d_i8_zero
6991           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6992          (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6993           Int16Regs:$r)>;
6994
6995def : Pat<(int_nvvm_sust_b_2d_i16_zero
6996           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6997          (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6998           Int16Regs:$r)>;
6999
7000def : Pat<(int_nvvm_sust_b_2d_i32_zero
7001           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7002          (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7003           Int32Regs:$r)>;
7004
7005def : Pat<(int_nvvm_sust_b_2d_i64_zero
7006           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
7007          (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7008           Int64Regs:$r)>;
7009
7010def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
7011          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7012          (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7013           Int16Regs:$r, Int16Regs:$g)>;
7014
7015def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
7016          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7017          (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7018           Int16Regs:$r, Int16Regs:$g)>;
7019
7020def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
7021          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7022          (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7023           Int32Regs:$r, Int32Regs:$g)>;
7024
7025def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
7026          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
7027          (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7028           Int64Regs:$r, Int64Regs:$g)>;
7029
7030def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
7031           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7032           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7033          (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7034           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7035
7036def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
7037           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7038           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7039          (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7040           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7041
7042def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
7043           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7044           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7045          (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7046           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7047
7048
7049
7050def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
7051          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7052          (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
7053           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7054           Int16Regs:$r)>;
7055
7056def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
7057          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7058          (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
7059           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7060           Int16Regs:$r)>;
7061
7062def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
7063          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7064          (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
7065           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7066           Int32Regs:$r)>;
7067
7068def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
7069          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
7070          (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
7071           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7072           Int64Regs:$r)>;
7073
7074def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
7075           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7076           Int16Regs:$r, Int16Regs:$g),
7077          (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
7078           Int32Regs:$x, Int32Regs:$y,
7079           Int16Regs:$r, Int16Regs:$g)>;
7080
7081def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
7082           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7083           Int16Regs:$r, Int16Regs:$g),
7084          (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
7085           Int32Regs:$x, Int32Regs:$y,
7086           Int16Regs:$r, Int16Regs:$g)>;
7087
7088def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
7089           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7090           Int32Regs:$g),
7091          (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
7092           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7093
7094def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
7095           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
7096           Int64Regs:$g),
7097          (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
7098           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
7099
7100def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
7101           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7102           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7103          (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
7104           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7105           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7106
7107def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
7108           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7109           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7110          (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
7111           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7112           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7113
7114def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
7115           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7116           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7117          (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
7118           Int32Regs:$x, Int32Regs:$y,
7119           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7120
7121
7122
7123def : Pat<(int_nvvm_sust_b_3d_i8_zero
7124           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7125           Int16Regs:$r),
7126          (SUST_B_3D_B8_ZERO Int64Regs:$s,
7127           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7128           Int16Regs:$r)>;
7129
7130def : Pat<(int_nvvm_sust_b_3d_i16_zero
7131           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7132           Int16Regs:$r),
7133          (SUST_B_3D_B16_ZERO Int64Regs:$s,
7134           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7135           Int16Regs:$r)>;
7136
7137def : Pat<(int_nvvm_sust_b_3d_i32_zero
7138           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7139           Int32Regs:$r),
7140          (SUST_B_3D_B32_ZERO Int64Regs:$s,
7141           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7142           Int32Regs:$r)>;
7143
7144def : Pat<(int_nvvm_sust_b_3d_i64_zero
7145           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7146           Int64Regs:$r),
7147          (SUST_B_3D_B64_ZERO Int64Regs:$s,
7148           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7149           Int64Regs:$r)>;
7150
7151def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
7152           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7153           Int16Regs:$r, Int16Regs:$g),
7154          (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
7155           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7156           Int16Regs:$r, Int16Regs:$g)>;
7157
7158def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
7159           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7160           Int16Regs:$r, Int16Regs:$g),
7161          (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
7162           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7163           Int16Regs:$r, Int16Regs:$g)>;
7164
7165def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
7166           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7167           Int32Regs:$r, Int32Regs:$g),
7168          (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
7169           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7170           Int32Regs:$r, Int32Regs:$g)>;
7171
7172def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
7173           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7174           Int64Regs:$r, Int64Regs:$g),
7175          (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
7176           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7177           Int64Regs:$r, Int64Regs:$g)>;
7178
7179def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
7180           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7181           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7182          (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
7183           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7184           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7185
7186def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
7187           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7188           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7189          (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
7190           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7191           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7192
7193def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
7194           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7195           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7196          (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
7197           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7198           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7199
7200
7201
7202
7203def : Pat<(int_nvvm_sust_p_1d_i8_trap
7204           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7205          (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7206
7207def : Pat<(int_nvvm_sust_p_1d_i16_trap
7208           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7209          (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7210
7211def : Pat<(int_nvvm_sust_p_1d_i32_trap
7212           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
7213          (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
7214
7215def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
7216           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7217          (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
7218           Int16Regs:$r, Int16Regs:$g)>;
7219
7220def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
7221           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7222          (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
7223           Int16Regs:$r, Int16Regs:$g)>;
7224
7225def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
7226           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7227          (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
7228           Int32Regs:$r, Int32Regs:$g)>;
7229
7230def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
7231           Int64Regs:$s, Int32Regs:$x,
7232           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7233          (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
7234           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7235
7236def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
7237           Int64Regs:$s, Int32Regs:$x,
7238           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7239          (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
7240           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7241
7242def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
7243           Int64Regs:$s, Int32Regs:$x,
7244           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7245          (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
7246           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7247
7248
7249
7250def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
7251           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7252          (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7253           Int16Regs:$r)>;
7254
7255def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
7256           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7257          (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7258           Int16Regs:$r)>;
7259
7260def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
7261           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
7262          (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7263           Int32Regs:$r)>;
7264
7265def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
7266          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7267          (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7268           Int16Regs:$r, Int16Regs:$g)>;
7269
7270def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
7271          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7272          (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7273           Int16Regs:$r, Int16Regs:$g)>;
7274
7275def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
7276          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7277          (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7278           Int32Regs:$r, Int32Regs:$g)>;
7279
7280def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
7281           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7282           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7283          (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7284           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7285
7286def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
7287           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7288           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7289          (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7290           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7291
7292def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
7293           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7294           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7295          (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7296           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7297
7298
7299
7300def : Pat<(int_nvvm_sust_p_2d_i8_trap
7301           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7302          (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7303           Int16Regs:$r)>;
7304
7305def : Pat<(int_nvvm_sust_p_2d_i16_trap
7306           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7307          (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7308           Int16Regs:$r)>;
7309
7310def : Pat<(int_nvvm_sust_p_2d_i32_trap
7311           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7312          (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7313           Int32Regs:$r)>;
7314
7315def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7316          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7317          (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7318           Int16Regs:$r, Int16Regs:$g)>;
7319
7320def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7321          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7322          (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7323           Int16Regs:$r, Int16Regs:$g)>;
7324
7325def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7326          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7327          (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7328           Int32Regs:$r, Int32Regs:$g)>;
7329
7330def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7331           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7332           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7333          (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7334           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7335
7336def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7337           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7338           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7339          (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7340           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7341
7342def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7343           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7344           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7345          (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7346           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7347
7348
7349
7350def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7351          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7352          (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7353           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7354           Int16Regs:$r)>;
7355
7356def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7357          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7358          (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7359           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7360           Int16Regs:$r)>;
7361
7362def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7363          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7364          (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7365           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7366           Int32Regs:$r)>;
7367
7368def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7369           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7370           Int16Regs:$r, Int16Regs:$g),
7371          (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7372           Int32Regs:$x, Int32Regs:$y,
7373           Int16Regs:$r, Int16Regs:$g)>;
7374
7375def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7376           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7377           Int16Regs:$r, Int16Regs:$g),
7378          (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7379           Int32Regs:$x, Int32Regs:$y,
7380           Int16Regs:$r, Int16Regs:$g)>;
7381
7382def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7383           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7384           Int32Regs:$g),
7385          (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7386           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7387
7388def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7389           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7390           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7391          (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7392           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7393           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7394
7395def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7396           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7397           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7398          (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7399           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7400           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7401
7402def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7403           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7404           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7405          (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7406           Int32Regs:$x, Int32Regs:$y,
7407           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7408
7409
7410
7411def : Pat<(int_nvvm_sust_p_3d_i8_trap
7412           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7413           Int16Regs:$r),
7414          (SUST_P_3D_B8_TRAP Int64Regs:$s,
7415           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7416           Int16Regs:$r)>;
7417
7418def : Pat<(int_nvvm_sust_p_3d_i16_trap
7419           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7420           Int16Regs:$r),
7421          (SUST_P_3D_B16_TRAP Int64Regs:$s,
7422           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7423           Int16Regs:$r)>;
7424
7425def : Pat<(int_nvvm_sust_p_3d_i32_trap
7426           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7427           Int32Regs:$r),
7428          (SUST_P_3D_B32_TRAP Int64Regs:$s,
7429           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7430           Int32Regs:$r)>;
7431
7432def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7433           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7434           Int16Regs:$r, Int16Regs:$g),
7435          (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7436           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7437           Int16Regs:$r, Int16Regs:$g)>;
7438
7439def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7440           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7441           Int16Regs:$r, Int16Regs:$g),
7442          (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7443           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7444           Int16Regs:$r, Int16Regs:$g)>;
7445
7446def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7447           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7448           Int32Regs:$r, Int32Regs:$g),
7449          (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7450           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7451           Int32Regs:$r, Int32Regs:$g)>;
7452
7453def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7454           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7455           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7456          (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7457           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7458           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7459
7460def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7461           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7462           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7463          (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7464           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7465           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7466
7467def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7468           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7469           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7470          (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7471           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7472           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7473
7474//-----------------------------------
7475// Read Special Registers
7476//-----------------------------------
7477
7478class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7479  : NVPTXInst<(outs Int64Regs:$d), (ins),
7480              !strconcat("mov.u64 \t$d, %", regname, ";"),
7481              [(set Int64Regs:$d, (intop))]>;
7482
7483class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7484  : NVPTXInst<(outs Int32Regs:$d), (ins),
7485              !strconcat("mov.u32 \t$d, %", regname, ";"),
7486              [(set Int32Regs:$d, (intop))]>;
7487
7488// TODO Add read vector-version of special registers
7489
7490def INT_PTX_SREG_TID_X :
7491    PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7492def INT_PTX_SREG_TID_Y :
7493    PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7494def INT_PTX_SREG_TID_Z :
7495    PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7496def INT_PTX_SREG_TID_W :
7497    PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7498
7499def INT_PTX_SREG_NTID_X :
7500    PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7501def INT_PTX_SREG_NTID_Y :
7502    PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7503def INT_PTX_SREG_NTID_Z :
7504    PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7505def INT_PTX_SREG_NTID_W :
7506    PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7507
7508def INT_PTX_SREG_LANEID :
7509    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7510def INT_PTX_SREG_WARPID :
7511    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7512def INT_PTX_SREG_NWARPID :
7513    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7514
7515def INT_PTX_SREG_CTAID_X :
7516    PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7517def INT_PTX_SREG_CTAID_Y :
7518    PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7519def INT_PTX_SREG_CTAID_Z :
7520    PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7521def INT_PTX_SREG_CTAID_W :
7522    PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7523
7524def INT_PTX_SREG_NCTAID_X :
7525    PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7526def INT_PTX_SREG_NCTAID_Y :
7527    PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7528def INT_PTX_SREG_NCTAID_Z :
7529    PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7530def INT_PTX_SREG_NCTAID_W :
7531    PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7532
7533def INT_PTX_SREG_SMID :
7534    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7535def INT_PTX_SREG_NSMID :
7536    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7537def INT_PTX_SREG_GRIDID :
7538    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7539
7540def INT_PTX_SREG_LANEMASK_EQ :
7541    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7542def INT_PTX_SREG_LANEMASK_LE :
7543    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7544def INT_PTX_SREG_LANEMASK_LT :
7545    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7546def INT_PTX_SREG_LANEMASK_GE :
7547    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7548def INT_PTX_SREG_LANEMASK_GT :
7549    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7550
7551def INT_PTX_SREG_CLOCK :
7552    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7553def INT_PTX_SREG_CLOCK64 :
7554    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7555
7556def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7557def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7558def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7559def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7560
7561// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7562// handle the constant.
7563def INT_PTX_SREG_WARPSIZE :
7564    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7565              [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7566
7567// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
7568// In addition to target-independent fields provided by WMMA_REGS, it adds
7569// the fields commonly used to implement specific PTX instruction -- register
7570// types and names, constraints, parts of assembly, etc.
7571class WMMA_REGINFO<WMMA_REGS r, string op>
7572      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
7573  // NVPTX register types used to carry fragment data.
7574  NVPTXRegClass regclass = !cond(
7575    !eq(ptx_elt_type, "f16") : Float16x2Regs,
7576    !eq(ptx_elt_type, "f32") : Float32Regs,
7577    !eq(ptx_elt_type, "f64") : Float64Regs,
7578    !eq(ptx_elt_type, "bf16") : Int32Regs,
7579    !eq(ptx_elt_type, "tf32") : Int32Regs,
7580    !eq(ptx_elt_type, "s32") : Int32Regs,
7581    !eq(ptx_elt_type, "s8") : Int32Regs,
7582    !eq(ptx_elt_type, "u8") : Int32Regs,
7583    !eq(ptx_elt_type, "s4") : Int32Regs,
7584    !eq(ptx_elt_type, "u4") : Int32Regs,
7585    !eq(ptx_elt_type, "b1") : Int32Regs);
7586
7587  // Instruction input/output arguments for the fragment.
7588  list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
7589
7590  // List of register names for the fragment -- ["ra0", "ra1",...]
7591  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
7592
7593  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
7594  string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
7595
7596  // Predicates for particular fragment variant. Technically those are
7597  // per-instruction predicates, but currently all fragments that can be used in
7598  // a given instruction are subject to the same constraints, so an instruction
7599  // can use predicates from any of its fragments. If/when this is no
7600  // longer the case, we can concat all per-fragment predicates to enforce that
7601  // all fragments of the instruction are viable.
7602  list<Predicate> Predicates = !cond(
7603    // fp16 -> fp16/fp32 @ m16n16k16
7604    !and(!eq(geom, "m16n16k16"),
7605         !or(!eq(ptx_elt_type, "f16"),
7606             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
7607
7608    !and(!eq(geom,"m8n8k4"),
7609         !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70],
7610
7611    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
7612    !and(!or(!eq(geom, "m8n32k16"),
7613             !eq(geom, "m32n8k16")),
7614         !or(!eq(ptx_elt_type, "f16"),
7615             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
7616
7617    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
7618    !and(!or(!eq(geom,"m16n16k16"),
7619             !eq(geom,"m8n32k16"),
7620             !eq(geom,"m32n8k16")),
7621         !or(!eq(ptx_elt_type, "u8"),
7622             !eq(ptx_elt_type, "s8"),
7623             !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
7624
7625    !and(!or(!eq(geom,"m16n16k16"),
7626             !eq(geom,"m8n32k16"),
7627             !eq(geom,"m32n8k16")),
7628         !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70],
7629
7630    !and(!eq(geom,"m16n16k8"),
7631         !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70],
7632
7633    !and(!eq(geom,"m16n16k8"),
7634         !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70],
7635
7636    // b1 -> s32 @ m8n8k128(b1)
7637    !and(!ne(op,"mma"),
7638         !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63],
7639
7640    // u4/s4 -> s32 @ m8n8k32 (u4/s4)
7641    !and(!ne(op,"mma"),
7642         !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
7643
7644    !or(!eq(geom,"m16n8k8"),
7645        !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65],
7646
7647    !and(!ne(ptx_elt_type,"f64"),
7648         !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64],
7649
7650    // mma m8n8k32 requires higher PTX version
7651    !and(!eq(op,"mma"),
7652         !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65],
7653
7654    !and(!eq(ptx_elt_type,"f64"),
7655         !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70],
7656
7657    !and(!eq(op,"mma"),
7658         !or(!eq(geom, "m16n8k16"),
7659             !eq(geom, "m16n8k4"),
7660             !eq(geom, "m16n8k32"),
7661             !eq(geom, "m16n8k64"),
7662             !eq(geom, "m8n8k128"),
7663             !eq(geom, "m16n8k128"),
7664             !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70]);
7665
7666  // template DAGs for instruction inputs/output.
7667  dag Outs = !dag(outs, ptx_regs, reg_names);
7668  dag Ins = !dag(ins, ptx_regs, reg_names);
7669}
7670
7671// Convert dag of arguments into a dag to match given intrinsic.
7672class BuildPatternI<Intrinsic Intr, dag Ins> {
7673  // Build a dag pattern that matches the intrinsic call.
7674  dag ret = !foreach(tmp, Ins,
7675                          !subst(imem, ADDRvar,
7676                          !subst(MEMri64, ADDRri64,
7677                          !subst(MEMri, ADDRri,
7678                          !subst(ins, Intr, tmp)))));
7679}
7680
7681// Same as above, but uses PatFrag instead of an Intrinsic.
7682class BuildPatternPF<PatFrag Intr, dag Ins> {
7683  // Build a dag pattern that matches the intrinsic call.
7684  dag ret = !foreach(tmp, Ins,
7685                          !subst(imem, ADDRvar,
7686                          !subst(MEMri64, ADDRri64,
7687                          !subst(MEMri, ADDRri,
7688                          !subst(ins, Intr, tmp)))));
7689}
7690
7691// Common WMMA-related fields used for building patterns for all MMA instructions.
7692class WMMA_INSTR<string _Intr, list<dag> _Args>
7693  : NVPTXInst<(outs), (ins), "?", []> {
7694  Intrinsic Intr = !cast<Intrinsic>(_Intr);
7695  // Concatenate all arguments into a single dag.
7696  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
7697  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
7698  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
7699}
7700
7701//
7702// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7703//
7704
7705class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
7706                DAGOperand SrcOp>
7707  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
7708                              [!con((ins SrcOp:$src),
7709                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7710    Requires<Frag.Predicates> {
7711  // Load/store intrinsics are overloaded on pointer's address space.
7712  // To match the right intrinsic, we need to build AS-constrained PatFrag.
7713  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7714  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7715  dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
7716  // Build PatFrag that only matches particular address space.
7717  PatFrag IntrFrag = PatFrag<PFOperands,
7718                             PFOperandsIntr,
7719                             !cond(!eq(Space, ".shared"): AS_match.shared,
7720                                   !eq(Space, ".global"): AS_match.global,
7721                                   true: AS_match.generic)>;
7722  // Build AS-constrained pattern.
7723  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7724
7725  let OutOperandList = Frag.Outs;
7726  let InOperandList = !con(Args, (ins MmaCode:$ptx));
7727  let AsmString = "wmma.load."
7728                  # Frag.frag
7729                  # ".sync"
7730                  # "${ptx:aligned}"
7731                  # "." # Layout
7732                  # "." # Frag.geom
7733                  # Space
7734                  # "." # Frag.ptx_elt_type # " \t"
7735                  # Frag.regstring
7736                  # ", [$src]"
7737                  # !if(WithStride, ", $ldm", "")
7738                  # ";";
7739}
7740
7741//
7742// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7743//
7744class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
7745                   bit WithStride, DAGOperand DstOp>
7746  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
7747               [!con((ins DstOp:$dst),
7748                     Frag.Ins,
7749                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7750    Requires<Frag.Predicates> {
7751
7752  // Load/store intrinsics are overloaded on pointer's address space.
7753  // To match the right intrinsic, we need to build AS-constrained PatFrag.
7754  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7755  dag PFOperands = !con((ops node:$dst),
7756                        !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
7757                        !if(WithStride, (ops node:$ldm), (ops)));
7758  // Build PatFrag that only matches particular address space.
7759  PatFrag IntrFrag = PatFrag<PFOperands,
7760                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7761                             !cond(!eq(Space, ".shared"): AS_match.shared,
7762                                   !eq(Space, ".global"): AS_match.global,
7763                                   true: AS_match.generic)>;
7764  // Build AS-constrained pattern.
7765  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7766
7767  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7768  let OutOperandList = (outs);
7769  let AsmString = "wmma.store.d.sync"
7770                  # "${ptx:aligned}"
7771                  # "." # Layout
7772                  # "." # Frag.geom
7773                  # Space
7774                  # "." # Frag.ptx_elt_type
7775                  # " \t[$dst],"
7776                  # Frag.regstring
7777                  # !if(WithStride, ", $ldm", "")
7778                  # ";";
7779}
7780
7781// Create all load/store variants
7782defset list<WMMA_INSTR> MMA_LDSTs  = {
7783  foreach layout = ["row", "col"] in {
7784    foreach stride = [false, true] in {
7785      foreach space = [".global", ".shared", ""] in {
7786        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7787          foreach frag = NVVM_MMA_OPS.all_ld_ops in
7788            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
7789              def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
7790          foreach frag = NVVM_MMA_OPS.all_st_ops in
7791            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
7792              def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
7793        } // addr
7794      } // space
7795    } // stride
7796  } // layout
7797} // defset
7798
7799// B1 instruction variants need extra constraints.
7800class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
7801  string Op = b1op;
7802  WMMA_REGINFO Frag = FragA;
7803  list<Predicate> ret = !listconcat(
7804    FragA.Predicates,
7805    !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[])
7806  );
7807}
7808// WMMA.MMA
7809class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7810               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7811               string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
7812  : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
7813                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
7814    // Requires does not seem to have effect on Instruction w/o Patterns.
7815    // We set it here anyways and propagate to the Pat<> we construct below.
7816    Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
7817  let OutOperandList = FragD.Outs;
7818  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7819  string TypeList = !cond(
7820    !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
7821                                     # "." # FragC.ptx_elt_type,
7822    1: "." # FragD.ptx_elt_type
7823       # "." # FragA.ptx_elt_type
7824       # "." # FragB.ptx_elt_type
7825       # "." # FragC.ptx_elt_type,
7826  );
7827  let AsmString = "wmma.mma"
7828                  # b1op
7829                  # ".sync"
7830                  # "${ptx:aligned}"
7831                  # "." # ALayout
7832                  # "." # BLayout
7833                  # "." # FragA.geom
7834                  # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
7835                  # TypeList
7836                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
7837                  # FragD.regstring # ",\n\t\t"
7838                  # FragA.regstring # ",\n\t\t"
7839                  # FragB.regstring # ",\n\t\t"
7840                  # FragC.regstring # ";";
7841}
7842
7843defset list<WMMA_INSTR> WMMAs  = {
7844  foreach layout_a = ["row", "col"] in {
7845    foreach layout_b = ["row", "col"] in {
7846      foreach satf = [0, 1] in {
7847        foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
7848          foreach op = NVVM_MMA_OPS.all_wmma_ops in {
7849            foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
7850              if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
7851                def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
7852                              WMMA_REGINFO<op[1], "wmma.mma">,
7853                              WMMA_REGINFO<op[2], "wmma.mma">,
7854                              WMMA_REGINFO<op[3], "wmma.mma">,
7855                              layout_a, layout_b, satf, rnd, b1op>;
7856              }
7857            } // b1op
7858          } // op
7859        } // rnd
7860      } // satf
7861    } // layout_b
7862  } // layout_a
7863} // defset
7864
7865// MMA
7866class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7867               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7868               string ALayout, string BLayout, int Satfinite, string b1op>
7869  : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
7870                        [FragA.Ins, FragB.Ins, FragC.Ins]>,
7871    // Requires does not seem to have effect on Instruction w/o Patterns.
7872    // We set it here anyways and propagate to the Pat<> we construct below.
7873  Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
7874  let OutOperandList = FragD.Outs;
7875  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7876  string TypeList = "." # FragD.ptx_elt_type
7877                    # "." # FragA.ptx_elt_type
7878                    # "." # FragB.ptx_elt_type
7879                    # "." # FragC.ptx_elt_type;
7880  let AsmString = "mma.sync.aligned."
7881                  # FragA.geom
7882                  # "." # ALayout
7883                  # "." # BLayout
7884                  # !if(Satfinite, ".satfinite", "")
7885                  # TypeList
7886                  # b1op # "\n\t\t"
7887                  # FragD.regstring # ",\n\t\t"
7888                  # FragA.regstring # ",\n\t\t"
7889                  # FragB.regstring # ",\n\t\t"
7890                  # FragC.regstring # ";";
7891}
7892
7893defset list<WMMA_INSTR> MMAs  = {
7894  foreach layout_a = ["row", "col"] in {
7895    foreach layout_b = ["row", "col"] in {
7896      foreach satf = [0, 1] in {
7897        foreach op = NVVM_MMA_OPS.all_mma_ops in {
7898          foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
7899            if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
7900              def : MMA<WMMA_REGINFO<op[0], "mma">,
7901                        WMMA_REGINFO<op[1], "mma">,
7902                        WMMA_REGINFO<op[2], "mma">,
7903                        WMMA_REGINFO<op[3], "mma">,
7904                        layout_a, layout_b, satf, b1op>;
7905            }
7906          } // b1op
7907        } // op
7908      } // satf
7909    } // layout_b
7910  } // layout_a
7911} // defset
7912
7913
7914// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
7915// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
7916// the instruction record.
7917class MMA_PAT<WMMA_INSTR wi>
7918      : Pat<wi.IntrinsicPattern,
7919            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
7920                 (wi ptx.version))>,
7921        Requires<wi.Predicates>;
7922
7923// Build intrinsic->instruction patterns for all MMA instructions.
7924foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs) in
7925  def : MMA_PAT<mma>;
7926