1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def immFloat0 : PatLeaf<(fpimm), [{
10    float f = (float)N->getValueAPF().convertToFloat();
11    return (f==0.0f);
12}]>;
13
14def immFloat1 : PatLeaf<(fpimm), [{
15    float f = (float)N->getValueAPF().convertToFloat();
16    return (f==1.0f);
17}]>;
18
19def immDouble0 : PatLeaf<(fpimm), [{
20    double d = (double)N->getValueAPF().convertToDouble();
21    return (d==0.0);
22}]>;
23
24def immDouble1 : PatLeaf<(fpimm), [{
25    double d = (double)N->getValueAPF().convertToDouble();
26    return (d==1.0);
27}]>;
28
29def AS_match {
30  code generic = [{
31   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32  }];
33  code shared = [{
34   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35  }];
36  code global = [{
37   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38  }];
39}
40
41// A node that will be replaced with the current PTX version.
42class PTX {
43  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45  }]>;
46  // (i32 0) will be XForm'ed to the currently used PTX version.
47  dag version = (PTXVerXform (i32 0));
48}
49def ptx : PTX;
50
51// Generates list of n sequential register names.
52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53class RegSeq<int n, string prefix> {
54  list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55                                        [prefix # !sub(n, 1)]),
56                            []);
57}
58
59class THREADMASK_INFO<bit sync> {
60  list<bit> ret = !if(sync, [0, 1], [0]);
61}
62
63//-----------------------------------
64// Synchronization and shuffle functions
65//-----------------------------------
66let isConvergent = true in {
67def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                  "bar.sync \t0;",
69      [(int_nvvm_barrier0)]>;
70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                  "bar.sync \t$src1;",
72      [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                  "bar.sync \t$src1, $src2;",
75      [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77  !strconcat("{{ \n\t",
78             ".reg .pred \t%p1; \n\t",
79             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80             "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81             "}}"),
82      [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84  !strconcat("{{ \n\t",
85             ".reg .pred \t%p1; \n\t",
86             ".reg .pred \t%p2; \n\t",
87             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88             "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90             "}}"),
91      [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93  !strconcat("{{ \n\t",
94             ".reg .pred \t%p1; \n\t",
95             ".reg .pred \t%p2; \n\t",
96             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97             "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99             "}}"),
100      [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
101
102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                             [(int_nvvm_bar_sync imm:$i)]>;
104
105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                             [(int_nvvm_bar_warp_sync imm:$i)]>,
107        Requires<[hasPTX60, hasSM30]>;
108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                             [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110        Requires<[hasPTX60, hasSM30]>;
111
112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                   [(int_nvvm_barrier_sync imm:$i)]>,
114        Requires<[hasPTX60, hasSM30]>;
115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                   [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117        Requires<[hasPTX60, hasSM30]>;
118
119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                 "barrier.sync \t$id, $cnt;",
121                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122        Requires<[hasPTX60, hasSM30]>;
123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                 "barrier.sync \t$id, $cnt;",
125                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126        Requires<[hasPTX60, hasSM30]>;
127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                 "barrier.sync \t$id, $cnt;",
129                 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130        Requires<[hasPTX60, hasSM30]>;
131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                 "barrier.sync \t$id, $cnt;",
133                 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134        Requires<[hasPTX60, hasSM30]>;
135
136class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
137                 bit offset_imm, bit mask_imm, bit threadmask_imm>
138      : NVPTXInst<(outs), (ins), "?", []> {
139  NVPTXRegClass rc = !cond(
140    !eq(reg, "i32"): Int32Regs,
141    !eq(reg, "f32"): Float32Regs);
142  string IntrName = "int_nvvm_shfl_"
143                    # !if(sync, "sync_", "")
144                    # mode
145                    # "_" # reg
146                    # !if(return_pred, "p", "");
147  Intrinsic Intr = !cast<Intrinsic>(IntrName);
148  let InOperandList = !con(
149    !if(sync,
150        !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
151        (ins)),
152    (ins rc:$src),
153    !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
154    !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
155    );
156  let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
157  let AsmString = "shfl."
158     # !if(sync, "sync.", "")
159     # mode # ".b32\t"
160     # "$dst"
161     # !if(return_pred, "|$pred", "") # ", "
162     # "$src, $offset, $mask"
163     # !if(sync, ", $threadmask", "")
164     # ";"
165     ;
166  let Pattern = [!con(
167      !foreach(tmp, OutOperandList,
168             !subst(outs, set,
169             !subst(i32imm, imm, tmp))),
170      (set !foreach(tmp, InOperandList,
171             !subst(ins, Intr,
172             !subst(i32imm, imm, tmp))))
173  )];
174}
175
176foreach sync = [false, true] in {
177  foreach mode = ["up", "down", "bfly", "idx"] in {
178    foreach regclass = ["i32", "f32"] in {
179      foreach return_pred = [false, true] in {
180        foreach offset_imm = [false, true] in {
181          foreach mask_imm = [false, true] in {
182            foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
183              def : SHFL_INSTR<sync, mode, regclass, return_pred,
184                               offset_imm, mask_imm, threadmask_imm>,
185                    Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>;
186            }
187          }
188        }
189      }
190    }
191  }
192}
193
194// vote.{all,any,uni,ballot}
195multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
196  def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
197              "vote." # mode # " \t$dest, $pred;",
198              [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
199        Requires<[hasPTX60, hasSM30]>;
200}
201
202defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
203defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
204defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
205defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
206
207// vote.sync.{all,any,uni,ballot}
208multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
209  def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
210              "vote.sync." # mode # " \t$dest, $pred, $mask;",
211              [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
212          Requires<[hasPTX60, hasSM30]>;
213  def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
214              "vote.sync." # mode #" \t$dest, $pred, $mask;",
215              [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
216          Requires<[hasPTX60, hasSM30]>;
217}
218
219defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
220defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
221defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
222defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
223
224multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
225                          Operand ImmOp> {
226  def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
227              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
228              [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
229           Requires<[hasPTX60, hasSM70]>;
230  def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
231              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
232              [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
233           Requires<[hasPTX60, hasSM70]>;
234  def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
235              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
236              [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
237           Requires<[hasPTX60, hasSM70]>;
238  def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
239              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240              [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
241           Requires<[hasPTX60, hasSM70]>;
242}
243
244defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
245                                        i32imm>;
246defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
247                                        i64imm>;
248
249multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
250                          Operand ImmOp> {
251  def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
252                     (ins i32imm:$mask, ImmOp:$value),
253              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
254              [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
255           Requires<[hasPTX60, hasSM70]>;
256  def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
257                     (ins Int32Regs:$mask, ImmOp:$value),
258              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
259              [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
260           Requires<[hasPTX60, hasSM70]>;
261  def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
262                     (ins i32imm:$mask, regclass:$value),
263              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
264              [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
265           Requires<[hasPTX60, hasSM70]>;
266  def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
267                     (ins Int32Regs:$mask, regclass:$value),
268              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
269              [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
270           Requires<[hasPTX60, hasSM70]>;
271}
272defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
273                                         i32imm>;
274defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
275                                         i64imm>;
276
277multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
278  def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
279          "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
280          [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
281        Requires<[hasPTX70, hasSM80]>;
282}
283
284defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
285defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
286defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
287defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
288defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
289defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
290defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
291defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
292
293} // isConvergent = true
294
295//-----------------------------------
296// Explicit Memory Fence Functions
297//-----------------------------------
298class MEMBAR<string StrOp, Intrinsic IntOP> :
299              NVPTXInst<(outs), (ins),
300            StrOp, [(IntOP)]>;
301
302def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
303def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
304def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
305
306
307//-----------------------------------
308// Async Copy Functions
309//-----------------------------------
310
311multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
312  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
313            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
314            [(Intrin Int32Regs:$addr)]>,
315    Requires<[hasPTX70, hasSM80]>;
316  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
317            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
318            [(Intrin Int64Regs:$addr)]>,
319    Requires<[hasPTX70, hasSM80]>;
320}
321
322defm CP_ASYNC_MBARRIER_ARRIVE :
323  CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
324defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
325  CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
326defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
327  CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
328defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
329  CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
330
331multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> {
332  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
333            !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
334            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
335    Requires<[hasPTX70, hasSM80]>;
336  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
337            !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
338            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
339    Requires<[hasPTX70, hasSM80]>;
340}
341
342defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
343  CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>;
344
345defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
346  CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>;
347
348defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
349  CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>;
350
351multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> {
352  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
353            !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
354            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
355    Requires<[hasPTX70, hasSM80]>;
356  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
357            !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
358            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
359    Requires<[hasPTX70, hasSM80]>;
360}
361
362defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
363  CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>;
364
365def CP_ASYNC_COMMIT_GROUP :
366  NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
367  Requires<[hasPTX70, hasSM80]>;
368
369def CP_ASYNC_WAIT_GROUP :
370  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
371  [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
372  Requires<[hasPTX70, hasSM80]>;
373
374def CP_ASYNC_WAIT_ALL :
375  NVPTXInst<(outs), (ins), "cp.async.wait_all;",
376  [(int_nvvm_cp_async_wait_all)]>,
377  Requires<[hasPTX70, hasSM80]>;
378
379//-----------------------------------
380// MBarrier Functions
381//-----------------------------------
382
383multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
384  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
385           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
386    [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
387    Requires<[hasPTX70, hasSM80]>;
388  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
389           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
390    [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
391    Requires<[hasPTX70, hasSM80]>;
392}
393
394defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
395defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
396                                          int_nvvm_mbarrier_init_shared>;
397
398multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
399  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
400           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
401    [(Intrin Int32Regs:$addr)]>,
402    Requires<[hasPTX70, hasSM80]>;
403  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
404           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
405    [(Intrin Int64Regs:$addr)]>,
406    Requires<[hasPTX70, hasSM80]>;
407}
408
409defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
410defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
411                                            int_nvvm_mbarrier_inval_shared>;
412
413multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
414  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
415           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
416    [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
417    Requires<[hasPTX70, hasSM80]>;
418  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
419           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
420    [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
421    Requires<[hasPTX70, hasSM80]>;
422}
423
424defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
425defm MBARRIER_ARRIVE_SHARED :
426  MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
427
428multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
429  def _32 : NVPTXInst<(outs Int64Regs:$state),
430           (ins Int32Regs:$addr, Int32Regs:$count),
431           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
432                      ".b64 $state, [$addr], $count;"),
433    [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
434    Requires<[hasPTX70, hasSM80]>;
435  def _64 : NVPTXInst<(outs Int64Regs:$state),
436           (ins Int64Regs:$addr, Int32Regs:$count),
437           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
438                      ".b64 $state, [$addr], $count;"),
439    [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
440    Requires<[hasPTX70, hasSM80]>;
441}
442
443defm MBARRIER_ARRIVE_NOCOMPLETE :
444  MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
445defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
446  MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
447
448multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
449  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
450           !strconcat("mbarrier.arrive_drop", AddrSpace,
451                      ".b64 $state, [$addr];"),
452           [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
453    Requires<[hasPTX70, hasSM80]>;
454  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
455           !strconcat("mbarrier.arrive_drop", AddrSpace,
456                      ".b64 $state, [$addr];"),
457           [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
458    Requires<[hasPTX70, hasSM80]>;
459}
460
461defm MBARRIER_ARRIVE_DROP :
462  MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
463defm MBARRIER_ARRIVE_DROP_SHARED :
464  MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
465
466multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
467  def _32 : NVPTXInst<(outs Int64Regs:$state),
468           (ins Int32Regs:$addr, Int32Regs:$count),
469           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
470                      ".b64 $state, [$addr], $count;"),
471           [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
472    Requires<[hasPTX70, hasSM80]>;
473  def _64 : NVPTXInst<(outs Int64Regs:$state),
474           (ins Int64Regs:$addr, Int32Regs:$count),
475           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
476                      ".b64 $state, [$addr], $count;"),
477           [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
478    Requires<[hasPTX70, hasSM80]>;
479}
480
481defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
482  MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
483defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
484  MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
485                       int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
486
487multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
488  def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
489           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
490           [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
491    Requires<[hasPTX70, hasSM80]>;
492  def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
493           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
494           [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
495    Requires<[hasPTX70, hasSM80]>;
496}
497
498defm MBARRIER_TEST_WAIT :
499  MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
500defm MBARRIER_TEST_WAIT_SHARED :
501  MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
502
503class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
504           NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
505           "mbarrier.pending_count.b64 $res, $state;",
506           [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
507    Requires<[hasPTX70, hasSM80]>;
508
509def MBARRIER_PENDING_COUNT :
510  MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
511
512//-----------------------------------
513// Math Functions
514//-----------------------------------
515
516// Map min(1.0, max(0.0, x)) to sat(x)
517// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
518// NaN
519// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
520// Same story for fmax, fmin.
521
522def : Pat<(int_nvvm_fmin_f immFloat1,
523            (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
524          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
525def : Pat<(int_nvvm_fmin_f immFloat1,
526            (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
527          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
528def : Pat<(int_nvvm_fmin_f
529            (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
530          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
531def : Pat<(int_nvvm_fmin_f
532            (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
533          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
534
535def : Pat<(int_nvvm_fmin_d immDouble1,
536            (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
537          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
538def : Pat<(int_nvvm_fmin_d immDouble1,
539            (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
540          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
541def : Pat<(int_nvvm_fmin_d
542            (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
543          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
544def : Pat<(int_nvvm_fmin_d
545            (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
546          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
547
548
549// We need a full string for OpcStr here because we need to deal with case like
550// INT_PTX_RECIP.
551class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
552  NVPTXRegClass src_regclass, Intrinsic IntOP>
553            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
554            OpcStr,
555        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
556
557// We need a full string for OpcStr here because we need to deal with the case
558// like INT_PTX_NATIVE_POWR_F.
559class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
560  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
561            : NVPTXInst<(outs t_regclass:$dst),
562              (ins s0_regclass:$src0, s1_regclass:$src1),
563            OpcStr,
564        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
565
566class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
567  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
568  NVPTXRegClass s2_regclass, Intrinsic IntOP>
569            : NVPTXInst<(outs t_regclass:$dst),
570              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
571            OpcStr,
572        [(set t_regclass:$dst,
573          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
574
575//
576// MISC
577//
578
579def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
580  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
581
582//
583// Min Max
584//
585
586def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
587  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
588def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
589  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
590
591def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
592  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
593def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
594  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
595
596def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
597  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
598def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
599  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
600
601
602//
603// Multiplication
604//
605
606def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
607  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
608def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
609  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
610
611def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
612  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
613def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
614  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
615
616def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
617  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
618def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
619  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
620def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
621  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
622def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
623  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
624def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
625  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
626def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
627  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
628def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
629  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
630def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
631  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
632
633def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
634  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
635def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
636  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
637def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
638  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
639def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
640  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
641
642def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
643  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
644def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
645  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
646
647//
648// Div
649//
650
651def INT_NVVM_DIV_APPROX_FTZ_F
652  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
653    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
654def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
655  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
656
657def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
658  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
659def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
660  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
661def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
662  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
663def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
664  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
665def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
666  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
667def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
668  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
669def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
670  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
671def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
672  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
673
674def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
675  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
676def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
677  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
678def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
679  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
680def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
681  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
682
683//
684// Sad
685//
686
687def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
688  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
689def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
690  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
691
692//
693// Floor  Ceil
694//
695
696def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
697          (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
698def : Pat<(int_nvvm_floor_f Float32Regs:$a),
699          (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
700def : Pat<(int_nvvm_floor_d Float64Regs:$a),
701          (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
702
703def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
704          (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
705def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
706          (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
707def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
708          (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
709
710//
711// Abs
712//
713
714def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
715  Float32Regs, int_nvvm_fabs_ftz_f>;
716def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
717  Float32Regs, int_nvvm_fabs_f>;
718
719def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
720  Float64Regs, int_nvvm_fabs_d>;
721
722//
723// Round
724//
725
726def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
727          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
728def : Pat<(int_nvvm_round_f Float32Regs:$a),
729          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
730def : Pat<(int_nvvm_round_d Float64Regs:$a),
731          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
732
733//
734// Trunc
735//
736
737def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
738          (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
739def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
740          (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
741def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
742          (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
743
744//
745// Saturate
746//
747
748def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
749          (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
750def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
751          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
752def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
753          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
754
755//
756// Exp2  Log2
757//
758
759def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
760  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
761def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
762  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
763def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
764  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
765
766def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
767  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
768def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
769  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
770def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
771  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
772
773//
774// Sin  Cos
775//
776
777def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
778  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
779def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
780  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
781
782def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
783  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
784def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
785  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
786
787//
788// Fma
789//
790
791def INT_NVVM_FMA_RN_FTZ_F
792  : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
793    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
794def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
795  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
796def INT_NVVM_FMA_RZ_FTZ_F
797  : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
798    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
799def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
800  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
801def INT_NVVM_FMA_RM_FTZ_F
802  : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
803    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
804def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
805  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
806def INT_NVVM_FMA_RP_FTZ_F
807  : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
808    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
809def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
810  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
811
812def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
813  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
814def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
815  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
816def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
817  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
818def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
819  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
820
821//
822// Rcp
823//
824
825def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
826  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
827def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
828  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
829def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
830  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
831def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
832  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
833def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
834  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
835def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
836  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
837def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
838  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
839def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
840  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
841
842def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
843  Float64Regs, int_nvvm_rcp_rn_d>;
844def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
845  Float64Regs, int_nvvm_rcp_rz_d>;
846def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
847  Float64Regs, int_nvvm_rcp_rm_d>;
848def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
849  Float64Regs, int_nvvm_rcp_rp_d>;
850
851def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
852  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
853
854//
855// Sqrt
856//
857
858def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
859  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
860def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
861  Float32Regs, int_nvvm_sqrt_rn_f>;
862def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
863  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
864def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
865  Float32Regs, int_nvvm_sqrt_rz_f>;
866def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
867  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
868def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
869  Float32Regs, int_nvvm_sqrt_rm_f>;
870def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
871  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
872def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
873  Float32Regs, int_nvvm_sqrt_rp_f>;
874def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
875  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
876def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
877  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
878
879def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
880  Float64Regs, int_nvvm_sqrt_rn_d>;
881def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
882  Float64Regs, int_nvvm_sqrt_rz_d>;
883def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
884  Float64Regs, int_nvvm_sqrt_rm_d>;
885def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
886  Float64Regs, int_nvvm_sqrt_rp_d>;
887
888// nvvm_sqrt intrinsic
889def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
890          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
891def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
892          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
893def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
894          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
895def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
896          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
897
898//
899// Rsqrt
900//
901
902def INT_NVVM_RSQRT_APPROX_FTZ_F
903  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
904    int_nvvm_rsqrt_approx_ftz_f>;
905def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
906  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
907def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
908  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
909
910//
911// Add
912//
913
914def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
915  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
916def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
917  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
918def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
919  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
920def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
921  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
922def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
923  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
924def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
925  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
926def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
927  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
928def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
929  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
930
931def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
932  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
933def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
934  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
935def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
936  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
937def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
938  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
939
940//
941// Convert
942//
943
944def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
945          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
946def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
947          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
948def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
949          (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
950def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
951          (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
952def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
953          (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
954def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
955          (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
956def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
957          (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
958def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
959          (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
960
961def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
962          (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
963def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
964          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
965def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
966          (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
967def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
968          (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
969
970def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
971          (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
972def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
973          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
974def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
975          (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
976def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
977          (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
978
979def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
980          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
981def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
982          (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
983def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
984          (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
985def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
986          (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
987
988def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
989          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
990def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
991          (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
992def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
993          (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
994def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
995          (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
996
997def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
998          (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
999def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1000          (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1001def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1002          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1003def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1004          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1005def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1006          (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1007def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1008          (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1009def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1010          (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1011def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1012          (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1013
1014def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1015          (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1016def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1017          (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1018def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1019          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1020def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1021          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1022def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1023          (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1024def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1025          (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1026def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1027          (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1028def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1029          (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1030
1031def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1032          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1033def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1034          (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1035def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1036          (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1037def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1038          (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1039
1040def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1041          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1042def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1043          (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1044def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1045          (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1046def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1047          (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1048
1049def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1050  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1051
1052def INT_NVVM_D2I_LO : F_MATH_1<
1053  !strconcat("{{\n\t",
1054             ".reg .b32 %temp; \n\t",
1055             "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1056             "}}"),
1057  Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1058def INT_NVVM_D2I_HI : F_MATH_1<
1059  !strconcat("{{\n\t",
1060             ".reg .b32 %temp; \n\t",
1061             "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1062             "}}"),
1063  Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1064
1065def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1066          (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1067def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1068          (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1069def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1070          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1071def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1072          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1073def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1074          (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1075def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1076          (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1077def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1078          (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1079def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1080          (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1081
1082def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1083          (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1084def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1085          (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1086def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1087          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1088def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1089          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1090def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1091          (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1092def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1093          (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1094def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1095          (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1096def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1097          (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1098
1099def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1100          (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1101def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1102          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1103def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1104          (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1105def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1106          (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1107
1108def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1109          (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1110def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1111          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1112def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1113          (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1114def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1115          (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1116
1117def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1118          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1119def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1120          (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1121def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1122          (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1123def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1124          (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1125
1126def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1127          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1128def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1129          (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1130def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1131          (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1132def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1133          (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1134
1135def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1136          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1137def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1138          (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1139def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1140          (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1141def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1142          (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1143
1144def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1145          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1146def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1147          (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1148def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1149          (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1150def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1151          (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1152
1153
1154def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1155          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
1156def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1157          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
1158
1159//
1160// Bitcast
1161//
1162
1163def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1164  Float32Regs, int_nvvm_bitcast_f2i>;
1165def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1166  Int32Regs, int_nvvm_bitcast_i2f>;
1167
1168def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1169  Int64Regs, int_nvvm_bitcast_ll2d>;
1170def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1171  Float64Regs, int_nvvm_bitcast_d2ll>;
1172
1173//
1174// FNS
1175//
1176
1177class INT_FNS_MBO<dag ins, dag Operands>
1178  : NVPTXInst<(outs Int32Regs:$dst), ins,
1179               "fns.b32 \t$dst, $mask, $base, $offset;",
1180               [(set Int32Regs:$dst, Operands )]>,
1181    Requires<[hasPTX60, hasSM30]>;
1182
1183def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1184                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1185def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1186                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1187def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1188                     (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1189def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1190                     (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1191def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1192                     (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1193def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1194                     (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1195def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1196                     (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1197def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1198                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1199
1200//-----------------------------------
1201// Atomic Functions
1202//-----------------------------------
1203
1204class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1205 : PatFrag<ops, frag, AS_match.global>;
1206class ATOMIC_SHARED_CHK <dag ops, dag frag>
1207 : PatFrag<ops, frag, AS_match.shared>;
1208class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1209 : PatFrag<ops, frag, AS_match.generic>;
1210
1211multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1212  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1213  Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1214  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1215    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1216    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1217  Requires<Pred>;
1218  def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1219    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1220    [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1221  Requires<Pred>;
1222}
1223multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1224  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1225  list<Predicate> Pred = []> {
1226  defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1227    IntOp, IMMType, IMM, Pred>;
1228  defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1229    IntOp, IMMType, IMM, Pred>;
1230}
1231
1232// has 2 operands, neg the second one
1233multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1234  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1235  list<Predicate> Pred> {
1236  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1237    !strconcat(
1238      "{{ \n\t",
1239      ".reg \t.s", TypeStr, " temp; \n\t",
1240      "neg.s", TypeStr, " \ttemp, $b; \n\t",
1241      "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1242      "}}"),
1243    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1244  Requires<Pred>;
1245}
1246multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1247  string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1248 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1249   IntOp, Pred> ;
1250 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1251   IntOp, Pred> ;
1252}
1253
1254// has 3 operands
1255multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1256  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1257  Operand IMMType, list<Predicate> Pred> {
1258  def reg : NVPTXInst<(outs regclass:$dst),
1259    (ins ptrclass:$addr, regclass:$b, regclass:$c),
1260    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1261    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1262  Requires<Pred>;
1263
1264  def imm1 : NVPTXInst<(outs regclass:$dst),
1265    (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1266    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1267    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1268  Requires<Pred>;
1269
1270  def imm2 : NVPTXInst<(outs regclass:$dst),
1271    (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1272    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1273    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1274  Requires<Pred>;
1275
1276  def imm3 : NVPTXInst<(outs regclass:$dst),
1277    (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1278    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1279    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1280  Requires<Pred>;
1281}
1282multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1283  string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1284  defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1285    IntOp, IMMType, Pred>;
1286  defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1287    IntOp, IMMType, Pred>;
1288}
1289
1290// atom_add
1291
1292def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1293  (atomic_load_add_32 node:$a, node:$b)>;
1294def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1295  (atomic_load_add_32 node:$a, node:$b)>;
1296def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1297  (atomic_load_add_32 node:$a, node:$b)>;
1298def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1299  (atomic_load_add_64 node:$a, node:$b)>;
1300def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1301  (atomic_load_add_64 node:$a, node:$b)>;
1302def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1303  (atomic_load_add_64 node:$a, node:$b)>;
1304def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1305  (atomic_load_fadd node:$a, node:$b)>;
1306def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1307  (atomic_load_fadd node:$a, node:$b)>;
1308def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1309  (atomic_load_fadd node:$a, node:$b)>;
1310
1311defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1312  atomic_load_add_32_g, i32imm, imm>;
1313defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1314  atomic_load_add_32_s, i32imm, imm>;
1315defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1316  atomic_load_add_32_gen, i32imm, imm>;
1317defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1318  ".add", atomic_load_add_32_gen, i32imm, imm>;
1319
1320defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1321  atomic_load_add_64_g, i64imm, imm>;
1322defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1323  atomic_load_add_64_s, i64imm, imm>;
1324defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1325  atomic_load_add_64_gen, i64imm, imm>;
1326defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1327  ".add", atomic_load_add_64_gen, i64imm, imm>;
1328
1329defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1330  atomic_load_add_g, f32imm, fpimm>;
1331defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1332  atomic_load_add_s, f32imm, fpimm>;
1333defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1334  atomic_load_add_gen, f32imm, fpimm>;
1335
1336defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1337  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1338defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1339  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1340defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1341  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1342
1343// atom_sub
1344
1345def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1346  (atomic_load_sub_32 node:$a, node:$b)>;
1347def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1348  (atomic_load_sub_32 node:$a, node:$b)>;
1349def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1350  (atomic_load_sub_32 node:$a, node:$b)>;
1351def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1352  (atomic_load_sub_64 node:$a, node:$b)>;
1353def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1354  (atomic_load_sub_64 node:$a, node:$b)>;
1355def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1356  (atomic_load_sub_64 node:$a, node:$b)>;
1357
1358defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1359  atomic_load_sub_32_g>;
1360defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1361  atomic_load_sub_64_g>;
1362defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1363  atomic_load_sub_32_gen>;
1364defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1365  ".add", atomic_load_sub_32_gen>;
1366defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1367  atomic_load_sub_32_s>;
1368defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1369  atomic_load_sub_64_s>;
1370defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1371  atomic_load_sub_64_gen>;
1372defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1373  ".add", atomic_load_sub_64_gen>;
1374
1375// atom_swap
1376
1377def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1378  (atomic_swap_32 node:$a, node:$b)>;
1379def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1380  (atomic_swap_32 node:$a, node:$b)>;
1381def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1382  (atomic_swap_32 node:$a, node:$b)>;
1383def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1384  (atomic_swap_64 node:$a, node:$b)>;
1385def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1386  (atomic_swap_64 node:$a, node:$b)>;
1387def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1388  (atomic_swap_64 node:$a, node:$b)>;
1389
1390defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1391  atomic_swap_32_g, i32imm, imm>;
1392defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1393  atomic_swap_32_s, i32imm, imm>;
1394defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1395  atomic_swap_32_gen, i32imm, imm>;
1396defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1397  ".exch", atomic_swap_32_gen, i32imm, imm>;
1398defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1399  atomic_swap_64_g, i64imm, imm>;
1400defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1401  atomic_swap_64_s, i64imm, imm>;
1402defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1403  atomic_swap_64_gen, i64imm, imm>;
1404defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1405  ".exch", atomic_swap_64_gen, i64imm, imm>;
1406
1407// atom_max
1408
1409def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1410  , (atomic_load_max_32 node:$a, node:$b)>;
1411def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1412  (atomic_load_max_32 node:$a, node:$b)>;
1413def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1414  (atomic_load_max_32 node:$a, node:$b)>;
1415def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1416  , (atomic_load_max_64 node:$a, node:$b)>;
1417def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1418  (atomic_load_max_64 node:$a, node:$b)>;
1419def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1420  (atomic_load_max_64 node:$a, node:$b)>;
1421def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1422  (atomic_load_umax_32 node:$a, node:$b)>;
1423def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1424  (atomic_load_umax_32 node:$a, node:$b)>;
1425def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1426  (atomic_load_umax_32 node:$a, node:$b)>;
1427def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1428  (atomic_load_umax_64 node:$a, node:$b)>;
1429def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1430  (atomic_load_umax_64 node:$a, node:$b)>;
1431def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1432  (atomic_load_umax_64 node:$a, node:$b)>;
1433
1434defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1435  ".max", atomic_load_max_32_g, i32imm, imm>;
1436defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1437  ".max", atomic_load_max_32_s, i32imm, imm>;
1438defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1439  atomic_load_max_32_gen, i32imm, imm>;
1440defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1441  ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1442defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1443  ".max", atomic_load_max_64_g, i64imm, imm>;
1444defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1445  ".max", atomic_load_max_64_s, i64imm, imm>;
1446defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1447  atomic_load_max_64_gen, i64imm, imm>;
1448defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1449  ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1450defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1451  ".max", atomic_load_umax_32_g, i32imm, imm>;
1452defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1453  ".max", atomic_load_umax_32_s, i32imm, imm>;
1454defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1455  atomic_load_umax_32_gen, i32imm, imm>;
1456defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1457  ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1458defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1459  ".max", atomic_load_umax_64_g, i64imm, imm>;
1460defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1461  ".max", atomic_load_umax_64_s, i64imm, imm>;
1462defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1463  atomic_load_umax_64_gen, i64imm, imm>;
1464defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1465  ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1466
1467// atom_min
1468
1469def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1470  (atomic_load_min_32 node:$a, node:$b)>;
1471def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1472  (atomic_load_min_32 node:$a, node:$b)>;
1473def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1474  (atomic_load_min_32 node:$a, node:$b)>;
1475def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1476  (atomic_load_min_64 node:$a, node:$b)>;
1477def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1478  (atomic_load_min_64 node:$a, node:$b)>;
1479def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1480  (atomic_load_min_64 node:$a, node:$b)>;
1481def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1482  (atomic_load_umin_32 node:$a, node:$b)>;
1483def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1484  (atomic_load_umin_32 node:$a, node:$b)>;
1485def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1486  (atomic_load_umin_32 node:$a, node:$b)>;
1487def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1488  (atomic_load_umin_64 node:$a, node:$b)>;
1489def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1490  (atomic_load_umin_64 node:$a, node:$b)>;
1491def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1492  (atomic_load_umin_64 node:$a, node:$b)>;
1493
1494defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1495  ".min", atomic_load_min_32_g, i32imm, imm>;
1496defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1497  ".min", atomic_load_min_32_s, i32imm, imm>;
1498defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1499  atomic_load_min_32_gen, i32imm, imm>;
1500defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1501  ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1502defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1503  ".min", atomic_load_min_64_g, i64imm, imm>;
1504defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1505  ".min", atomic_load_min_64_s, i64imm, imm>;
1506defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1507  atomic_load_min_64_gen, i64imm, imm>;
1508defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1509  ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1510defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1511  ".min", atomic_load_umin_32_g, i32imm, imm>;
1512defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1513  ".min", atomic_load_umin_32_s, i32imm, imm>;
1514defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1515  atomic_load_umin_32_gen, i32imm, imm>;
1516defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1517  ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1518defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1519  ".min", atomic_load_umin_64_g, i64imm, imm>;
1520defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1521  ".min", atomic_load_umin_64_s, i64imm, imm>;
1522defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1523  atomic_load_umin_64_gen, i64imm, imm>;
1524defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1525  ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1526
1527// atom_inc  atom_dec
1528
1529def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1530  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1531def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1532  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1533def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1534  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1535def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1536  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1537def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1538  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1539def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1540  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1541
1542defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1543  atomic_load_inc_32_g, i32imm, imm>;
1544defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1545  atomic_load_inc_32_s, i32imm, imm>;
1546defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1547  atomic_load_inc_32_gen, i32imm, imm>;
1548defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1549  ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1550defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1551  atomic_load_dec_32_g, i32imm, imm>;
1552defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1553  atomic_load_dec_32_s, i32imm, imm>;
1554defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1555  atomic_load_dec_32_gen, i32imm, imm>;
1556defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1557  ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1558
1559// atom_and
1560
1561def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1562  (atomic_load_and_32 node:$a, node:$b)>;
1563def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1564  (atomic_load_and_32 node:$a, node:$b)>;
1565def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1566  (atomic_load_and_32 node:$a, node:$b)>;
1567def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1568  (atomic_load_and_64 node:$a, node:$b)>;
1569def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1570  (atomic_load_and_64 node:$a, node:$b)>;
1571def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1572  (atomic_load_and_64 node:$a, node:$b)>;
1573
1574defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1575  atomic_load_and_32_g, i32imm, imm>;
1576defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1577  atomic_load_and_32_s, i32imm, imm>;
1578defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1579  atomic_load_and_32_gen, i32imm, imm>;
1580defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1581  ".and", atomic_load_and_32_gen, i32imm, imm>;
1582defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1583  atomic_load_and_64_g, i64imm, imm>;
1584defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1585  atomic_load_and_64_s, i64imm, imm>;
1586defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1587  atomic_load_and_64_gen, i64imm, imm>;
1588defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1589  ".and", atomic_load_and_64_gen, i64imm, imm>;
1590
1591// atom_or
1592
1593def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1594  (atomic_load_or_32 node:$a, node:$b)>;
1595def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1596  (atomic_load_or_32 node:$a, node:$b)>;
1597def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1598  (atomic_load_or_32 node:$a, node:$b)>;
1599def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1600  (atomic_load_or_64 node:$a, node:$b)>;
1601def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1602  (atomic_load_or_64 node:$a, node:$b)>;
1603def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1604  (atomic_load_or_64 node:$a, node:$b)>;
1605
1606defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1607  atomic_load_or_32_g, i32imm, imm>;
1608defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1609  atomic_load_or_32_gen, i32imm, imm>;
1610defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1611  ".or", atomic_load_or_32_gen, i32imm, imm>;
1612defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1613  atomic_load_or_32_s, i32imm, imm>;
1614defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1615  atomic_load_or_64_g, i64imm, imm>;
1616defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1617  atomic_load_or_64_gen, i64imm, imm>;
1618defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1619  ".or", atomic_load_or_64_gen, i64imm, imm>;
1620defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1621  atomic_load_or_64_s, i64imm, imm>;
1622
1623// atom_xor
1624
1625def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1626  (atomic_load_xor_32 node:$a, node:$b)>;
1627def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1628  (atomic_load_xor_32 node:$a, node:$b)>;
1629def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1630  (atomic_load_xor_32 node:$a, node:$b)>;
1631def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1632  (atomic_load_xor_64 node:$a, node:$b)>;
1633def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1634  (atomic_load_xor_64 node:$a, node:$b)>;
1635def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1636  (atomic_load_xor_64 node:$a, node:$b)>;
1637
1638defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1639  atomic_load_xor_32_g, i32imm, imm>;
1640defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1641  atomic_load_xor_32_s, i32imm, imm>;
1642defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1643  atomic_load_xor_32_gen, i32imm, imm>;
1644defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1645  ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1646defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1647  atomic_load_xor_64_g, i64imm, imm>;
1648defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1649  atomic_load_xor_64_s, i64imm, imm>;
1650defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1651  atomic_load_xor_64_gen, i64imm, imm>;
1652defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1653  ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1654
1655// atom_cas
1656
1657def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1658  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1659def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1660  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1661def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1662  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1663def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1664  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1665def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1666  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1667def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1668  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1669
1670defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1671  atomic_cmp_swap_32_g, i32imm>;
1672defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1673  atomic_cmp_swap_32_s, i32imm>;
1674defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1675  atomic_cmp_swap_32_gen, i32imm>;
1676defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1677  ".cas", atomic_cmp_swap_32_gen, i32imm>;
1678defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1679  atomic_cmp_swap_64_g, i64imm>;
1680defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1681  atomic_cmp_swap_64_s, i64imm>;
1682defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1683  atomic_cmp_swap_64_gen, i64imm>;
1684defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1685  ".cas", atomic_cmp_swap_64_gen, i64imm>;
1686
1687// Support for scoped atomic operations.  Matches
1688// int_nvvm_atomic_{op}_{space}_{type}_{scope}
1689// and converts it into the appropriate instruction.
1690// NOTE: not all possible combinations are implemented
1691//  'space' is limited to generic as it's the only one needed to support CUDA.
1692//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1693class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1694                  dag ins, dag Operands>
1695      : NVPTXInst<(outs regclass:$result), ins,
1696                  AsmStr,
1697                  [(set regclass:$result, Operands)]>,
1698        Requires<Preds>;
1699
1700// Define instruction variants for all addressing modes.
1701multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1702                       NVPTXRegClass regclass, Operand ImmType,
1703                       SDNode Imm, ValueType ImmTy,
1704                       list<Predicate> Preds> {
1705  let AddedComplexity = 1 in {
1706    def : ATOM23_impl<AsmStr, regclass, Preds,
1707                      (ins Int32Regs:$src, regclass:$b),
1708                      (Intr Int32Regs:$src, regclass:$b)>;
1709    def : ATOM23_impl<AsmStr, regclass, Preds,
1710                      (ins Int64Regs:$src, regclass:$b),
1711                      (Intr Int64Regs:$src, regclass:$b)>;
1712  }
1713  // tablegen can't infer argument types from Intrinsic (though it can
1714  // from Instruction) so we have to enforce specific type on
1715  // immediates via explicit cast to ImmTy.
1716  def : ATOM23_impl<AsmStr, regclass, Preds,
1717                    (ins Int32Regs:$src, ImmType:$b),
1718                    (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1719  def : ATOM23_impl<AsmStr, regclass, Preds,
1720                    (ins Int64Regs:$src, ImmType:$b),
1721                    (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1722}
1723
1724multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1725                       NVPTXRegClass regclass, Operand ImmType,
1726                       SDNode Imm, ValueType ImmTy,
1727                       list<Predicate> Preds> {
1728  // Variants for register/immediate permutations of $b and $c
1729  let AddedComplexity = 2 in {
1730    def : ATOM23_impl<AsmStr, regclass, Preds,
1731                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
1732                      (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1733    def : ATOM23_impl<AsmStr, regclass, Preds,
1734                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
1735                      (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1736  }
1737  let AddedComplexity = 1 in {
1738    def : ATOM23_impl<AsmStr, regclass, Preds,
1739                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1740                      (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1741    def : ATOM23_impl<AsmStr, regclass, Preds,
1742                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1743                      (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1744    def : ATOM23_impl<AsmStr, regclass, Preds,
1745                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1746                      (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1747    def : ATOM23_impl<AsmStr, regclass, Preds,
1748                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1749                      (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1750  }
1751  def : ATOM23_impl<AsmStr, regclass, Preds,
1752                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1753                    (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1754  def : ATOM23_impl<AsmStr, regclass, Preds,
1755                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1756                    (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1757}
1758
1759// Constructs instrinsic name and instruction asm strings.
1760multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1761                       string ScopeStr, string SpaceStr,
1762                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1763                       ValueType ImmTy, list<Predicate> Preds> {
1764  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1765                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1766                            # "." # OpStr # "." # TypeStr
1767                            # " \t$result, [$src], $b;",
1768                     !cast<Intrinsic>(
1769                            "int_nvvm_atomic_" # OpStr
1770                            # "_" # SpaceStr # "_" # IntTypeStr
1771                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1772                     regclass, ImmType, Imm, ImmTy, Preds>;
1773}
1774multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1775                       string ScopeStr, string SpaceStr,
1776                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1777                       ValueType ImmTy, list<Predicate> Preds> {
1778  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1779                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1780                            # "." # OpStr # "." # TypeStr
1781                            # " \t$result, [$src], $b, $c;",
1782                     !cast<Intrinsic>(
1783                            "int_nvvm_atomic_" # OpStr
1784                            # "_" # SpaceStr # "_" # IntTypeStr
1785                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1786                     regclass, ImmType, Imm, ImmTy, Preds>;
1787}
1788
1789// Constructs variants for different address spaces.
1790// For now we only need variants for generic space pointers.
1791multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1792                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1793                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1794   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1795                            regclass, ImmType, Imm, ImmTy, Preds>;
1796}
1797multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1798                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1799                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1800   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1801                            regclass, ImmType, Imm, ImmTy, Preds>;
1802}
1803
1804// Constructs variants for different scopes of atomic op.
1805multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1806                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1807                       ValueType ImmTy, list<Predicate> Preds> {
1808   // .gpu scope is default and is currently covered by existing
1809   // atomics w/o explicitly specified scope.
1810   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1811                           regclass, ImmType, Imm, ImmTy,
1812                           !listconcat(Preds,[hasAtomScope])>;
1813   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1814                           regclass, ImmType, Imm, ImmTy,
1815                           !listconcat(Preds,[hasAtomScope])>;
1816}
1817multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1818           NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1819           list<Predicate> Preds> {
1820   // No need to define ".gpu"-scoped atomics.  They do the same thing
1821   // as the regular, non-scoped atomics defined elsewhere.
1822   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1823                           regclass, ImmType, Imm, ImmTy,
1824                           !listconcat(Preds,[hasAtomScope])>;
1825   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1826                           regclass, ImmType, Imm, ImmTy,
1827                           !listconcat(Preds,[hasAtomScope])>;
1828}
1829
1830// atom.add
1831multiclass ATOM2_add_impl<string OpStr> {
1832   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1833   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1834   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1835   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1836                            []>;
1837   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1838                            [hasAtomAddF64]>;
1839}
1840
1841// atom.{and,or,xor}
1842multiclass ATOM2_bitwise_impl<string OpStr> {
1843   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1844   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1845                            [hasAtomBitwise64]>;
1846}
1847
1848// atom.exch
1849multiclass ATOM2_exch_impl<string OpStr> {
1850   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1851   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1852}
1853
1854// atom.{min,max}
1855multiclass ATOM2_minmax_impl<string OpStr> {
1856   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1857   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1858   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1859                            [hasAtomMinMax64]>;
1860   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1861                            [hasAtomMinMax64]>;
1862}
1863
1864// atom.{inc,dec}
1865multiclass ATOM2_incdec_impl<string OpStr> {
1866   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1867}
1868
1869// atom.cas
1870multiclass ATOM3_cas_impl<string OpStr> {
1871   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1872   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1873}
1874
1875defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1876defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1877defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1878defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1879defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1880defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1881defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1882defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1883defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
1884defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1885
1886//-----------------------------------
1887// Support for ldu on sm_20 or later
1888//-----------------------------------
1889
1890// Don't annotate ldu instructions as mayLoad, as they load from memory that is
1891// read-only in a kernel.
1892
1893// Scalar
1894
1895multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1896  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1897               !strconcat("ldu.global.", TyStr),
1898                      []>, Requires<[hasLDU]>;
1899  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1900               !strconcat("ldu.global.", TyStr),
1901                        []>, Requires<[hasLDU]>;
1902 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1903               !strconcat("ldu.global.", TyStr),
1904                      []>, Requires<[hasLDU]>;
1905 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1906               !strconcat("ldu.global.", TyStr),
1907                      []>, Requires<[hasLDU]>;
1908 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1909               !strconcat("ldu.global.", TyStr),
1910                        []>, Requires<[hasLDU]>;
1911}
1912
1913defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1914defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1915defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1916defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1917defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1918defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1919defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1920defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1921defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1922defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1923
1924// vector
1925
1926// Elementized vector ldu
1927multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1928 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1929                     (ins Int32Regs:$src),
1930                     !strconcat("ldu.global.", TyStr), []>;
1931 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1932                     (ins Int64Regs:$src),
1933                     !strconcat("ldu.global.", TyStr), []>;
1934 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1935                     (ins MEMri:$src),
1936                     !strconcat("ldu.global.", TyStr), []>;
1937 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1938                     (ins MEMri64:$src),
1939                     !strconcat("ldu.global.", TyStr), []>;
1940 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1941                     (ins imemAny:$src),
1942                     !strconcat("ldu.global.", TyStr), []>;
1943}
1944
1945multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1946 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1947                            regclass:$dst4), (ins Int32Regs:$src),
1948               !strconcat("ldu.global.", TyStr), []>;
1949 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1950                            regclass:$dst4), (ins Int64Regs:$src),
1951               !strconcat("ldu.global.", TyStr), []>;
1952 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1953                            regclass:$dst4), (ins MEMri:$src),
1954               !strconcat("ldu.global.", TyStr), []>;
1955 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1956                            regclass:$dst4), (ins MEMri64:$src),
1957               !strconcat("ldu.global.", TyStr), []>;
1958 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1959                            regclass:$dst4), (ins imemAny:$src),
1960               !strconcat("ldu.global.", TyStr), []>;
1961}
1962
1963defm INT_PTX_LDU_G_v2i8_ELE
1964  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1965defm INT_PTX_LDU_G_v2i16_ELE
1966  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1967defm INT_PTX_LDU_G_v2i32_ELE
1968  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1969defm INT_PTX_LDU_G_v2f16_ELE
1970  : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1971defm INT_PTX_LDU_G_v2f16x2_ELE
1972  : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1973defm INT_PTX_LDU_G_v2f32_ELE
1974  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1975defm INT_PTX_LDU_G_v2i64_ELE
1976  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1977defm INT_PTX_LDU_G_v2f64_ELE
1978  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1979defm INT_PTX_LDU_G_v4i8_ELE
1980  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1981defm INT_PTX_LDU_G_v4i16_ELE
1982  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1983    Int16Regs>;
1984defm INT_PTX_LDU_G_v4i32_ELE
1985  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1986    Int32Regs>;
1987defm INT_PTX_LDU_G_v4f16_ELE
1988  : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1989    Float16Regs>;
1990defm INT_PTX_LDU_G_v4f16x2_ELE
1991  : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1992    Float16x2Regs>;
1993defm INT_PTX_LDU_G_v4f32_ELE
1994  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1995    Float32Regs>;
1996
1997
1998//-----------------------------------
1999// Support for ldg on sm_35 or later
2000//-----------------------------------
2001
2002// Don't annotate ld.global.nc as mayLoad, because these loads go through the
2003// non-coherent texture cache, and therefore the values read must be read-only
2004// during the lifetime of the kernel.
2005
2006multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2007  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2008               !strconcat("ld.global.nc.", TyStr),
2009                      []>, Requires<[hasLDG]>;
2010  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2011               !strconcat("ld.global.nc.", TyStr),
2012                        []>, Requires<[hasLDG]>;
2013 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2014               !strconcat("ld.global.nc.", TyStr),
2015                      []>, Requires<[hasLDG]>;
2016 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2017               !strconcat("ld.global.nc.", TyStr),
2018                      []>, Requires<[hasLDG]>;
2019 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2020               !strconcat("ld.global.nc.", TyStr),
2021                        []>, Requires<[hasLDG]>;
2022}
2023
2024defm INT_PTX_LDG_GLOBAL_i8
2025  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2026defm INT_PTX_LDG_GLOBAL_i16
2027  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2028defm INT_PTX_LDG_GLOBAL_i32
2029  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2030defm INT_PTX_LDG_GLOBAL_i64
2031  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2032defm INT_PTX_LDG_GLOBAL_f16
2033  : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
2034defm INT_PTX_LDG_GLOBAL_f16x2
2035  : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
2036defm INT_PTX_LDG_GLOBAL_f32
2037  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2038defm INT_PTX_LDG_GLOBAL_f64
2039  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2040defm INT_PTX_LDG_GLOBAL_p32
2041  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2042defm INT_PTX_LDG_GLOBAL_p64
2043  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2044
2045// vector
2046
2047// Elementized vector ldg
2048multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2049 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2050                     (ins Int32Regs:$src),
2051                     !strconcat("ld.global.nc.", TyStr), []>;
2052 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2053                     (ins Int64Regs:$src),
2054                     !strconcat("ld.global.nc.", TyStr), []>;
2055 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2056                     (ins MEMri:$src),
2057                     !strconcat("ld.global.nc.", TyStr), []>;
2058 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2059                     (ins MEMri64:$src),
2060                     !strconcat("ld.global.nc.", TyStr), []>;
2061 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2062                     (ins imemAny:$src),
2063                     !strconcat("ld.global.nc.", TyStr), []>;
2064}
2065
2066multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2067  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2068                              regclass:$dst4), (ins Int32Regs:$src),
2069               !strconcat("ld.global.nc.", TyStr), []>;
2070  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2071                               regclass:$dst4), (ins Int64Regs:$src),
2072               !strconcat("ld.global.nc.", TyStr), []>;
2073  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2074                              regclass:$dst4), (ins MEMri:$src),
2075               !strconcat("ld.global.nc.", TyStr), []>;
2076  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2077                              regclass:$dst4), (ins MEMri64:$src),
2078               !strconcat("ld.global.nc.", TyStr), []>;
2079  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2080                             regclass:$dst4), (ins imemAny:$src),
2081               !strconcat("ld.global.nc.", TyStr), []>;
2082}
2083
2084// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2085defm INT_PTX_LDG_G_v2i8_ELE
2086  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2087defm INT_PTX_LDG_G_v2i16_ELE
2088  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2089defm INT_PTX_LDG_G_v2i32_ELE
2090  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2091defm INT_PTX_LDG_G_v2f16_ELE
2092  : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
2093defm INT_PTX_LDG_G_v2f16x2_ELE
2094  : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
2095defm INT_PTX_LDG_G_v2f32_ELE
2096  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2097defm INT_PTX_LDG_G_v2i64_ELE
2098  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2099defm INT_PTX_LDG_G_v2f64_ELE
2100  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2101defm INT_PTX_LDG_G_v4i8_ELE
2102  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2103defm INT_PTX_LDG_G_v4i16_ELE
2104  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2105defm INT_PTX_LDG_G_v4i32_ELE
2106  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2107defm INT_PTX_LDG_G_v4f16_ELE
2108  : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
2109defm INT_PTX_LDG_G_v4f16x2_ELE
2110  : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
2111defm INT_PTX_LDG_G_v4f32_ELE
2112  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2113
2114
2115multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2116   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2117          !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2118      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2119   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2120          !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2121      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2122   def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2123          "{{ .reg .b64 %tmp;\n\t"
2124          #"  cvt.u64.u32 \t%tmp, $src;\n\t"
2125          #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
2126      [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2127      Requires<[useShortPtr]>;
2128}
2129
2130multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2131   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2132          !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2133      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2134   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2135          !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2136      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2137   def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2138          "{{ .reg .b64 %tmp;\n\t"
2139          #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2140          #"  cvt.u32.u64 \t$result, %tmp; }}",
2141      [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2142      Requires<[useShortPtr]>;
2143}
2144
2145defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2146defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2147defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2148defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2149
2150defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2151defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2152defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2153defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2154
2155
2156// nvvm.ptr.gen.to.param
2157def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2158  (ins Int32Regs:$src),
2159                        "mov.u32 \t$result, $src;",
2160                              [(set Int32Regs:$result,
2161                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2162def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2163  (ins Int64Regs:$src),
2164                        "mov.u64 \t$result, $src;",
2165                              [(set Int64Regs:$result,
2166                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2167
2168
2169// nvvm.move intrinsicc
2170def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2171                             "mov.b16 \t$r, $s;",
2172                             [(set Int16Regs:$r,
2173                               (int_nvvm_move_i16 Int16Regs:$s))]>;
2174def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2175                             "mov.b32 \t$r, $s;",
2176                             [(set Int32Regs:$r,
2177                               (int_nvvm_move_i32 Int32Regs:$s))]>;
2178def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2179                             "mov.b64 \t$r, $s;",
2180                             [(set Int64Regs:$r,
2181                               (int_nvvm_move_i64 Int64Regs:$s))]>;
2182def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2183                             "mov.f32 \t$r, $s;",
2184                             [(set Float32Regs:$r,
2185                               (int_nvvm_move_float Float32Regs:$s))]>;
2186def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2187                             "mov.f64 \t$r, $s;",
2188                             [(set Float64Regs:$r,
2189                               (int_nvvm_move_double Float64Regs:$s))]>;
2190def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2191                             "mov.u32 \t$r, $s;",
2192                             [(set Int32Regs:$r,
2193                               (int_nvvm_move_ptr Int32Regs:$s))]>;
2194def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2195                             "mov.u64 \t$r, $s;",
2196                             [(set Int64Regs:$r,
2197                               (int_nvvm_move_ptr Int64Regs:$s))]>;
2198
2199// @TODO: Are these actually needed, or will we always just see symbols
2200// copied to registers first?
2201/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2202                             "mov.u32 \t$r, $s;",
2203                             [(set Int32Regs:$r,
2204                             (int_nvvm_move_ptr texternalsym:$s))]>;
2205def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2206                             "mov.u64 \t$r, $s;",
2207                             [(set Int64Regs:$r,
2208                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
2209
2210
2211// MoveParam        %r1, param
2212// ptr_local_to_gen %r2, %r1
2213// ptr_gen_to_local %r3, %r2
2214// ->
2215// mov %r1, param
2216
2217// @TODO: Revisit this.  There is a type
2218// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2219// instructions are not currently defined. However, we can use the ptr
2220// variants and the asm printer will do the right thing.
2221def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2222                (MoveParam texternalsym:$src)))),
2223               (nvvm_move_ptr64  texternalsym:$src)>;
2224def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2225                (MoveParam texternalsym:$src)))),
2226               (nvvm_move_ptr32  texternalsym:$src)>;
2227
2228def texsurf_handles
2229  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2230              "mov.u64 \t$result, $src;", []>;
2231
2232//-----------------------------------
2233// Compiler Error Warn
2234// - Just ignore them in codegen
2235//-----------------------------------
2236
2237def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2238                "// llvm.nvvm.compiler.warn()",
2239                [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2240def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2241                "// llvm.nvvm.compiler.warn()",
2242                [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2243def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2244                "// llvm.nvvm.compiler.error()",
2245                [(int_nvvm_compiler_error Int32Regs:$a)]>;
2246def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2247                "// llvm.nvvm.compiler.error()",
2248                [(int_nvvm_compiler_error Int64Regs:$a)]>;
2249
2250
2251// isspacep
2252
2253def ISSPACEP_CONST_32
2254  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2255              "isspacep.const \t$d, $a;",
2256              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2257    Requires<[hasPTX31]>;
2258def ISSPACEP_CONST_64
2259  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2260              "isspacep.const \t$d, $a;",
2261              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2262    Requires<[hasPTX31]>;
2263def ISSPACEP_GLOBAL_32
2264  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2265              "isspacep.global \t$d, $a;",
2266              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2267def ISSPACEP_GLOBAL_64
2268  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2269              "isspacep.global \t$d, $a;",
2270              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2271def ISSPACEP_LOCAL_32
2272  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2273              "isspacep.local \t$d, $a;",
2274              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2275def ISSPACEP_LOCAL_64
2276  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2277              "isspacep.local \t$d, $a;",
2278              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2279def ISSPACEP_SHARED_32
2280  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2281              "isspacep.shared \t$d, $a;",
2282              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2283def ISSPACEP_SHARED_64
2284  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2285              "isspacep.shared \t$d, $a;",
2286              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2287
2288
2289// Special register reads
2290def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2291                            (ins SpecialRegs:$r),
2292                            "mov.b32 \t$d, $r;", []>;
2293
2294def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2295def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2296def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2297def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2298def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2299def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2300def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2301def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2302def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2303def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2304def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2305def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2306def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2307def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2308def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2309def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2310def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2311def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2312def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2313def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2314def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2315def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2316def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2317def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2318def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2319def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2320def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2321def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2322def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2323def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2324def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2325def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2326
2327
2328// rotate builtin support
2329
2330def ROTATE_B32_HW_IMM
2331  : NVPTXInst<(outs Int32Regs:$dst),
2332              (ins  Int32Regs:$src, i32imm:$amt),
2333              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2334              [(set Int32Regs:$dst,
2335                 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2336              Requires<[hasHWROT32]> ;
2337
2338def ROTATE_B32_HW_REG
2339  : NVPTXInst<(outs Int32Regs:$dst),
2340              (ins  Int32Regs:$src, Int32Regs:$amt),
2341              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2342              [(set Int32Regs:$dst,
2343                 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2344              Requires<[hasHWROT32]> ;
2345
2346def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2347          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2348      Requires<[noHWROT32]> ;
2349
2350def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2351          (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2352      Requires<[noHWROT32]> ;
2353
2354let hasSideEffects = false in {
2355  def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2356    !strconcat("{{\n\t",
2357               ".reg .b32 %dummy;\n\t",
2358               "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2359               "}}"),
2360          []> ;
2361
2362  def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2363    !strconcat("{{\n\t",
2364               ".reg .b32 %dummy;\n\t",
2365               "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2366               "}}"),
2367          []> ;
2368}
2369
2370let hasSideEffects = false in {
2371  def PACK_TWO_INT32
2372    : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2373                "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2374}
2375
2376def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2377          (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2378                          (GET_LO_INT64 Int64Regs:$src))> ;
2379
2380// Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2381// no side effects.
2382let hasSideEffects = false in {
2383  def SHF_L_WRAP_B32_IMM
2384    : NVPTXInst<(outs Int32Regs:$dst),
2385                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2386                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2387      Requires<[hasHWROT32]>;
2388
2389  def SHF_L_WRAP_B32_REG
2390    : NVPTXInst<(outs Int32Regs:$dst),
2391                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2392                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2393      Requires<[hasHWROT32]>;
2394
2395  def SHF_R_WRAP_B32_IMM
2396    : NVPTXInst<(outs Int32Regs:$dst),
2397                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2398                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2399      Requires<[hasHWROT32]>;
2400
2401  def SHF_R_WRAP_B32_REG
2402    : NVPTXInst<(outs Int32Regs:$dst),
2403                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2404                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2405      Requires<[hasHWROT32]>;
2406}
2407
2408// HW version of rotate 64
2409def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2410          (PACK_TWO_INT32
2411            (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2412                                (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2413            (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2414                                (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2415      Requires<[hasHWROT32]>;
2416
2417def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2418          (PACK_TWO_INT32
2419            (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2420                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2421            (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2422                               (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2423      Requires<[hasHWROT32]>;
2424
2425
2426def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2427          (PACK_TWO_INT32
2428            (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2429                                (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2430            (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2431                                (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2432      Requires<[hasHWROT32]>;
2433
2434def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2435          (PACK_TWO_INT32
2436            (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2437                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2438            (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2439                               (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2440      Requires<[hasHWROT32]>;
2441
2442// SW version of rotate 64
2443def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2444          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2445      Requires<[noHWROT32]>;
2446def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2447          (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2448      Requires<[noHWROT32]>;
2449def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2450          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2451      Requires<[noHWROT32]>;
2452def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2453          (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2454      Requires<[noHWROT32]>;
2455
2456
2457//-----------------------------------
2458// Texture Intrinsics
2459//-----------------------------------
2460
2461// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2462// also defined in NVPTXReplaceImageHandles.cpp
2463
2464// texmode_independent
2465let IsTex = true, IsTexModeUnified = false in {
2466// Texture fetch instructions using handles
2467def TEX_1D_F32_S32
2468  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2469                    Float32Regs:$b, Float32Regs:$a),
2470              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2471              "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2472              []>;
2473def TEX_1D_F32_F32
2474  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2475                    Float32Regs:$b, Float32Regs:$a),
2476              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2477              "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2478              []>;
2479def TEX_1D_F32_F32_LEVEL
2480  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2481                    Float32Regs:$b, Float32Regs:$a),
2482              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2483              "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2484              "[$t, $s, \\{$x\\}], $lod;",
2485              []>;
2486def TEX_1D_F32_F32_GRAD
2487  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2488                    Float32Regs:$b, Float32Regs:$a),
2489              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2490                   Float32Regs:$gradx, Float32Regs:$grady),
2491              "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2492              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2493              []>;
2494def TEX_1D_S32_S32
2495  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2496                    Int32Regs:$b, Int32Regs:$a),
2497              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2498              "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2499              []>;
2500def TEX_1D_S32_F32
2501  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2502                    Int32Regs:$b, Int32Regs:$a),
2503              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2504              "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2505              []>;
2506def TEX_1D_S32_F32_LEVEL
2507  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2508                    Int32Regs:$b, Int32Regs:$a),
2509              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2510                   Float32Regs:$lod),
2511              "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2512              "[$t, $s, \\{$x\\}], $lod;",
2513              []>;
2514def TEX_1D_S32_F32_GRAD
2515  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2516                    Int32Regs:$b, Int32Regs:$a),
2517              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2518                   Float32Regs:$gradx, Float32Regs:$grady),
2519              "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2520              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2521              []>;
2522def TEX_1D_U32_S32
2523  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2524                    Int32Regs:$b, Int32Regs:$a),
2525              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2526              "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2527              []>;
2528def TEX_1D_U32_F32
2529  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2530                    Int32Regs:$b, Int32Regs:$a),
2531              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2532              "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2533              []>;
2534def TEX_1D_U32_F32_LEVEL
2535  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2536                    Int32Regs:$b, Int32Regs:$a),
2537              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2538                   Float32Regs:$lod),
2539              "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2540              "[$t, $s, \\{$x\\}], $lod;",
2541              []>;
2542def TEX_1D_U32_F32_GRAD
2543  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2544                    Int32Regs:$b, Int32Regs:$a),
2545              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2546                   Float32Regs:$gradx, Float32Regs:$grady),
2547              "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2548              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2549              []>;
2550
2551def TEX_1D_ARRAY_F32_S32
2552  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2553                    Float32Regs:$b, Float32Regs:$a),
2554              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2555              "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2556              "[$t, $s, \\{$l, $x\\}];",
2557              []>;
2558def TEX_1D_ARRAY_F32_F32
2559  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2560                    Float32Regs:$b, Float32Regs:$a),
2561              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2562              "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2563              "[$t, $s, \\{$l, $x\\}];",
2564              []>;
2565def TEX_1D_ARRAY_F32_F32_LEVEL
2566  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2567                    Float32Regs:$b, Float32Regs:$a),
2568              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2569                   Float32Regs:$lod),
2570              "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2571              "[$t, $s, \\{$l, $x\\}], $lod;",
2572              []>;
2573def TEX_1D_ARRAY_F32_F32_GRAD
2574  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2575                    Float32Regs:$b, Float32Regs:$a),
2576              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2577                   Float32Regs:$gradx, Float32Regs:$grady),
2578              "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2579              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2580              []>;
2581def TEX_1D_ARRAY_S32_S32
2582  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2583                    Int32Regs:$b, Int32Regs:$a),
2584              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2585              "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2586              "[$t, $s, \\{$l, $x\\}];",
2587              []>;
2588def TEX_1D_ARRAY_S32_F32
2589  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2590                    Int32Regs:$b, Int32Regs:$a),
2591              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2592              "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2593              "[$t, $s, \\{$l, $x\\}];",
2594              []>;
2595def TEX_1D_ARRAY_S32_F32_LEVEL
2596  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2597                    Int32Regs:$b, Int32Regs:$a),
2598              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2599                   Float32Regs:$lod),
2600              "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2601              "[$t, $s, \\{$l, $x\\}], $lod;",
2602              []>;
2603def TEX_1D_ARRAY_S32_F32_GRAD
2604  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2605                    Int32Regs:$b, Int32Regs:$a),
2606              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2607                   Float32Regs:$gradx, Float32Regs:$grady),
2608              "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2609              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2610              []>;
2611def TEX_1D_ARRAY_U32_S32
2612  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2613                    Int32Regs:$b, Int32Regs:$a),
2614              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2615              "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2616              "[$t, $s, \\{$l, $x\\}];",
2617              []>;
2618def TEX_1D_ARRAY_U32_F32
2619  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2620                    Int32Regs:$b, Int32Regs:$a),
2621              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2622              "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2623              "[$t, $s, \\{$l, $x\\}];",
2624              []>;
2625def TEX_1D_ARRAY_U32_F32_LEVEL
2626  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2627                    Int32Regs:$b, Int32Regs:$a),
2628              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2629                   Float32Regs:$lod),
2630              "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2631              "[$t, $s, \\{$l, $x\\}], $lod;",
2632              []>;
2633def TEX_1D_ARRAY_U32_F32_GRAD
2634  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2635                    Int32Regs:$b, Int32Regs:$a),
2636              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2637                   Float32Regs:$gradx, Float32Regs:$grady),
2638              "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2639              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2640              []>;
2641
2642def TEX_2D_F32_S32
2643  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2644                    Float32Regs:$b, Float32Regs:$a),
2645              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2646              "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2647              "[$t, $s, \\{$x, $y\\}];",
2648              []>;
2649def TEX_2D_F32_F32
2650  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2651                    Float32Regs:$b, Float32Regs:$a),
2652              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2653              "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2654              "[$t, $s, \\{$x, $y\\}];",
2655              []>;
2656def TEX_2D_F32_F32_LEVEL
2657  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2658                    Float32Regs:$b, Float32Regs:$a),
2659              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2660                   Float32Regs:$lod),
2661              "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2662              "[$t, $s, \\{$x, $y\\}], $lod;",
2663              []>;
2664def TEX_2D_F32_F32_GRAD
2665  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2666                    Float32Regs:$b, Float32Regs:$a),
2667              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2668                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2669                   Float32Regs:$grady0, Float32Regs:$grady1),
2670              "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2671              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2672              "\\{$grady0, $grady1\\};",
2673              []>;
2674def TEX_2D_S32_S32
2675  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2676                    Int32Regs:$b, Int32Regs:$a),
2677              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2678              "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2679              "[$t, $s, \\{$x, $y\\}];",
2680              []>;
2681def TEX_2D_S32_F32
2682  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2683                    Int32Regs:$b, Int32Regs:$a),
2684              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2685              "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2686              "[$t, $s, \\{$x, $y\\}];",
2687              []>;
2688def TEX_2D_S32_F32_LEVEL
2689  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2690                    Int32Regs:$b, Int32Regs:$a),
2691              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2692                   Float32Regs:$lod),
2693              "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2694              "[$t, $s, \\{$x, $y\\}], $lod;",
2695              []>;
2696def TEX_2D_S32_F32_GRAD
2697  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2698                    Int32Regs:$b, Int32Regs:$a),
2699              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2700                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2701                   Float32Regs:$grady0, Float32Regs:$grady1),
2702              "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2703              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2704              "\\{$grady0, $grady1\\};",
2705              []>;
2706def TEX_2D_U32_S32
2707  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2708                    Int32Regs:$b, Int32Regs:$a),
2709              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2710              "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2711              "[$t, $s, \\{$x, $y\\}];",
2712              []>;
2713def TEX_2D_U32_F32
2714  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2715                    Int32Regs:$b, Int32Regs:$a),
2716              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2717              "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2718              "[$t, $s, \\{$x, $y\\}];",
2719              []>;
2720def TEX_2D_U32_F32_LEVEL
2721  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2722                    Int32Regs:$b, Int32Regs:$a),
2723              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2724                   Float32Regs:$lod),
2725              "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2726              "[$t, $s, \\{$x, $y\\}], $lod;",
2727              []>;
2728def TEX_2D_U32_F32_GRAD
2729  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2730                    Int32Regs:$b, Int32Regs:$a),
2731              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2732                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2733                   Float32Regs:$grady0, Float32Regs:$grady1),
2734              "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2735              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2736              "\\{$grady0, $grady1\\};",
2737              []>;
2738
2739def TEX_2D_ARRAY_F32_S32
2740  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2741                    Float32Regs:$b, Float32Regs:$a),
2742              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2743                   Int32Regs:$y),
2744              "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2745              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2746              []>;
2747def TEX_2D_ARRAY_F32_F32
2748  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2749                    Float32Regs:$b, Float32Regs:$a),
2750              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2751                   Float32Regs:$y),
2752              "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2753              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2754              []>;
2755def TEX_2D_ARRAY_F32_F32_LEVEL
2756  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2757                    Float32Regs:$b, Float32Regs:$a),
2758              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2759                   Float32Regs:$y, Float32Regs:$lod),
2760              "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2761              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2762              []>;
2763def TEX_2D_ARRAY_F32_F32_GRAD
2764  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2765                    Float32Regs:$b, Float32Regs:$a),
2766              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2767                   Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2768                   Float32Regs:$grady0, Float32Regs:$grady1),
2769              "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2770              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2771              "\\{$grady0, $grady1\\};",
2772              []>;
2773def TEX_2D_ARRAY_S32_S32
2774  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2775                    Int32Regs:$b, Int32Regs:$a),
2776              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2777                   Int32Regs:$y),
2778              "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2779              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2780              []>;
2781def TEX_2D_ARRAY_S32_F32
2782  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2783                    Int32Regs:$b, Int32Regs:$a),
2784              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2785                   Float32Regs:$y),
2786              "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2787              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2788              []>;
2789def TEX_2D_ARRAY_S32_F32_LEVEL
2790  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2791                    Int32Regs:$b, Int32Regs:$a),
2792              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2793                   Float32Regs:$y, Float32Regs:$lod),
2794              "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2795              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2796              []>;
2797def TEX_2D_ARRAY_S32_F32_GRAD
2798  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2799                    Int32Regs:$b, Int32Regs:$a),
2800              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2801                   Float32Regs:$y,
2802                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2803                   Float32Regs:$grady0, Float32Regs:$grady1),
2804              "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2805              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2806              "\\{$grady0, $grady1\\};",
2807              []>;
2808def TEX_2D_ARRAY_U32_S32
2809  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2810                    Int32Regs:$b, Int32Regs:$a),
2811              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2812                   Int32Regs:$y),
2813              "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2814              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2815              []>;
2816def TEX_2D_ARRAY_U32_F32
2817  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2818                    Int32Regs:$b, Int32Regs:$a),
2819              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2820                   Float32Regs:$y),
2821              "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2822              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2823              []>;
2824def TEX_2D_ARRAY_U32_F32_LEVEL
2825  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2826                    Int32Regs:$b, Int32Regs:$a),
2827              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2828                   Float32Regs:$y, Float32Regs:$lod),
2829              "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2830              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2831              []>;
2832def TEX_2D_ARRAY_U32_F32_GRAD
2833  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2834                    Int32Regs:$b, Int32Regs:$a),
2835              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2836                   Float32Regs:$y,
2837                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2838                   Float32Regs:$grady0, Float32Regs:$grady1),
2839              "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2840              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2841              "\\{$grady0, $grady1\\};",
2842              []>;
2843
2844def TEX_3D_F32_S32
2845  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2846                    Float32Regs:$b, Float32Regs:$a),
2847              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2848                   Int32Regs:$z),
2849              "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2850              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2851              []>;
2852def TEX_3D_F32_F32
2853  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2854                    Float32Regs:$b, Float32Regs:$a),
2855              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2856                   Float32Regs:$z),
2857              "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2858              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2859              []>;
2860def TEX_3D_F32_F32_LEVEL
2861  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2862                    Float32Regs:$b, Float32Regs:$a),
2863              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2864                   Float32Regs:$z, Float32Regs:$lod),
2865              "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2866              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2867              []>;
2868def TEX_3D_F32_F32_GRAD
2869  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2870                    Float32Regs:$b, Float32Regs:$a),
2871              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2872                   Float32Regs:$z,
2873                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2874                   Float32Regs:$gradx2, Float32Regs:$grady0,
2875                   Float32Regs:$grady1, Float32Regs:$grady2),
2876              "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2877              "[$t, $s, \\{$x, $y, $z, $z\\}], "
2878              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2879              "\\{$grady0, $grady1, $grady2, $grady2\\};",
2880              []>;
2881def TEX_3D_S32_S32
2882  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2883                    Int32Regs:$b, Int32Regs:$a),
2884              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2885                   Int32Regs:$z),
2886              "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2887              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2888              []>;
2889def TEX_3D_S32_F32
2890  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2891                    Int32Regs:$b, Int32Regs:$a),
2892              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2893                   Float32Regs:$z),
2894              "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2895              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2896              []>;
2897def TEX_3D_S32_F32_LEVEL
2898  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2899                    Int32Regs:$b, Int32Regs:$a),
2900              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2901                   Float32Regs:$z, Float32Regs:$lod),
2902              "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2903              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2904              []>;
2905def TEX_3D_S32_F32_GRAD
2906  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2907                    Int32Regs:$b, Int32Regs:$a),
2908              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2909                   Float32Regs:$z,
2910                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2911                   Float32Regs:$gradx2, Float32Regs:$grady0,
2912                   Float32Regs:$grady1, Float32Regs:$grady2),
2913              "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2914              "[$t, $s, \\{$x, $y, $z, $z\\}], "
2915              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2916              "\\{$grady0, $grady1, $grady2, $grady2\\};",
2917              []>;
2918def TEX_3D_U32_S32
2919  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2920                    Int32Regs:$b, Int32Regs:$a),
2921              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2922                   Int32Regs:$z),
2923              "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2924              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2925              []>;
2926def TEX_3D_U32_F32
2927  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2928                    Int32Regs:$b, Int32Regs:$a),
2929              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2930                   Float32Regs:$z),
2931              "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2932              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2933              []>;
2934def TEX_3D_U32_F32_LEVEL
2935  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2936                    Int32Regs:$b, Int32Regs:$a),
2937              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2938                   Float32Regs:$z, Float32Regs:$lod),
2939              "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2940              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2941              []>;
2942def TEX_3D_U32_F32_GRAD
2943  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2944                    Int32Regs:$b, Int32Regs:$a),
2945              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2946                   Float32Regs:$z,
2947                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2948                   Float32Regs:$gradx2, Float32Regs:$grady0,
2949                   Float32Regs:$grady1, Float32Regs:$grady2),
2950              "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2951              "[$t, $s, \\{$x, $y, $z, $z\\}], "
2952              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2953              "\\{$grady0, $grady1, $grady2, $grady2\\};",
2954              []>;
2955
2956def TEX_CUBE_F32_F32
2957  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2958                    Float32Regs:$b, Float32Regs:$a),
2959              (ins Int64Regs:$t, Int64Regs:$s,
2960               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2961              "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2962              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2963              []>;
2964def TEX_CUBE_F32_F32_LEVEL
2965  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2966                    Float32Regs:$b, Float32Regs:$a),
2967              (ins Int64Regs:$t, Int64Regs:$s,
2968                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2969                   Float32Regs:$lod),
2970              "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2971              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2972              []>;
2973def TEX_CUBE_S32_F32
2974  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2975                    Int32Regs:$b, Int32Regs:$a),
2976              (ins Int64Regs:$t, Int64Regs:$s,
2977                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2978              "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2979              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2980              []>;
2981def TEX_CUBE_S32_F32_LEVEL
2982  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2983                    Int32Regs:$b, Int32Regs:$a),
2984              (ins Int64Regs:$t, Int64Regs:$s,
2985                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2986                   Float32Regs:$lod),
2987              "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2988              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2989              []>;
2990def TEX_CUBE_U32_F32
2991  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2992                    Int32Regs:$b, Int32Regs:$a),
2993              (ins Int64Regs:$t, Int64Regs:$s,
2994                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2995              "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2996              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2997              []>;
2998def TEX_CUBE_U32_F32_LEVEL
2999  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3000                    Int32Regs:$b, Int32Regs:$a),
3001              (ins Int64Regs:$t, Int64Regs:$s,
3002                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3003                   Float32Regs:$lod),
3004              "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3005              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3006              []>;
3007
3008def TEX_CUBE_ARRAY_F32_F32
3009  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3010                    Float32Regs:$b, Float32Regs:$a),
3011              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3012               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3013              "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3014              "[$t, $s, \\{$l, $x, $y, $z\\}];",
3015              []>;
3016def TEX_CUBE_ARRAY_F32_F32_LEVEL
3017  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3018                    Float32Regs:$b, Float32Regs:$a),
3019              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3020                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3021                   Float32Regs:$lod),
3022              "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3023              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3024              []>;
3025def TEX_CUBE_ARRAY_S32_F32
3026  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3027                    Int32Regs:$b, Int32Regs:$a),
3028              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3029                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3030              "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3031              "[$t, $s, \\{$l, $x, $y, $z\\}];",
3032              []>;
3033def TEX_CUBE_ARRAY_S32_F32_LEVEL
3034  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3035                    Int32Regs:$b, Int32Regs:$a),
3036              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3037                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3038                   Float32Regs:$lod),
3039              "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3040              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3041              []>;
3042def TEX_CUBE_ARRAY_U32_F32
3043  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3044                    Int32Regs:$b, Int32Regs:$a),
3045              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3046                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3047              "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3048              "[$t, $s, \\{$l, $x, $y, $z\\}];",
3049              []>;
3050def TEX_CUBE_ARRAY_U32_F32_LEVEL
3051  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3052                    Int32Regs:$b, Int32Regs:$a),
3053              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
3054                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3055                   Float32Regs:$lod),
3056              "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3057              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3058              []>;
3059
3060def TLD4_R_2D_F32_F32
3061  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3062                    Float32Regs:$v2, Float32Regs:$v3),
3063              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3064              "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3065              "[$t, $s, \\{$x, $y\\}];",
3066              []>;
3067def TLD4_G_2D_F32_F32
3068  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3069                    Float32Regs:$v2, Float32Regs:$v3),
3070              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3071              "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3072              "[$t, $s, \\{$x, $y\\}];",
3073              []>;
3074def TLD4_B_2D_F32_F32
3075  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3076                    Float32Regs:$v2, Float32Regs:$v3),
3077              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3078              "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3079              "[$t, $s, \\{$x, $y\\}];",
3080              []>;
3081def TLD4_A_2D_F32_F32
3082  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3083                    Float32Regs:$v2, Float32Regs:$v3),
3084              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3085              "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3086              "[$t, $s, \\{$x, $y\\}];",
3087              []>;
3088def TLD4_R_2D_S32_F32
3089  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3090                    Int32Regs:$v2, Int32Regs:$v3),
3091              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3092              "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3093              "[$t, $s, \\{$x, $y\\}];",
3094              []>;
3095def TLD4_G_2D_S32_F32
3096  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3097                    Int32Regs:$v2, Int32Regs:$v3),
3098              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3099              "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3100              "[$t, $s, \\{$x, $y\\}];",
3101              []>;
3102def TLD4_B_2D_S32_F32
3103  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3104                    Int32Regs:$v2, Int32Regs:$v3),
3105              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3106              "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3107              "[$t, $s, \\{$x, $y\\}];",
3108              []>;
3109def TLD4_A_2D_S32_F32
3110  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3111                    Int32Regs:$v2, Int32Regs:$v3),
3112              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3113              "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3114              "[$t, $s, \\{$x, $y\\}];",
3115              []>;
3116def TLD4_R_2D_U32_F32
3117  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3118                    Int32Regs:$v2, Int32Regs:$v3),
3119              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3120              "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3121              "[$t, $s, \\{$x, $y\\}];",
3122              []>;
3123def TLD4_G_2D_U32_F32
3124  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3125                    Int32Regs:$v2, Int32Regs:$v3),
3126              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3127              "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3128              "[$t, $s, \\{$x, $y\\}];",
3129              []>;
3130def TLD4_B_2D_U32_F32
3131  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3132                    Int32Regs:$v2, Int32Regs:$v3),
3133              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3134              "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3135              "[$t, $s, \\{$x, $y\\}];",
3136              []>;
3137def TLD4_A_2D_U32_F32
3138  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3139                    Int32Regs:$v2, Int32Regs:$v3),
3140              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
3141              "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3142              "[$t, $s, \\{$x, $y\\}];",
3143              []>;
3144}
3145
3146
3147// texmode_unified
3148let IsTex = true, IsTexModeUnified = true in {
3149// Texture fetch instructions using handles
3150def TEX_UNIFIED_1D_F32_S32
3151  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3152                    Float32Regs:$b, Float32Regs:$a),
3153              (ins Int64Regs:$t, Int32Regs:$x),
3154              "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3155              []>;
3156def TEX_UNIFIED_1D_F32_F32
3157  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3158                    Float32Regs:$b, Float32Regs:$a),
3159              (ins Int64Regs:$t, Float32Regs:$x),
3160              "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3161              []>;
3162def TEX_UNIFIED_1D_F32_F32_LEVEL
3163  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3164                    Float32Regs:$b, Float32Regs:$a),
3165              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
3166              "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3167              "[$t, \\{$x\\}], $lod;",
3168              []>;
3169def TEX_UNIFIED_1D_F32_F32_GRAD
3170  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3171                    Float32Regs:$b, Float32Regs:$a),
3172              (ins Int64Regs:$t, Float32Regs:$x,
3173                   Float32Regs:$gradx, Float32Regs:$grady),
3174              "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3175              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3176              []>;
3177def TEX_UNIFIED_1D_S32_S32
3178  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3179                    Int32Regs:$b, Int32Regs:$a),
3180              (ins Int64Regs:$t, Int32Regs:$x),
3181              "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3182              []>;
3183def TEX_UNIFIED_1D_S32_F32
3184  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3185                    Int32Regs:$b, Int32Regs:$a),
3186              (ins Int64Regs:$t, Float32Regs:$x),
3187              "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3188              []>;
3189def TEX_UNIFIED_1D_S32_F32_LEVEL
3190  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3191                    Int32Regs:$b, Int32Regs:$a),
3192              (ins Int64Regs:$t, Float32Regs:$x,
3193                   Float32Regs:$lod),
3194              "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3195              "[$t, \\{$x\\}], $lod;",
3196              []>;
3197def TEX_UNIFIED_1D_S32_F32_GRAD
3198  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3199                    Int32Regs:$b, Int32Regs:$a),
3200              (ins Int64Regs:$t, Float32Regs:$x,
3201                   Float32Regs:$gradx, Float32Regs:$grady),
3202              "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3203              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3204              []>;
3205def TEX_UNIFIED_1D_U32_S32
3206  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3207                    Int32Regs:$b, Int32Regs:$a),
3208              (ins Int64Regs:$t, Int32Regs:$x),
3209              "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3210              []>;
3211def TEX_UNIFIED_1D_U32_F32
3212  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3213                    Int32Regs:$b, Int32Regs:$a),
3214              (ins Int64Regs:$t, Float32Regs:$x),
3215              "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3216              []>;
3217def TEX_UNIFIED_1D_U32_F32_LEVEL
3218  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3219                    Int32Regs:$b, Int32Regs:$a),
3220              (ins Int64Regs:$t, Float32Regs:$x,
3221                   Float32Regs:$lod),
3222              "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3223              "[$t, \\{$x\\}], $lod;",
3224              []>;
3225def TEX_UNIFIED_1D_U32_F32_GRAD
3226  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3227                    Int32Regs:$b, Int32Regs:$a),
3228              (ins Int64Regs:$t, Float32Regs:$x,
3229                   Float32Regs:$gradx, Float32Regs:$grady),
3230              "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3231              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3232              []>;
3233
3234def TEX_UNIFIED_1D_ARRAY_F32_S32
3235  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3236                    Float32Regs:$b, Float32Regs:$a),
3237              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3238              "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3239              "[$t, \\{$l, $x\\}];",
3240              []>;
3241def TEX_UNIFIED_1D_ARRAY_F32_F32
3242  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3243                    Float32Regs:$b, Float32Regs:$a),
3244              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3245              "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3246              "[$t, \\{$l, $x\\}];",
3247              []>;
3248def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3249  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3250                    Float32Regs:$b, Float32Regs:$a),
3251              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3252                   Float32Regs:$lod),
3253              "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3254              "[$t, \\{$l, $x\\}], $lod;",
3255              []>;
3256def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3257  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3258                    Float32Regs:$b, Float32Regs:$a),
3259              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3260                   Float32Regs:$gradx, Float32Regs:$grady),
3261              "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3262              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3263              []>;
3264def TEX_UNIFIED_1D_ARRAY_S32_S32
3265  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3266                    Int32Regs:$b, Int32Regs:$a),
3267              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3268              "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3269              "[$t, \\{$l, $x\\}];",
3270              []>;
3271def TEX_UNIFIED_1D_ARRAY_S32_F32
3272  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3273                    Int32Regs:$b, Int32Regs:$a),
3274              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3275              "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3276              "[$t, \\{$l, $x\\}];",
3277              []>;
3278def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3279  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3280                    Int32Regs:$b, Int32Regs:$a),
3281              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3282                   Float32Regs:$lod),
3283              "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3284              "[$t, \\{$l, $x\\}], $lod;",
3285              []>;
3286def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3287  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3288                    Int32Regs:$b, Int32Regs:$a),
3289              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3290                   Float32Regs:$gradx, Float32Regs:$grady),
3291              "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3292              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3293              []>;
3294def TEX_UNIFIED_1D_ARRAY_U32_S32
3295  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3296                    Int32Regs:$b, Int32Regs:$a),
3297              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3298              "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3299              "[$t, \\{$l, $x\\}];",
3300              []>;
3301def TEX_UNIFIED_1D_ARRAY_U32_F32
3302  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3303                    Int32Regs:$b, Int32Regs:$a),
3304              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3305              "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3306              "[$t, \\{$l, $x\\}];",
3307              []>;
3308def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3309  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3310                    Int32Regs:$b, Int32Regs:$a),
3311              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3312                   Float32Regs:$lod),
3313              "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3314              "[$t, \\{$l, $x\\}], $lod;",
3315              []>;
3316def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3317  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3318                    Int32Regs:$b, Int32Regs:$a),
3319              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3320                   Float32Regs:$gradx, Float32Regs:$grady),
3321              "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3322              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3323              []>;
3324
3325def TEX_UNIFIED_2D_F32_S32
3326  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3327                    Float32Regs:$b, Float32Regs:$a),
3328              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3329              "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3330              "[$t, \\{$x, $y\\}];",
3331              []>;
3332def TEX_UNIFIED_2D_F32_F32
3333  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3334                    Float32Regs:$b, Float32Regs:$a),
3335              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3336              "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3337              "[$t, \\{$x, $y\\}];",
3338              []>;
3339def TEX_UNIFIED_2D_F32_F32_LEVEL
3340  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3341                    Float32Regs:$b, Float32Regs:$a),
3342              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3343                   Float32Regs:$lod),
3344              "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3345              "[$t, \\{$x, $y\\}], $lod;",
3346              []>;
3347def TEX_UNIFIED_2D_F32_F32_GRAD
3348  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3349                    Float32Regs:$b, Float32Regs:$a),
3350              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3351                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3352                   Float32Regs:$grady0, Float32Regs:$grady1),
3353              "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3354              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3355              "\\{$grady0, $grady1\\};",
3356              []>;
3357def TEX_UNIFIED_2D_S32_S32
3358  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3359                    Int32Regs:$b, Int32Regs:$a),
3360              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3361              "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3362              "[$t, \\{$x, $y\\}];",
3363              []>;
3364def TEX_UNIFIED_2D_S32_F32
3365  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3366                    Int32Regs:$b, Int32Regs:$a),
3367              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3368              "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3369              "[$t, \\{$x, $y\\}];",
3370              []>;
3371def TEX_UNIFIED_2D_S32_F32_LEVEL
3372  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3373                    Int32Regs:$b, Int32Regs:$a),
3374              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3375                   Float32Regs:$lod),
3376              "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3377              "[$t, \\{$x, $y\\}], $lod;",
3378              []>;
3379def TEX_UNIFIED_2D_S32_F32_GRAD
3380  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3381                    Int32Regs:$b, Int32Regs:$a),
3382              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3383                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3384                   Float32Regs:$grady0, Float32Regs:$grady1),
3385              "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3386              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3387              "\\{$grady0, $grady1\\};",
3388              []>;
3389def TEX_UNIFIED_2D_U32_S32
3390  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3391                    Int32Regs:$b, Int32Regs:$a),
3392              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3393              "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3394              "[$t, \\{$x, $y\\}];",
3395              []>;
3396def TEX_UNIFIED_2D_U32_F32
3397  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3398                    Int32Regs:$b, Int32Regs:$a),
3399              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3400              "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3401              "[$t, \\{$x, $y\\}];",
3402              []>;
3403def TEX_UNIFIED_2D_U32_F32_LEVEL
3404  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3405                    Int32Regs:$b, Int32Regs:$a),
3406              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3407                   Float32Regs:$lod),
3408              "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3409              "[$t, \\{$x, $y\\}], $lod;",
3410              []>;
3411def TEX_UNIFIED_2D_U32_F32_GRAD
3412  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3413                    Int32Regs:$b, Int32Regs:$a),
3414              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3415                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3416                   Float32Regs:$grady0, Float32Regs:$grady1),
3417              "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3418              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3419              "\\{$grady0, $grady1\\};",
3420              []>;
3421
3422def TEX_UNIFIED_2D_ARRAY_F32_S32
3423  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3424                    Float32Regs:$b, Float32Regs:$a),
3425              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3426                   Int32Regs:$y),
3427              "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3428              "[$t, \\{$l, $x, $y, $y\\}];",
3429              []>;
3430def TEX_UNIFIED_2D_ARRAY_F32_F32
3431  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3432                    Float32Regs:$b, Float32Regs:$a),
3433              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3434                   Float32Regs:$y),
3435              "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3436              "[$t, \\{$l, $x, $y, $y\\}];",
3437              []>;
3438def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3439  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3440                    Float32Regs:$b, Float32Regs:$a),
3441              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3442                   Float32Regs:$y, Float32Regs:$lod),
3443              "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3444              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3445              []>;
3446def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3447  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3448                    Float32Regs:$b, Float32Regs:$a),
3449              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3450                   Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3451                   Float32Regs:$grady0, Float32Regs:$grady1),
3452              "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3453              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3454              "\\{$grady0, $grady1\\};",
3455              []>;
3456def TEX_UNIFIED_2D_ARRAY_S32_S32
3457  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3458                    Int32Regs:$b, Int32Regs:$a),
3459              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3460                   Int32Regs:$y),
3461              "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3462              "[$t, \\{$l, $x, $y, $y\\}];",
3463              []>;
3464def TEX_UNIFIED_2D_ARRAY_S32_F32
3465  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3466                    Int32Regs:$b, Int32Regs:$a),
3467              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3468                   Float32Regs:$y),
3469              "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3470              "[$t, \\{$l, $x, $y, $y\\}];",
3471              []>;
3472def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3473  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3474                    Int32Regs:$b, Int32Regs:$a),
3475              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3476                   Float32Regs:$y, Float32Regs:$lod),
3477              "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3478              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3479              []>;
3480def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3481  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3482                    Int32Regs:$b, Int32Regs:$a),
3483              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3484                   Float32Regs:$y,
3485                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3486                   Float32Regs:$grady0, Float32Regs:$grady1),
3487              "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3488              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3489              "\\{$grady0, $grady1\\};",
3490              []>;
3491def TEX_UNIFIED_2D_ARRAY_U32_S32
3492  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3493                    Int32Regs:$b, Int32Regs:$a),
3494              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3495                   Int32Regs:$y),
3496              "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3497              "[$t, \\{$l, $x, $y, $y\\}];",
3498              []>;
3499def TEX_UNIFIED_2D_ARRAY_U32_F32
3500  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3501                    Int32Regs:$b, Int32Regs:$a),
3502              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3503                   Float32Regs:$y),
3504              "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3505              "[$t, \\{$l, $x, $y, $y\\}];",
3506              []>;
3507def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3508  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3509                    Int32Regs:$b, Int32Regs:$a),
3510              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3511                   Float32Regs:$y, Float32Regs:$lod),
3512              "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3513              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3514              []>;
3515def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3516  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3517                    Int32Regs:$b, Int32Regs:$a),
3518              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3519                   Float32Regs:$y,
3520                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3521                   Float32Regs:$grady0, Float32Regs:$grady1),
3522              "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3523              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3524              "\\{$grady0, $grady1\\};",
3525              []>;
3526
3527def TEX_UNIFIED_3D_F32_S32
3528  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3529                    Float32Regs:$b, Float32Regs:$a),
3530              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3531                   Int32Regs:$z),
3532              "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3533              "[$t, \\{$x, $y, $z, $z\\}];",
3534              []>;
3535def TEX_UNIFIED_3D_F32_F32
3536  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3537                    Float32Regs:$b, Float32Regs:$a),
3538              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3539                   Float32Regs:$z),
3540              "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3541              "[$t, \\{$x, $y, $z, $z\\}];",
3542              []>;
3543def TEX_UNIFIED_3D_F32_F32_LEVEL
3544  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3545                    Float32Regs:$b, Float32Regs:$a),
3546              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3547                   Float32Regs:$z, Float32Regs:$lod),
3548              "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3549              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3550              []>;
3551def TEX_UNIFIED_3D_F32_F32_GRAD
3552  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3553                    Float32Regs:$b, Float32Regs:$a),
3554              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3555                   Float32Regs:$z,
3556                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3557                   Float32Regs:$gradx2, Float32Regs:$grady0,
3558                   Float32Regs:$grady1, Float32Regs:$grady2),
3559              "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3560              "[$t, \\{$x, $y, $z, $z\\}], "
3561              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3562              "\\{$grady0, $grady1, $grady2, $grady2\\};",
3563              []>;
3564def TEX_UNIFIED_3D_S32_S32
3565  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3566                    Int32Regs:$b, Int32Regs:$a),
3567              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3568                   Int32Regs:$z),
3569              "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3570              "[$t, \\{$x, $y, $z, $z\\}];",
3571              []>;
3572def TEX_UNIFIED_3D_S32_F32
3573  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3574                    Int32Regs:$b, Int32Regs:$a),
3575              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3576                   Float32Regs:$z),
3577              "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3578              "[$t, \\{$x, $y, $z, $z\\}];",
3579              []>;
3580def TEX_UNIFIED_3D_S32_F32_LEVEL
3581  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3582                    Int32Regs:$b, Int32Regs:$a),
3583              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3584                   Float32Regs:$z, Float32Regs:$lod),
3585              "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3586              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3587              []>;
3588def TEX_UNIFIED_3D_S32_F32_GRAD
3589  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3590                    Int32Regs:$b, Int32Regs:$a),
3591              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3592                   Float32Regs:$z,
3593                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3594                   Float32Regs:$gradx2, Float32Regs:$grady0,
3595                   Float32Regs:$grady1, Float32Regs:$grady2),
3596              "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3597              "[$t, \\{$x, $y, $z, $z\\}], "
3598              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3599              "\\{$grady0, $grady1, $grady2, $grady2\\};",
3600              []>;
3601def TEX_UNIFIED_3D_U32_S32
3602  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3603                    Int32Regs:$b, Int32Regs:$a),
3604              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3605                   Int32Regs:$z),
3606              "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3607              "[$t, \\{$x, $y, $z, $z\\}];",
3608              []>;
3609def TEX_UNIFIED_3D_U32_F32
3610  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3611                    Int32Regs:$b, Int32Regs:$a),
3612              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3613                   Float32Regs:$z),
3614              "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3615              "[$t, \\{$x, $y, $z, $z\\}];",
3616              []>;
3617def TEX_UNIFIED_3D_U32_F32_LEVEL
3618  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3619                    Int32Regs:$b, Int32Regs:$a),
3620              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3621                   Float32Regs:$z, Float32Regs:$lod),
3622              "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3623              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3624              []>;
3625def TEX_UNIFIED_3D_U32_F32_GRAD
3626  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3627                    Int32Regs:$b, Int32Regs:$a),
3628              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3629                   Float32Regs:$z,
3630                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3631                   Float32Regs:$gradx2, Float32Regs:$grady0,
3632                   Float32Regs:$grady1, Float32Regs:$grady2),
3633              "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3634              "[$t, \\{$x, $y, $z, $z\\}], "
3635              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3636              "\\{$grady0, $grady1, $grady2, $grady2\\};",
3637              []>;
3638
3639def TEX_UNIFIED_CUBE_F32_F32
3640  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3641                    Float32Regs:$b, Float32Regs:$a),
3642              (ins Int64Regs:$t,
3643               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3644              "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3645              "[$t, \\{$x, $y, $z, $z\\}];",
3646              []>;
3647def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3648  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3649                    Float32Regs:$b, Float32Regs:$a),
3650              (ins Int64Regs:$t,
3651                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3652                   Float32Regs:$lod),
3653              "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3654              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3655              []>;
3656def TEX_UNIFIED_CUBE_S32_F32
3657  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3658                    Int32Regs:$b, Int32Regs:$a),
3659              (ins Int64Regs:$t,
3660                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3661              "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3662              "[$t, \\{$x, $y, $z, $z\\}];",
3663              []>;
3664def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3665  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3666                    Int32Regs:$b, Int32Regs:$a),
3667              (ins Int64Regs:$t,
3668                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3669                   Float32Regs:$lod),
3670              "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3671              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3672              []>;
3673def TEX_UNIFIED_CUBE_U32_F32
3674  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3675                    Int32Regs:$b, Int32Regs:$a),
3676              (ins Int64Regs:$t,
3677                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3678              "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3679              "[$t, \\{$x, $y, $z, $z\\}];",
3680              []>;
3681def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3682  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3683                    Int32Regs:$b, Int32Regs:$a),
3684              (ins Int64Regs:$t,
3685                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3686                   Float32Regs:$lod),
3687              "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3688              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3689              []>;
3690
3691def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3692  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3693                    Float32Regs:$b, Float32Regs:$a),
3694              (ins Int64Regs:$t, Int32Regs:$l,
3695               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3696              "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3697              "[$t, \\{$l, $x, $y, $z\\}];",
3698              []>;
3699def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3700  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3701                    Float32Regs:$b, Float32Regs:$a),
3702              (ins Int64Regs:$t, Int32Regs:$l,
3703                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3704                   Float32Regs:$lod),
3705              "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3706              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3707              []>;
3708def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3709  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3710                    Int32Regs:$b, Int32Regs:$a),
3711              (ins Int64Regs:$t, Int32Regs:$l,
3712                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3713              "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3714              "[$t, \\{$l, $x, $y, $z\\}];",
3715              []>;
3716def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3717  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3718                    Int32Regs:$b, Int32Regs:$a),
3719              (ins Int64Regs:$t, Int32Regs:$l,
3720                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3721                   Float32Regs:$lod),
3722              "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3723              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3724              []>;
3725def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3726  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3727                    Int32Regs:$b, Int32Regs:$a),
3728              (ins Int64Regs:$t, Int32Regs:$l,
3729                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3730              "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3731              "[$t, \\{$l, $x, $y, $z\\}];",
3732              []>;
3733def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3734  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3735                    Int32Regs:$b, Int32Regs:$a),
3736              (ins Int64Regs:$t, Int32Regs:$l,
3737                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3738                   Float32Regs:$lod),
3739              "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3740              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3741              []>;
3742
3743def TLD4_UNIFIED_R_2D_F32_F32
3744  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3745                    Float32Regs:$v2, Float32Regs:$v3),
3746              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3747              "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3748              "[$t, \\{$x, $y\\}];",
3749              []>;
3750def TLD4_UNIFIED_G_2D_F32_F32
3751  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3752                    Float32Regs:$v2, Float32Regs:$v3),
3753              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3754              "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3755              "[$t, \\{$x, $y\\}];",
3756              []>;
3757def TLD4_UNIFIED_B_2D_F32_F32
3758  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3759                    Float32Regs:$v2, Float32Regs:$v3),
3760              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3761              "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3762              "[$t, \\{$x, $y\\}];",
3763              []>;
3764def TLD4_UNIFIED_A_2D_F32_F32
3765  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3766                    Float32Regs:$v2, Float32Regs:$v3),
3767              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3768              "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3769              "[$t, \\{$x, $y\\}];",
3770              []>;
3771def TLD4_UNIFIED_R_2D_S32_F32
3772  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3773                    Int32Regs:$v2, Int32Regs:$v3),
3774              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3775              "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3776              "[$t, \\{$x, $y\\}];",
3777              []>;
3778def TLD4_UNIFIED_G_2D_S32_F32
3779  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3780                    Int32Regs:$v2, Int32Regs:$v3),
3781              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3782              "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3783              "[$t, \\{$x, $y\\}];",
3784              []>;
3785def TLD4_UNIFIED_B_2D_S32_F32
3786  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3787                    Int32Regs:$v2, Int32Regs:$v3),
3788              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3789              "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3790              "[$t, \\{$x, $y\\}];",
3791              []>;
3792def TLD4_UNIFIED_A_2D_S32_F32
3793  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3794                    Int32Regs:$v2, Int32Regs:$v3),
3795              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3796              "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3797              "[$t, \\{$x, $y\\}];",
3798              []>;
3799def TLD4_UNIFIED_R_2D_U32_F32
3800  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3801                    Int32Regs:$v2, Int32Regs:$v3),
3802              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3803              "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3804              "[$t, \\{$x, $y\\}];",
3805              []>;
3806def TLD4_UNIFIED_G_2D_U32_F32
3807  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3808                    Int32Regs:$v2, Int32Regs:$v3),
3809              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3810              "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3811              "[$t, \\{$x, $y\\}];",
3812              []>;
3813def TLD4_UNIFIED_B_2D_U32_F32
3814  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3815                    Int32Regs:$v2, Int32Regs:$v3),
3816              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3817              "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3818              "[$t, \\{$x, $y\\}];",
3819              []>;
3820def TLD4_UNIFIED_A_2D_U32_F32
3821  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3822                    Int32Regs:$v2, Int32Regs:$v3),
3823              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3824              "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3825              "[$t, \\{$x, $y\\}];",
3826              []>;
3827}
3828
3829
3830
3831//=== Surface load instructions
3832// .clamp variant
3833let IsSuld = true in {
3834def SULD_1D_I8_CLAMP
3835  : NVPTXInst<(outs Int16Regs:$r),
3836              (ins Int64Regs:$s, Int32Regs:$x),
3837              "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3838              []>;
3839def SULD_1D_I16_CLAMP
3840  : NVPTXInst<(outs Int16Regs:$r),
3841              (ins Int64Regs:$s, Int32Regs:$x),
3842              "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3843              []>;
3844def SULD_1D_I32_CLAMP
3845  : NVPTXInst<(outs Int32Regs:$r),
3846              (ins Int64Regs:$s, Int32Regs:$x),
3847              "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3848              []>;
3849def SULD_1D_I64_CLAMP
3850  : NVPTXInst<(outs Int64Regs:$r),
3851              (ins Int64Regs:$s, Int32Regs:$x),
3852              "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3853              []>;
3854
3855def SULD_1D_ARRAY_I8_CLAMP
3856  : NVPTXInst<(outs Int16Regs:$r),
3857              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3858              "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3859              []>;
3860def SULD_1D_ARRAY_I16_CLAMP
3861  : NVPTXInst<(outs Int16Regs:$r),
3862              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3863              "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3864              []>;
3865def SULD_1D_ARRAY_I32_CLAMP
3866  : NVPTXInst<(outs Int32Regs:$r),
3867              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3868              "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3869              []>;
3870def SULD_1D_ARRAY_I64_CLAMP
3871  : NVPTXInst<(outs Int64Regs:$r),
3872              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3873              "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3874              []>;
3875
3876def SULD_2D_I8_CLAMP
3877  : NVPTXInst<(outs Int16Regs:$r),
3878              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3879              "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3880              []>;
3881def SULD_2D_I16_CLAMP
3882  : NVPTXInst<(outs Int16Regs:$r),
3883              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3884              "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3885              []>;
3886def SULD_2D_I32_CLAMP
3887  : NVPTXInst<(outs Int32Regs:$r),
3888              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3889              "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3890              []>;
3891def SULD_2D_I64_CLAMP
3892  : NVPTXInst<(outs Int64Regs:$r),
3893              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3894              "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3895              []>;
3896
3897def SULD_2D_ARRAY_I8_CLAMP
3898  : NVPTXInst<(outs Int16Regs:$r),
3899              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3900              "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3901              []>;
3902def SULD_2D_ARRAY_I16_CLAMP
3903  : NVPTXInst<(outs Int16Regs:$r),
3904              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3905              "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3906              []>;
3907def SULD_2D_ARRAY_I32_CLAMP
3908  : NVPTXInst<(outs Int32Regs:$r),
3909              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3910              "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3911              []>;
3912def SULD_2D_ARRAY_I64_CLAMP
3913  : NVPTXInst<(outs Int64Regs:$r),
3914              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3915              "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3916              []>;
3917
3918def SULD_3D_I8_CLAMP
3919  : NVPTXInst<(outs Int16Regs:$r),
3920              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3921              "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3922              []>;
3923def SULD_3D_I16_CLAMP
3924  : NVPTXInst<(outs Int16Regs:$r),
3925              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3926              "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3927              []>;
3928def SULD_3D_I32_CLAMP
3929  : NVPTXInst<(outs Int32Regs:$r),
3930              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3931              "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3932              []>;
3933def SULD_3D_I64_CLAMP
3934  : NVPTXInst<(outs Int64Regs:$r),
3935              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3936              "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3937              []>;
3938}
3939
3940let IsSuld = 2 in {
3941def SULD_1D_V2I8_CLAMP
3942  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3943              (ins Int64Regs:$s, Int32Regs:$x),
3944              "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3945              []>;
3946def SULD_1D_V2I16_CLAMP
3947  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3948              (ins Int64Regs:$s, Int32Regs:$x),
3949              "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3950              []>;
3951def SULD_1D_V2I32_CLAMP
3952  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3953              (ins Int64Regs:$s, Int32Regs:$x),
3954              "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3955              []>;
3956def SULD_1D_V2I64_CLAMP
3957  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3958              (ins Int64Regs:$s, Int32Regs:$x),
3959              "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3960              []>;
3961
3962def SULD_1D_ARRAY_V2I8_CLAMP
3963  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3964              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3965              "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3966              []>;
3967def SULD_1D_ARRAY_V2I16_CLAMP
3968  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3969              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3970              "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3971              []>;
3972def SULD_1D_ARRAY_V2I32_CLAMP
3973  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3974              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3975              "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3976              []>;
3977def SULD_1D_ARRAY_V2I64_CLAMP
3978  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3979              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3980              "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3981              []>;
3982
3983def SULD_2D_V2I8_CLAMP
3984  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3985              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3986              "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3987              []>;
3988def SULD_2D_V2I16_CLAMP
3989  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3990              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3991              "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3992              []>;
3993def SULD_2D_V2I32_CLAMP
3994  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3995              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3996              "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3997              []>;
3998def SULD_2D_V2I64_CLAMP
3999  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4000              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4001              "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4002              []>;
4003
4004def SULD_2D_ARRAY_V2I8_CLAMP
4005  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4006              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4007              "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
4008              "[$s, \\{$l, $x, $y, $y\\}];",
4009              []>;
4010def SULD_2D_ARRAY_V2I16_CLAMP
4011  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4012              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4013              "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
4014              "[$s, \\{$l, $x, $y, $y\\}];",
4015              []>;
4016def SULD_2D_ARRAY_V2I32_CLAMP
4017  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4018              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4019              "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
4020              "[$s, \\{$l, $x, $y, $y\\}];",
4021              []>;
4022def SULD_2D_ARRAY_V2I64_CLAMP
4023  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4024              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4025              "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
4026              "[$s, \\{$l, $x, $y, $y\\}];",
4027              []>;
4028
4029def SULD_3D_V2I8_CLAMP
4030  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4031              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4032              "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4033              []>;
4034def SULD_3D_V2I16_CLAMP
4035  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4036              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4037              "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4038              []>;
4039def SULD_3D_V2I32_CLAMP
4040  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4041              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4042              "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4043              []>;
4044def SULD_3D_V2I64_CLAMP
4045  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4046              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4047              "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4048              []>;
4049}
4050
4051let IsSuld = 3 in {
4052def SULD_1D_V4I8_CLAMP
4053  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4054              (ins Int64Regs:$s, Int32Regs:$x),
4055              "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4056              []>;
4057def SULD_1D_V4I16_CLAMP
4058  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4059              (ins Int64Regs:$s, Int32Regs:$x),
4060              "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4061              []>;
4062def SULD_1D_V4I32_CLAMP
4063  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4064              (ins Int64Regs:$s, Int32Regs:$x),
4065              "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4066              []>;
4067
4068def SULD_1D_ARRAY_V4I8_CLAMP
4069  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4070              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4071              "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
4072              "[$s, \\{$l, $x\\}];",
4073              []>;
4074def SULD_1D_ARRAY_V4I16_CLAMP
4075  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4076              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4077              "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
4078              "[$s, \\{$l, $x\\}];",
4079              []>;
4080def SULD_1D_ARRAY_V4I32_CLAMP
4081  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4082              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4083              "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
4084              "[$s, \\{$l, $x\\}];",
4085              []>;
4086
4087def SULD_2D_V4I8_CLAMP
4088  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4089              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4090              "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4091              []>;
4092def SULD_2D_V4I16_CLAMP
4093  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4094              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4095              "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4096              []>;
4097def SULD_2D_V4I32_CLAMP
4098  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4099              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4100              "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4101              []>;
4102
4103def SULD_2D_ARRAY_V4I8_CLAMP
4104  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4105              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4106              "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
4107              "[$s, \\{$l, $x, $y, $y\\}];",
4108              []>;
4109def SULD_2D_ARRAY_V4I16_CLAMP
4110  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4111              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4112              "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
4113              "[$s, \\{$l, $x, $y, $y\\}];",
4114              []>;
4115def SULD_2D_ARRAY_V4I32_CLAMP
4116  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4117              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4118              "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
4119              "[$s, \\{$l, $x, $y, $y\\}];",
4120              []>;
4121
4122
4123def SULD_3D_V4I8_CLAMP
4124  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4125              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4126              "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
4127              "[$s, \\{$x, $y, $z, $z\\}];",
4128              []>;
4129def SULD_3D_V4I16_CLAMP
4130  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4131              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4132              "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
4133              "[$s, \\{$x, $y, $z, $z\\}];",
4134              []>;
4135def SULD_3D_V4I32_CLAMP
4136  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4137              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4138              "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
4139              "[$s, \\{$x, $y, $z, $z\\}];",
4140              []>;
4141}
4142
4143
4144// .trap variant
4145let IsSuld = true in {
4146def SULD_1D_I8_TRAP
4147  : NVPTXInst<(outs Int16Regs:$r),
4148              (ins Int64Regs:$s, Int32Regs:$x),
4149              "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
4150              []>;
4151def SULD_1D_I16_TRAP
4152  : NVPTXInst<(outs Int16Regs:$r),
4153              (ins Int64Regs:$s, Int32Regs:$x),
4154              "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
4155              []>;
4156def SULD_1D_I32_TRAP
4157  : NVPTXInst<(outs Int32Regs:$r),
4158              (ins Int64Regs:$s, Int32Regs:$x),
4159              "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
4160              []>;
4161def SULD_1D_I64_TRAP
4162  : NVPTXInst<(outs Int64Regs:$r),
4163              (ins Int64Regs:$s, Int32Regs:$x),
4164              "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
4165              []>;
4166
4167def SULD_1D_ARRAY_I8_TRAP
4168  : NVPTXInst<(outs Int16Regs:$r),
4169              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4170              "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4171              []>;
4172def SULD_1D_ARRAY_I16_TRAP
4173  : NVPTXInst<(outs Int16Regs:$r),
4174              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4175              "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4176              []>;
4177def SULD_1D_ARRAY_I32_TRAP
4178  : NVPTXInst<(outs Int32Regs:$r),
4179              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4180              "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4181              []>;
4182def SULD_1D_ARRAY_I64_TRAP
4183  : NVPTXInst<(outs Int64Regs:$r),
4184              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4185              "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4186              []>;
4187
4188def SULD_2D_I8_TRAP
4189  : NVPTXInst<(outs Int16Regs:$r),
4190              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4191              "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4192              []>;
4193def SULD_2D_I16_TRAP
4194  : NVPTXInst<(outs Int16Regs:$r),
4195              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4196              "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4197              []>;
4198def SULD_2D_I32_TRAP
4199  : NVPTXInst<(outs Int32Regs:$r),
4200              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4201              "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4202              []>;
4203def SULD_2D_I64_TRAP
4204  : NVPTXInst<(outs Int64Regs:$r),
4205              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4206              "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4207              []>;
4208
4209def SULD_2D_ARRAY_I8_TRAP
4210  : NVPTXInst<(outs Int16Regs:$r),
4211              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4212              "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4213              []>;
4214def SULD_2D_ARRAY_I16_TRAP
4215  : NVPTXInst<(outs Int16Regs:$r),
4216              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4217              "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4218              []>;
4219def SULD_2D_ARRAY_I32_TRAP
4220  : NVPTXInst<(outs Int32Regs:$r),
4221              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4222              "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4223              []>;
4224def SULD_2D_ARRAY_I64_TRAP
4225  : NVPTXInst<(outs Int64Regs:$r),
4226              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4227              "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4228              []>;
4229
4230def SULD_3D_I8_TRAP
4231  : NVPTXInst<(outs Int16Regs:$r),
4232              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4233              "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4234              []>;
4235def SULD_3D_I16_TRAP
4236  : NVPTXInst<(outs Int16Regs:$r),
4237              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4238              "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4239              []>;
4240def SULD_3D_I32_TRAP
4241  : NVPTXInst<(outs Int32Regs:$r),
4242              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4243              "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4244              []>;
4245def SULD_3D_I64_TRAP
4246  : NVPTXInst<(outs Int64Regs:$r),
4247              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4248              "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4249              []>;
4250}
4251
4252let IsSuld = 2 in {
4253def SULD_1D_V2I8_TRAP
4254  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4255              (ins Int64Regs:$s, Int32Regs:$x),
4256              "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4257              []>;
4258def SULD_1D_V2I16_TRAP
4259  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4260              (ins Int64Regs:$s, Int32Regs:$x),
4261              "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4262              []>;
4263def SULD_1D_V2I32_TRAP
4264  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4265              (ins Int64Regs:$s, Int32Regs:$x),
4266              "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4267              []>;
4268def SULD_1D_V2I64_TRAP
4269  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4270              (ins Int64Regs:$s, Int32Regs:$x),
4271              "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4272              []>;
4273
4274def SULD_1D_ARRAY_V2I8_TRAP
4275  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4276              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4277              "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4278              []>;
4279def SULD_1D_ARRAY_V2I16_TRAP
4280  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4281              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4282              "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4283              []>;
4284def SULD_1D_ARRAY_V2I32_TRAP
4285  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4286              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4287              "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4288              []>;
4289def SULD_1D_ARRAY_V2I64_TRAP
4290  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4291              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4292              "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4293              []>;
4294
4295def SULD_2D_V2I8_TRAP
4296  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4297              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4298              "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4299              []>;
4300def SULD_2D_V2I16_TRAP
4301  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4302              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4303              "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4304              []>;
4305def SULD_2D_V2I32_TRAP
4306  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4307              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4308              "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4309              []>;
4310def SULD_2D_V2I64_TRAP
4311  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4312              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4313              "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4314              []>;
4315
4316def SULD_2D_ARRAY_V2I8_TRAP
4317  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4318              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4319              "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4320              "[$s, \\{$l, $x, $y, $y\\}];",
4321              []>;
4322def SULD_2D_ARRAY_V2I16_TRAP
4323  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4324              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4325              "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4326              "[$s, \\{$l, $x, $y, $y\\}];",
4327              []>;
4328def SULD_2D_ARRAY_V2I32_TRAP
4329  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4330              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4331              "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4332              "[$s, \\{$l, $x, $y, $y\\}];",
4333              []>;
4334def SULD_2D_ARRAY_V2I64_TRAP
4335  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4336              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4337              "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4338              "[$s, \\{$l, $x, $y, $y\\}];",
4339              []>;
4340
4341def SULD_3D_V2I8_TRAP
4342  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4343              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4344              "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4345              []>;
4346def SULD_3D_V2I16_TRAP
4347  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4348              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4349              "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4350              []>;
4351def SULD_3D_V2I32_TRAP
4352  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4353              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4354              "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4355              []>;
4356def SULD_3D_V2I64_TRAP
4357  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4358              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4359              "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4360              []>;
4361}
4362
4363let IsSuld = 3 in {
4364def SULD_1D_V4I8_TRAP
4365  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4366              (ins Int64Regs:$s, Int32Regs:$x),
4367              "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4368              []>;
4369def SULD_1D_V4I16_TRAP
4370  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4371              (ins Int64Regs:$s, Int32Regs:$x),
4372              "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4373              []>;
4374def SULD_1D_V4I32_TRAP
4375  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4376              (ins Int64Regs:$s, Int32Regs:$x),
4377              "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4378              []>;
4379
4380def SULD_1D_ARRAY_V4I8_TRAP
4381  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4382              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4383              "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4384              "[$s, \\{$l, $x\\}];",
4385              []>;
4386def SULD_1D_ARRAY_V4I16_TRAP
4387  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4388              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4389              "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4390              "[$s, \\{$l, $x\\}];",
4391              []>;
4392def SULD_1D_ARRAY_V4I32_TRAP
4393  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4394              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4395              "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4396              "[$s, \\{$l, $x\\}];",
4397              []>;
4398
4399def SULD_2D_V4I8_TRAP
4400  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4401              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4402              "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4403              []>;
4404def SULD_2D_V4I16_TRAP
4405  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4406              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4407              "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4408              []>;
4409def SULD_2D_V4I32_TRAP
4410  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4411              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4412              "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4413              []>;
4414
4415def SULD_2D_ARRAY_V4I8_TRAP
4416  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4417              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4418              "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4419              "[$s, \\{$l, $x, $y, $y\\}];",
4420              []>;
4421def SULD_2D_ARRAY_V4I16_TRAP
4422  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4423              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4424              "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4425              "[$s, \\{$l, $x, $y, $y\\}];",
4426              []>;
4427def SULD_2D_ARRAY_V4I32_TRAP
4428  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4429              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4430              "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4431              "[$s, \\{$l, $x, $y, $y\\}];",
4432              []>;
4433
4434
4435def SULD_3D_V4I8_TRAP
4436  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4437              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4438              "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4439              "[$s, \\{$x, $y, $z, $z\\}];",
4440              []>;
4441def SULD_3D_V4I16_TRAP
4442  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4443              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4444              "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4445              "[$s, \\{$x, $y, $z, $z\\}];",
4446              []>;
4447def SULD_3D_V4I32_TRAP
4448  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4449              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4450              "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4451              "[$s, \\{$x, $y, $z, $z\\}];",
4452              []>;
4453}
4454
4455// .zero variant
4456let IsSuld = true in {
4457def SULD_1D_I8_ZERO
4458  : NVPTXInst<(outs Int16Regs:$r),
4459              (ins Int64Regs:$s, Int32Regs:$x),
4460              "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4461              []>;
4462def SULD_1D_I16_ZERO
4463  : NVPTXInst<(outs Int16Regs:$r),
4464              (ins Int64Regs:$s, Int32Regs:$x),
4465              "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4466              []>;
4467def SULD_1D_I32_ZERO
4468  : NVPTXInst<(outs Int32Regs:$r),
4469              (ins Int64Regs:$s, Int32Regs:$x),
4470              "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4471              []>;
4472def SULD_1D_I64_ZERO
4473  : NVPTXInst<(outs Int64Regs:$r),
4474              (ins Int64Regs:$s, Int32Regs:$x),
4475              "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4476              []>;
4477
4478def SULD_1D_ARRAY_I8_ZERO
4479  : NVPTXInst<(outs Int16Regs:$r),
4480              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4481              "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4482              []>;
4483def SULD_1D_ARRAY_I16_ZERO
4484  : NVPTXInst<(outs Int16Regs:$r),
4485              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4486              "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4487              []>;
4488def SULD_1D_ARRAY_I32_ZERO
4489  : NVPTXInst<(outs Int32Regs:$r),
4490              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4491              "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4492              []>;
4493def SULD_1D_ARRAY_I64_ZERO
4494  : NVPTXInst<(outs Int64Regs:$r),
4495              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4496              "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4497              []>;
4498
4499def SULD_2D_I8_ZERO
4500  : NVPTXInst<(outs Int16Regs:$r),
4501              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4502              "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4503              []>;
4504def SULD_2D_I16_ZERO
4505  : NVPTXInst<(outs Int16Regs:$r),
4506              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4507              "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4508              []>;
4509def SULD_2D_I32_ZERO
4510  : NVPTXInst<(outs Int32Regs:$r),
4511              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4512              "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4513              []>;
4514def SULD_2D_I64_ZERO
4515  : NVPTXInst<(outs Int64Regs:$r),
4516              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4517              "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4518              []>;
4519
4520def SULD_2D_ARRAY_I8_ZERO
4521  : NVPTXInst<(outs Int16Regs:$r),
4522              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4523              "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4524              []>;
4525def SULD_2D_ARRAY_I16_ZERO
4526  : NVPTXInst<(outs Int16Regs:$r),
4527              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4528              "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4529              []>;
4530def SULD_2D_ARRAY_I32_ZERO
4531  : NVPTXInst<(outs Int32Regs:$r),
4532              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4533              "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4534              []>;
4535def SULD_2D_ARRAY_I64_ZERO
4536  : NVPTXInst<(outs Int64Regs:$r),
4537              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4538              "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4539              []>;
4540
4541def SULD_3D_I8_ZERO
4542  : NVPTXInst<(outs Int16Regs:$r),
4543              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4544              "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4545              []>;
4546def SULD_3D_I16_ZERO
4547  : NVPTXInst<(outs Int16Regs:$r),
4548              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4549              "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4550              []>;
4551def SULD_3D_I32_ZERO
4552  : NVPTXInst<(outs Int32Regs:$r),
4553              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4554              "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4555              []>;
4556def SULD_3D_I64_ZERO
4557  : NVPTXInst<(outs Int64Regs:$r),
4558              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4559              "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4560              []>;
4561}
4562
4563let IsSuld = 2 in {
4564def SULD_1D_V2I8_ZERO
4565  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4566              (ins Int64Regs:$s, Int32Regs:$x),
4567              "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4568              []>;
4569def SULD_1D_V2I16_ZERO
4570  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4571              (ins Int64Regs:$s, Int32Regs:$x),
4572              "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4573              []>;
4574def SULD_1D_V2I32_ZERO
4575  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4576              (ins Int64Regs:$s, Int32Regs:$x),
4577              "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4578              []>;
4579def SULD_1D_V2I64_ZERO
4580  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4581              (ins Int64Regs:$s, Int32Regs:$x),
4582              "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4583              []>;
4584
4585def SULD_1D_ARRAY_V2I8_ZERO
4586  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4587              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4588              "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4589              []>;
4590def SULD_1D_ARRAY_V2I16_ZERO
4591  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4592              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4593              "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4594              []>;
4595def SULD_1D_ARRAY_V2I32_ZERO
4596  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4597              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4598              "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4599              []>;
4600def SULD_1D_ARRAY_V2I64_ZERO
4601  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4602              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4603              "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4604              []>;
4605
4606def SULD_2D_V2I8_ZERO
4607  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4608              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4609              "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4610              []>;
4611def SULD_2D_V2I16_ZERO
4612  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4613              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4614              "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4615              []>;
4616def SULD_2D_V2I32_ZERO
4617  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4618              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4619              "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4620              []>;
4621def SULD_2D_V2I64_ZERO
4622  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4623              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4624              "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4625              []>;
4626
4627def SULD_2D_ARRAY_V2I8_ZERO
4628  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4629              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4630              "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4631              "[$s, \\{$l, $x, $y, $y\\}];",
4632              []>;
4633def SULD_2D_ARRAY_V2I16_ZERO
4634  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4635              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4636              "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4637              "[$s, \\{$l, $x, $y, $y\\}];",
4638              []>;
4639def SULD_2D_ARRAY_V2I32_ZERO
4640  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4641              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4642              "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4643              "[$s, \\{$l, $x, $y, $y\\}];",
4644              []>;
4645def SULD_2D_ARRAY_V2I64_ZERO
4646  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4647              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4648              "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4649              "[$s, \\{$l, $x, $y, $y\\}];",
4650              []>;
4651
4652def SULD_3D_V2I8_ZERO
4653  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4654              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4655              "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4656              []>;
4657def SULD_3D_V2I16_ZERO
4658  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4659              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4660              "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4661              []>;
4662def SULD_3D_V2I32_ZERO
4663  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4664              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4665              "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4666              []>;
4667def SULD_3D_V2I64_ZERO
4668  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4669              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4670              "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4671              []>;
4672}
4673
4674let IsSuld = 3 in {
4675def SULD_1D_V4I8_ZERO
4676  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4677              (ins Int64Regs:$s, Int32Regs:$x),
4678              "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4679              []>;
4680def SULD_1D_V4I16_ZERO
4681  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4682              (ins Int64Regs:$s, Int32Regs:$x),
4683              "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4684              []>;
4685def SULD_1D_V4I32_ZERO
4686  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4687              (ins Int64Regs:$s, Int32Regs:$x),
4688              "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4689              []>;
4690
4691def SULD_1D_ARRAY_V4I8_ZERO
4692  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4693              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4694              "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4695              "[$s, \\{$l, $x\\}];",
4696              []>;
4697def SULD_1D_ARRAY_V4I16_ZERO
4698  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4699              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4700              "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4701              "[$s, \\{$l, $x\\}];",
4702              []>;
4703def SULD_1D_ARRAY_V4I32_ZERO
4704  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4705              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4706              "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4707              "[$s, \\{$l, $x\\}];",
4708              []>;
4709
4710def SULD_2D_V4I8_ZERO
4711  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4712              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4713              "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4714              []>;
4715def SULD_2D_V4I16_ZERO
4716  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4717              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4718              "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4719              []>;
4720def SULD_2D_V4I32_ZERO
4721  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4722              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4723              "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4724              []>;
4725
4726def SULD_2D_ARRAY_V4I8_ZERO
4727  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4728              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4729              "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4730              "[$s, \\{$l, $x, $y, $y\\}];",
4731              []>;
4732def SULD_2D_ARRAY_V4I16_ZERO
4733  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4734              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4735              "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4736              "[$s, \\{$l, $x, $y, $y\\}];",
4737              []>;
4738def SULD_2D_ARRAY_V4I32_ZERO
4739  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4740              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4741              "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4742              "[$s, \\{$l, $x, $y, $y\\}];",
4743              []>;
4744
4745
4746def SULD_3D_V4I8_ZERO
4747  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4748              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4749              "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4750              "[$s, \\{$x, $y, $z, $z\\}];",
4751              []>;
4752def SULD_3D_V4I16_ZERO
4753  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4754              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4755              "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4756              "[$s, \\{$x, $y, $z, $z\\}];",
4757              []>;
4758def SULD_3D_V4I32_ZERO
4759  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4760              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4761              "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4762              "[$s, \\{$x, $y, $z, $z\\}];",
4763              []>;
4764}
4765
4766//-----------------------------------
4767// Texture Query Intrinsics
4768//-----------------------------------
4769
4770let IsSurfTexQuery = true in {
4771def TXQ_CHANNEL_ORDER
4772  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4773              "txq.channel_order.b32 \t$d, [$a];",
4774              []>;
4775def TXQ_CHANNEL_DATA_TYPE
4776  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4777              "txq.channel_data_type.b32 \t$d, [$a];",
4778              []>;
4779def TXQ_WIDTH
4780  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4781              "txq.width.b32 \t$d, [$a];",
4782              []>;
4783def TXQ_HEIGHT
4784  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4785              "txq.height.b32 \t$d, [$a];",
4786              []>;
4787def TXQ_DEPTH
4788  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4789              "txq.depth.b32 \t$d, [$a];",
4790              []>;
4791def TXQ_ARRAY_SIZE
4792  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4793              "txq.array_size.b32 \t$d, [$a];",
4794              []>;
4795def TXQ_NUM_SAMPLES
4796  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4797              "txq.num_samples.b32 \t$d, [$a];",
4798              []>;
4799def TXQ_NUM_MIPMAP_LEVELS
4800  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4801              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4802              []>;
4803}
4804
4805def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4806          (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4807def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4808          (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4809def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4810          (TXQ_WIDTH Int64Regs:$a)>;
4811def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4812          (TXQ_HEIGHT Int64Regs:$a)>;
4813def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4814          (TXQ_DEPTH Int64Regs:$a)>;
4815def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4816          (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4817def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4818          (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4819def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4820          (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4821
4822
4823//-----------------------------------
4824// Surface Query Intrinsics
4825//-----------------------------------
4826
4827let IsSurfTexQuery = true in {
4828def SUQ_CHANNEL_ORDER
4829  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4830              "suq.channel_order.b32 \t$d, [$a];",
4831              []>;
4832def SUQ_CHANNEL_DATA_TYPE
4833  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4834              "suq.channel_data_type.b32 \t$d, [$a];",
4835              []>;
4836def SUQ_WIDTH
4837  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4838              "suq.width.b32 \t$d, [$a];",
4839              []>;
4840def SUQ_HEIGHT
4841  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4842              "suq.height.b32 \t$d, [$a];",
4843              []>;
4844def SUQ_DEPTH
4845  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4846              "suq.depth.b32 \t$d, [$a];",
4847              []>;
4848def SUQ_ARRAY_SIZE
4849  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4850              "suq.array_size.b32 \t$d, [$a];",
4851              []>;
4852}
4853
4854def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4855          (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4856def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4857          (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4858def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4859          (SUQ_WIDTH Int64Regs:$a)>;
4860def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4861          (SUQ_HEIGHT Int64Regs:$a)>;
4862def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4863          (SUQ_DEPTH Int64Regs:$a)>;
4864def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4865          (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4866
4867
4868//===- Handle Query -------------------------------------------------------===//
4869
4870// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4871def ISTYPEP_SAMPLER
4872  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4873              "istypep.samplerref \t$d, $a;",
4874              [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4875def ISTYPEP_SURFACE
4876  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4877              "istypep.surfref \t$d, $a;",
4878              [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4879def ISTYPEP_TEXTURE
4880  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4881              "istypep.texref \t$d, $a;",
4882              [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4883
4884//===- Surface Stores -----------------------------------------------------===//
4885
4886let IsSust = true in {
4887// Unformatted
4888// .clamp variant
4889def SUST_B_1D_B8_CLAMP
4890  : NVPTXInst<(outs),
4891              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4892              "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4893              []>;
4894def SUST_B_1D_B16_CLAMP
4895  : NVPTXInst<(outs),
4896              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4897              "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4898              []>;
4899def SUST_B_1D_B32_CLAMP
4900  : NVPTXInst<(outs),
4901              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4902              "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4903              []>;
4904def SUST_B_1D_B64_CLAMP
4905  : NVPTXInst<(outs),
4906              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4907              "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4908              []>;
4909def SUST_B_1D_V2B8_CLAMP
4910  : NVPTXInst<(outs),
4911              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4912              "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4913              []>;
4914def SUST_B_1D_V2B16_CLAMP
4915  : NVPTXInst<(outs),
4916              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4917              "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4918              []>;
4919def SUST_B_1D_V2B32_CLAMP
4920  : NVPTXInst<(outs),
4921              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4922              "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4923              []>;
4924def SUST_B_1D_V2B64_CLAMP
4925  : NVPTXInst<(outs),
4926              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4927              "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4928              []>;
4929def SUST_B_1D_V4B8_CLAMP
4930  : NVPTXInst<(outs),
4931              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4932                   Int16Regs:$b, Int16Regs:$a),
4933              "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4934              []>;
4935def SUST_B_1D_V4B16_CLAMP
4936  : NVPTXInst<(outs),
4937              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4938                   Int16Regs:$b, Int16Regs:$a),
4939              "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4940              []>;
4941def SUST_B_1D_V4B32_CLAMP
4942  : NVPTXInst<(outs),
4943              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4944                   Int32Regs:$b, Int32Regs:$a),
4945              "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4946              []>;
4947
4948
4949def SUST_B_1D_ARRAY_B8_CLAMP
4950  : NVPTXInst<(outs),
4951              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4952              "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4953              []>;
4954def SUST_B_1D_ARRAY_B16_CLAMP
4955  : NVPTXInst<(outs),
4956              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4957              "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4958              []>;
4959def SUST_B_1D_ARRAY_B32_CLAMP
4960  : NVPTXInst<(outs),
4961              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4962              "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4963              []>;
4964def SUST_B_1D_ARRAY_B64_CLAMP
4965  : NVPTXInst<(outs),
4966              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4967              "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4968              []>;
4969def SUST_B_1D_ARRAY_V2B8_CLAMP
4970  : NVPTXInst<(outs),
4971              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4972                   Int16Regs:$g),
4973              "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4974              []>;
4975def SUST_B_1D_ARRAY_V2B16_CLAMP
4976  : NVPTXInst<(outs),
4977              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4978                   Int16Regs:$g),
4979              "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4980              []>;
4981def SUST_B_1D_ARRAY_V2B32_CLAMP
4982  : NVPTXInst<(outs),
4983              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4984                   Int32Regs:$g),
4985              "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4986              []>;
4987def SUST_B_1D_ARRAY_V2B64_CLAMP
4988  : NVPTXInst<(outs),
4989              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4990                   Int64Regs:$g),
4991              "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4992              []>;
4993def SUST_B_1D_ARRAY_V4B8_CLAMP
4994  : NVPTXInst<(outs),
4995              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4996                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4997              "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4998              "\\{$r, $g, $b, $a\\};",
4999              []>;
5000def SUST_B_1D_ARRAY_V4B16_CLAMP
5001  : NVPTXInst<(outs),
5002              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5003                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5004             "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
5005             "\\{$r, $g, $b, $a\\};",
5006              []>;
5007def SUST_B_1D_ARRAY_V4B32_CLAMP
5008  : NVPTXInst<(outs),
5009              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5010                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5011             "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
5012             "\\{$r, $g, $b, $a\\};",
5013              []>;
5014
5015
5016def SUST_B_2D_B8_CLAMP
5017  : NVPTXInst<(outs),
5018              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5019              "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5020              []>;
5021def SUST_B_2D_B16_CLAMP
5022  : NVPTXInst<(outs),
5023              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5024              "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5025              []>;
5026def SUST_B_2D_B32_CLAMP
5027  : NVPTXInst<(outs),
5028              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5029              "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5030              []>;
5031def SUST_B_2D_B64_CLAMP
5032  : NVPTXInst<(outs),
5033              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5034              "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
5035              []>;
5036def SUST_B_2D_V2B8_CLAMP
5037  : NVPTXInst<(outs),
5038              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5039                   Int16Regs:$g),
5040              "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5041              []>;
5042def SUST_B_2D_V2B16_CLAMP
5043  : NVPTXInst<(outs),
5044              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5045                   Int16Regs:$g),
5046              "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5047              []>;
5048def SUST_B_2D_V2B32_CLAMP
5049  : NVPTXInst<(outs),
5050              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5051                   Int32Regs:$g),
5052              "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5053              []>;
5054def SUST_B_2D_V2B64_CLAMP
5055  : NVPTXInst<(outs),
5056              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5057                   Int64Regs:$g),
5058              "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5059              []>;
5060def SUST_B_2D_V4B8_CLAMP
5061  : NVPTXInst<(outs),
5062              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5063                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5064              "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
5065              "\\{$r, $g, $b, $a\\};",
5066              []>;
5067def SUST_B_2D_V4B16_CLAMP
5068  : NVPTXInst<(outs),
5069              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5070                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5071             "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
5072             "\\{$r, $g, $b, $a\\};",
5073              []>;
5074def SUST_B_2D_V4B32_CLAMP
5075  : NVPTXInst<(outs),
5076              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5077                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5078             "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
5079             "\\{$r, $g, $b, $a\\};",
5080              []>;
5081
5082
5083def SUST_B_2D_ARRAY_B8_CLAMP
5084  : NVPTXInst<(outs),
5085              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5086                   Int16Regs:$r),
5087              "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5088              []>;
5089def SUST_B_2D_ARRAY_B16_CLAMP
5090  : NVPTXInst<(outs),
5091              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5092                   Int16Regs:$r),
5093              "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5094              []>;
5095def SUST_B_2D_ARRAY_B32_CLAMP
5096  : NVPTXInst<(outs),
5097              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5098                   Int32Regs:$r),
5099              "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5100              []>;
5101def SUST_B_2D_ARRAY_B64_CLAMP
5102  : NVPTXInst<(outs),
5103              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5104                   Int64Regs:$r),
5105              "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5106              []>;
5107def SUST_B_2D_ARRAY_V2B8_CLAMP
5108  : NVPTXInst<(outs),
5109              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5110                   Int16Regs:$r, Int16Regs:$g),
5111              "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5112              "\\{$r, $g\\};",
5113              []>;
5114def SUST_B_2D_ARRAY_V2B16_CLAMP
5115  : NVPTXInst<(outs),
5116              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5117                   Int16Regs:$r, Int16Regs:$g),
5118             "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5119             "\\{$r, $g\\};",
5120              []>;
5121def SUST_B_2D_ARRAY_V2B32_CLAMP
5122  : NVPTXInst<(outs),
5123              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5124                   Int32Regs:$r, Int32Regs:$g),
5125             "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5126             "\\{$r, $g\\};",
5127              []>;
5128def SUST_B_2D_ARRAY_V2B64_CLAMP
5129  : NVPTXInst<(outs),
5130              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5131                   Int64Regs:$r, Int64Regs:$g),
5132             "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5133             "\\{$r, $g\\};",
5134              []>;
5135def SUST_B_2D_ARRAY_V4B8_CLAMP
5136  : NVPTXInst<(outs),
5137              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5138                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5139      "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5140      "\\{$r, $g, $b, $a\\};",
5141              []>;
5142def SUST_B_2D_ARRAY_V4B16_CLAMP
5143  : NVPTXInst<(outs),
5144              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5145                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5146     "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5147     "\\{$r, $g, $b, $a\\};",
5148              []>;
5149def SUST_B_2D_ARRAY_V4B32_CLAMP
5150  : NVPTXInst<(outs),
5151              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5152                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5153     "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
5154     "\\{$r, $g, $b, $a\\};",
5155              []>;
5156
5157
5158def SUST_B_3D_B8_CLAMP
5159  : NVPTXInst<(outs),
5160              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5161                   Int16Regs:$r),
5162              "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5163              []>;
5164def SUST_B_3D_B16_CLAMP
5165  : NVPTXInst<(outs),
5166              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5167                   Int16Regs:$r),
5168              "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5169              []>;
5170def SUST_B_3D_B32_CLAMP
5171  : NVPTXInst<(outs),
5172              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5173                   Int32Regs:$r),
5174              "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5175              []>;
5176def SUST_B_3D_B64_CLAMP
5177  : NVPTXInst<(outs),
5178              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5179                   Int64Regs:$r),
5180              "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5181              []>;
5182def SUST_B_3D_V2B8_CLAMP
5183  : NVPTXInst<(outs),
5184              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5185                   Int16Regs:$r, Int16Regs:$g),
5186              "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5187              "\\{$r, $g\\};",
5188              []>;
5189def SUST_B_3D_V2B16_CLAMP
5190  : NVPTXInst<(outs),
5191              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5192                   Int16Regs:$r, Int16Regs:$g),
5193              "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5194              "\\{$r, $g\\};",
5195              []>;
5196def SUST_B_3D_V2B32_CLAMP
5197  : NVPTXInst<(outs),
5198              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5199                   Int32Regs:$r, Int32Regs:$g),
5200              "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5201              "\\{$r, $g\\};",
5202              []>;
5203def SUST_B_3D_V2B64_CLAMP
5204  : NVPTXInst<(outs),
5205              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5206                   Int64Regs:$r, Int64Regs:$g),
5207              "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5208              "\\{$r, $g\\};",
5209              []>;
5210def SUST_B_3D_V4B8_CLAMP
5211  : NVPTXInst<(outs),
5212              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5213                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5214         "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5215         "\\{$r, $g, $b, $a\\};",
5216              []>;
5217def SUST_B_3D_V4B16_CLAMP
5218  : NVPTXInst<(outs),
5219              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5220                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5221        "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5222        "\\{$r, $g, $b, $a\\};",
5223              []>;
5224def SUST_B_3D_V4B32_CLAMP
5225  : NVPTXInst<(outs),
5226              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5227                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5228        "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5229        "\\{$r, $g, $b, $a\\};",
5230              []>;
5231
5232
5233// .trap variant
5234def SUST_B_1D_B8_TRAP
5235  : NVPTXInst<(outs),
5236              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5237              "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5238              []>;
5239def SUST_B_1D_B16_TRAP
5240  : NVPTXInst<(outs),
5241              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5242              "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5243              []>;
5244def SUST_B_1D_B32_TRAP
5245  : NVPTXInst<(outs),
5246              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5247              "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5248              []>;
5249def SUST_B_1D_B64_TRAP
5250  : NVPTXInst<(outs),
5251              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5252              "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
5253              []>;
5254def SUST_B_1D_V2B8_TRAP
5255  : NVPTXInst<(outs),
5256              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5257              "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5258              []>;
5259def SUST_B_1D_V2B16_TRAP
5260  : NVPTXInst<(outs),
5261              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5262              "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5263              []>;
5264def SUST_B_1D_V2B32_TRAP
5265  : NVPTXInst<(outs),
5266              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5267              "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5268              []>;
5269def SUST_B_1D_V2B64_TRAP
5270  : NVPTXInst<(outs),
5271              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5272              "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5273              []>;
5274def SUST_B_1D_V4B8_TRAP
5275  : NVPTXInst<(outs),
5276              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5277                   Int16Regs:$b, Int16Regs:$a),
5278              "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5279              []>;
5280def SUST_B_1D_V4B16_TRAP
5281  : NVPTXInst<(outs),
5282              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5283                   Int16Regs:$b, Int16Regs:$a),
5284              "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5285              []>;
5286def SUST_B_1D_V4B32_TRAP
5287  : NVPTXInst<(outs),
5288              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5289                   Int32Regs:$b, Int32Regs:$a),
5290              "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5291              []>;
5292
5293
5294def SUST_B_1D_ARRAY_B8_TRAP
5295  : NVPTXInst<(outs),
5296              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5297              "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5298              []>;
5299def SUST_B_1D_ARRAY_B16_TRAP
5300  : NVPTXInst<(outs),
5301              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5302              "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5303              []>;
5304def SUST_B_1D_ARRAY_B32_TRAP
5305  : NVPTXInst<(outs),
5306              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5307              "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5308              []>;
5309def SUST_B_1D_ARRAY_B64_TRAP
5310  : NVPTXInst<(outs),
5311              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5312              "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5313              []>;
5314def SUST_B_1D_ARRAY_V2B8_TRAP
5315  : NVPTXInst<(outs),
5316              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5317                   Int16Regs:$g),
5318              "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5319              []>;
5320def SUST_B_1D_ARRAY_V2B16_TRAP
5321  : NVPTXInst<(outs),
5322              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5323                   Int16Regs:$g),
5324              "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5325              []>;
5326def SUST_B_1D_ARRAY_V2B32_TRAP
5327  : NVPTXInst<(outs),
5328              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5329                   Int32Regs:$g),
5330              "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5331              []>;
5332def SUST_B_1D_ARRAY_V2B64_TRAP
5333  : NVPTXInst<(outs),
5334              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5335                   Int64Regs:$g),
5336              "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5337              []>;
5338def SUST_B_1D_ARRAY_V4B8_TRAP
5339  : NVPTXInst<(outs),
5340              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5341                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5342              "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5343              "\\{$r, $g, $b, $a\\};",
5344              []>;
5345def SUST_B_1D_ARRAY_V4B16_TRAP
5346  : NVPTXInst<(outs),
5347              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5348                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5349             "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5350             "\\{$r, $g, $b, $a\\};",
5351              []>;
5352def SUST_B_1D_ARRAY_V4B32_TRAP
5353  : NVPTXInst<(outs),
5354              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5355                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5356             "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5357             "\\{$r, $g, $b, $a\\};",
5358              []>;
5359
5360
5361def SUST_B_2D_B8_TRAP
5362  : NVPTXInst<(outs),
5363              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5364              "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5365              []>;
5366def SUST_B_2D_B16_TRAP
5367  : NVPTXInst<(outs),
5368              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5369              "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5370              []>;
5371def SUST_B_2D_B32_TRAP
5372  : NVPTXInst<(outs),
5373              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5374              "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5375              []>;
5376def SUST_B_2D_B64_TRAP
5377  : NVPTXInst<(outs),
5378              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5379              "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5380              []>;
5381def SUST_B_2D_V2B8_TRAP
5382  : NVPTXInst<(outs),
5383              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5384                   Int16Regs:$g),
5385              "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5386              []>;
5387def SUST_B_2D_V2B16_TRAP
5388  : NVPTXInst<(outs),
5389              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5390                   Int16Regs:$g),
5391              "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5392              []>;
5393def SUST_B_2D_V2B32_TRAP
5394  : NVPTXInst<(outs),
5395              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5396                   Int32Regs:$g),
5397              "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5398              []>;
5399def SUST_B_2D_V2B64_TRAP
5400  : NVPTXInst<(outs),
5401              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5402                   Int64Regs:$g),
5403              "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5404              []>;
5405def SUST_B_2D_V4B8_TRAP
5406  : NVPTXInst<(outs),
5407              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5408                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5409              "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5410              "\\{$r, $g, $b, $a\\};",
5411              []>;
5412def SUST_B_2D_V4B16_TRAP
5413  : NVPTXInst<(outs),
5414              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5415                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5416             "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5417             "\\{$r, $g, $b, $a\\};",
5418              []>;
5419def SUST_B_2D_V4B32_TRAP
5420  : NVPTXInst<(outs),
5421              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5422                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5423             "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5424             "\\{$r, $g, $b, $a\\};",
5425              []>;
5426
5427
5428def SUST_B_2D_ARRAY_B8_TRAP
5429  : NVPTXInst<(outs),
5430              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5431                   Int16Regs:$r),
5432              "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5433              []>;
5434def SUST_B_2D_ARRAY_B16_TRAP
5435  : NVPTXInst<(outs),
5436              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5437                   Int16Regs:$r),
5438              "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5439              []>;
5440def SUST_B_2D_ARRAY_B32_TRAP
5441  : NVPTXInst<(outs),
5442              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5443                   Int32Regs:$r),
5444              "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5445              []>;
5446def SUST_B_2D_ARRAY_B64_TRAP
5447  : NVPTXInst<(outs),
5448              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5449                   Int64Regs:$r),
5450              "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5451              []>;
5452def SUST_B_2D_ARRAY_V2B8_TRAP
5453  : NVPTXInst<(outs),
5454              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5455                   Int16Regs:$r, Int16Regs:$g),
5456              "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5457              "\\{$r, $g\\};",
5458              []>;
5459def SUST_B_2D_ARRAY_V2B16_TRAP
5460  : NVPTXInst<(outs),
5461              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5462                   Int16Regs:$r, Int16Regs:$g),
5463             "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5464             "\\{$r, $g\\};",
5465              []>;
5466def SUST_B_2D_ARRAY_V2B32_TRAP
5467  : NVPTXInst<(outs),
5468              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5469                   Int32Regs:$r, Int32Regs:$g),
5470             "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5471             "\\{$r, $g\\};",
5472              []>;
5473def SUST_B_2D_ARRAY_V2B64_TRAP
5474  : NVPTXInst<(outs),
5475              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5476                   Int64Regs:$r, Int64Regs:$g),
5477             "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5478             "\\{$r, $g\\};",
5479              []>;
5480def SUST_B_2D_ARRAY_V4B8_TRAP
5481  : NVPTXInst<(outs),
5482              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5483                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5484      "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5485      "\\{$r, $g, $b, $a\\};",
5486              []>;
5487def SUST_B_2D_ARRAY_V4B16_TRAP
5488  : NVPTXInst<(outs),
5489              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5490                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5491     "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5492     "\\{$r, $g, $b, $a\\};",
5493              []>;
5494def SUST_B_2D_ARRAY_V4B32_TRAP
5495  : NVPTXInst<(outs),
5496              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5497                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5498     "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5499     "\\{$r, $g, $b, $a\\};",
5500              []>;
5501
5502
5503def SUST_B_3D_B8_TRAP
5504  : NVPTXInst<(outs),
5505              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5506                   Int16Regs:$r),
5507              "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5508              []>;
5509def SUST_B_3D_B16_TRAP
5510  : NVPTXInst<(outs),
5511              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5512                   Int16Regs:$r),
5513              "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5514              []>;
5515def SUST_B_3D_B32_TRAP
5516  : NVPTXInst<(outs),
5517              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5518                   Int32Regs:$r),
5519              "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5520              []>;
5521def SUST_B_3D_B64_TRAP
5522  : NVPTXInst<(outs),
5523              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5524                   Int64Regs:$r),
5525              "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5526              []>;
5527def SUST_B_3D_V2B8_TRAP
5528  : NVPTXInst<(outs),
5529              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5530                   Int16Regs:$r, Int16Regs:$g),
5531              "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5532              "\\{$r, $g\\};",
5533              []>;
5534def SUST_B_3D_V2B16_TRAP
5535  : NVPTXInst<(outs),
5536              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5537                   Int16Regs:$r, Int16Regs:$g),
5538              "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5539              "\\{$r, $g\\};",
5540              []>;
5541def SUST_B_3D_V2B32_TRAP
5542  : NVPTXInst<(outs),
5543              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5544                   Int32Regs:$r, Int32Regs:$g),
5545              "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5546              "\\{$r, $g\\};",
5547              []>;
5548def SUST_B_3D_V2B64_TRAP
5549  : NVPTXInst<(outs),
5550              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5551                   Int64Regs:$r, Int64Regs:$g),
5552              "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5553              "\\{$r, $g\\};",
5554              []>;
5555def SUST_B_3D_V4B8_TRAP
5556  : NVPTXInst<(outs),
5557              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5558                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5559         "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5560         "\\{$r, $g, $b, $a\\};",
5561              []>;
5562def SUST_B_3D_V4B16_TRAP
5563  : NVPTXInst<(outs),
5564              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5565                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5566        "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5567        "\\{$r, $g, $b, $a\\};",
5568              []>;
5569def SUST_B_3D_V4B32_TRAP
5570  : NVPTXInst<(outs),
5571              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5572                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5573        "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5574        "\\{$r, $g, $b, $a\\};",
5575              []>;
5576
5577
5578// .zero variant
5579def SUST_B_1D_B8_ZERO
5580  : NVPTXInst<(outs),
5581              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5582              "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5583              []>;
5584def SUST_B_1D_B16_ZERO
5585  : NVPTXInst<(outs),
5586              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5587              "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5588              []>;
5589def SUST_B_1D_B32_ZERO
5590  : NVPTXInst<(outs),
5591              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5592              "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5593              []>;
5594def SUST_B_1D_B64_ZERO
5595  : NVPTXInst<(outs),
5596              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5597              "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5598              []>;
5599def SUST_B_1D_V2B8_ZERO
5600  : NVPTXInst<(outs),
5601              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5602              "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5603              []>;
5604def SUST_B_1D_V2B16_ZERO
5605  : NVPTXInst<(outs),
5606              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5607              "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5608              []>;
5609def SUST_B_1D_V2B32_ZERO
5610  : NVPTXInst<(outs),
5611              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5612              "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5613              []>;
5614def SUST_B_1D_V2B64_ZERO
5615  : NVPTXInst<(outs),
5616              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5617              "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5618              []>;
5619def SUST_B_1D_V4B8_ZERO
5620  : NVPTXInst<(outs),
5621              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5622                   Int16Regs:$b, Int16Regs:$a),
5623              "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5624              []>;
5625def SUST_B_1D_V4B16_ZERO
5626  : NVPTXInst<(outs),
5627              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5628                   Int16Regs:$b, Int16Regs:$a),
5629              "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5630              []>;
5631def SUST_B_1D_V4B32_ZERO
5632  : NVPTXInst<(outs),
5633              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5634                   Int32Regs:$b, Int32Regs:$a),
5635              "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5636              []>;
5637
5638
5639def SUST_B_1D_ARRAY_B8_ZERO
5640  : NVPTXInst<(outs),
5641              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5642              "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5643              []>;
5644def SUST_B_1D_ARRAY_B16_ZERO
5645  : NVPTXInst<(outs),
5646              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5647              "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5648              []>;
5649def SUST_B_1D_ARRAY_B32_ZERO
5650  : NVPTXInst<(outs),
5651              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5652              "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5653              []>;
5654def SUST_B_1D_ARRAY_B64_ZERO
5655  : NVPTXInst<(outs),
5656              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5657              "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5658              []>;
5659def SUST_B_1D_ARRAY_V2B8_ZERO
5660  : NVPTXInst<(outs),
5661              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5662                   Int16Regs:$g),
5663              "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5664              []>;
5665def SUST_B_1D_ARRAY_V2B16_ZERO
5666  : NVPTXInst<(outs),
5667              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5668                   Int16Regs:$g),
5669              "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5670              []>;
5671def SUST_B_1D_ARRAY_V2B32_ZERO
5672  : NVPTXInst<(outs),
5673              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5674                   Int32Regs:$g),
5675              "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5676              []>;
5677def SUST_B_1D_ARRAY_V2B64_ZERO
5678  : NVPTXInst<(outs),
5679              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5680                   Int64Regs:$g),
5681              "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5682              []>;
5683def SUST_B_1D_ARRAY_V4B8_ZERO
5684  : NVPTXInst<(outs),
5685              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5686                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5687              "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5688              "\\{$r, $g, $b, $a\\};",
5689              []>;
5690def SUST_B_1D_ARRAY_V4B16_ZERO
5691  : NVPTXInst<(outs),
5692              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5693                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5694             "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5695             "\\{$r, $g, $b, $a\\};",
5696              []>;
5697def SUST_B_1D_ARRAY_V4B32_ZERO
5698  : NVPTXInst<(outs),
5699              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5700                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5701             "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5702             "\\{$r, $g, $b, $a\\};",
5703              []>;
5704
5705
5706def SUST_B_2D_B8_ZERO
5707  : NVPTXInst<(outs),
5708              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5709              "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5710              []>;
5711def SUST_B_2D_B16_ZERO
5712  : NVPTXInst<(outs),
5713              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5714              "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5715              []>;
5716def SUST_B_2D_B32_ZERO
5717  : NVPTXInst<(outs),
5718              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5719              "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5720              []>;
5721def SUST_B_2D_B64_ZERO
5722  : NVPTXInst<(outs),
5723              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5724              "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5725              []>;
5726def SUST_B_2D_V2B8_ZERO
5727  : NVPTXInst<(outs),
5728              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5729                   Int16Regs:$g),
5730              "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5731              []>;
5732def SUST_B_2D_V2B16_ZERO
5733  : NVPTXInst<(outs),
5734              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5735                   Int16Regs:$g),
5736              "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5737              []>;
5738def SUST_B_2D_V2B32_ZERO
5739  : NVPTXInst<(outs),
5740              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5741                   Int32Regs:$g),
5742              "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5743              []>;
5744def SUST_B_2D_V2B64_ZERO
5745  : NVPTXInst<(outs),
5746              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5747                   Int64Regs:$g),
5748              "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5749              []>;
5750def SUST_B_2D_V4B8_ZERO
5751  : NVPTXInst<(outs),
5752              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5753                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5754              "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5755              "\\{$r, $g, $b, $a\\};",
5756              []>;
5757def SUST_B_2D_V4B16_ZERO
5758  : NVPTXInst<(outs),
5759              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5760                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5761             "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5762             "\\{$r, $g, $b, $a\\};",
5763              []>;
5764def SUST_B_2D_V4B32_ZERO
5765  : NVPTXInst<(outs),
5766              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5767                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5768             "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5769             "\\{$r, $g, $b, $a\\};",
5770              []>;
5771
5772
5773def SUST_B_2D_ARRAY_B8_ZERO
5774  : NVPTXInst<(outs),
5775              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5776                   Int16Regs:$r),
5777              "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5778              []>;
5779def SUST_B_2D_ARRAY_B16_ZERO
5780  : NVPTXInst<(outs),
5781              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5782                   Int16Regs:$r),
5783              "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5784              []>;
5785def SUST_B_2D_ARRAY_B32_ZERO
5786  : NVPTXInst<(outs),
5787              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5788                   Int32Regs:$r),
5789              "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5790              []>;
5791def SUST_B_2D_ARRAY_B64_ZERO
5792  : NVPTXInst<(outs),
5793              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5794                   Int64Regs:$r),
5795              "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5796              []>;
5797def SUST_B_2D_ARRAY_V2B8_ZERO
5798  : NVPTXInst<(outs),
5799              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5800                   Int16Regs:$r, Int16Regs:$g),
5801              "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5802              "\\{$r, $g\\};",
5803              []>;
5804def SUST_B_2D_ARRAY_V2B16_ZERO
5805  : NVPTXInst<(outs),
5806              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5807                   Int16Regs:$r, Int16Regs:$g),
5808             "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5809             "\\{$r, $g\\};",
5810              []>;
5811def SUST_B_2D_ARRAY_V2B32_ZERO
5812  : NVPTXInst<(outs),
5813              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5814                   Int32Regs:$r, Int32Regs:$g),
5815             "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5816             "\\{$r, $g\\};",
5817              []>;
5818def SUST_B_2D_ARRAY_V2B64_ZERO
5819  : NVPTXInst<(outs),
5820              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5821                   Int64Regs:$r, Int64Regs:$g),
5822             "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5823             "\\{$r, $g\\};",
5824              []>;
5825def SUST_B_2D_ARRAY_V4B8_ZERO
5826  : NVPTXInst<(outs),
5827              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5828                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5829      "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5830      "\\{$r, $g, $b, $a\\};",
5831              []>;
5832def SUST_B_2D_ARRAY_V4B16_ZERO
5833  : NVPTXInst<(outs),
5834              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5835                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5836     "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5837     "\\{$r, $g, $b, $a\\};",
5838              []>;
5839def SUST_B_2D_ARRAY_V4B32_ZERO
5840  : NVPTXInst<(outs),
5841              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5842                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5843     "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5844     "\\{$r, $g, $b, $a\\};",
5845              []>;
5846
5847
5848def SUST_B_3D_B8_ZERO
5849  : NVPTXInst<(outs),
5850              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5851                   Int16Regs:$r),
5852              "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5853              []>;
5854def SUST_B_3D_B16_ZERO
5855  : NVPTXInst<(outs),
5856              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5857                   Int16Regs:$r),
5858              "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5859              []>;
5860def SUST_B_3D_B32_ZERO
5861  : NVPTXInst<(outs),
5862              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5863                   Int32Regs:$r),
5864              "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5865              []>;
5866def SUST_B_3D_B64_ZERO
5867  : NVPTXInst<(outs),
5868              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5869                   Int64Regs:$r),
5870              "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5871              []>;
5872def SUST_B_3D_V2B8_ZERO
5873  : NVPTXInst<(outs),
5874              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5875                   Int16Regs:$r, Int16Regs:$g),
5876              "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5877              "\\{$r, $g\\};",
5878              []>;
5879def SUST_B_3D_V2B16_ZERO
5880  : NVPTXInst<(outs),
5881              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5882                   Int16Regs:$r, Int16Regs:$g),
5883              "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5884              "\\{$r, $g\\};",
5885              []>;
5886def SUST_B_3D_V2B32_ZERO
5887  : NVPTXInst<(outs),
5888              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5889                   Int32Regs:$r, Int32Regs:$g),
5890              "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5891              "\\{$r, $g\\};",
5892              []>;
5893def SUST_B_3D_V2B64_ZERO
5894  : NVPTXInst<(outs),
5895              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5896                   Int64Regs:$r, Int64Regs:$g),
5897              "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5898              "\\{$r, $g\\};",
5899              []>;
5900def SUST_B_3D_V4B8_ZERO
5901  : NVPTXInst<(outs),
5902              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5903                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5904         "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5905         "\\{$r, $g, $b, $a\\};",
5906              []>;
5907def SUST_B_3D_V4B16_ZERO
5908  : NVPTXInst<(outs),
5909              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5910                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5911        "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5912        "\\{$r, $g, $b, $a\\};",
5913              []>;
5914def SUST_B_3D_V4B32_ZERO
5915  : NVPTXInst<(outs),
5916              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5917                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5918        "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5919        "\\{$r, $g, $b, $a\\};",
5920              []>;
5921
5922
5923
5924// Formatted
5925
5926def SUST_P_1D_B8_TRAP
5927  : NVPTXInst<(outs),
5928              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5929              "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5930              []>;
5931def SUST_P_1D_B16_TRAP
5932  : NVPTXInst<(outs),
5933              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5934              "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5935              []>;
5936def SUST_P_1D_B32_TRAP
5937  : NVPTXInst<(outs),
5938              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5939              "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5940              []>;
5941def SUST_P_1D_V2B8_TRAP
5942  : NVPTXInst<(outs),
5943              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5944              "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5945              []>;
5946def SUST_P_1D_V2B16_TRAP
5947  : NVPTXInst<(outs),
5948              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5949              "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5950              []>;
5951def SUST_P_1D_V2B32_TRAP
5952  : NVPTXInst<(outs),
5953              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5954              "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5955              []>;
5956def SUST_P_1D_V4B8_TRAP
5957  : NVPTXInst<(outs),
5958              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5959                   Int16Regs:$b, Int16Regs:$a),
5960              "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5961              []>;
5962def SUST_P_1D_V4B16_TRAP
5963  : NVPTXInst<(outs),
5964              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5965                   Int16Regs:$b, Int16Regs:$a),
5966              "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5967              []>;
5968def SUST_P_1D_V4B32_TRAP
5969  : NVPTXInst<(outs),
5970              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5971                   Int32Regs:$b, Int32Regs:$a),
5972              "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5973              []>;
5974
5975
5976def SUST_P_1D_ARRAY_B8_TRAP
5977  : NVPTXInst<(outs),
5978              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5979              "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5980              []>;
5981def SUST_P_1D_ARRAY_B16_TRAP
5982  : NVPTXInst<(outs),
5983              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5984              "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5985              []>;
5986def SUST_P_1D_ARRAY_B32_TRAP
5987  : NVPTXInst<(outs),
5988              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5989              "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5990              []>;
5991def SUST_P_1D_ARRAY_V2B8_TRAP
5992  : NVPTXInst<(outs),
5993              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5994                   Int16Regs:$g),
5995              "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5996              []>;
5997def SUST_P_1D_ARRAY_V2B16_TRAP
5998  : NVPTXInst<(outs),
5999              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
6000                   Int16Regs:$g),
6001              "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
6002              []>;
6003def SUST_P_1D_ARRAY_V2B32_TRAP
6004  : NVPTXInst<(outs),
6005              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
6006                   Int32Regs:$g),
6007              "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
6008              []>;
6009def SUST_P_1D_ARRAY_V4B8_TRAP
6010  : NVPTXInst<(outs),
6011              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
6012                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6013              "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
6014              "\\{$r, $g, $b, $a\\};",
6015              []>;
6016def SUST_P_1D_ARRAY_V4B16_TRAP
6017  : NVPTXInst<(outs),
6018              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
6019                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6020             "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
6021             "\\{$r, $g, $b, $a\\};",
6022              []>;
6023def SUST_P_1D_ARRAY_V4B32_TRAP
6024  : NVPTXInst<(outs),
6025              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
6026                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6027             "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
6028             "\\{$r, $g, $b, $a\\};",
6029              []>;
6030
6031
6032def SUST_P_2D_B8_TRAP
6033  : NVPTXInst<(outs),
6034              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6035              "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
6036              []>;
6037def SUST_P_2D_B16_TRAP
6038  : NVPTXInst<(outs),
6039              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6040              "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
6041              []>;
6042def SUST_P_2D_B32_TRAP
6043  : NVPTXInst<(outs),
6044              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6045              "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
6046              []>;
6047def SUST_P_2D_V2B8_TRAP
6048  : NVPTXInst<(outs),
6049              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6050                   Int16Regs:$g),
6051              "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
6052              []>;
6053def SUST_P_2D_V2B16_TRAP
6054  : NVPTXInst<(outs),
6055              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6056                   Int16Regs:$g),
6057              "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
6058              []>;
6059def SUST_P_2D_V2B32_TRAP
6060  : NVPTXInst<(outs),
6061              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6062                   Int32Regs:$g),
6063              "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
6064              []>;
6065def SUST_P_2D_V4B8_TRAP
6066  : NVPTXInst<(outs),
6067              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6068                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6069              "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
6070              "\\{$r, $g, $b, $a\\};",
6071              []>;
6072def SUST_P_2D_V4B16_TRAP
6073  : NVPTXInst<(outs),
6074              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
6075                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6076             "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
6077             "\\{$r, $g, $b, $a\\};",
6078              []>;
6079def SUST_P_2D_V4B32_TRAP
6080  : NVPTXInst<(outs),
6081              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6082                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6083             "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
6084             "\\{$r, $g, $b, $a\\};",
6085              []>;
6086
6087
6088def SUST_P_2D_ARRAY_B8_TRAP
6089  : NVPTXInst<(outs),
6090              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6091                   Int16Regs:$r),
6092              "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
6093              []>;
6094def SUST_P_2D_ARRAY_B16_TRAP
6095  : NVPTXInst<(outs),
6096              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6097                   Int16Regs:$r),
6098              "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
6099              []>;
6100def SUST_P_2D_ARRAY_B32_TRAP
6101  : NVPTXInst<(outs),
6102              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6103                   Int32Regs:$r),
6104              "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
6105              []>;
6106def SUST_P_2D_ARRAY_V2B8_TRAP
6107  : NVPTXInst<(outs),
6108              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6109                   Int16Regs:$r, Int16Regs:$g),
6110              "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6111              "\\{$r, $g\\};",
6112              []>;
6113def SUST_P_2D_ARRAY_V2B16_TRAP
6114  : NVPTXInst<(outs),
6115              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6116                   Int16Regs:$r, Int16Regs:$g),
6117             "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6118             "\\{$r, $g\\};",
6119              []>;
6120def SUST_P_2D_ARRAY_V2B32_TRAP
6121  : NVPTXInst<(outs),
6122              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6123                   Int32Regs:$r, Int32Regs:$g),
6124             "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6125             "\\{$r, $g\\};",
6126              []>;
6127def SUST_P_2D_ARRAY_V4B8_TRAP
6128  : NVPTXInst<(outs),
6129              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6130                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6131      "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6132      "\\{$r, $g, $b, $a\\};",
6133              []>;
6134def SUST_P_2D_ARRAY_V4B16_TRAP
6135  : NVPTXInst<(outs),
6136              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6137                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6138     "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6139     "\\{$r, $g, $b, $a\\};",
6140              []>;
6141def SUST_P_2D_ARRAY_V4B32_TRAP
6142  : NVPTXInst<(outs),
6143              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
6144                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6145     "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
6146     "\\{$r, $g, $b, $a\\};",
6147              []>;
6148
6149
6150def SUST_P_3D_B8_TRAP
6151  : NVPTXInst<(outs),
6152              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6153                   Int16Regs:$r),
6154              "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6155              []>;
6156def SUST_P_3D_B16_TRAP
6157  : NVPTXInst<(outs),
6158              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6159                   Int16Regs:$r),
6160              "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6161              []>;
6162def SUST_P_3D_B32_TRAP
6163  : NVPTXInst<(outs),
6164              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6165                   Int32Regs:$r),
6166              "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
6167              []>;
6168def SUST_P_3D_V2B8_TRAP
6169  : NVPTXInst<(outs),
6170              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6171                   Int16Regs:$r, Int16Regs:$g),
6172              "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6173              "\\{$r, $g\\};",
6174              []>;
6175def SUST_P_3D_V2B16_TRAP
6176  : NVPTXInst<(outs),
6177              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6178                   Int16Regs:$r, Int16Regs:$g),
6179              "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6180              "\\{$r, $g\\};",
6181              []>;
6182def SUST_P_3D_V2B32_TRAP
6183  : NVPTXInst<(outs),
6184              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6185                   Int32Regs:$r, Int32Regs:$g),
6186              "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6187              "\\{$r, $g\\};",
6188              []>;
6189def SUST_P_3D_V4B8_TRAP
6190  : NVPTXInst<(outs),
6191              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6192                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6193         "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6194         "\\{$r, $g, $b, $a\\};",
6195              []>;
6196def SUST_P_3D_V4B16_TRAP
6197  : NVPTXInst<(outs),
6198              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6199                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6200        "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6201        "\\{$r, $g, $b, $a\\};",
6202              []>;
6203def SUST_P_3D_V4B32_TRAP
6204  : NVPTXInst<(outs),
6205              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6206                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6207        "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6208        "\\{$r, $g, $b, $a\\};",
6209              []>;
6210}
6211
6212// Surface store instruction patterns
6213// I'm not sure why we can't just include these in the instruction definitions,
6214// but TableGen complains of type errors :(
6215
6216// .clamp variant
6217def : Pat<(int_nvvm_sust_b_1d_i8_clamp
6218           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6219          (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6220
6221def : Pat<(int_nvvm_sust_b_1d_i16_clamp
6222           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6223          (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6224
6225def : Pat<(int_nvvm_sust_b_1d_i32_clamp
6226           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6227          (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6228
6229def : Pat<(int_nvvm_sust_b_1d_i64_clamp
6230           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6231          (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6232
6233def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
6234           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6235          (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6236           Int16Regs:$r, Int16Regs:$g)>;
6237
6238def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
6239           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6240          (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6241           Int16Regs:$r, Int16Regs:$g)>;
6242
6243def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
6244           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6245          (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6246           Int32Regs:$r, Int32Regs:$g)>;
6247
6248def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
6249           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6250          (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
6251           Int64Regs:$r, Int64Regs:$g)>;
6252
6253def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
6254           Int64Regs:$s, Int32Regs:$x,
6255           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6256          (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6257           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6258
6259def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
6260           Int64Regs:$s, Int32Regs:$x,
6261           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6262          (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6263           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6264
6265def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
6266           Int64Regs:$s, Int32Regs:$x,
6267           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6268          (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6269           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6270
6271
6272
6273def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
6274           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6275          (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6276           Int16Regs:$r)>;
6277
6278def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
6279           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6280          (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6281           Int16Regs:$r)>;
6282
6283def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
6284           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6285          (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6286           Int32Regs:$r)>;
6287
6288def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
6289           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6290          (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6291           Int64Regs:$r)>;
6292
6293def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
6294          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6295          (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6296           Int16Regs:$r, Int16Regs:$g)>;
6297
6298def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
6299          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6300          (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6301           Int16Regs:$r, Int16Regs:$g)>;
6302
6303def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6304          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6305          (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6306           Int32Regs:$r, Int32Regs:$g)>;
6307
6308def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6309          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6310          (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6311           Int64Regs:$r, Int64Regs:$g)>;
6312
6313def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6314           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6315           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6316          (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6317           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6318
6319def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6320           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6321           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6322          (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6323           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6324
6325def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6326           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6327           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6328          (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6329           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6330
6331
6332
6333def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6334           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6335          (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6336           Int16Regs:$r)>;
6337
6338def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6339           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6340          (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6341           Int16Regs:$r)>;
6342
6343def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6344           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6345          (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6346           Int32Regs:$r)>;
6347
6348def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6349           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6350          (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6351           Int64Regs:$r)>;
6352
6353def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6354          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6355          (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6356           Int16Regs:$r, Int16Regs:$g)>;
6357
6358def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6359          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6360          (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6361           Int16Regs:$r, Int16Regs:$g)>;
6362
6363def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6364          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6365          (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6366           Int32Regs:$r, Int32Regs:$g)>;
6367
6368def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6369          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6370          (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6371           Int64Regs:$r, Int64Regs:$g)>;
6372
6373def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6374           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6375           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6376          (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6377           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6378
6379def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6380           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6381           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6382          (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6383           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6384
6385def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6386           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6387           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6388          (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6389           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6390
6391
6392
6393def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6394          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6395          (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6396           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6397           Int16Regs:$r)>;
6398
6399def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6400          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6401          (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6402           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6403           Int16Regs:$r)>;
6404
6405def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6406          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6407          (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6408           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6409           Int32Regs:$r)>;
6410
6411def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6412          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6413          (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6414           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6415           Int64Regs:$r)>;
6416
6417def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6418           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6419           Int16Regs:$r, Int16Regs:$g),
6420          (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6421           Int32Regs:$x, Int32Regs:$y,
6422           Int16Regs:$r, Int16Regs:$g)>;
6423
6424def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6425           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6426           Int16Regs:$r, Int16Regs:$g),
6427          (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6428           Int32Regs:$x, Int32Regs:$y,
6429           Int16Regs:$r, Int16Regs:$g)>;
6430
6431def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6432           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6433           Int32Regs:$g),
6434          (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6435           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6436
6437def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6438           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6439           Int64Regs:$g),
6440          (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6441           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6442
6443def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6444           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6445           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6446          (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6447           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6448           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6449
6450def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6451           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6452           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6453          (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6454           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6455           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6456
6457def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6458           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6459           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6460          (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6461           Int32Regs:$x, Int32Regs:$y,
6462           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6463
6464
6465
6466def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6467           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6468           Int16Regs:$r),
6469          (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6470           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6471           Int16Regs:$r)>;
6472
6473def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6474           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6475           Int16Regs:$r),
6476          (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6477           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6478           Int16Regs:$r)>;
6479
6480def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6481           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6482           Int32Regs:$r),
6483          (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6484           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6485           Int32Regs:$r)>;
6486
6487def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6488           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6489           Int64Regs:$r),
6490          (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6491           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6492           Int64Regs:$r)>;
6493
6494def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6495           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6496           Int16Regs:$r, Int16Regs:$g),
6497          (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6498           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6499           Int16Regs:$r, Int16Regs:$g)>;
6500
6501def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6502           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6503           Int16Regs:$r, Int16Regs:$g),
6504          (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6505           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6506           Int16Regs:$r, Int16Regs:$g)>;
6507
6508def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6509           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6510           Int32Regs:$r, Int32Regs:$g),
6511          (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6512           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6513           Int32Regs:$r, Int32Regs:$g)>;
6514
6515def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6516           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6517           Int64Regs:$r, Int64Regs:$g),
6518          (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6519           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6520           Int64Regs:$r, Int64Regs:$g)>;
6521
6522def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6523           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6524           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6525          (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6526           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6527           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6528
6529def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6530           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6531           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6532          (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6533           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6534           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6535
6536def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6537           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6538           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6539          (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6540           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6541           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6542
6543
6544// .trap variant
6545def : Pat<(int_nvvm_sust_b_1d_i8_trap
6546           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6547          (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6548
6549def : Pat<(int_nvvm_sust_b_1d_i16_trap
6550           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6551          (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6552
6553def : Pat<(int_nvvm_sust_b_1d_i32_trap
6554           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6555          (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6556
6557def : Pat<(int_nvvm_sust_b_1d_i64_trap
6558           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6559          (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6560
6561def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6562           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6563          (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6564           Int16Regs:$r, Int16Regs:$g)>;
6565
6566def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6567           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6568          (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6569           Int16Regs:$r, Int16Regs:$g)>;
6570
6571def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6572           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6573          (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6574           Int32Regs:$r, Int32Regs:$g)>;
6575
6576def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6577           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6578          (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6579           Int64Regs:$r, Int64Regs:$g)>;
6580
6581def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6582           Int64Regs:$s, Int32Regs:$x,
6583           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6584          (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6585           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6586
6587def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6588           Int64Regs:$s, Int32Regs:$x,
6589           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6590          (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6591           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6592
6593def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6594           Int64Regs:$s, Int32Regs:$x,
6595           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6596          (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6597           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6598
6599
6600
6601def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6602           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6603          (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6604           Int16Regs:$r)>;
6605
6606def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6607           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6608          (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6609           Int16Regs:$r)>;
6610
6611def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6612           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6613          (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6614           Int32Regs:$r)>;
6615
6616def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6617           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6618          (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6619           Int64Regs:$r)>;
6620
6621def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6622          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6623          (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6624           Int16Regs:$r, Int16Regs:$g)>;
6625
6626def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6627          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6628          (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6629           Int16Regs:$r, Int16Regs:$g)>;
6630
6631def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6632          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6633          (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6634           Int32Regs:$r, Int32Regs:$g)>;
6635
6636def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6637          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6638          (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6639           Int64Regs:$r, Int64Regs:$g)>;
6640
6641def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6642           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6643           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6644          (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6645           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6646
6647def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6648           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6649           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6650          (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6651           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6652
6653def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6654           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6655           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6656          (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6657           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6658
6659
6660
6661def : Pat<(int_nvvm_sust_b_2d_i8_trap
6662           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6663          (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6664           Int16Regs:$r)>;
6665
6666def : Pat<(int_nvvm_sust_b_2d_i16_trap
6667           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6668          (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6669           Int16Regs:$r)>;
6670
6671def : Pat<(int_nvvm_sust_b_2d_i32_trap
6672           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6673          (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6674           Int32Regs:$r)>;
6675
6676def : Pat<(int_nvvm_sust_b_2d_i64_trap
6677           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6678          (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6679           Int64Regs:$r)>;
6680
6681def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6682          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6683          (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6684           Int16Regs:$r, Int16Regs:$g)>;
6685
6686def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6687          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6688          (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6689           Int16Regs:$r, Int16Regs:$g)>;
6690
6691def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6692          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6693          (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6694           Int32Regs:$r, Int32Regs:$g)>;
6695
6696def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6697          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6698          (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6699           Int64Regs:$r, Int64Regs:$g)>;
6700
6701def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6702           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6703           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6704          (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6705           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6706
6707def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6708           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6709           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6710          (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6711           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6712
6713def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6714           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6715           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6716          (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6717           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6718
6719
6720
6721def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6722          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6723          (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6724           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6725           Int16Regs:$r)>;
6726
6727def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6728          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6729          (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6730           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6731           Int16Regs:$r)>;
6732
6733def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6734          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6735          (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6736           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6737           Int32Regs:$r)>;
6738
6739def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6740          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6741          (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6742           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6743           Int64Regs:$r)>;
6744
6745def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6746           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6747           Int16Regs:$r, Int16Regs:$g),
6748          (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6749           Int32Regs:$x, Int32Regs:$y,
6750           Int16Regs:$r, Int16Regs:$g)>;
6751
6752def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6753           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6754           Int16Regs:$r, Int16Regs:$g),
6755          (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6756           Int32Regs:$x, Int32Regs:$y,
6757           Int16Regs:$r, Int16Regs:$g)>;
6758
6759def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6760           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6761           Int32Regs:$g),
6762          (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6763           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6764
6765def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6766           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6767           Int64Regs:$g),
6768          (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6769           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6770
6771def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6772           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6773           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6774          (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6775           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6776           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6777
6778def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6779           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6780           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6781          (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6782           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6783           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6784
6785def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6786           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6787           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6788          (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6789           Int32Regs:$x, Int32Regs:$y,
6790           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6791
6792
6793
6794def : Pat<(int_nvvm_sust_b_3d_i8_trap
6795           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6796           Int16Regs:$r),
6797          (SUST_B_3D_B8_TRAP Int64Regs:$s,
6798           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6799           Int16Regs:$r)>;
6800
6801def : Pat<(int_nvvm_sust_b_3d_i16_trap
6802           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6803           Int16Regs:$r),
6804          (SUST_B_3D_B16_TRAP Int64Regs:$s,
6805           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6806           Int16Regs:$r)>;
6807
6808def : Pat<(int_nvvm_sust_b_3d_i32_trap
6809           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6810           Int32Regs:$r),
6811          (SUST_B_3D_B32_TRAP Int64Regs:$s,
6812           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6813           Int32Regs:$r)>;
6814
6815def : Pat<(int_nvvm_sust_b_3d_i64_trap
6816           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6817           Int64Regs:$r),
6818          (SUST_B_3D_B64_TRAP Int64Regs:$s,
6819           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6820           Int64Regs:$r)>;
6821
6822def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6823           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6824           Int16Regs:$r, Int16Regs:$g),
6825          (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6826           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6827           Int16Regs:$r, Int16Regs:$g)>;
6828
6829def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6830           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6831           Int16Regs:$r, Int16Regs:$g),
6832          (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6833           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6834           Int16Regs:$r, Int16Regs:$g)>;
6835
6836def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6837           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6838           Int32Regs:$r, Int32Regs:$g),
6839          (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6840           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6841           Int32Regs:$r, Int32Regs:$g)>;
6842
6843def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6844           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6845           Int64Regs:$r, Int64Regs:$g),
6846          (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6847           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6848           Int64Regs:$r, Int64Regs:$g)>;
6849
6850def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6851           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6852           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6853          (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6854           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6855           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6856
6857def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6858           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6859           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6860          (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6861           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6862           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6863
6864def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6865           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6866           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6867          (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6868           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6869           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6870
6871
6872// .zero variant
6873def : Pat<(int_nvvm_sust_b_1d_i8_zero
6874           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6875          (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6876
6877def : Pat<(int_nvvm_sust_b_1d_i16_zero
6878           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6879          (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6880
6881def : Pat<(int_nvvm_sust_b_1d_i32_zero
6882           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6883          (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6884
6885def : Pat<(int_nvvm_sust_b_1d_i64_zero
6886           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6887          (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6888
6889def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6890           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6891          (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6892           Int16Regs:$r, Int16Regs:$g)>;
6893
6894def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6895           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6896          (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6897           Int16Regs:$r, Int16Regs:$g)>;
6898
6899def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6900           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6901          (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6902           Int32Regs:$r, Int32Regs:$g)>;
6903
6904def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6905           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6906          (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6907           Int64Regs:$r, Int64Regs:$g)>;
6908
6909def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6910           Int64Regs:$s, Int32Regs:$x,
6911           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6912          (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6913           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6914
6915def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6916           Int64Regs:$s, Int32Regs:$x,
6917           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6918          (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6919           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6920
6921def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6922           Int64Regs:$s, Int32Regs:$x,
6923           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6924          (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6925           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6926
6927
6928
6929def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6930           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6931          (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6932           Int16Regs:$r)>;
6933
6934def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6935           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6936          (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6937           Int16Regs:$r)>;
6938
6939def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6940           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6941          (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6942           Int32Regs:$r)>;
6943
6944def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6945           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6946          (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6947           Int64Regs:$r)>;
6948
6949def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6950          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6951          (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6952           Int16Regs:$r, Int16Regs:$g)>;
6953
6954def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6955          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6956          (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6957           Int16Regs:$r, Int16Regs:$g)>;
6958
6959def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6960          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6961          (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6962           Int32Regs:$r, Int32Regs:$g)>;
6963
6964def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6965          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6966          (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6967           Int64Regs:$r, Int64Regs:$g)>;
6968
6969def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6970           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6971           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6972          (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6973           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6974
6975def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6976           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6977           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6978          (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6979           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6980
6981def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6982           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6983           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6984          (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6985           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6986
6987
6988
6989def : Pat<(int_nvvm_sust_b_2d_i8_zero
6990           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6991          (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6992           Int16Regs:$r)>;
6993
6994def : Pat<(int_nvvm_sust_b_2d_i16_zero
6995           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6996          (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6997           Int16Regs:$r)>;
6998
6999def : Pat<(int_nvvm_sust_b_2d_i32_zero
7000           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7001          (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7002           Int32Regs:$r)>;
7003
7004def : Pat<(int_nvvm_sust_b_2d_i64_zero
7005           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
7006          (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7007           Int64Regs:$r)>;
7008
7009def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
7010          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7011          (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7012           Int16Regs:$r, Int16Regs:$g)>;
7013
7014def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
7015          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7016          (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7017           Int16Regs:$r, Int16Regs:$g)>;
7018
7019def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
7020          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7021          (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7022           Int32Regs:$r, Int32Regs:$g)>;
7023
7024def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
7025          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
7026          (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7027           Int64Regs:$r, Int64Regs:$g)>;
7028
7029def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
7030           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7031           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7032          (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7033           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7034
7035def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
7036           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7037           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7038          (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7039           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7040
7041def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
7042           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7043           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7044          (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7045           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7046
7047
7048
7049def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
7050          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7051          (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
7052           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7053           Int16Regs:$r)>;
7054
7055def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
7056          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7057          (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
7058           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7059           Int16Regs:$r)>;
7060
7061def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
7062          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7063          (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
7064           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7065           Int32Regs:$r)>;
7066
7067def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
7068          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
7069          (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
7070           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7071           Int64Regs:$r)>;
7072
7073def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
7074           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7075           Int16Regs:$r, Int16Regs:$g),
7076          (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
7077           Int32Regs:$x, Int32Regs:$y,
7078           Int16Regs:$r, Int16Regs:$g)>;
7079
7080def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
7081           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7082           Int16Regs:$r, Int16Regs:$g),
7083          (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
7084           Int32Regs:$x, Int32Regs:$y,
7085           Int16Regs:$r, Int16Regs:$g)>;
7086
7087def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
7088           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7089           Int32Regs:$g),
7090          (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
7091           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7092
7093def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
7094           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
7095           Int64Regs:$g),
7096          (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
7097           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
7098
7099def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
7100           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7101           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7102          (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
7103           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7104           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7105
7106def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
7107           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7108           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7109          (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
7110           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7111           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7112
7113def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
7114           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7115           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7116          (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
7117           Int32Regs:$x, Int32Regs:$y,
7118           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7119
7120
7121
7122def : Pat<(int_nvvm_sust_b_3d_i8_zero
7123           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7124           Int16Regs:$r),
7125          (SUST_B_3D_B8_ZERO Int64Regs:$s,
7126           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7127           Int16Regs:$r)>;
7128
7129def : Pat<(int_nvvm_sust_b_3d_i16_zero
7130           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7131           Int16Regs:$r),
7132          (SUST_B_3D_B16_ZERO Int64Regs:$s,
7133           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7134           Int16Regs:$r)>;
7135
7136def : Pat<(int_nvvm_sust_b_3d_i32_zero
7137           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7138           Int32Regs:$r),
7139          (SUST_B_3D_B32_ZERO Int64Regs:$s,
7140           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7141           Int32Regs:$r)>;
7142
7143def : Pat<(int_nvvm_sust_b_3d_i64_zero
7144           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7145           Int64Regs:$r),
7146          (SUST_B_3D_B64_ZERO Int64Regs:$s,
7147           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7148           Int64Regs:$r)>;
7149
7150def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
7151           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7152           Int16Regs:$r, Int16Regs:$g),
7153          (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
7154           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7155           Int16Regs:$r, Int16Regs:$g)>;
7156
7157def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
7158           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7159           Int16Regs:$r, Int16Regs:$g),
7160          (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
7161           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7162           Int16Regs:$r, Int16Regs:$g)>;
7163
7164def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
7165           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7166           Int32Regs:$r, Int32Regs:$g),
7167          (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
7168           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7169           Int32Regs:$r, Int32Regs:$g)>;
7170
7171def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
7172           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7173           Int64Regs:$r, Int64Regs:$g),
7174          (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
7175           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7176           Int64Regs:$r, Int64Regs:$g)>;
7177
7178def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
7179           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7180           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7181          (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
7182           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7183           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7184
7185def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
7186           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7187           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7188          (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
7189           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7190           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7191
7192def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
7193           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7194           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7195          (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
7196           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7197           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7198
7199
7200
7201
7202def : Pat<(int_nvvm_sust_p_1d_i8_trap
7203           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7204          (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7205
7206def : Pat<(int_nvvm_sust_p_1d_i16_trap
7207           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7208          (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7209
7210def : Pat<(int_nvvm_sust_p_1d_i32_trap
7211           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
7212          (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
7213
7214def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
7215           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7216          (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
7217           Int16Regs:$r, Int16Regs:$g)>;
7218
7219def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
7220           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7221          (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
7222           Int16Regs:$r, Int16Regs:$g)>;
7223
7224def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
7225           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7226          (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
7227           Int32Regs:$r, Int32Regs:$g)>;
7228
7229def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
7230           Int64Regs:$s, Int32Regs:$x,
7231           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7232          (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
7233           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7234
7235def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
7236           Int64Regs:$s, Int32Regs:$x,
7237           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7238          (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
7239           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7240
7241def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
7242           Int64Regs:$s, Int32Regs:$x,
7243           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7244          (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
7245           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7246
7247
7248
7249def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
7250           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7251          (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7252           Int16Regs:$r)>;
7253
7254def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
7255           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7256          (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7257           Int16Regs:$r)>;
7258
7259def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
7260           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
7261          (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7262           Int32Regs:$r)>;
7263
7264def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
7265          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7266          (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7267           Int16Regs:$r, Int16Regs:$g)>;
7268
7269def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
7270          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7271          (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7272           Int16Regs:$r, Int16Regs:$g)>;
7273
7274def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
7275          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7276          (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7277           Int32Regs:$r, Int32Regs:$g)>;
7278
7279def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
7280           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7281           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7282          (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7283           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7284
7285def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
7286           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7287           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7288          (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7289           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7290
7291def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
7292           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7293           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7294          (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7295           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7296
7297
7298
7299def : Pat<(int_nvvm_sust_p_2d_i8_trap
7300           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7301          (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7302           Int16Regs:$r)>;
7303
7304def : Pat<(int_nvvm_sust_p_2d_i16_trap
7305           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7306          (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7307           Int16Regs:$r)>;
7308
7309def : Pat<(int_nvvm_sust_p_2d_i32_trap
7310           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7311          (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7312           Int32Regs:$r)>;
7313
7314def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7315          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7316          (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7317           Int16Regs:$r, Int16Regs:$g)>;
7318
7319def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7320          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7321          (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7322           Int16Regs:$r, Int16Regs:$g)>;
7323
7324def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7325          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7326          (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7327           Int32Regs:$r, Int32Regs:$g)>;
7328
7329def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7330           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7331           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7332          (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7333           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7334
7335def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7336           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7337           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7338          (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7339           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7340
7341def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7342           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7343           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7344          (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7345           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7346
7347
7348
7349def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7350          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7351          (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7352           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7353           Int16Regs:$r)>;
7354
7355def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7356          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7357          (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7358           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7359           Int16Regs:$r)>;
7360
7361def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7362          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7363          (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7364           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7365           Int32Regs:$r)>;
7366
7367def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7368           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7369           Int16Regs:$r, Int16Regs:$g),
7370          (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7371           Int32Regs:$x, Int32Regs:$y,
7372           Int16Regs:$r, Int16Regs:$g)>;
7373
7374def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7375           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7376           Int16Regs:$r, Int16Regs:$g),
7377          (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7378           Int32Regs:$x, Int32Regs:$y,
7379           Int16Regs:$r, Int16Regs:$g)>;
7380
7381def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7382           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7383           Int32Regs:$g),
7384          (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7385           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7386
7387def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7388           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7389           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7390          (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7391           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7392           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7393
7394def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7395           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7396           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7397          (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7398           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7399           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7400
7401def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7402           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7403           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7404          (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7405           Int32Regs:$x, Int32Regs:$y,
7406           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7407
7408
7409
7410def : Pat<(int_nvvm_sust_p_3d_i8_trap
7411           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7412           Int16Regs:$r),
7413          (SUST_P_3D_B8_TRAP Int64Regs:$s,
7414           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7415           Int16Regs:$r)>;
7416
7417def : Pat<(int_nvvm_sust_p_3d_i16_trap
7418           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7419           Int16Regs:$r),
7420          (SUST_P_3D_B16_TRAP Int64Regs:$s,
7421           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7422           Int16Regs:$r)>;
7423
7424def : Pat<(int_nvvm_sust_p_3d_i32_trap
7425           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7426           Int32Regs:$r),
7427          (SUST_P_3D_B32_TRAP Int64Regs:$s,
7428           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7429           Int32Regs:$r)>;
7430
7431def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7432           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7433           Int16Regs:$r, Int16Regs:$g),
7434          (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7435           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7436           Int16Regs:$r, Int16Regs:$g)>;
7437
7438def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7439           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7440           Int16Regs:$r, Int16Regs:$g),
7441          (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7442           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7443           Int16Regs:$r, Int16Regs:$g)>;
7444
7445def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7446           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7447           Int32Regs:$r, Int32Regs:$g),
7448          (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7449           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7450           Int32Regs:$r, Int32Regs:$g)>;
7451
7452def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7453           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7454           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7455          (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7456           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7457           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7458
7459def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7460           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7461           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7462          (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7463           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7464           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7465
7466def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7467           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7468           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7469          (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7470           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7471           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7472
7473//-----------------------------------
7474// Read Special Registers
7475//-----------------------------------
7476
7477class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7478  : NVPTXInst<(outs Int64Regs:$d), (ins),
7479              !strconcat("mov.u64 \t$d, %", regname, ";"),
7480              [(set Int64Regs:$d, (intop))]>;
7481
7482class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7483  : NVPTXInst<(outs Int32Regs:$d), (ins),
7484              !strconcat("mov.u32 \t$d, %", regname, ";"),
7485              [(set Int32Regs:$d, (intop))]>;
7486
7487// TODO Add read vector-version of special registers
7488
7489def INT_PTX_SREG_TID_X :
7490    PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7491def INT_PTX_SREG_TID_Y :
7492    PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7493def INT_PTX_SREG_TID_Z :
7494    PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7495def INT_PTX_SREG_TID_W :
7496    PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7497
7498def INT_PTX_SREG_NTID_X :
7499    PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7500def INT_PTX_SREG_NTID_Y :
7501    PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7502def INT_PTX_SREG_NTID_Z :
7503    PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7504def INT_PTX_SREG_NTID_W :
7505    PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7506
7507def INT_PTX_SREG_LANEID :
7508    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7509def INT_PTX_SREG_WARPID :
7510    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7511def INT_PTX_SREG_NWARPID :
7512    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7513
7514def INT_PTX_SREG_CTAID_X :
7515    PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7516def INT_PTX_SREG_CTAID_Y :
7517    PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7518def INT_PTX_SREG_CTAID_Z :
7519    PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7520def INT_PTX_SREG_CTAID_W :
7521    PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7522
7523def INT_PTX_SREG_NCTAID_X :
7524    PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7525def INT_PTX_SREG_NCTAID_Y :
7526    PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7527def INT_PTX_SREG_NCTAID_Z :
7528    PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7529def INT_PTX_SREG_NCTAID_W :
7530    PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7531
7532def INT_PTX_SREG_SMID :
7533    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7534def INT_PTX_SREG_NSMID :
7535    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7536def INT_PTX_SREG_GRIDID :
7537    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7538
7539def INT_PTX_SREG_LANEMASK_EQ :
7540    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7541def INT_PTX_SREG_LANEMASK_LE :
7542    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7543def INT_PTX_SREG_LANEMASK_LT :
7544    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7545def INT_PTX_SREG_LANEMASK_GE :
7546    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7547def INT_PTX_SREG_LANEMASK_GT :
7548    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7549
7550def INT_PTX_SREG_CLOCK :
7551    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7552def INT_PTX_SREG_CLOCK64 :
7553    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7554
7555def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7556def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7557def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7558def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7559
7560// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7561// handle the constant.
7562def INT_PTX_SREG_WARPSIZE :
7563    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7564              [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7565
7566// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
7567// In addition to target-independent fields provided by WMMA_REGS, it adds
7568// the fields commonly used to implement specific PTX instruction -- register
7569// types and names, constraints, parts of assembly, etc.
7570class WMMA_REGINFO<WMMA_REGS r, string op>
7571      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
7572  // NVPTX register types used to carry fragment data.
7573  NVPTXRegClass regclass = !cond(
7574    !eq(ptx_elt_type, "f16") : Float16x2Regs,
7575    !eq(ptx_elt_type, "f32") : Float32Regs,
7576    !eq(ptx_elt_type, "f64") : Float64Regs,
7577    !eq(ptx_elt_type, "bf16") : Int32Regs,
7578    !eq(ptx_elt_type, "tf32") : Int32Regs,
7579    !eq(ptx_elt_type, "s32") : Int32Regs,
7580    !eq(ptx_elt_type, "b16") : Int32Regs,
7581    !eq(ptx_elt_type, "s8") : Int32Regs,
7582    !eq(ptx_elt_type, "u8") : Int32Regs,
7583    !eq(ptx_elt_type, "s4") : Int32Regs,
7584    !eq(ptx_elt_type, "u4") : Int32Regs,
7585    !eq(ptx_elt_type, "b1") : Int32Regs);
7586
7587  // Instruction input/output arguments for the fragment.
7588  list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
7589
7590  // List of register names for the fragment -- ["ra0", "ra1",...]
7591  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
7592
7593  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
7594  string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
7595
7596  // Predicates for particular fragment variant. Technically those are
7597  // per-instruction predicates, but currently all fragments that can be used in
7598  // a given instruction are subject to the same constraints, so an instruction
7599  // can use predicates from any of its fragments. If/when this is no
7600  // longer the case, we can concat all per-fragment predicates to enforce that
7601  // all fragments of the instruction are viable.
7602  list<Predicate> Predicates = !cond(
7603    // fp16 -> fp16/fp32 @ m16n16k16
7604    !and(!eq(geom, "m16n16k16"),
7605         !or(!eq(ptx_elt_type, "f16"),
7606             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
7607
7608    !and(!eq(geom,"m8n8k4"),
7609         !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70],
7610
7611    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
7612    !and(!or(!eq(geom, "m8n32k16"),
7613             !eq(geom, "m32n8k16")),
7614         !or(!eq(ptx_elt_type, "f16"),
7615             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
7616
7617    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
7618    !and(!or(!eq(geom,"m16n16k16"),
7619             !eq(geom,"m8n32k16"),
7620             !eq(geom,"m32n8k16")),
7621         !or(!eq(ptx_elt_type, "u8"),
7622             !eq(ptx_elt_type, "s8"),
7623             !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
7624
7625    !and(!or(!eq(geom,"m16n16k16"),
7626             !eq(geom,"m8n32k16"),
7627             !eq(geom,"m32n8k16")),
7628         !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70],
7629
7630    !and(!eq(geom,"m16n16k8"),
7631         !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70],
7632
7633    !and(!eq(geom,"m16n16k8"),
7634         !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70],
7635
7636    // b1 -> s32 @ m8n8k128(b1)
7637    !and(!ne(op,"mma"),
7638         !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63],
7639
7640    // u4/s4 -> s32 @ m8n8k32 (u4/s4)
7641    !and(!ne(op,"mma"),
7642         !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
7643
7644    !or(!eq(geom,"m16n8k8"),
7645        !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65],
7646
7647    !and(!ne(ptx_elt_type,"f64"),
7648         !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64],
7649
7650    // mma m8n8k32 requires higher PTX version
7651    !and(!eq(op,"mma"),
7652         !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65],
7653
7654    !and(!eq(ptx_elt_type,"f64"),
7655         !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70],
7656
7657    !and(!eq(op,"mma"),
7658         !or(!eq(geom, "m16n8k16"),
7659             !eq(geom, "m16n8k4"),
7660             !eq(geom, "m16n8k32"),
7661             !eq(geom, "m16n8k64"),
7662             !eq(geom, "m8n8k128"),
7663             !eq(geom, "m16n8k128"),
7664             !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70],
7665
7666    !and(!eq(op,"ldmatrix"),
7667         !eq(ptx_elt_type,"b16"),
7668         !eq(geom, "m8n8")) : [hasSM75, hasPTX65]);
7669
7670  // template DAGs for instruction inputs/output.
7671  dag Outs = !dag(outs, ptx_regs, reg_names);
7672  dag Ins = !dag(ins, ptx_regs, reg_names);
7673}
7674
7675// Convert dag of arguments into a dag to match given intrinsic.
7676class BuildPatternI<Intrinsic Intr, dag Ins> {
7677  // Build a dag pattern that matches the intrinsic call.
7678  dag ret = !foreach(tmp, Ins,
7679                          !subst(imem, ADDRvar,
7680                          !subst(MEMri64, ADDRri64,
7681                          !subst(MEMri, ADDRri,
7682                          !subst(ins, Intr, tmp)))));
7683}
7684
7685// Same as above, but uses PatFrag instead of an Intrinsic.
7686class BuildPatternPF<PatFrag Intr, dag Ins> {
7687  // Build a dag pattern that matches the intrinsic call.
7688  dag ret = !foreach(tmp, Ins,
7689                          !subst(imem, ADDRvar,
7690                          !subst(MEMri64, ADDRri64,
7691                          !subst(MEMri, ADDRri,
7692                          !subst(ins, Intr, tmp)))));
7693}
7694
7695// Common WMMA-related fields used for building patterns for all MMA instructions.
7696class WMMA_INSTR<string _Intr, list<dag> _Args>
7697  : NVPTXInst<(outs), (ins), "?", []> {
7698  Intrinsic Intr = !cast<Intrinsic>(_Intr);
7699  // Concatenate all arguments into a single dag.
7700  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
7701  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
7702  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
7703}
7704
7705//
7706// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7707//
7708
7709class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
7710                DAGOperand SrcOp>
7711  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
7712                              [!con((ins SrcOp:$src),
7713                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7714    Requires<Frag.Predicates> {
7715  // Load/store intrinsics are overloaded on pointer's address space.
7716  // To match the right intrinsic, we need to build AS-constrained PatFrag.
7717  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7718  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7719  dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
7720  // Build PatFrag that only matches particular address space.
7721  PatFrag IntrFrag = PatFrag<PFOperands,
7722                             PFOperandsIntr,
7723                             !cond(!eq(Space, ".shared"): AS_match.shared,
7724                                   !eq(Space, ".global"): AS_match.global,
7725                                   true: AS_match.generic)>;
7726  // Build AS-constrained pattern.
7727  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7728
7729  let OutOperandList = Frag.Outs;
7730  let InOperandList = !con(Args, (ins MmaCode:$ptx));
7731  let AsmString = "wmma.load."
7732                  # Frag.frag
7733                  # ".sync"
7734                  # "${ptx:aligned}"
7735                  # "." # Layout
7736                  # "." # Frag.geom
7737                  # Space
7738                  # "." # Frag.ptx_elt_type # " \t"
7739                  # Frag.regstring
7740                  # ", [$src]"
7741                  # !if(WithStride, ", $ldm", "")
7742                  # ";";
7743}
7744
7745//
7746// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7747//
7748class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
7749                   bit WithStride, DAGOperand DstOp>
7750  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
7751               [!con((ins DstOp:$dst),
7752                     Frag.Ins,
7753                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7754    Requires<Frag.Predicates> {
7755
7756  // Load/store intrinsics are overloaded on pointer's address space.
7757  // To match the right intrinsic, we need to build AS-constrained PatFrag.
7758  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7759  dag PFOperands = !con((ops node:$dst),
7760                        !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
7761                        !if(WithStride, (ops node:$ldm), (ops)));
7762  // Build PatFrag that only matches particular address space.
7763  PatFrag IntrFrag = PatFrag<PFOperands,
7764                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7765                             !cond(!eq(Space, ".shared"): AS_match.shared,
7766                                   !eq(Space, ".global"): AS_match.global,
7767                                   true: AS_match.generic)>;
7768  // Build AS-constrained pattern.
7769  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7770
7771  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7772  let OutOperandList = (outs);
7773  let AsmString = "wmma.store.d.sync"
7774                  # "${ptx:aligned}"
7775                  # "." # Layout
7776                  # "." # Frag.geom
7777                  # Space
7778                  # "." # Frag.ptx_elt_type
7779                  # " \t[$dst],"
7780                  # Frag.regstring
7781                  # !if(WithStride, ", $ldm", "")
7782                  # ";";
7783}
7784
7785// Create all load/store variants
7786defset list<WMMA_INSTR> MMA_LDSTs  = {
7787  foreach layout = ["row", "col"] in {
7788    foreach stride = [false, true] in {
7789      foreach space = [".global", ".shared", ""] in {
7790        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7791          foreach frag = NVVM_MMA_OPS.all_ld_ops in
7792            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
7793              def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
7794          foreach frag = NVVM_MMA_OPS.all_st_ops in
7795            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
7796              def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
7797        } // addr
7798      } // space
7799    } // stride
7800  } // layout
7801} // defset
7802
7803// B1 instruction variants need extra constraints.
7804class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
7805  string Op = b1op;
7806  WMMA_REGINFO Frag = FragA;
7807  list<Predicate> ret = !listconcat(
7808    FragA.Predicates,
7809    !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[])
7810  );
7811}
7812// WMMA.MMA
7813class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7814               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7815               string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
7816  : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
7817                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
7818    // Requires does not seem to have effect on Instruction w/o Patterns.
7819    // We set it here anyways and propagate to the Pat<> we construct below.
7820    Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
7821  let OutOperandList = FragD.Outs;
7822  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7823  string TypeList = !cond(
7824    !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
7825                                     # "." # FragC.ptx_elt_type,
7826    1: "." # FragD.ptx_elt_type
7827       # "." # FragA.ptx_elt_type
7828       # "." # FragB.ptx_elt_type
7829       # "." # FragC.ptx_elt_type,
7830  );
7831  let AsmString = "wmma.mma"
7832                  # b1op
7833                  # ".sync"
7834                  # "${ptx:aligned}"
7835                  # "." # ALayout
7836                  # "." # BLayout
7837                  # "." # FragA.geom
7838                  # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
7839                  # TypeList
7840                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
7841                  # FragD.regstring # ",\n\t\t"
7842                  # FragA.regstring # ",\n\t\t"
7843                  # FragB.regstring # ",\n\t\t"
7844                  # FragC.regstring # ";";
7845}
7846
7847defset list<WMMA_INSTR> WMMAs  = {
7848  foreach layout_a = ["row", "col"] in {
7849    foreach layout_b = ["row", "col"] in {
7850      foreach satf = [0, 1] in {
7851        foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
7852          foreach op = NVVM_MMA_OPS.all_wmma_ops in {
7853            foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
7854              if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
7855                def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
7856                              WMMA_REGINFO<op[1], "wmma.mma">,
7857                              WMMA_REGINFO<op[2], "wmma.mma">,
7858                              WMMA_REGINFO<op[3], "wmma.mma">,
7859                              layout_a, layout_b, satf, rnd, b1op>;
7860              }
7861            } // b1op
7862          } // op
7863        } // rnd
7864      } // satf
7865    } // layout_b
7866  } // layout_a
7867} // defset
7868
7869// MMA
7870class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7871               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7872               string ALayout, string BLayout, int Satfinite, string b1op>
7873  : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
7874                        [FragA.Ins, FragB.Ins, FragC.Ins]>,
7875    // Requires does not seem to have effect on Instruction w/o Patterns.
7876    // We set it here anyways and propagate to the Pat<> we construct below.
7877  Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
7878  let OutOperandList = FragD.Outs;
7879  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7880  string TypeList = "." # FragD.ptx_elt_type
7881                    # "." # FragA.ptx_elt_type
7882                    # "." # FragB.ptx_elt_type
7883                    # "." # FragC.ptx_elt_type;
7884  let AsmString = "mma.sync.aligned."
7885                  # FragA.geom
7886                  # "." # ALayout
7887                  # "." # BLayout
7888                  # !if(Satfinite, ".satfinite", "")
7889                  # TypeList
7890                  # b1op # "\n\t\t"
7891                  # FragD.regstring # ",\n\t\t"
7892                  # FragA.regstring # ",\n\t\t"
7893                  # FragB.regstring # ",\n\t\t"
7894                  # FragC.regstring # ";";
7895}
7896
7897defset list<WMMA_INSTR> MMAs  = {
7898  foreach layout_a = ["row", "col"] in {
7899    foreach layout_b = ["row", "col"] in {
7900      foreach satf = [0, 1] in {
7901        foreach op = NVVM_MMA_OPS.all_mma_ops in {
7902          foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
7903            if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
7904              def : MMA<WMMA_REGINFO<op[0], "mma">,
7905                        WMMA_REGINFO<op[1], "mma">,
7906                        WMMA_REGINFO<op[2], "mma">,
7907                        WMMA_REGINFO<op[3], "mma">,
7908                        layout_a, layout_b, satf, b1op>;
7909            }
7910          } // b1op
7911        } // op
7912      } // satf
7913    } // layout_b
7914  } // layout_a
7915} // defset
7916
7917//
7918// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
7919//
7920class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
7921               DAGOperand SrcOp>
7922  : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
7923    Requires<Frag.Predicates> {
7924  // Build PatFrag that only matches particular address space.
7925  PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
7926                             !cond(!eq(Space, ".shared"): AS_match.shared,
7927                                   true: AS_match.generic)>;
7928  // Build AS-constrained pattern.
7929  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7930
7931  let OutOperandList = Frag.Outs;
7932  let InOperandList = !con(Args, (ins MmaCode:$ptx));
7933  let AsmString = "ldmatrix.sync.aligned."
7934                  # Frag.geom
7935                  # "." # Frag.frag
7936                  # !if(Transposed, ".trans", "")
7937                  # Space
7938                  # "." # Frag.ptx_elt_type
7939                  # " " # Frag.regstring # ", [$src];";
7940}
7941
7942// Create all ldmatrix variants
7943defset list<WMMA_INSTR> LDMATRIXs  = {
7944  foreach transposed = [false, true] in {
7945    foreach space = [".shared", ""] in {
7946      foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7947        foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
7948          if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
7949            def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
7950                            addr>;
7951      } // addr
7952    } // space
7953  } // transposed
7954} // defset
7955
7956// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
7957// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
7958// the instruction record.
7959class MMA_PAT<WMMA_INSTR wi>
7960      : Pat<wi.IntrinsicPattern,
7961            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
7962                 (wi ptx.version))>,
7963        Requires<wi.Predicates>;
7964
7965// Build intrinsic->instruction patterns for all MMA instructions.
7966foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
7967  def : MMA_PAT<mma>;
7968