1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def immFloat0 : PatLeaf<(fpimm), [{
10    float f = (float)N->getValueAPF().convertToFloat();
11    return (f==0.0f);
12}]>;
13
14def immFloat1 : PatLeaf<(fpimm), [{
15    float f = (float)N->getValueAPF().convertToFloat();
16    return (f==1.0f);
17}]>;
18
19def immDouble0 : PatLeaf<(fpimm), [{
20    double d = (double)N->getValueAPF().convertToDouble();
21    return (d==0.0);
22}]>;
23
24def immDouble1 : PatLeaf<(fpimm), [{
25    double d = (double)N->getValueAPF().convertToDouble();
26    return (d==1.0);
27}]>;
28
29def AS_match {
30  code generic = [{
31   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32  }];
33  code shared = [{
34   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35  }];
36  code global = [{
37   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38  }];
39}
40
41// A node that will be replaced with the current PTX version.
42class PTX {
43  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45  }]>;
46  // (i32 0) will be XForm'ed to the currently used PTX version.
47  dag version = (PTXVerXform (i32 0));
48}
49def ptx : PTX;
50
51// Generates list of n sequential register names.
52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53class RegSeq<int n, string prefix> {
54  list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55                                        [prefix # !sub(n, 1)]),
56                            []);
57}
58
59class THREADMASK_INFO<bit sync> {
60  list<bit> ret = !if(sync, [0, 1], [0]);
61}
62
63//-----------------------------------
64// Synchronization and shuffle functions
65//-----------------------------------
66let isConvergent = true in {
67def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                  "bar.sync \t0;",
69      [(int_nvvm_barrier0)]>;
70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                  "bar.sync \t$src1;",
72      [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                  "bar.sync \t$src1, $src2;",
75      [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77  !strconcat("{{ \n\t",
78             ".reg .pred \t%p1; \n\t",
79             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80             "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81             "}}"),
82      [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84  !strconcat("{{ \n\t",
85             ".reg .pred \t%p1; \n\t",
86             ".reg .pred \t%p2; \n\t",
87             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88             "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90             "}}"),
91      [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93  !strconcat("{{ \n\t",
94             ".reg .pred \t%p1; \n\t",
95             ".reg .pred \t%p2; \n\t",
96             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97             "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99             "}}"),
100      [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
101
102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                             [(int_nvvm_bar_sync imm:$i)]>;
104
105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                             [(int_nvvm_bar_warp_sync imm:$i)]>,
107        Requires<[hasPTX<60>, hasSM<30>]>;
108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                             [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110        Requires<[hasPTX<60>, hasSM<30>]>;
111
112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                   [(int_nvvm_barrier_sync imm:$i)]>,
114        Requires<[hasPTX<60>, hasSM<30>]>;
115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                   [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117        Requires<[hasPTX<60>, hasSM<30>]>;
118
119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                 "barrier.sync \t$id, $cnt;",
121                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122        Requires<[hasPTX<60>, hasSM<30>]>;
123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                 "barrier.sync \t$id, $cnt;",
125                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126        Requires<[hasPTX<60>, hasSM<30>]>;
127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                 "barrier.sync \t$id, $cnt;",
129                 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130        Requires<[hasPTX<60>, hasSM<30>]>;
131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                 "barrier.sync \t$id, $cnt;",
133                 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134        Requires<[hasPTX<60>, hasSM<30>]>;
135class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
136                          list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
137        NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
138        Requires<Preds>;
139
140def barrier_cluster_arrive:
141        INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>;
142def barrier_cluster_arrive_relaxed:
143        INT_BARRIER_CLUSTER<"arrive.relaxed",
144        int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>;
145def barrier_cluster_wait:
146        INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
147
148class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
149                 bit offset_imm, bit mask_imm, bit threadmask_imm>
150      : NVPTXInst<(outs), (ins), "?", []> {
151  NVPTXRegClass rc = !cond(
152    !eq(reg, "i32"): Int32Regs,
153    !eq(reg, "f32"): Float32Regs);
154  string IntrName = "int_nvvm_shfl_"
155                    # !if(sync, "sync_", "")
156                    # mode
157                    # "_" # reg
158                    # !if(return_pred, "p", "");
159  Intrinsic Intr = !cast<Intrinsic>(IntrName);
160  let InOperandList = !con(
161    !if(sync,
162        !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
163        (ins)),
164    (ins rc:$src),
165    !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
166    !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
167    );
168  let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
169  let AsmString = "shfl."
170     # !if(sync, "sync.", "")
171     # mode # ".b32\t"
172     # "$dst"
173     # !if(return_pred, "|$pred", "") # ", "
174     # "$src, $offset, $mask"
175     # !if(sync, ", $threadmask", "")
176     # ";"
177     ;
178  let Pattern = [!con(
179      !foreach(tmp, OutOperandList,
180             !subst(outs, set,
181             !subst(i32imm, imm, tmp))),
182      (set !foreach(tmp, InOperandList,
183             !subst(ins, Intr,
184             !subst(i32imm, imm, tmp))))
185  )];
186}
187
188foreach sync = [false, true] in {
189  foreach mode = ["up", "down", "bfly", "idx"] in {
190    foreach regclass = ["i32", "f32"] in {
191      foreach return_pred = [false, true] in {
192        foreach offset_imm = [false, true] in {
193          foreach mask_imm = [false, true] in {
194            foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
195              def : SHFL_INSTR<sync, mode, regclass, return_pred,
196                               offset_imm, mask_imm, threadmask_imm>,
197                    Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>;
198            }
199          }
200        }
201      }
202    }
203  }
204}
205
206// vote.{all,any,uni,ballot}
207multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
208  def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
209              "vote." # mode # " \t$dest, $pred;",
210              [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
211        Requires<[hasPTX<60>, hasSM<30>]>;
212}
213
214defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
215defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
216defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
217defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
218
219// vote.sync.{all,any,uni,ballot}
220multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
221  def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
222              "vote.sync." # mode # " \t$dest, $pred, $mask;",
223              [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
224          Requires<[hasPTX<60>, hasSM<30>]>;
225  def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
226              "vote.sync." # mode #" \t$dest, $pred, $mask;",
227              [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
228          Requires<[hasPTX<60>, hasSM<30>]>;
229}
230
231defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
232defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
233defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
234defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
235
236multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
237                          Operand ImmOp> {
238  def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
239              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240              [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
241           Requires<[hasPTX<60>, hasSM<70>]>;
242  def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
243              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
244              [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
245           Requires<[hasPTX<60>, hasSM<70>]>;
246  def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
247              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
248              [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
249           Requires<[hasPTX<60>, hasSM<70>]>;
250  def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
251              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
252              [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
253           Requires<[hasPTX<60>, hasSM<70>]>;
254}
255
256defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
257                                        i32imm>;
258defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
259                                        i64imm>;
260
261multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
262                          Operand ImmOp> {
263  def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
264                     (ins i32imm:$mask, ImmOp:$value),
265              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
266              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
267           Requires<[hasPTX<60>, hasSM<70>]>;
268  def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
269                     (ins Int32Regs:$mask, ImmOp:$value),
270              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
271              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
272           Requires<[hasPTX<60>, hasSM<70>]>;
273  def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
274                     (ins i32imm:$mask, regclass:$value),
275              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
276              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
277           Requires<[hasPTX<60>, hasSM<70>]>;
278  def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
279                     (ins Int32Regs:$mask, regclass:$value),
280              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
281              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
282           Requires<[hasPTX<60>, hasSM<70>]>;
283}
284defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
285                                         i32imm>;
286defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
287                                         i64imm>;
288
289multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
290  def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
291          "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
292          [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
293        Requires<[hasPTX<70>, hasSM<80>]>;
294}
295
296defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
297defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
298defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
299defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
300defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
301defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
302defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
303defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
304
305} // isConvergent = true
306
307//-----------------------------------
308// Explicit Memory Fence Functions
309//-----------------------------------
310class MEMBAR<string StrOp, Intrinsic IntOP> :
311              NVPTXInst<(outs), (ins),
312            StrOp, [(IntOP)]>;
313
314def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
315def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
316def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
317
318def INT_FENCE_SC_CLUSTER:
319       MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>,
320       Requires<[hasPTX<78>, hasSM<90>]>;
321
322//-----------------------------------
323// Async Copy Functions
324//-----------------------------------
325
326multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
327  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
328            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
329            [(Intrin Int32Regs:$addr)]>,
330    Requires<[hasPTX<70>, hasSM<80>]>;
331  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
332            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
333            [(Intrin Int64Regs:$addr)]>,
334    Requires<[hasPTX<70>, hasSM<80>]>;
335}
336
337defm CP_ASYNC_MBARRIER_ARRIVE :
338  CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
339defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
340  CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
341defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
342  CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
343defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
344  CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
345
346multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> {
347  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
348            !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
349            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
350    Requires<[hasPTX<70>, hasSM<80>]>;
351  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
352            !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
353            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
354    Requires<[hasPTX<70>, hasSM<80>]>;
355  // Variant with src_size parameter
356  def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size),
357             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
358             [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>,
359    Requires<[hasPTX<70>, hasSM<80>]>;
360  def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size),
361             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
362             [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>,
363    Requires<[hasPTX<70>, hasSM<80>]>;
364  def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size),
365             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
366             [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>,
367    Requires<[hasPTX<70>, hasSM<80>]>;
368  def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size),
369             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
370             [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>,
371    Requires<[hasPTX<70>, hasSM<80>]>;
372}
373
374defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
375  CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4,
376                                      int_nvvm_cp_async_ca_shared_global_4_s>;
377
378defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
379  CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8,
380                                      int_nvvm_cp_async_ca_shared_global_8_s>;
381
382defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
383  CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16,
384                                       int_nvvm_cp_async_ca_shared_global_16_s>;
385
386defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
387  CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16,
388                                       int_nvvm_cp_async_cg_shared_global_16_s>;
389
390def CP_ASYNC_COMMIT_GROUP :
391  NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
392  Requires<[hasPTX<70>, hasSM<80>]>;
393
394def CP_ASYNC_WAIT_GROUP :
395  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
396  [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
397  Requires<[hasPTX<70>, hasSM<80>]>;
398
399def CP_ASYNC_WAIT_ALL :
400  NVPTXInst<(outs), (ins), "cp.async.wait_all;",
401  [(int_nvvm_cp_async_wait_all)]>,
402  Requires<[hasPTX<70>, hasSM<80>]>;
403
404//-----------------------------------
405// MBarrier Functions
406//-----------------------------------
407
408multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
409  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
410           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
411    [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
412    Requires<[hasPTX<70>, hasSM<80>]>;
413  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
414           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
415    [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
416    Requires<[hasPTX<70>, hasSM<80>]>;
417}
418
419defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
420defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
421                                          int_nvvm_mbarrier_init_shared>;
422
423multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
424  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
425           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
426    [(Intrin Int32Regs:$addr)]>,
427    Requires<[hasPTX<70>, hasSM<80>]>;
428  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
429           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
430    [(Intrin Int64Regs:$addr)]>,
431    Requires<[hasPTX<70>, hasSM<80>]>;
432}
433
434defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
435defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
436                                            int_nvvm_mbarrier_inval_shared>;
437
438multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
439  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
440           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
441    [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
442    Requires<[hasPTX<70>, hasSM<80>]>;
443  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
444           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
445    [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
446    Requires<[hasPTX<70>, hasSM<80>]>;
447}
448
449defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
450defm MBARRIER_ARRIVE_SHARED :
451  MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
452
453multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
454  def _32 : NVPTXInst<(outs Int64Regs:$state),
455           (ins Int32Regs:$addr, Int32Regs:$count),
456           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
457                      ".b64 $state, [$addr], $count;"),
458    [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
459    Requires<[hasPTX<70>, hasSM<80>]>;
460  def _64 : NVPTXInst<(outs Int64Regs:$state),
461           (ins Int64Regs:$addr, Int32Regs:$count),
462           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
463                      ".b64 $state, [$addr], $count;"),
464    [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
465    Requires<[hasPTX<70>, hasSM<80>]>;
466}
467
468defm MBARRIER_ARRIVE_NOCOMPLETE :
469  MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
470defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
471  MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
472
473multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
474  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
475           !strconcat("mbarrier.arrive_drop", AddrSpace,
476                      ".b64 $state, [$addr];"),
477           [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
478    Requires<[hasPTX<70>, hasSM<80>]>;
479  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
480           !strconcat("mbarrier.arrive_drop", AddrSpace,
481                      ".b64 $state, [$addr];"),
482           [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
483    Requires<[hasPTX<70>, hasSM<80>]>;
484}
485
486defm MBARRIER_ARRIVE_DROP :
487  MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
488defm MBARRIER_ARRIVE_DROP_SHARED :
489  MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
490
491multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
492  def _32 : NVPTXInst<(outs Int64Regs:$state),
493           (ins Int32Regs:$addr, Int32Regs:$count),
494           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
495                      ".b64 $state, [$addr], $count;"),
496           [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
497    Requires<[hasPTX<70>, hasSM<80>]>;
498  def _64 : NVPTXInst<(outs Int64Regs:$state),
499           (ins Int64Regs:$addr, Int32Regs:$count),
500           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
501                      ".b64 $state, [$addr], $count;"),
502           [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
503    Requires<[hasPTX<70>, hasSM<80>]>;
504}
505
506defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
507  MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
508defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
509  MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
510                       int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
511
512multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
513  def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
514           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
515           [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
516    Requires<[hasPTX<70>, hasSM<80>]>;
517  def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
518           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
519           [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
520    Requires<[hasPTX<70>, hasSM<80>]>;
521}
522
523defm MBARRIER_TEST_WAIT :
524  MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
525defm MBARRIER_TEST_WAIT_SHARED :
526  MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
527
528class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
529           NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
530           "mbarrier.pending_count.b64 $res, $state;",
531           [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
532    Requires<[hasPTX<70>, hasSM<80>]>;
533
534def MBARRIER_PENDING_COUNT :
535  MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
536
537//-----------------------------------
538// Math Functions
539//-----------------------------------
540
541// Map min(1.0, max(0.0, x)) to sat(x)
542// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
543// NaN
544// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
545// Same story for fmax, fmin.
546
547def : Pat<(int_nvvm_fmin_f immFloat1,
548            (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
549          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
550def : Pat<(int_nvvm_fmin_f immFloat1,
551            (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
552          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
553def : Pat<(int_nvvm_fmin_f
554            (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
555          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
556def : Pat<(int_nvvm_fmin_f
557            (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
558          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
559
560def : Pat<(int_nvvm_fmin_d immDouble1,
561            (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
562          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
563def : Pat<(int_nvvm_fmin_d immDouble1,
564            (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
565          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
566def : Pat<(int_nvvm_fmin_d
567            (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
568          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
569def : Pat<(int_nvvm_fmin_d
570            (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
571          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
572
573
574// We need a full string for OpcStr here because we need to deal with case like
575// INT_PTX_RECIP.
576class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
577  NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
578            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
579            OpcStr,
580        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
581        Requires<Preds>;
582
583// We need a full string for OpcStr here because we need to deal with the case
584// like INT_PTX_NATIVE_POWR_F.
585class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
586  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
587  list<Predicate> Preds = []>
588            : NVPTXInst<(outs t_regclass:$dst),
589              (ins s0_regclass:$src0, s1_regclass:$src1),
590            OpcStr,
591        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
592        Requires<Preds>;
593
594class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
595  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
596  NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
597            : NVPTXInst<(outs t_regclass:$dst),
598              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
599            OpcStr,
600        [(set t_regclass:$dst,
601          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
602          Requires<Preds>;
603
604//
605// MISC
606//
607
608def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
609  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
610
611//
612// Min Max
613//
614
615def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
616  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
617def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
618  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
619def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
620  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
621  [hasPTX<70>, hasSM<80>]>;
622def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
623  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
624  [hasPTX<70>, hasSM<80>]>;
625def INT_NVVM_FMIN_XORSIGN_ABS_F :
626  F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
627    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
628    [hasPTX<72>, hasSM<86>]>;
629def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
630  F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
631    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
632    [hasPTX<72>, hasSM<86>]>;
633def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
634  F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
635    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
636    [hasPTX<72>, hasSM<86>]>;
637def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
638  F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
639    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
640    [hasPTX<72>, hasSM<86>]>;
641
642def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
643  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
644def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
645  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
646def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
647  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
648  [hasPTX<70>, hasSM<80>]>;
649def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
650  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
651  [hasPTX<70>, hasSM<80>]>;
652def INT_NVVM_FMAX_XORSIGN_ABS_F :
653  F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
654    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
655    [hasPTX<72>, hasSM<86>]>;
656def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
657  F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
658    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
659    [hasPTX<72>, hasSM<86>]>;
660def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
661  F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
662    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
663    [hasPTX<72>, hasSM<86>]>;
664def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
665  F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
666    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
667    [hasPTX<72>, hasSM<86>]>;
668
669def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
670  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
671def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
672  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
673
674//
675// Min Max f16, f16x2, bf16, bf16x2
676//
677
678class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
679                    list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> {
680  string Variant = V;
681  Intrinsic Intr = I;
682  NVPTXRegClass RegClass = RC;
683  list<Predicate> Predicates = Preds;
684}
685
686multiclass MIN_MAX<string IntName> {
687  foreach P = [
688    MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
689      int_nvvm_fmax_f16), Int16Regs>,
690    MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
691      int_nvvm_fmax_ftz_f16), Int16Regs>,
692    MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
693      int_nvvm_fmax_nan_f16), Int16Regs>,
694    MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
695      int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
696    MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
697      int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
698      Int16Regs, [hasPTX<72>, hasSM<86>]>,
699    MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
700      int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
701      Int16Regs, [hasPTX<72>, hasSM<86>]>,
702    MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
703      int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
704      Int16Regs, [hasPTX<72>, hasSM<86>]>,
705    MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
706      int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
707      int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
708    MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
709      int_nvvm_fmax_f16x2), Int32Regs>,
710    MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
711      int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
712    MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
713      int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
714    MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
715      int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
716    MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
717      int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
718      Int32Regs, [hasPTX<72>, hasSM<86>]>,
719    MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
720      int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
721      Int32Regs, [hasPTX<72>, hasSM<86>]>,
722    MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
723      int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
724      Int32Regs, [hasPTX<72>, hasSM<86>]>,
725    MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
726      int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
727      int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
728      Int32Regs, [hasPTX<72>, hasSM<86>]>,
729    MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
730      int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
731    MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
732      int_nvvm_fmax_nan_bf16), Int16Regs>,
733    MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
734      int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
735      Int16Regs, [hasPTX<72>, hasSM<86>]>,
736    MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
737      int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
738      Int16Regs, [hasPTX<72>, hasSM<86>]>,
739    MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
740      int_nvvm_fmax_bf16x2), Int32Regs>,
741    MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
742      int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
743    MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
744      int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
745      Int32Regs, [hasPTX<72>, hasSM<86>]>,
746    MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
747      int_nvvm_fmin_nan_xorsign_abs_bf16x2,
748      int_nvvm_fmax_nan_xorsign_abs_bf16x2),
749      Int32Regs, [hasPTX<72>, hasSM<86>]>] in {
750        def P.Variant : F_MATH_2<!strconcat(
751          IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
752          P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
753  }
754}
755
756defm INT_NVVM_FMIN : MIN_MAX<"min">;
757defm INT_NVVM_FMAN : MIN_MAX<"max">;
758
759//
760// Multiplication
761//
762
763def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
764  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
765def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
766  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
767
768def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
769  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
770def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
771  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
772
773def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
774  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
775def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
776  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
777def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
778  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
779def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
780  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
781def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
782  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
783def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
784  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
785def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
786  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
787def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
788  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
789
790def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
791  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
792def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
793  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
794def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
795  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
796def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
797  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
798
799def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
800  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
801def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
802  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
803
804//
805// Div
806//
807
808def INT_NVVM_DIV_APPROX_FTZ_F
809  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
810    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
811def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
812  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
813
814def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
815  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
816def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
817  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
818def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
819  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
820def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
821  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
822def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
823  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
824def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
825  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
826def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
827  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
828def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
829  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
830
831def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
832  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
833def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
834  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
835def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
836  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
837def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
838  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
839
840//
841// Sad
842//
843
844def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
845  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
846def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
847  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
848
849//
850// Floor  Ceil
851//
852
853def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
854          (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
855def : Pat<(int_nvvm_floor_f Float32Regs:$a),
856          (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
857def : Pat<(int_nvvm_floor_d Float64Regs:$a),
858          (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
859
860def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
861          (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
862def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
863          (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
864def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
865          (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
866
867//
868// Abs
869//
870
871def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
872  Float32Regs, int_nvvm_fabs_ftz_f>;
873def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
874  Float32Regs, int_nvvm_fabs_f>;
875
876def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
877  Float64Regs, int_nvvm_fabs_d>;
878
879//
880// Abs, Neg bf16, bf16x2
881//
882
883def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
884  Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>;
885def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
886  Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>;
887def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
888  Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>;
889def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
890  Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>;
891
892//
893// Round
894//
895
896def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
897          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
898def : Pat<(int_nvvm_round_f Float32Regs:$a),
899          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
900def : Pat<(int_nvvm_round_d Float64Regs:$a),
901          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
902
903//
904// Trunc
905//
906
907def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
908          (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
909def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
910          (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
911def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
912          (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
913
914//
915// Saturate
916//
917
918def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
919          (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
920def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
921          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
922def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
923          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
924
925//
926// Exp2  Log2
927//
928
929def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
930  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
931def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
932  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
933def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
934  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
935def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
936  Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
937def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
938  Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
939
940def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
941  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
942def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
943  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
944def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
945  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
946
947//
948// Sin  Cos
949//
950
951def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
952  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
953def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
954  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
955
956def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
957  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
958def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
959  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
960
961//
962// Fma
963//
964
965class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
966                list<Predicate> Preds = []> {
967  string Variant = V;
968  Intrinsic Intr = I;
969  NVPTXRegClass RegClass = RC;
970  list<Predicate> Predicates = Preds;
971}
972
973multiclass FMA_INST {
974  foreach P = [
975    FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
976    FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
977    FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
978    FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
979
980    FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
981    FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
982    FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
983    FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
984    FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
985    FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
986    FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
987    FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
988
989    FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
990    FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
991      [hasPTX<42>, hasSM<53>]>,
992    FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
993      [hasPTX<42>, hasSM<53>]>,
994    FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
995      [hasPTX<42>, hasSM<53>]>,
996    FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
997      [hasPTX<70>, hasSM<80>]>,
998    FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
999      [hasPTX<70>, hasSM<80>]>,
1000
1001    FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
1002    FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs,
1003      [hasPTX<70>, hasSM<80>]>,
1004    FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs,
1005      [hasPTX<70>, hasSM<80>]>,
1006    FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs,
1007      [hasPTX<70>, hasSM<80>]>,
1008    FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
1009      [hasPTX<70>, hasSM<80>]>,
1010    FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs,
1011      [hasPTX<70>, hasSM<80>]>,
1012
1013    FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
1014      [hasPTX<42>, hasSM<53>]>,
1015    FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
1016      [hasPTX<42>, hasSM<53>]>,
1017    FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
1018      [hasPTX<42>, hasSM<53>]>,
1019    FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
1020      Int32Regs, [hasPTX<42>, hasSM<53>]>,
1021    FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
1022      [hasPTX<70>, hasSM<80>]>,
1023    FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
1024      Int32Regs, [hasPTX<70>, hasSM<80>]>,
1025    FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
1026      [hasPTX<70>, hasSM<80>]>,
1027    FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
1028      [hasPTX<70>, hasSM<80>]>
1029  ] in {
1030    def P.Variant :
1031      F_MATH_3<!strconcat("fma",
1032        !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
1033        P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
1034  }
1035}
1036
1037defm INT_NVVM_FMA : FMA_INST;
1038
1039//
1040// Rcp
1041//
1042
1043def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
1044  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
1045def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
1046  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
1047def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
1048  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
1049def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
1050  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
1051def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
1052  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
1053def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
1054  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
1055def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
1056  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
1057def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
1058  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
1059
1060def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
1061  Float64Regs, int_nvvm_rcp_rn_d>;
1062def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
1063  Float64Regs, int_nvvm_rcp_rz_d>;
1064def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
1065  Float64Regs, int_nvvm_rcp_rm_d>;
1066def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
1067  Float64Regs, int_nvvm_rcp_rp_d>;
1068
1069def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
1070  Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
1071def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
1072  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
1073
1074//
1075// Sqrt
1076//
1077
1078def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
1079  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
1080def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
1081  Float32Regs, int_nvvm_sqrt_rn_f>;
1082def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
1083  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
1084def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
1085  Float32Regs, int_nvvm_sqrt_rz_f>;
1086def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
1087  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
1088def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
1089  Float32Regs, int_nvvm_sqrt_rm_f>;
1090def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
1091  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
1092def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
1093  Float32Regs, int_nvvm_sqrt_rp_f>;
1094def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
1095  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
1096def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
1097  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
1098
1099def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
1100  Float64Regs, int_nvvm_sqrt_rn_d>;
1101def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
1102  Float64Regs, int_nvvm_sqrt_rz_d>;
1103def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
1104  Float64Regs, int_nvvm_sqrt_rm_d>;
1105def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
1106  Float64Regs, int_nvvm_sqrt_rp_d>;
1107
1108// nvvm_sqrt intrinsic
1109def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1110          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
1111def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1112          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
1113def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1114          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
1115def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1116          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
1117
1118//
1119// Rsqrt
1120//
1121
1122def INT_NVVM_RSQRT_APPROX_FTZ_F
1123  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
1124    int_nvvm_rsqrt_approx_ftz_f>;
1125def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
1126  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
1127def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
1128  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
1129
1130//
1131// Add
1132//
1133
1134def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
1135  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
1136def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
1137  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
1138def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
1139  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
1140def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
1141  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
1142def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
1143  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
1144def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
1145  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
1146def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
1147  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
1148def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
1149  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
1150
1151def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
1152  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
1153def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
1154  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
1155def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
1156  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
1157def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
1158  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
1159
1160//
1161// Convert
1162//
1163
1164def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
1165          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
1166def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
1167          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
1168def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
1169          (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
1170def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
1171          (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
1172def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
1173          (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
1174def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
1175          (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
1176def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
1177          (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
1178def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
1179          (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
1180
1181def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
1182          (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
1183def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
1184          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
1185def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
1186          (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
1187def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
1188          (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
1189
1190def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
1191          (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
1192def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
1193          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
1194def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
1195          (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
1196def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
1197          (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
1198
1199def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
1200          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
1201def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
1202          (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
1203def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
1204          (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
1205def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
1206          (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
1207
1208def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
1209          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
1210def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
1211          (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
1212def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
1213          (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
1214def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
1215          (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
1216
1217def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
1218          (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1219def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1220          (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1221def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1222          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1223def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1224          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1225def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1226          (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1227def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1228          (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1229def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1230          (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1231def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1232          (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1233
1234def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1235          (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1236def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1237          (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1238def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1239          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1240def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1241          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1242def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1243          (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1244def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1245          (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1246def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1247          (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1248def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1249          (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1250
1251def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1252          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1253def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1254          (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1255def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1256          (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1257def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1258          (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1259
1260def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1261          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1262def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1263          (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1264def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1265          (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1266def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1267          (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1268
1269def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
1270          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1271def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1272          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1273def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
1274          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1275def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1276          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1277
1278def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
1279          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1280def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1281          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1282def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
1283          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1284def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1285          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1286
1287def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
1288          (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
1289def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
1290          (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
1291def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
1292          (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
1293def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
1294          (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
1295
1296def CVT_tf32_f32 :
1297   NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
1298                   "cvt.rna.tf32.f32 \t$dest, $a;",
1299       [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
1300
1301def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1302  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1303
1304def INT_NVVM_D2I_LO : F_MATH_1<
1305  !strconcat("{{\n\t",
1306             ".reg .b32 %temp; \n\t",
1307             "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1308             "}}"),
1309  Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1310def INT_NVVM_D2I_HI : F_MATH_1<
1311  !strconcat("{{\n\t",
1312             ".reg .b32 %temp; \n\t",
1313             "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1314             "}}"),
1315  Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1316
1317def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1318          (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1319def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1320          (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1321def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1322          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1323def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1324          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1325def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1326          (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1327def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1328          (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1329def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1330          (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1331def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1332          (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1333
1334def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1335          (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1336def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1337          (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1338def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1339          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1340def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1341          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1342def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1343          (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1344def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1345          (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1346def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1347          (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1348def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1349          (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1350
1351def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1352          (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1353def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1354          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1355def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1356          (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1357def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1358          (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1359
1360def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1361          (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1362def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1363          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1364def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1365          (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1366def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1367          (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1368
1369def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1370          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1371def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1372          (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1373def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1374          (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1375def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1376          (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1377
1378def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1379          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1380def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1381          (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1382def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1383          (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1384def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1385          (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1386
1387def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1388          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1389def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1390          (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1391def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1392          (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1393def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1394          (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1395
1396def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1397          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1398def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1399          (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1400def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1401          (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1402def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1403          (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1404
1405
1406def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1407          (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
1408def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1409          (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
1410
1411//
1412// Bitcast
1413//
1414
1415def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1416  Float32Regs, int_nvvm_bitcast_f2i>;
1417def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1418  Int32Regs, int_nvvm_bitcast_i2f>;
1419
1420def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1421  Int64Regs, int_nvvm_bitcast_ll2d>;
1422def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1423  Float64Regs, int_nvvm_bitcast_d2ll>;
1424
1425//
1426// FNS
1427//
1428
1429class INT_FNS_MBO<dag ins, dag Operands>
1430  : NVPTXInst<(outs Int32Regs:$dst), ins,
1431               "fns.b32 \t$dst, $mask, $base, $offset;",
1432               [(set Int32Regs:$dst, Operands )]>,
1433    Requires<[hasPTX<60>, hasSM<30>]>;
1434
1435def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1436                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1437def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1438                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1439def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1440                     (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1441def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1442                     (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1443def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1444                     (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1445def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1446                     (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1447def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1448                     (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1449def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1450                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1451
1452//-----------------------------------
1453// Atomic Functions
1454//-----------------------------------
1455
1456class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1457 : PatFrag<ops, frag, AS_match.global>;
1458class ATOMIC_SHARED_CHK <dag ops, dag frag>
1459 : PatFrag<ops, frag, AS_match.shared>;
1460class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1461 : PatFrag<ops, frag, AS_match.generic>;
1462
1463multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1464  ValueType regT, NVPTXRegClass regclass,
1465  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1466  Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1467  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1468    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1469    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1470  Requires<Pred>;
1471  def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1472    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1473    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
1474  Requires<Pred>;
1475}
1476multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1477  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1478  list<Predicate> Pred = []> {
1479  defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1480    IntOp, IMMType, IMM, Pred>;
1481  defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1482    IntOp, IMMType, IMM, Pred>;
1483}
1484
1485// has 2 operands, neg the second one
1486multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1487  ValueType regT, NVPTXRegClass regclass,
1488  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1489  list<Predicate> Pred> {
1490  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1491    !strconcat(
1492      "{{ \n\t",
1493      ".reg \t.s", TypeStr, " temp; \n\t",
1494      "neg.s", TypeStr, " \ttemp, $b; \n\t",
1495      "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1496      "}}"),
1497    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1498  Requires<Pred>;
1499}
1500multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
1501  string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1502 defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1503   IntOp, Pred> ;
1504 defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1505   IntOp, Pred> ;
1506}
1507
1508// has 3 operands
1509multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1510  ValueType regT, NVPTXRegClass regclass,
1511  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1512  Operand IMMType, list<Predicate> Pred> {
1513  def reg : NVPTXInst<(outs regclass:$dst),
1514    (ins ptrclass:$addr, regclass:$b, regclass:$c),
1515    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1516    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
1517  Requires<Pred>;
1518
1519  def imm1 : NVPTXInst<(outs regclass:$dst),
1520    (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1521    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1522    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
1523  Requires<Pred>;
1524
1525  def imm2 : NVPTXInst<(outs regclass:$dst),
1526    (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1527    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1528    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
1529  Requires<Pred>;
1530
1531  def imm3 : NVPTXInst<(outs regclass:$dst),
1532    (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1533    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1534    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
1535  Requires<Pred>;
1536}
1537multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1538  string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1539  defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1540    IntOp, IMMType, Pred>;
1541  defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1542    IntOp, IMMType, Pred>;
1543}
1544
1545// atom_add
1546
1547def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1548  (atomic_load_add_32 node:$a, node:$b)>;
1549def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1550  (atomic_load_add_32 node:$a, node:$b)>;
1551def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1552  (atomic_load_add_32 node:$a, node:$b)>;
1553def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1554  (atomic_load_add_64 node:$a, node:$b)>;
1555def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1556  (atomic_load_add_64 node:$a, node:$b)>;
1557def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1558  (atomic_load_add_64 node:$a, node:$b)>;
1559def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1560  (atomic_load_fadd node:$a, node:$b)>;
1561def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1562  (atomic_load_fadd node:$a, node:$b)>;
1563def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1564  (atomic_load_fadd node:$a, node:$b)>;
1565
1566defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
1567  atomic_load_add_32_g, i32imm, imm>;
1568defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
1569  atomic_load_add_32_s, i32imm, imm>;
1570defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
1571  atomic_load_add_32_gen, i32imm, imm>;
1572defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1573  ".add", atomic_load_add_32_gen, i32imm, imm>;
1574
1575defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
1576  atomic_load_add_64_g, i64imm, imm>;
1577defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
1578  atomic_load_add_64_s, i64imm, imm>;
1579defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
1580  atomic_load_add_64_gen, i64imm, imm>;
1581defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1582  ".add", atomic_load_add_64_gen, i64imm, imm>;
1583
1584defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
1585  atomic_load_add_g, f32imm, fpimm>;
1586defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
1587  atomic_load_add_s, f32imm, fpimm>;
1588defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
1589  atomic_load_add_gen, f32imm, fpimm>;
1590
1591defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
1592  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1593defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
1594  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1595defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
1596  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1597
1598// atom_sub
1599
1600def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1601  (atomic_load_sub_32 node:$a, node:$b)>;
1602def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1603  (atomic_load_sub_32 node:$a, node:$b)>;
1604def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1605  (atomic_load_sub_32 node:$a, node:$b)>;
1606def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1607  (atomic_load_sub_64 node:$a, node:$b)>;
1608def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1609  (atomic_load_sub_64 node:$a, node:$b)>;
1610def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1611  (atomic_load_sub_64 node:$a, node:$b)>;
1612
1613defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
1614  atomic_load_sub_32_g>;
1615defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
1616  atomic_load_sub_64_g>;
1617defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
1618  atomic_load_sub_32_gen>;
1619defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
1620  ".add", atomic_load_sub_32_gen>;
1621defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
1622  atomic_load_sub_32_s>;
1623defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
1624  atomic_load_sub_64_s>;
1625defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
1626  atomic_load_sub_64_gen>;
1627defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
1628  ".add", atomic_load_sub_64_gen>;
1629
1630// atom_swap
1631
1632def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1633  (atomic_swap_32 node:$a, node:$b)>;
1634def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1635  (atomic_swap_32 node:$a, node:$b)>;
1636def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1637  (atomic_swap_32 node:$a, node:$b)>;
1638def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1639  (atomic_swap_64 node:$a, node:$b)>;
1640def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1641  (atomic_swap_64 node:$a, node:$b)>;
1642def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1643  (atomic_swap_64 node:$a, node:$b)>;
1644
1645defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
1646  atomic_swap_32_g, i32imm, imm>;
1647defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
1648  atomic_swap_32_s, i32imm, imm>;
1649defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
1650  atomic_swap_32_gen, i32imm, imm>;
1651defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1652  ".exch", atomic_swap_32_gen, i32imm, imm>;
1653defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
1654  atomic_swap_64_g, i64imm, imm>;
1655defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
1656  atomic_swap_64_s, i64imm, imm>;
1657defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
1658  atomic_swap_64_gen, i64imm, imm>;
1659defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1660  ".exch", atomic_swap_64_gen, i64imm, imm>;
1661
1662// atom_max
1663
1664def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1665  , (atomic_load_max_32 node:$a, node:$b)>;
1666def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1667  (atomic_load_max_32 node:$a, node:$b)>;
1668def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1669  (atomic_load_max_32 node:$a, node:$b)>;
1670def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1671  , (atomic_load_max_64 node:$a, node:$b)>;
1672def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1673  (atomic_load_max_64 node:$a, node:$b)>;
1674def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1675  (atomic_load_max_64 node:$a, node:$b)>;
1676def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1677  (atomic_load_umax_32 node:$a, node:$b)>;
1678def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1679  (atomic_load_umax_32 node:$a, node:$b)>;
1680def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1681  (atomic_load_umax_32 node:$a, node:$b)>;
1682def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1683  (atomic_load_umax_64 node:$a, node:$b)>;
1684def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1685  (atomic_load_umax_64 node:$a, node:$b)>;
1686def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1687  (atomic_load_umax_64 node:$a, node:$b)>;
1688
1689defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1690  ".max", atomic_load_max_32_g, i32imm, imm>;
1691defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1692  ".max", atomic_load_max_32_s, i32imm, imm>;
1693defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
1694  atomic_load_max_32_gen, i32imm, imm>;
1695defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1696  ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1697defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1698  ".max", atomic_load_max_64_g, i64imm, imm, [hasSM<32>]>;
1699defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1700  ".max", atomic_load_max_64_s, i64imm, imm, [hasSM<32>]>;
1701defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
1702  atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
1703defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1704  ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
1705defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1706  ".max", atomic_load_umax_32_g, i32imm, imm>;
1707defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1708  ".max", atomic_load_umax_32_s, i32imm, imm>;
1709defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
1710  atomic_load_umax_32_gen, i32imm, imm>;
1711defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1712  ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1713defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1714  ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM<32>]>;
1715defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1716  ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM<32>]>;
1717defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
1718  atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
1719defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1720  ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
1721
1722// atom_min
1723
1724def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1725  (atomic_load_min_32 node:$a, node:$b)>;
1726def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1727  (atomic_load_min_32 node:$a, node:$b)>;
1728def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1729  (atomic_load_min_32 node:$a, node:$b)>;
1730def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1731  (atomic_load_min_64 node:$a, node:$b)>;
1732def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1733  (atomic_load_min_64 node:$a, node:$b)>;
1734def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1735  (atomic_load_min_64 node:$a, node:$b)>;
1736def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1737  (atomic_load_umin_32 node:$a, node:$b)>;
1738def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1739  (atomic_load_umin_32 node:$a, node:$b)>;
1740def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1741  (atomic_load_umin_32 node:$a, node:$b)>;
1742def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1743  (atomic_load_umin_64 node:$a, node:$b)>;
1744def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1745  (atomic_load_umin_64 node:$a, node:$b)>;
1746def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1747  (atomic_load_umin_64 node:$a, node:$b)>;
1748
1749defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1750  ".min", atomic_load_min_32_g, i32imm, imm>;
1751defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1752  ".min", atomic_load_min_32_s, i32imm, imm>;
1753defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
1754  atomic_load_min_32_gen, i32imm, imm>;
1755defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1756  ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1757defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1758  ".min", atomic_load_min_64_g, i64imm, imm, [hasSM<32>]>;
1759defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1760  ".min", atomic_load_min_64_s, i64imm, imm, [hasSM<32>]>;
1761defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
1762  atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
1763defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1764  ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
1765defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1766  ".min", atomic_load_umin_32_g, i32imm, imm>;
1767defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1768  ".min", atomic_load_umin_32_s, i32imm, imm>;
1769defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
1770  atomic_load_umin_32_gen, i32imm, imm>;
1771defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1772  ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1773defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1774  ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM<32>]>;
1775defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1776  ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM<32>]>;
1777defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
1778  atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
1779defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1780  ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
1781
1782// atom_inc  atom_dec
1783
1784def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1785  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1786def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1787  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1788def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1789  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1790def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1791  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1792def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1793  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1794def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1795  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1796
1797defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
1798  atomic_load_inc_32_g, i32imm, imm>;
1799defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
1800  atomic_load_inc_32_s, i32imm, imm>;
1801defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
1802  atomic_load_inc_32_gen, i32imm, imm>;
1803defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1804  ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1805defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
1806  atomic_load_dec_32_g, i32imm, imm>;
1807defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
1808  atomic_load_dec_32_s, i32imm, imm>;
1809defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
1810  atomic_load_dec_32_gen, i32imm, imm>;
1811defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1812  ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1813
1814// atom_and
1815
1816def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1817  (atomic_load_and_32 node:$a, node:$b)>;
1818def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1819  (atomic_load_and_32 node:$a, node:$b)>;
1820def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1821  (atomic_load_and_32 node:$a, node:$b)>;
1822def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1823  (atomic_load_and_64 node:$a, node:$b)>;
1824def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1825  (atomic_load_and_64 node:$a, node:$b)>;
1826def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1827  (atomic_load_and_64 node:$a, node:$b)>;
1828
1829defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
1830  atomic_load_and_32_g, i32imm, imm>;
1831defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
1832  atomic_load_and_32_s, i32imm, imm>;
1833defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
1834  atomic_load_and_32_gen, i32imm, imm>;
1835defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1836  ".and", atomic_load_and_32_gen, i32imm, imm>;
1837defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
1838  atomic_load_and_64_g, i64imm, imm, [hasSM<32>]>;
1839defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
1840  atomic_load_and_64_s, i64imm, imm, [hasSM<32>]>;
1841defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
1842  atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
1843defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1844  ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
1845
1846// atom_or
1847
1848def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1849  (atomic_load_or_32 node:$a, node:$b)>;
1850def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1851  (atomic_load_or_32 node:$a, node:$b)>;
1852def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1853  (atomic_load_or_32 node:$a, node:$b)>;
1854def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1855  (atomic_load_or_64 node:$a, node:$b)>;
1856def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1857  (atomic_load_or_64 node:$a, node:$b)>;
1858def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1859  (atomic_load_or_64 node:$a, node:$b)>;
1860
1861defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
1862  atomic_load_or_32_g, i32imm, imm>;
1863defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
1864  atomic_load_or_32_gen, i32imm, imm>;
1865defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1866  ".or", atomic_load_or_32_gen, i32imm, imm>;
1867defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
1868  atomic_load_or_32_s, i32imm, imm>;
1869defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
1870  atomic_load_or_64_g, i64imm, imm, [hasSM<32>]>;
1871defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
1872  atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
1873defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1874  ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
1875defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
1876  atomic_load_or_64_s, i64imm, imm, [hasSM<32>]>;
1877
1878// atom_xor
1879
1880def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1881  (atomic_load_xor_32 node:$a, node:$b)>;
1882def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1883  (atomic_load_xor_32 node:$a, node:$b)>;
1884def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1885  (atomic_load_xor_32 node:$a, node:$b)>;
1886def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1887  (atomic_load_xor_64 node:$a, node:$b)>;
1888def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1889  (atomic_load_xor_64 node:$a, node:$b)>;
1890def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1891  (atomic_load_xor_64 node:$a, node:$b)>;
1892
1893defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
1894  atomic_load_xor_32_g, i32imm, imm>;
1895defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
1896  atomic_load_xor_32_s, i32imm, imm>;
1897defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
1898  atomic_load_xor_32_gen, i32imm, imm>;
1899defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1900  ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1901defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
1902  atomic_load_xor_64_g, i64imm, imm, [hasSM<32>]>;
1903defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
1904  atomic_load_xor_64_s, i64imm, imm, [hasSM<32>]>;
1905defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
1906  atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
1907defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1908  ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
1909
1910// atom_cas
1911
1912def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1913  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1914def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1915  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1916def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1917  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1918def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1919  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1920def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1921  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1922def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1923  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1924
1925defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
1926  atomic_cmp_swap_32_g, i32imm>;
1927defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
1928  atomic_cmp_swap_32_s, i32imm>;
1929defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
1930  atomic_cmp_swap_32_gen, i32imm>;
1931defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
1932  ".cas", atomic_cmp_swap_32_gen, i32imm>;
1933defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
1934  atomic_cmp_swap_64_g, i64imm>;
1935defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
1936  atomic_cmp_swap_64_s, i64imm>;
1937defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
1938  atomic_cmp_swap_64_gen, i64imm>;
1939defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
1940  ".cas", atomic_cmp_swap_64_gen, i64imm>;
1941
1942// Support for scoped atomic operations.  Matches
1943// int_nvvm_atomic_{op}_{space}_{type}_{scope}
1944// and converts it into the appropriate instruction.
1945// NOTE: not all possible combinations are implemented
1946//  'space' is limited to generic as it's the only one needed to support CUDA.
1947//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1948class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
1949                  dag ins, dag Operands>
1950      : NVPTXInst<(outs regclass:$result), ins,
1951                  AsmStr,
1952                  [(set (regT regclass:$result), Operands)]>,
1953        Requires<Preds>;
1954
1955// Define instruction variants for all addressing modes.
1956multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1957                       ValueType regT, NVPTXRegClass regclass, Operand ImmType,
1958                       SDNode Imm, ValueType ImmTy,
1959                       list<Predicate> Preds> {
1960  let AddedComplexity = 1 in {
1961    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1962                      (ins Int32Regs:$src, regclass:$b),
1963                      (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
1964    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1965                      (ins Int64Regs:$src, regclass:$b),
1966                      (Intr (i64 Int64Regs:$src), (regT regclass:$b))>;
1967  }
1968  // tablegen can't infer argument types from Intrinsic (though it can
1969  // from Instruction) so we have to enforce specific type on
1970  // immediates via explicit cast to ImmTy.
1971  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1972                    (ins Int32Regs:$src, ImmType:$b),
1973                    (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
1974  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1975                    (ins Int64Regs:$src, ImmType:$b),
1976                    (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>;
1977}
1978
1979multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1980                       ValueType regT, NVPTXRegClass regclass,
1981                       Operand ImmType, SDNode Imm, ValueType ImmTy,
1982                       list<Predicate> Preds> {
1983  // Variants for register/immediate permutations of $b and $c
1984  let AddedComplexity = 2 in {
1985    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1986                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
1987                      (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
1988    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1989                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
1990                      (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
1991  }
1992  let AddedComplexity = 1 in {
1993    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1994                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1995                      (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
1996    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1997                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1998                      (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
1999    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2000                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
2001                      (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2002    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2003                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
2004                      (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2005  }
2006  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2007                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
2008                    (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2009  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2010                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
2011                    (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2012}
2013
2014// Constructs intrinsic name and instruction asm strings.
2015multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
2016                       string ScopeStr, string SpaceStr,
2017                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2018                       ValueType ImmTy, list<Predicate> Preds> {
2019  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2020                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2021                            # "." # OpStr # "." # TypeStr
2022                            # " \t$result, [$src], $b;",
2023                     !cast<Intrinsic>(
2024                            "int_nvvm_atomic_" # OpStr
2025                            # "_" # SpaceStr # "_" # IntTypeStr
2026                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2027                     regT, regclass, ImmType, Imm, ImmTy, Preds>;
2028}
2029multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
2030                       string ScopeStr, string SpaceStr,
2031                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2032                       ValueType ImmTy, list<Predicate> Preds> {
2033  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2034                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2035                            # "." # OpStr # "." # TypeStr
2036                            # " \t$result, [$src], $b, $c;",
2037                     !cast<Intrinsic>(
2038                            "int_nvvm_atomic_" # OpStr
2039                            # "_" # SpaceStr # "_" # IntTypeStr
2040                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2041                     regT, regclass, ImmType, Imm, ImmTy, Preds>;
2042}
2043
2044// Constructs variants for different address spaces.
2045// For now we only need variants for generic space pointers.
2046multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
2047                       string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2048                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2049   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2050                            regT, regclass, ImmType, Imm, ImmTy, Preds>;
2051}
2052multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
2053                       string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2054                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2055   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2056                            regT, regclass, ImmType, Imm, ImmTy, Preds>;
2057}
2058
2059// Constructs variants for different scopes of atomic op.
2060multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
2061                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2062                       ValueType ImmTy, list<Predicate> Preds> {
2063   // .gpu scope is default and is currently covered by existing
2064   // atomics w/o explicitly specified scope.
2065   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2066                           regT, regclass, ImmType, Imm, ImmTy,
2067                           !listconcat(Preds,[hasAtomScope])>;
2068   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2069                           regT, regclass, ImmType, Imm, ImmTy,
2070                           !listconcat(Preds,[hasAtomScope])>;
2071}
2072multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
2073           ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
2074           list<Predicate> Preds> {
2075   // No need to define ".gpu"-scoped atomics.  They do the same thing
2076   // as the regular, non-scoped atomics defined elsewhere.
2077   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2078                           regT, regclass, ImmType, Imm, ImmTy,
2079                           !listconcat(Preds,[hasAtomScope])>;
2080   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2081                           regT, regclass, ImmType, Imm, ImmTy,
2082                           !listconcat(Preds,[hasAtomScope])>;
2083}
2084
2085// atom.add
2086multiclass ATOM2_add_impl<string OpStr> {
2087   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2088   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2089   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
2090   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
2091                            []>;
2092   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
2093                            [hasAtomAddF64]>;
2094}
2095
2096// atom.{and,or,xor}
2097multiclass ATOM2_bitwise_impl<string OpStr> {
2098   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2099   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64,
2100                            [hasAtomBitwise64]>;
2101}
2102
2103// atom.exch
2104multiclass ATOM2_exch_impl<string OpStr> {
2105   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2106   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2107}
2108
2109// atom.{min,max}
2110multiclass ATOM2_minmax_impl<string OpStr> {
2111   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2112   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2113   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64,
2114                            [hasAtomMinMax64]>;
2115   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64,
2116                            [hasAtomMinMax64]>;
2117}
2118
2119// atom.{inc,dec}
2120multiclass ATOM2_incdec_impl<string OpStr> {
2121   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2122}
2123
2124// atom.cas
2125multiclass ATOM3_cas_impl<string OpStr> {
2126   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2127   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2128}
2129
2130defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
2131defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
2132defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
2133defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
2134defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
2135defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
2136defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
2137defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
2138defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
2139defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2140
2141//-----------------------------------
2142// Support for ldu on sm_20 or later
2143//-----------------------------------
2144
2145// Don't annotate ldu instructions as mayLoad, as they load from memory that is
2146// read-only in a kernel.
2147
2148// Scalar
2149
2150multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2151  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2152               !strconcat("ldu.global.", TyStr),
2153                      []>, Requires<[hasLDU]>;
2154  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2155               !strconcat("ldu.global.", TyStr),
2156                        []>, Requires<[hasLDU]>;
2157 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2158               !strconcat("ldu.global.", TyStr),
2159                      []>, Requires<[hasLDU]>;
2160 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2161               !strconcat("ldu.global.", TyStr),
2162                      []>, Requires<[hasLDU]>;
2163 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2164               !strconcat("ldu.global.", TyStr),
2165                        []>, Requires<[hasLDU]>;
2166}
2167
2168defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
2169defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
2170defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
2171defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
2172defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
2173defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
2174
2175// vector
2176
2177// Elementized vector ldu
2178multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2179 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2180                     (ins Int32Regs:$src),
2181                     !strconcat("ldu.global.", TyStr), []>;
2182 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2183                     (ins Int64Regs:$src),
2184                     !strconcat("ldu.global.", TyStr), []>;
2185 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2186                     (ins MEMri:$src),
2187                     !strconcat("ldu.global.", TyStr), []>;
2188 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2189                     (ins MEMri64:$src),
2190                     !strconcat("ldu.global.", TyStr), []>;
2191 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2192                     (ins imemAny:$src),
2193                     !strconcat("ldu.global.", TyStr), []>;
2194}
2195
2196multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2197 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2198                            regclass:$dst4), (ins Int32Regs:$src),
2199               !strconcat("ldu.global.", TyStr), []>;
2200 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2201                            regclass:$dst4), (ins Int64Regs:$src),
2202               !strconcat("ldu.global.", TyStr), []>;
2203 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2204                            regclass:$dst4), (ins MEMri:$src),
2205               !strconcat("ldu.global.", TyStr), []>;
2206 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2207                            regclass:$dst4), (ins MEMri64:$src),
2208               !strconcat("ldu.global.", TyStr), []>;
2209 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2210                            regclass:$dst4), (ins imemAny:$src),
2211               !strconcat("ldu.global.", TyStr), []>;
2212}
2213
2214defm INT_PTX_LDU_G_v2i8_ELE
2215  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2216defm INT_PTX_LDU_G_v2i16_ELE
2217  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2218defm INT_PTX_LDU_G_v2i32_ELE
2219  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2220defm INT_PTX_LDU_G_v2f32_ELE
2221  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2222defm INT_PTX_LDU_G_v2i64_ELE
2223  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2224defm INT_PTX_LDU_G_v2f64_ELE
2225  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2226defm INT_PTX_LDU_G_v4i8_ELE
2227  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2228defm INT_PTX_LDU_G_v4i16_ELE
2229  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2230    Int16Regs>;
2231defm INT_PTX_LDU_G_v4i32_ELE
2232  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2233    Int32Regs>;
2234defm INT_PTX_LDU_G_v4f16_ELE
2235  : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2236    Int16Regs>;
2237defm INT_PTX_LDU_G_v4f16x2_ELE
2238  : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2239    Int32Regs>;
2240defm INT_PTX_LDU_G_v4f32_ELE
2241  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2242    Float32Regs>;
2243
2244
2245//-----------------------------------
2246// Support for ldg on sm_35 or later
2247//-----------------------------------
2248
2249// Don't annotate ld.global.nc as mayLoad, because these loads go through the
2250// non-coherent texture cache, and therefore the values read must be read-only
2251// during the lifetime of the kernel.
2252
2253multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2254  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2255               !strconcat("ld.global.nc.", TyStr),
2256                      []>, Requires<[hasLDG]>;
2257  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2258               !strconcat("ld.global.nc.", TyStr),
2259                        []>, Requires<[hasLDG]>;
2260 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2261               !strconcat("ld.global.nc.", TyStr),
2262                      []>, Requires<[hasLDG]>;
2263 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2264               !strconcat("ld.global.nc.", TyStr),
2265                      []>, Requires<[hasLDG]>;
2266 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2267               !strconcat("ld.global.nc.", TyStr),
2268                        []>, Requires<[hasLDG]>;
2269}
2270
2271defm INT_PTX_LDG_GLOBAL_i8
2272  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2273defm INT_PTX_LDG_GLOBAL_i16
2274  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2275defm INT_PTX_LDG_GLOBAL_i32
2276  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2277defm INT_PTX_LDG_GLOBAL_i64
2278  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2279defm INT_PTX_LDG_GLOBAL_f32
2280  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2281defm INT_PTX_LDG_GLOBAL_f64
2282  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2283
2284// vector
2285
2286// Elementized vector ldg
2287multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2288 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2289                     (ins Int32Regs:$src),
2290                     !strconcat("ld.global.nc.", TyStr), []>;
2291 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2292                     (ins Int64Regs:$src),
2293                     !strconcat("ld.global.nc.", TyStr), []>;
2294 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2295                     (ins MEMri:$src),
2296                     !strconcat("ld.global.nc.", TyStr), []>;
2297 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2298                     (ins MEMri64:$src),
2299                     !strconcat("ld.global.nc.", TyStr), []>;
2300 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2301                     (ins imemAny:$src),
2302                     !strconcat("ld.global.nc.", TyStr), []>;
2303}
2304
2305multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2306  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2307                              regclass:$dst4), (ins Int32Regs:$src),
2308               !strconcat("ld.global.nc.", TyStr), []>;
2309  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2310                               regclass:$dst4), (ins Int64Regs:$src),
2311               !strconcat("ld.global.nc.", TyStr), []>;
2312  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2313                              regclass:$dst4), (ins MEMri:$src),
2314               !strconcat("ld.global.nc.", TyStr), []>;
2315  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2316                              regclass:$dst4), (ins MEMri64:$src),
2317               !strconcat("ld.global.nc.", TyStr), []>;
2318  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2319                             regclass:$dst4), (ins imemAny:$src),
2320               !strconcat("ld.global.nc.", TyStr), []>;
2321}
2322
2323// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2324defm INT_PTX_LDG_G_v2i8_ELE
2325  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2326defm INT_PTX_LDG_G_v2i16_ELE
2327  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2328defm INT_PTX_LDG_G_v2i32_ELE
2329  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2330defm INT_PTX_LDG_G_v2f32_ELE
2331  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2332defm INT_PTX_LDG_G_v2i64_ELE
2333  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2334defm INT_PTX_LDG_G_v2f64_ELE
2335  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2336defm INT_PTX_LDG_G_v4i8_ELE
2337  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2338defm INT_PTX_LDG_G_v4i16_ELE
2339  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2340defm INT_PTX_LDG_G_v4i32_ELE
2341  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2342defm INT_PTX_LDG_G_v4f32_ELE
2343  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2344
2345
2346multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2347   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2348          !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2349      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2350   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2351          !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2352      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2353   def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2354          "{{ .reg .b64 %tmp;\n\t"
2355          #"  cvt.u64.u32 \t%tmp, $src;\n\t"
2356          #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
2357      [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2358      Requires<[useShortPtr]>;
2359}
2360
2361multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2362   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2363          !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2364      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2365   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2366          !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2367      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2368   def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2369          "{{ .reg .b64 %tmp;\n\t"
2370          #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2371          #"  cvt.u32.u64 \t$result, %tmp; }}",
2372      [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2373      Requires<[useShortPtr]>;
2374}
2375
2376defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2377defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2378defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2379defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2380
2381defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2382defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2383defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2384defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2385
2386
2387// nvvm.ptr.gen.to.param
2388def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2389  (ins Int32Regs:$src),
2390                        "mov.u32 \t$result, $src;",
2391                              [(set Int32Regs:$result,
2392                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2393def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2394  (ins Int64Regs:$src),
2395                        "mov.u64 \t$result, $src;",
2396                              [(set Int64Regs:$result,
2397                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2398
2399
2400// nvvm.move intrinsicc
2401def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2402                             "mov.b16 \t$r, $s;",
2403                             [(set Int16Regs:$r,
2404                               (int_nvvm_move_i16 Int16Regs:$s))]>;
2405def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2406                             "mov.b32 \t$r, $s;",
2407                             [(set Int32Regs:$r,
2408                               (int_nvvm_move_i32 Int32Regs:$s))]>;
2409def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2410                             "mov.b64 \t$r, $s;",
2411                             [(set Int64Regs:$r,
2412                               (int_nvvm_move_i64 Int64Regs:$s))]>;
2413def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2414                             "mov.f32 \t$r, $s;",
2415                             [(set Float32Regs:$r,
2416                               (int_nvvm_move_float Float32Regs:$s))]>;
2417def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2418                             "mov.f64 \t$r, $s;",
2419                             [(set Float64Regs:$r,
2420                               (int_nvvm_move_double Float64Regs:$s))]>;
2421def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2422                             "mov.u32 \t$r, $s;",
2423                             [(set Int32Regs:$r,
2424                               (int_nvvm_move_ptr Int32Regs:$s))]>;
2425def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2426                             "mov.u64 \t$r, $s;",
2427                             [(set Int64Regs:$r,
2428                               (int_nvvm_move_ptr Int64Regs:$s))]>;
2429
2430// @TODO: Are these actually needed, or will we always just see symbols
2431// copied to registers first?
2432/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2433                             "mov.u32 \t$r, $s;",
2434                             [(set Int32Regs:$r,
2435                             (int_nvvm_move_ptr texternalsym:$s))]>;
2436def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2437                             "mov.u64 \t$r, $s;",
2438                             [(set Int64Regs:$r,
2439                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
2440
2441
2442// MoveParam        %r1, param
2443// ptr_local_to_gen %r2, %r1
2444// ptr_gen_to_local %r3, %r2
2445// ->
2446// mov %r1, param
2447
2448// @TODO: Revisit this.  There is a type
2449// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2450// instructions are not currently defined. However, we can use the ptr
2451// variants and the asm printer will do the right thing.
2452def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2453                (MoveParam texternalsym:$src)))),
2454               (nvvm_move_ptr64  texternalsym:$src)>;
2455def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2456                (MoveParam texternalsym:$src)))),
2457               (nvvm_move_ptr32  texternalsym:$src)>;
2458
2459def texsurf_handles
2460  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2461              "mov.u64 \t$result, $src;", []>;
2462
2463//-----------------------------------
2464// Compiler Error Warn
2465// - Just ignore them in codegen
2466//-----------------------------------
2467
2468def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2469                "// llvm.nvvm.compiler.warn()",
2470                [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2471def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2472                "// llvm.nvvm.compiler.warn()",
2473                [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2474def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2475                "// llvm.nvvm.compiler.error()",
2476                [(int_nvvm_compiler_error Int32Regs:$a)]>;
2477def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2478                "// llvm.nvvm.compiler.error()",
2479                [(int_nvvm_compiler_error Int64Regs:$a)]>;
2480
2481
2482// isspacep
2483
2484multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
2485  def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2486              "isspacep." # suffix # "\t$d, $a;",
2487              [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
2488    Requires<Preds>;
2489  def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2490              "isspacep." # suffix # "\t$d, $a;",
2491              [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
2492    Requires<Preds>;
2493}
2494
2495defm isspace_const  : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
2496defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
2497defm isspace_local  : ISSPACEP<"local", int_nvvm_isspacep_local>;
2498defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
2499defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
2500                                       int_nvvm_isspacep_shared_cluster,
2501                                       [hasPTX<78>, hasSM<90>]>;
2502
2503// Special register reads
2504def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2505                            (ins SpecialRegs:$r),
2506                            "mov.b32 \t$d, $r;", []>;
2507
2508def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2509def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2510def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2511def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2512def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2513def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2514def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2515def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2516def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2517def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2518def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2519def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2520def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2521def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2522def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2523def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2524def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2525def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2526def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2527def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2528def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2529def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2530def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2531def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2532def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2533def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2534def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2535def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2536def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2537def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2538def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2539def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2540
2541
2542// rotate builtin support
2543
2544def ROTATE_B32_HW_IMM
2545  : NVPTXInst<(outs Int32Regs:$dst),
2546              (ins  Int32Regs:$src, i32imm:$amt),
2547              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2548              [(set Int32Regs:$dst,
2549                 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2550              Requires<[hasHWROT32]> ;
2551
2552def ROTATE_B32_HW_REG
2553  : NVPTXInst<(outs Int32Regs:$dst),
2554              (ins  Int32Regs:$src, Int32Regs:$amt),
2555              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2556              [(set Int32Regs:$dst,
2557                 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2558              Requires<[hasHWROT32]> ;
2559
2560def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2561          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2562      Requires<[noHWROT32]> ;
2563
2564def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2565          (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2566      Requires<[noHWROT32]> ;
2567
2568let hasSideEffects = false in {
2569  def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2570    !strconcat("{{\n\t",
2571               ".reg .b32 %dummy;\n\t",
2572               "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2573               "}}"),
2574          []> ;
2575
2576  def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2577    !strconcat("{{\n\t",
2578               ".reg .b32 %dummy;\n\t",
2579               "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2580               "}}"),
2581          []> ;
2582}
2583
2584let hasSideEffects = false in {
2585  def PACK_TWO_INT32
2586    : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2587                "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2588}
2589
2590def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2591          (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2592                          (GET_LO_INT64 Int64Regs:$src))> ;
2593
2594// Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2595// no side effects.
2596let hasSideEffects = false in {
2597  def SHF_L_WRAP_B32_IMM
2598    : NVPTXInst<(outs Int32Regs:$dst),
2599                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2600                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2601      Requires<[hasHWROT32]>;
2602
2603  def SHF_L_WRAP_B32_REG
2604    : NVPTXInst<(outs Int32Regs:$dst),
2605                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2606                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2607      Requires<[hasHWROT32]>;
2608
2609  def SHF_R_WRAP_B32_IMM
2610    : NVPTXInst<(outs Int32Regs:$dst),
2611                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2612                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2613      Requires<[hasHWROT32]>;
2614
2615  def SHF_R_WRAP_B32_REG
2616    : NVPTXInst<(outs Int32Regs:$dst),
2617                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2618                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2619      Requires<[hasHWROT32]>;
2620}
2621
2622// HW version of rotate 64
2623def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2624          (PACK_TWO_INT32
2625            (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2626                                (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2627            (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2628                                (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2629      Requires<[hasHWROT32]>;
2630
2631def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2632          (PACK_TWO_INT32
2633            (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2634                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2635            (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2636                               (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2637      Requires<[hasHWROT32]>;
2638
2639
2640def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2641          (PACK_TWO_INT32
2642            (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2643                                (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2644            (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2645                                (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2646      Requires<[hasHWROT32]>;
2647
2648def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2649          (PACK_TWO_INT32
2650            (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2651                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2652            (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2653                               (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2654      Requires<[hasHWROT32]>;
2655
2656// SW version of rotate 64
2657def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2658          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
2659      Requires<[noHWROT32]>;
2660def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2661          (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2662      Requires<[noHWROT32]>;
2663def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2664          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2665      Requires<[noHWROT32]>;
2666def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2667          (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2668      Requires<[noHWROT32]>;
2669
2670
2671//-----------------------------------
2672// Texture Intrinsics
2673//-----------------------------------
2674
2675// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2676// also defined in NVPTXReplaceImageHandles.cpp
2677
2678// texmode_independent
2679let IsTex = true, IsTexModeUnified = false in {
2680// Texture fetch instructions using handles
2681
2682class TEX_1D_base<string inst, NVPTXRegClass outtype,
2683                  NVPTXRegClass intype, dag texsamp>
2684    : NVPTXInst<(outs outtype:$r, outtype:$g,
2685                      outtype:$b, outtype:$a),
2686                 !con(texsamp, (ins intype:$x)),
2687                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2688                 []>;
2689
2690multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2691  def _RR : TEX_1D_base<inst, outtype, intype,
2692                        (ins Int64Regs:$t, Int64Regs:$s)>;
2693  def _RI : TEX_1D_base<inst, outtype, intype,
2694                        (ins Int64Regs:$t, i64imm:$s)>;
2695  def _IR : TEX_1D_base<inst, outtype, intype,
2696                        (ins i64imm:$t, Int64Regs:$s)>;
2697  def _II : TEX_1D_base<inst, outtype, intype,
2698                        (ins i64imm:$t, i64imm:$s)>;
2699}
2700
2701defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
2702defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2703defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
2704defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2705defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
2706defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2707
2708class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
2709                        NVPTXRegClass intype, dag texsamp>
2710    : NVPTXInst<(outs outtype:$r, outtype:$g,
2711                      outtype:$b, outtype:$a),
2712                 !con(texsamp, (ins intype:$x, intype:$lod)),
2713                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
2714                 []>;
2715
2716multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
2717                        NVPTXRegClass intype> {
2718  def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
2719                              (ins Int64Regs:$t, Int64Regs:$s)>;
2720  def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
2721                              (ins Int64Regs:$t, i64imm:$s)>;
2722  def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
2723                              (ins i64imm:$t, Int64Regs:$s)>;
2724  def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
2725                              (ins i64imm:$t, i64imm:$s)>;
2726}
2727
2728defm TEX_1D_F32_F32_LEVEL :
2729  TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2730defm TEX_1D_S32_F32_LEVEL :
2731  TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2732defm TEX_1D_U32_F32_LEVEL :
2733  TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2734
2735class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
2736                       NVPTXRegClass intype, dag texsamp>
2737    : NVPTXInst<(outs outtype:$r, outtype:$g,
2738                      outtype:$b, outtype:$a),
2739                 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
2740                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
2741                        " \\{$gradx\\}, \\{$grady\\};",
2742                 []>;
2743
2744multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
2745                       NVPTXRegClass intype> {
2746  def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
2747                             (ins Int64Regs:$t, Int64Regs:$s)>;
2748  def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
2749                             (ins Int64Regs:$t, i64imm:$s)>;
2750  def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
2751                             (ins i64imm:$t, Int64Regs:$s)>;
2752  def _II : TEX_1D_GRAD_base<inst, outtype, intype,
2753                             (ins i64imm:$t, i64imm:$s)>;
2754}
2755
2756defm TEX_1D_F32_F32_GRAD
2757  : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2758defm TEX_1D_S32_F32_GRAD
2759  : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2760defm TEX_1D_U32_F32_GRAD
2761  : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2762
2763class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
2764                        NVPTXRegClass intype, dag texsamp>
2765    : NVPTXInst<(outs outtype:$r, outtype:$g,
2766                      outtype:$b, outtype:$a),
2767                 !con(texsamp, (ins Int32Regs:$l, intype:$x)),
2768                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
2769                 []>;
2770
2771multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
2772                        NVPTXRegClass intype> {
2773  def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
2774                              (ins Int64Regs:$t, Int64Regs:$s)>;
2775  def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
2776                              (ins Int64Regs:$t, i64imm:$s)>;
2777  def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
2778                              (ins i64imm:$t, Int64Regs:$s)>;
2779  def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
2780                              (ins i64imm:$t, i64imm:$s)>;
2781}
2782
2783defm TEX_1D_ARRAY_F32_F32
2784  : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2785defm TEX_1D_ARRAY_F32_S32
2786  : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
2787defm TEX_1D_ARRAY_S32_S32
2788  : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
2789defm TEX_1D_ARRAY_S32_F32
2790  : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2791defm TEX_1D_ARRAY_U32_S32
2792  : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
2793defm TEX_1D_ARRAY_U32_F32
2794  : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2795
2796class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2797                              NVPTXRegClass intype, dag texsamp>
2798    : NVPTXInst<(outs outtype:$r, outtype:$g,
2799                      outtype:$b, outtype:$a),
2800                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
2801                 inst # " \t\\{$r, $g, $b, $a\\},"
2802                        " [$t, $s, \\{$l, $x\\}], $lod;",
2803                 []>;
2804
2805multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2806                              NVPTXRegClass intype> {
2807  def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2808                                    (ins Int64Regs:$t, Int64Regs:$s)>;
2809  def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2810                                    (ins Int64Regs:$t, i64imm:$s)>;
2811  def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2812                                    (ins i64imm:$t, Int64Regs:$s)>;
2813  def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2814                                    (ins i64imm:$t, i64imm:$s)>;
2815}
2816
2817defm TEX_1D_ARRAY_F32_F32_LEVEL
2818  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2819defm TEX_1D_ARRAY_S32_F32_LEVEL
2820  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2821defm TEX_1D_ARRAY_U32_F32_LEVEL
2822  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2823
2824class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2825                             NVPTXRegClass intype, dag texsamp>
2826    : NVPTXInst<(outs outtype:$r, outtype:$g,
2827                      outtype:$b, outtype:$a),
2828                 !con(texsamp, (ins Int32Regs:$l, intype:$x,
2829                                    intype:$gradx, intype:$grady)),
2830                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
2831                        " \\{$gradx\\}, \\{$grady\\};",
2832                 []>;
2833
2834multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2835                             NVPTXRegClass intype> {
2836  def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2837                                   (ins Int64Regs:$t, Int64Regs:$s)>;
2838  def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2839                                   (ins Int64Regs:$t, i64imm:$s)>;
2840  def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2841                                   (ins i64imm:$t, Int64Regs:$s)>;
2842  def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2843                                   (ins i64imm:$t, i64imm:$s)>;
2844}
2845
2846defm TEX_1D_ARRAY_F32_F32_GRAD
2847  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2848defm TEX_1D_ARRAY_S32_F32_GRAD
2849  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2850defm TEX_1D_ARRAY_U32_F32_GRAD
2851  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2852
2853class TEX_2D_base<string inst, NVPTXRegClass outtype,
2854                  NVPTXRegClass intype, dag texsamp>
2855    : NVPTXInst<(outs outtype:$r, outtype:$g,
2856                      outtype:$b, outtype:$a),
2857                 !con(texsamp, (ins intype:$x, intype:$y)),
2858                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
2859                 []>;
2860
2861multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2862  def _RR : TEX_2D_base<inst, outtype, intype,
2863                        (ins Int64Regs:$t, Int64Regs:$s)>;
2864  def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
2865  def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
2866  def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
2867}
2868
2869defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2870defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
2871defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
2872defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2873defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
2874defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2875
2876class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
2877                        NVPTXRegClass intype, dag texsamp>
2878    : NVPTXInst<(outs outtype:$r, outtype:$g,
2879                      outtype:$b, outtype:$a),
2880                 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
2881                 inst # " \t\\{$r, $g, $b, $a\\},"
2882                        " [$t, $s, \\{$x, $y\\}], $lod;",
2883                 []>;
2884
2885multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
2886                        NVPTXRegClass intype> {
2887  def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
2888                              (ins Int64Regs:$t, Int64Regs:$s)>;
2889  def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
2890                              (ins Int64Regs:$t, i64imm:$s)>;
2891  def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
2892                              (ins i64imm:$t, Int64Regs:$s)>;
2893  def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
2894                              (ins i64imm:$t, i64imm:$s)>;
2895}
2896
2897defm TEX_2D_F32_F32_LEVEL :
2898  TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2899defm TEX_2D_S32_F32_LEVEL :
2900  TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2901defm TEX_2D_U32_F32_LEVEL :
2902  TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2903
2904class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
2905                       NVPTXRegClass intype, dag texsamp>
2906    : NVPTXInst<(outs outtype:$r, outtype:$g,
2907                      outtype:$b, outtype:$a),
2908                 !con(texsamp, (ins intype:$x, intype:$y,
2909                                    intype:$gradx0, intype:$gradx1,
2910                                    intype:$grady0, intype:$grady1)),
2911                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
2912                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
2913                 []>;
2914
2915multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
2916                       NVPTXRegClass intype> {
2917  def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
2918                              (ins Int64Regs:$t, Int64Regs:$s)>;
2919  def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
2920                              (ins Int64Regs:$t, i64imm:$s)>;
2921  def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
2922                              (ins i64imm:$t, Int64Regs:$s)>;
2923  def _II : TEX_2D_GRAD_base<inst, outtype, intype,
2924                              (ins i64imm:$t, i64imm:$s)>;
2925}
2926
2927defm TEX_2D_F32_F32_GRAD :
2928  TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2929defm TEX_2D_S32_F32_GRAD :
2930  TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2931defm TEX_2D_U32_F32_GRAD :
2932  TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2933
2934class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
2935                        NVPTXRegClass intype, dag texsamp>
2936    : NVPTXInst<(outs outtype:$r, outtype:$g,
2937                      outtype:$b, outtype:$a),
2938                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
2939                 inst # " \t\\{$r, $g, $b, $a\\},"
2940                        " [$t, $s, \\{$l, $x, $y, $y\\}];",
2941                 []>;
2942
2943multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
2944                        NVPTXRegClass intype> {
2945  def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
2946                              (ins Int64Regs:$t, Int64Regs:$s)>;
2947  def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
2948                              (ins Int64Regs:$t, i64imm:$s)>;
2949  def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
2950                              (ins i64imm:$t, Int64Regs:$s)>;
2951  def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
2952                              (ins i64imm:$t, i64imm:$s)>;
2953}
2954
2955defm TEX_2D_ARRAY_F32_F32
2956  : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2957defm TEX_2D_ARRAY_F32_S32
2958  : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
2959defm TEX_2D_ARRAY_S32_S32
2960  : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
2961defm TEX_2D_ARRAY_S32_F32
2962  : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2963defm TEX_2D_ARRAY_U32_S32
2964  : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
2965defm TEX_2D_ARRAY_U32_F32
2966  : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2967
2968class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2969                              NVPTXRegClass intype, dag texsamp>
2970    : NVPTXInst<(outs outtype:$r, outtype:$g,
2971                      outtype:$b, outtype:$a),
2972                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
2973                                    intype:$lod)),
2974                 inst # " \t\\{$r, $g, $b, $a\\},"
2975                        " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2976                 []>;
2977
2978multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2979                              NVPTXRegClass intype> {
2980  def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2981                              (ins Int64Regs:$t, Int64Regs:$s)>;
2982  def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2983                              (ins Int64Regs:$t, i64imm:$s)>;
2984  def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2985                              (ins i64imm:$t, Int64Regs:$s)>;
2986  def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2987                              (ins i64imm:$t, i64imm:$s)>;
2988}
2989
2990defm TEX_2D_ARRAY_F32_F32_LEVEL
2991  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2992defm TEX_2D_ARRAY_S32_F32_LEVEL
2993  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2994defm TEX_2D_ARRAY_U32_F32_LEVEL
2995  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2996
2997class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2998                             NVPTXRegClass intype, dag texsamp>
2999    : NVPTXInst<(outs outtype:$r, outtype:$g,
3000                      outtype:$b, outtype:$a),
3001                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3002                                    intype:$gradx0, intype:$gradx1,
3003                                    intype:$grady0, intype:$grady1)),
3004                 inst # " \t\\{$r, $g, $b, $a\\},"
3005                        " [$t, $s, \\{$l, $x, $y, $y\\}],"
3006                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3007                 []>;
3008
3009multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3010                             NVPTXRegClass intype> {
3011  def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3012                              (ins Int64Regs:$t, Int64Regs:$s)>;
3013  def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3014                              (ins Int64Regs:$t, i64imm:$s)>;
3015  def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3016                              (ins i64imm:$t, Int64Regs:$s)>;
3017  def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3018                              (ins i64imm:$t, i64imm:$s)>;
3019}
3020
3021defm TEX_2D_ARRAY_F32_F32_GRAD
3022  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3023defm TEX_2D_ARRAY_S32_F32_GRAD
3024  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3025defm TEX_2D_ARRAY_U32_F32_GRAD
3026  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3027
3028class TEX_3D_base<string inst, NVPTXRegClass outtype,
3029                  NVPTXRegClass intype, dag texsamp>
3030    : NVPTXInst<(outs outtype:$r, outtype:$g,
3031                      outtype:$b, outtype:$a),
3032                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3033                 inst # " \t\\{$r, $g, $b, $a\\},"
3034                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
3035                 []>;
3036
3037multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3038  def _RR : TEX_3D_base<inst, outtype, intype,
3039                              (ins Int64Regs:$t, Int64Regs:$s)>;
3040  def _RI : TEX_3D_base<inst, outtype, intype,
3041                              (ins Int64Regs:$t, i64imm:$s)>;
3042  def _IR : TEX_3D_base<inst, outtype, intype,
3043                              (ins i64imm:$t, Int64Regs:$s)>;
3044  def _II : TEX_3D_base<inst, outtype, intype,
3045                              (ins i64imm:$t, i64imm:$s)>;
3046}
3047
3048defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3049defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3050defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3051defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3052defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3053defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3054
3055class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3056                        NVPTXRegClass intype, dag texsamp>
3057    : NVPTXInst<(outs outtype:$r, outtype:$g,
3058                      outtype:$b, outtype:$a),
3059                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3060                                    intype:$lod)),
3061                 inst # " \t\\{$r, $g, $b, $a\\},"
3062                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3063                 []>;
3064
3065multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
3066                        NVPTXRegClass intype> {
3067  def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
3068                              (ins Int64Regs:$t, Int64Regs:$s)>;
3069  def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
3070                              (ins Int64Regs:$t, i64imm:$s)>;
3071  def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
3072                              (ins i64imm:$t, Int64Regs:$s)>;
3073  def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
3074                              (ins i64imm:$t, i64imm:$s)>;
3075}
3076
3077defm TEX_3D_F32_F32_LEVEL
3078  : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3079defm TEX_3D_S32_F32_LEVEL
3080  : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3081defm TEX_3D_U32_F32_LEVEL
3082  : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3083
3084class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3085                       NVPTXRegClass intype, dag texsamp>
3086    : NVPTXInst<(outs outtype:$r, outtype:$g,
3087                      outtype:$b, outtype:$a),
3088                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3089                                    intype :$gradx0, intype:$gradx1,
3090                                    intype:$gradx2, intype:$grady0,
3091                                    intype:$grady1, intype:$grady2)),
3092                 inst # " \t\\{$r, $g, $b, $a\\},"
3093                        " [$t, $s, \\{$x, $y, $z, $z\\}],"
3094                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3095                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3096                 []>;
3097
3098multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
3099                       NVPTXRegClass intype> {
3100  def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
3101                             (ins Int64Regs:$t, Int64Regs:$s)>;
3102  def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
3103                             (ins Int64Regs:$t, i64imm:$s)>;
3104  def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
3105                             (ins i64imm:$t, Int64Regs:$s)>;
3106  def _II : TEX_3D_GRAD_base<inst, outtype, intype,
3107                             (ins i64imm:$t, i64imm:$s)>;
3108}
3109
3110defm TEX_3D_F32_F32_GRAD
3111  : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3112defm TEX_3D_S32_F32_GRAD
3113  : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3114defm TEX_3D_U32_F32_GRAD
3115  : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3116
3117class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
3118                    NVPTXRegClass intype, dag texsamp>
3119    : NVPTXInst<(outs outtype:$r, outtype:$g,
3120                      outtype:$b, outtype:$a),
3121                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3122                 inst # " \t\\{$r, $g, $b, $a\\},"
3123                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
3124                 []>;
3125
3126multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3127  def _RR : TEX_CUBE_base<inst, outtype, intype,
3128                          (ins Int64Regs:$t, Int64Regs:$s)>;
3129  def _RI : TEX_CUBE_base<inst, outtype, intype,
3130                          (ins Int64Regs:$t, i64imm:$s)>;
3131  def _IR : TEX_CUBE_base<inst, outtype, intype,
3132                          (ins i64imm:$t, Int64Regs:$s)>;
3133  def _II : TEX_CUBE_base<inst, outtype, intype,
3134                          (ins i64imm:$t, i64imm:$s)>;
3135}
3136
3137defm TEX_CUBE_F32_F32
3138  : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3139defm TEX_CUBE_S32_F32
3140  : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3141defm TEX_CUBE_U32_F32
3142  : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3143
3144class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3145                          NVPTXRegClass intype, dag texsamp>
3146    : NVPTXInst<(outs outtype:$r, outtype:$g,
3147                      outtype:$b, outtype:$a),
3148                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3149                                    intype:$lod)),
3150                 inst # " \t\\{$r, $g, $b, $a\\},"
3151                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3152                 []>;
3153
3154multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3155                          NVPTXRegClass intype> {
3156  def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3157                                (ins Int64Regs:$t, Int64Regs:$s)>;
3158  def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3159                                (ins Int64Regs:$t, i64imm:$s)>;
3160  def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3161                                (ins i64imm:$t, Int64Regs:$s)>;
3162  def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3163                                (ins i64imm:$t, i64imm:$s)>;
3164}
3165
3166defm TEX_CUBE_F32_F32_LEVEL
3167  : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3168defm TEX_CUBE_S32_F32_LEVEL
3169  : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3170defm TEX_CUBE_U32_F32_LEVEL
3171  : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3172
3173class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3174                          NVPTXRegClass intype, dag texsamp>
3175    : NVPTXInst<(outs outtype:$r, outtype:$g,
3176                      outtype:$b, outtype:$a),
3177                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3178                                    intype:$z)),
3179                 inst # " \t\\{$r, $g, $b, $a\\},"
3180                        " [$t, $s, \\{$l, $x, $y, $z\\}];",
3181                 []>;
3182
3183multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3184                          NVPTXRegClass intype> {
3185  def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3186                                (ins Int64Regs:$t, Int64Regs:$s)>;
3187  def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3188                                (ins Int64Regs:$t, i64imm:$s)>;
3189  def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3190                                (ins i64imm:$t, Int64Regs:$s)>;
3191  def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3192                                (ins i64imm:$t, i64imm:$s)>;
3193}
3194
3195defm TEX_CUBE_ARRAY_F32_F32
3196  : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3197defm TEX_CUBE_ARRAY_S32_F32
3198  : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3199defm TEX_CUBE_ARRAY_U32_F32
3200  : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3201
3202class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3203                                NVPTXRegClass intype, dag texsamp>
3204    : NVPTXInst<(outs outtype:$r, outtype:$g,
3205                      outtype:$b, outtype:$a),
3206                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3207                                    intype:$z, intype:$lod)),
3208                 inst # " \t\\{$r, $g, $b, $a\\},"
3209                        " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3210                 []>;
3211
3212multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3213                                NVPTXRegClass intype> {
3214  def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3215                                      (ins Int64Regs:$t, Int64Regs:$s)>;
3216  def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3217                                      (ins Int64Regs:$t, i64imm:$s)>;
3218  def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3219                                      (ins i64imm:$t, Int64Regs:$s)>;
3220  def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3221                                      (ins i64imm:$t, i64imm:$s)>;
3222}
3223
3224defm TEX_CUBE_ARRAY_F32_F32_LEVEL
3225  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3226                         Float32Regs, Float32Regs>;
3227defm TEX_CUBE_ARRAY_S32_F32_LEVEL
3228  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3229                         Int32Regs, Float32Regs>;
3230defm TEX_CUBE_ARRAY_U32_F32_LEVEL
3231  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3232                         Int32Regs, Float32Regs>;
3233
3234class TLD4_2D_base<string inst, NVPTXRegClass outtype,
3235                   NVPTXRegClass intype, dag texsamp>
3236    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3237                      outtype:$v2, outtype:$v3),
3238                 !con(texsamp, (ins intype:$x, intype:$y)),
3239                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
3240                 []>;
3241
3242multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3243  def _RR : TLD4_2D_base<inst, outtype, intype,
3244                         (ins Int64Regs:$t, Int64Regs:$s)>;
3245  def _RI : TLD4_2D_base<inst, outtype, intype,
3246                         (ins Int64Regs:$t, i64imm:$s)>;
3247  def _IR : TLD4_2D_base<inst, outtype, intype,
3248                         (ins i64imm:$t, Int64Regs:$s)>;
3249  def _II : TLD4_2D_base<inst, outtype, intype,
3250                         (ins i64imm:$t, i64imm:$s)>;
3251}
3252
3253defm TLD4_R_2D_F32_F32
3254  : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3255defm TLD4_G_2D_F32_F32
3256  : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3257defm TLD4_B_2D_F32_F32
3258  : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3259defm TLD4_A_2D_F32_F32
3260  : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3261
3262defm TLD4_R_2D_S32_F32
3263  : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3264defm TLD4_G_2D_S32_F32
3265  : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3266defm TLD4_B_2D_S32_F32
3267  : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3268defm TLD4_A_2D_S32_F32
3269  : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3270
3271defm TLD4_R_2D_U32_F32
3272  : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3273defm TLD4_G_2D_U32_F32
3274  : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3275defm TLD4_B_2D_U32_F32
3276  : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3277defm TLD4_A_2D_U32_F32
3278  : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3279
3280}
3281
3282
3283// texmode_unified
3284let IsTex = true, IsTexModeUnified = true in {
3285// Texture fetch instructions using handles
3286
3287class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3288                          NVPTXRegClass intype, dag tex>
3289    : NVPTXInst<(outs outtype:$r, outtype:$g,
3290                      outtype:$b, outtype:$a),
3291                 !con(tex, (ins intype:$x)),
3292                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3293                 []>;
3294
3295multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3296                          NVPTXRegClass intype> {
3297  def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3298  def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3299}
3300
3301defm TEX_UNIFIED_1D_F32_S32
3302  : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
3303defm TEX_UNIFIED_1D_F32_F32
3304  : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3305defm TEX_UNIFIED_1D_S32_S32
3306  : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
3307defm TEX_UNIFIED_1D_S32_F32
3308  : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3309defm TEX_UNIFIED_1D_U32_S32
3310  : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
3311defm TEX_UNIFIED_1D_U32_F32
3312  : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3313
3314class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3315                                NVPTXRegClass intype, dag tex>
3316    : NVPTXInst<(outs outtype:$r, outtype:$g,
3317                      outtype:$b, outtype:$a),
3318                 !con(tex, (ins intype:$x, intype:$lod)),
3319                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
3320                 []>;
3321
3322multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3323                                NVPTXRegClass intype> {
3324  def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3325  def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3326}
3327
3328defm TEX_UNIFIED_1D_F32_F32_LEVEL
3329  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3330defm TEX_UNIFIED_1D_S32_F32_LEVEL
3331  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3332defm TEX_UNIFIED_1D_U32_F32_LEVEL
3333  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3334
3335class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3336                               NVPTXRegClass intype, dag tex>
3337    : NVPTXInst<(outs outtype:$r, outtype:$g,
3338                      outtype:$b, outtype:$a),
3339                 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3340                 inst # " \t\\{$r, $g, $b, $a\\},"
3341                        " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3342                 []>;
3343
3344multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3345                               NVPTXRegClass intype> {
3346  def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3347  def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3348}
3349
3350defm TEX_UNIFIED_1D_F32_F32_GRAD
3351  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3352defm TEX_UNIFIED_1D_S32_F32_GRAD
3353  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3354defm TEX_UNIFIED_1D_U32_F32_GRAD
3355  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3356
3357class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3358                                NVPTXRegClass intype, dag tex>
3359    : NVPTXInst<(outs outtype:$r, outtype:$g,
3360                      outtype:$b, outtype:$a),
3361                 !con(tex, (ins Int32Regs:$l, intype:$x)),
3362                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
3363                 []>;
3364
3365multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3366                                NVPTXRegClass intype> {
3367  def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3368  def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3369}
3370
3371defm TEX_UNIFIED_1D_ARRAY_F32_S32
3372  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
3373defm TEX_UNIFIED_1D_ARRAY_F32_F32
3374  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
3375defm TEX_UNIFIED_1D_ARRAY_S32_S32
3376  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
3377defm TEX_UNIFIED_1D_ARRAY_S32_F32
3378  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
3379defm TEX_UNIFIED_1D_ARRAY_U32_S32
3380  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
3381defm TEX_UNIFIED_1D_ARRAY_U32_F32
3382  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
3383
3384class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3385                                      NVPTXRegClass intype, dag tex>
3386    : NVPTXInst<(outs outtype:$r, outtype:$g,
3387                      outtype:$b, outtype:$a),
3388                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3389                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
3390                 []>;
3391
3392multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3393                                      NVPTXRegClass intype> {
3394  def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3395                                           (ins Int64Regs:$t)>;
3396  def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3397                                           (ins i64imm:$t)>;
3398}
3399
3400defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3401  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3402                               Float32Regs, Float32Regs>;
3403defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3404  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3405                               Int32Regs, Float32Regs>;
3406defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3407  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3408                               Int32Regs, Float32Regs>;
3409
3410class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3411                                     NVPTXRegClass intype, dag tex>
3412    : NVPTXInst<(outs outtype:$r, outtype:$g,
3413                      outtype:$b, outtype:$a),
3414                 !con(tex, (ins Int32Regs:$l, intype:$x,
3415                                intype:$gradx, intype:$grady)),
3416                 inst # " \t\\{$r, $g, $b, $a\\},"
3417                        "  [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3418                 []>;
3419
3420multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3421                                     NVPTXRegClass intype> {
3422  def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3423                                          (ins Int64Regs:$t)>;
3424  def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3425                                          (ins i64imm:$t)>;
3426}
3427
3428defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3429  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3430                              Float32Regs, Float32Regs>;
3431defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3432  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3433                              Int32Regs, Float32Regs>;
3434defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3435  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3436                              Int32Regs, Float32Regs>;
3437
3438class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3439                          NVPTXRegClass intype, dag tex>
3440    : NVPTXInst<(outs outtype:$r, outtype:$g,
3441                      outtype:$b, outtype:$a),
3442                 !con(tex, (ins intype:$x, intype:$y)),
3443                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
3444                 []>;
3445
3446multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3447                          NVPTXRegClass intype> {
3448  def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3449  def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3450}
3451
3452defm TEX_UNIFIED_2D_F32_S32
3453  : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
3454defm TEX_UNIFIED_2D_F32_F32
3455  : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3456defm TEX_UNIFIED_2D_S32_S32
3457  : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
3458defm TEX_UNIFIED_2D_S32_F32
3459  : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3460defm TEX_UNIFIED_2D_U32_S32
3461  : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
3462defm TEX_UNIFIED_2D_U32_F32
3463  : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3464
3465class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3466                                NVPTXRegClass intype, dag tex>
3467    : NVPTXInst<(outs outtype:$r, outtype:$g,
3468                      outtype:$b, outtype:$a),
3469                 !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
3470                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
3471                 []>;
3472
3473multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
3474                                NVPTXRegClass intype> {
3475  def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3476  def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3477}
3478
3479defm TEX_UNIFIED_2D_F32_F32_LEVEL
3480  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3481defm TEX_UNIFIED_2D_S32_F32_LEVEL
3482  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3483defm TEX_UNIFIED_2D_U32_F32_LEVEL
3484  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3485
3486class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3487                               NVPTXRegClass intype, dag tex>
3488    : NVPTXInst<(outs outtype:$r, outtype:$g,
3489                      outtype:$b, outtype:$a),
3490                 !con(tex, (ins intype:$x, intype:$y,
3491                                intype:$gradx0, intype:$gradx1,
3492                                intype:$grady0, intype:$grady1)),
3493                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
3494                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3495                 []>;
3496multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
3497                               NVPTXRegClass intype> {
3498  def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3499  def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3500}
3501
3502defm TEX_UNIFIED_2D_F32_F32_GRAD
3503  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3504defm TEX_UNIFIED_2D_S32_F32_GRAD
3505  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3506defm TEX_UNIFIED_2D_U32_F32_GRAD
3507  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3508
3509class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3510                                NVPTXRegClass intype, dag tex>
3511    : NVPTXInst<(outs outtype:$r, outtype:$g,
3512                      outtype:$b, outtype:$a),
3513                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
3514                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
3515                 []>;
3516multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
3517                                NVPTXRegClass intype> {
3518  def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3519  def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3520}
3521
3522defm TEX_UNIFIED_2D_ARRAY_F32_S32
3523  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3524defm TEX_UNIFIED_2D_ARRAY_F32_F32
3525  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3526defm TEX_UNIFIED_2D_ARRAY_S32_S32
3527  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3528defm TEX_UNIFIED_2D_ARRAY_S32_F32
3529  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3530defm TEX_UNIFIED_2D_ARRAY_U32_S32
3531  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3532defm TEX_UNIFIED_2D_ARRAY_U32_F32
3533  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3534
3535class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3536                                      NVPTXRegClass intype, dag tex>
3537    : NVPTXInst<(outs outtype:$r, outtype:$g,
3538                      outtype:$b, outtype:$a),
3539                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3540                                intype:$lod)),
3541                 inst # " \t\\{$r, $g, $b, $a\\},"
3542                        "  [$t, \\{$l, $x, $y, $y\\}], $lod;",
3543                 []>;
3544multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3545                                      NVPTXRegClass intype> {
3546  def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3547                                           (ins Int64Regs:$t)>;
3548  def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3549                                           (ins i64imm:$t)>;
3550}
3551
3552defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3553  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3554                               Float32Regs, Float32Regs>;
3555defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3556  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3557                               Int32Regs, Float32Regs>;
3558defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3559  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3560                               Int32Regs, Float32Regs>;
3561
3562class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3563                                     NVPTXRegClass intype, dag tex>
3564    : NVPTXInst<(outs outtype:$r, outtype:$g,
3565                      outtype:$b, outtype:$a),
3566                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3567                                intype:$gradx0, intype:$gradx1,
3568                                intype:$grady0, intype:$grady1)),
3569                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
3570                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3571                 []>;
3572multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3573                                     NVPTXRegClass intype> {
3574  def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3575                                          (ins Int64Regs:$t)>;
3576  def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3577                                          (ins i64imm:$t)>;
3578}
3579
3580defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3581  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
3582                              Float32Regs, Float32Regs>;
3583defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3584  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
3585                              Int32Regs, Float32Regs>;
3586defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3587  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
3588                              Int32Regs, Float32Regs>;
3589
3590class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
3591                          NVPTXRegClass intype, dag tex>
3592    : NVPTXInst<(outs outtype:$r, outtype:$g,
3593                      outtype:$b, outtype:$a),
3594                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3595                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3596                 []>;
3597multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
3598                          NVPTXRegClass intype> {
3599  def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3600  def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
3601}
3602
3603defm TEX_UNIFIED_3D_F32_S32
3604  : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3605defm TEX_UNIFIED_3D_F32_F32
3606  : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3607defm TEX_UNIFIED_3D_S32_S32
3608  : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3609defm TEX_UNIFIED_3D_S32_F32
3610  : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3611defm TEX_UNIFIED_3D_U32_S32
3612  : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3613defm TEX_UNIFIED_3D_U32_F32
3614  : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3615
3616class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3617                                NVPTXRegClass intype, dag tex>
3618    : NVPTXInst<(outs outtype:$r, outtype:$g,
3619                      outtype:$b, outtype:$a),
3620                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3621                 inst # " \t\\{$r, $g, $b, $a\\},"
3622                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3623                 []>;
3624multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
3625                                NVPTXRegClass intype> {
3626  def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3627  def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3628}
3629
3630defm TEX_UNIFIED_3D_F32_F32_LEVEL
3631  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3632defm TEX_UNIFIED_3D_S32_F32_LEVEL
3633  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3634defm TEX_UNIFIED_3D_U32_F32_LEVEL
3635  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3636
3637class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3638                               NVPTXRegClass intype, dag tex>
3639    : NVPTXInst<(outs outtype:$r, outtype:$g,
3640                      outtype:$b, outtype:$a),
3641                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3642                                intype:$gradx0, intype:$gradx1,
3643                                intype:$gradx2, intype:$grady0,
3644                                intype:$grady1, intype:$grady2)),
3645                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3646                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3647                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3648                 []>;
3649multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
3650                               NVPTXRegClass intype> {
3651  def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3652  def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3653}
3654
3655defm TEX_UNIFIED_3D_F32_F32_GRAD
3656  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3657defm TEX_UNIFIED_3D_S32_F32_GRAD
3658  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3659defm TEX_UNIFIED_3D_U32_F32_GRAD
3660  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3661
3662class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
3663                            NVPTXRegClass intype, dag tex>
3664    : NVPTXInst<(outs outtype:$r, outtype:$g,
3665                      outtype:$b, outtype:$a),
3666                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3667                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3668                 []>;
3669multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
3670                            NVPTXRegClass intype> {
3671  def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3672  def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
3673}
3674
3675defm TEX_UNIFIED_CUBE_F32_F32
3676  : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3677defm TEX_UNIFIED_CUBE_S32_F32
3678  : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3679defm TEX_UNIFIED_CUBE_U32_F32
3680  : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3681
3682class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3683                                  NVPTXRegClass intype, dag tex>
3684    : NVPTXInst<(outs outtype:$r, outtype:$g,
3685                      outtype:$b, outtype:$a),
3686                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3687                 inst # " \t\\{$r, $g, $b, $a\\},"
3688                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3689                 []>;
3690multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3691                                  NVPTXRegClass intype> {
3692  def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3693                                       (ins Int64Regs:$t)>;
3694  def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3695                                       (ins i64imm:$t)>;
3696}
3697
3698defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
3699  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3700                           Float32Regs, Float32Regs>;
3701defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
3702  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3703                           Int32Regs, Float32Regs>;
3704defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
3705  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3706                           Int32Regs, Float32Regs>;
3707
3708class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3709                                  NVPTXRegClass intype, dag tex>
3710    : NVPTXInst<(outs outtype:$r, outtype:$g,
3711                      outtype:$b, outtype:$a),
3712                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
3713                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
3714                 []>;
3715multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3716                                  NVPTXRegClass intype> {
3717  def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3718                                       (ins Int64Regs:$t)>;
3719  def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3720                                       (ins i64imm:$t)>;
3721}
3722
3723defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
3724  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3725defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
3726  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3727defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
3728  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3729
3730class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3731                                        NVPTXRegClass intype, dag tex>
3732    : NVPTXInst<(outs outtype:$r, outtype:$g,
3733                      outtype:$b, outtype:$a),
3734                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3735                                intype:$lod)),
3736                 inst # " \t\\{$r, $g, $b, $a\\},"
3737                        " [$t, \\{$l, $x, $y, $z\\}], $lod;",
3738                 []>;
3739multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3740                                        NVPTXRegClass intype> {
3741  def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3742                                             (ins Int64Regs:$t)>;
3743  def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3744                                             (ins i64imm:$t)>;
3745}
3746
3747defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3748  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3749                                 Float32Regs, Float32Regs>;
3750defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3751  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3752                                 Int32Regs, Float32Regs>;
3753defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3754  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3755                                 Int32Regs, Float32Regs>;
3756
3757class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3758                           NVPTXRegClass intype, dag tex>
3759    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3760                      outtype:$v2, outtype:$v3),
3761                 !con(tex, (ins intype:$x, intype:$y)),
3762                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
3763                 []>;
3764multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3765                           NVPTXRegClass intype> {
3766  def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3767  def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3768}
3769
3770defm TLD4_UNIFIED_R_2D_F32_F32
3771  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3772defm TLD4_UNIFIED_G_2D_F32_F32
3773  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3774defm TLD4_UNIFIED_B_2D_F32_F32
3775  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3776defm TLD4_UNIFIED_A_2D_F32_F32
3777  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3778
3779defm TLD4_UNIFIED_R_2D_S32_F32
3780  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3781defm TLD4_UNIFIED_G_2D_S32_F32
3782  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3783defm TLD4_UNIFIED_B_2D_S32_F32
3784  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3785defm TLD4_UNIFIED_A_2D_S32_F32
3786  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3787
3788defm TLD4_UNIFIED_R_2D_U32_F32
3789  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3790defm TLD4_UNIFIED_G_2D_U32_F32
3791  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3792defm TLD4_UNIFIED_B_2D_U32_F32
3793  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3794defm TLD4_UNIFIED_A_2D_U32_F32
3795  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3796
3797}
3798
3799
3800
3801//=== Surface load instructions
3802
3803let IsSuld = true in {
3804
3805class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
3806    : NVPTXInst<(outs outtype:$r),
3807                !con(surf, (ins Int32Regs:$x)),
3808                inst # " \\{$r\\}, [$s, \\{$x\\}];",
3809                []>;
3810multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
3811  def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
3812  def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
3813}
3814
3815defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
3816defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
3817defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
3818defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
3819
3820defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
3821defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
3822defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
3823defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
3824
3825defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
3826defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
3827defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
3828defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
3829
3830class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3831    : NVPTXInst<(outs outtype:$r),
3832                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3833                inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
3834                []>;
3835multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
3836  def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3837  def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3838}
3839
3840defm SULD_1D_ARRAY_I8_CLAMP
3841  : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
3842defm SULD_1D_ARRAY_I16_CLAMP
3843  : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
3844defm SULD_1D_ARRAY_I32_CLAMP
3845  : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
3846defm SULD_1D_ARRAY_I64_CLAMP
3847  : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
3848
3849defm SULD_1D_ARRAY_I8_TRAP
3850  : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
3851defm SULD_1D_ARRAY_I16_TRAP
3852  : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
3853defm SULD_1D_ARRAY_I32_TRAP
3854  : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
3855defm SULD_1D_ARRAY_I64_TRAP
3856  : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
3857
3858defm SULD_1D_ARRAY_I8_ZERO
3859  : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
3860defm SULD_1D_ARRAY_I16_ZERO
3861  : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
3862defm SULD_1D_ARRAY_I32_ZERO
3863  : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
3864defm SULD_1D_ARRAY_I64_ZERO
3865  : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
3866
3867class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
3868    : NVPTXInst<(outs outtype:$r),
3869                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3870                inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
3871                []>;
3872multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
3873  def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
3874  def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
3875}
3876
3877defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
3878defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
3879defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
3880defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
3881
3882defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
3883defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
3884defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
3885defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
3886
3887defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
3888defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
3889defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
3890defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
3891
3892class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3893    : NVPTXInst<(outs outtype:$r),
3894                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
3895                inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3896                []>;
3897multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
3898  def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3899  def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3900}
3901
3902defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
3903defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
3904defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
3905defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
3906
3907defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
3908defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
3909defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
3910defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
3911
3912defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
3913defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
3914defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
3915defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
3916
3917class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
3918    : NVPTXInst<(outs outtype:$r),
3919                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
3920                inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3921                []>;
3922multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
3923  def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
3924  def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
3925}
3926
3927defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
3928defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
3929defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
3930defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
3931
3932defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
3933defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
3934defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
3935defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
3936
3937defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
3938defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
3939defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
3940defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
3941}
3942
3943let IsSuld = 2 in {
3944
3945class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3946    : NVPTXInst<(outs outtype:$r, outtype:$g),
3947                !con(surf, (ins Int32Regs:$x)),
3948                inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
3949                []>;
3950multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
3951  def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3952  def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
3953}
3954
3955defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
3956defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
3957defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
3958defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
3959
3960defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
3961defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
3962defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
3963defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
3964
3965defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
3966defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
3967defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
3968defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
3969
3970class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3971    : NVPTXInst<(outs outtype:$r, outtype:$g),
3972                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3973                inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3974                []>;
3975multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
3976  def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3977  def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
3978}
3979
3980defm SULD_1D_ARRAY_V2I8_CLAMP
3981  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
3982defm SULD_1D_ARRAY_V2I16_CLAMP
3983  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
3984defm SULD_1D_ARRAY_V2I32_CLAMP
3985  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
3986defm SULD_1D_ARRAY_V2I64_CLAMP
3987  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
3988
3989defm SULD_1D_ARRAY_V2I8_TRAP
3990  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
3991defm SULD_1D_ARRAY_V2I16_TRAP
3992  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
3993defm SULD_1D_ARRAY_V2I32_TRAP
3994  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
3995defm SULD_1D_ARRAY_V2I64_TRAP
3996  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
3997
3998defm SULD_1D_ARRAY_V2I8_ZERO
3999  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
4000defm SULD_1D_ARRAY_V2I16_ZERO
4001  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
4002defm SULD_1D_ARRAY_V2I32_ZERO
4003  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
4004defm SULD_1D_ARRAY_V2I64_ZERO
4005  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
4006
4007class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4008    : NVPTXInst<(outs outtype:$r, outtype:$g),
4009                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4010                inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4011                []>;
4012multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
4013  def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4014  def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
4015}
4016
4017defm SULD_2D_V2I8_CLAMP
4018  : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
4019defm SULD_2D_V2I16_CLAMP
4020  : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
4021defm SULD_2D_V2I32_CLAMP
4022  : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
4023defm SULD_2D_V2I64_CLAMP
4024  : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
4025
4026defm SULD_2D_V2I8_TRAP
4027  : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
4028defm SULD_2D_V2I16_TRAP
4029  : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
4030defm SULD_2D_V2I32_TRAP
4031  : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
4032defm SULD_2D_V2I64_TRAP
4033  : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
4034
4035defm SULD_2D_V2I8_ZERO
4036  : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
4037defm SULD_2D_V2I16_ZERO
4038  : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
4039defm SULD_2D_V2I32_ZERO
4040  : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
4041defm SULD_2D_V2I64_ZERO
4042  : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
4043
4044class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4045    : NVPTXInst<(outs outtype:$r, outtype:$g),
4046                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4047                inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
4048                []>;
4049multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4050  def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4051  def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4052}
4053
4054defm SULD_2D_ARRAY_V2I8_CLAMP
4055  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
4056defm SULD_2D_ARRAY_V2I16_CLAMP
4057  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
4058defm SULD_2D_ARRAY_V2I32_CLAMP
4059  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
4060defm SULD_2D_ARRAY_V2I64_CLAMP
4061  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
4062
4063defm SULD_2D_ARRAY_V2I8_TRAP
4064  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
4065defm SULD_2D_ARRAY_V2I16_TRAP
4066  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
4067defm SULD_2D_ARRAY_V2I32_TRAP
4068  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
4069defm SULD_2D_ARRAY_V2I64_TRAP
4070  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
4071
4072defm SULD_2D_ARRAY_V2I8_ZERO
4073  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
4074defm SULD_2D_ARRAY_V2I16_ZERO
4075  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
4076defm SULD_2D_ARRAY_V2I32_ZERO
4077  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
4078defm SULD_2D_ARRAY_V2I64_ZERO
4079  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
4080
4081class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4082    : NVPTXInst<(outs outtype:$r, outtype:$g),
4083                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4084                inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4085                []>;
4086multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
4087  def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4088  def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
4089}
4090
4091defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
4092defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
4093defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
4094defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
4095
4096defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
4097defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
4098defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
4099defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
4100
4101defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
4102defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
4103defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
4104defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
4105
4106}
4107
4108let IsSuld = 3 in {
4109
4110class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4111    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4112                !con(surf, (ins Int32Regs:$x)),
4113                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4114                []>;
4115multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
4116  def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4117  def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
4118}
4119
4120defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
4121defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
4122defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
4123
4124defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
4125defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
4126defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
4127
4128defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
4129defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
4130defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
4131
4132class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4133    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4134                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4135                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
4136                []>;
4137multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4138  def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4139  def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4140}
4141
4142defm SULD_1D_ARRAY_V4I8_CLAMP
4143  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
4144defm SULD_1D_ARRAY_V4I16_CLAMP
4145  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
4146defm SULD_1D_ARRAY_V4I32_CLAMP
4147  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
4148
4149defm SULD_1D_ARRAY_V4I8_TRAP
4150  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
4151defm SULD_1D_ARRAY_V4I16_TRAP
4152  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
4153defm SULD_1D_ARRAY_V4I32_TRAP
4154  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
4155
4156defm SULD_1D_ARRAY_V4I8_ZERO
4157  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
4158defm SULD_1D_ARRAY_V4I16_ZERO
4159  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
4160defm SULD_1D_ARRAY_V4I32_ZERO
4161  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
4162
4163class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4164    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4165                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4166                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4167                []>;
4168multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
4169  def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4170  def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
4171}
4172
4173defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
4174defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
4175defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
4176
4177defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
4178defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
4179defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
4180
4181defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
4182defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
4183defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
4184
4185class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4186    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4187                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4188                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
4189                []>;
4190multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4191  def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4192  def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4193}
4194
4195defm SULD_2D_ARRAY_V4I8_CLAMP
4196  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
4197defm SULD_2D_ARRAY_V4I16_CLAMP
4198  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
4199defm SULD_2D_ARRAY_V4I32_CLAMP
4200  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
4201
4202defm SULD_2D_ARRAY_V4I8_TRAP
4203  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
4204defm SULD_2D_ARRAY_V4I16_TRAP
4205  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
4206defm SULD_2D_ARRAY_V4I32_TRAP
4207  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
4208
4209defm SULD_2D_ARRAY_V4I8_ZERO
4210  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
4211defm SULD_2D_ARRAY_V4I16_ZERO
4212  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
4213defm SULD_2D_ARRAY_V4I32_ZERO
4214  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
4215
4216class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4217    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4218                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4219                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
4220                []>;
4221multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
4222  def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4223  def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
4224}
4225
4226defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
4227defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
4228defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
4229
4230defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
4231defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
4232defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
4233
4234defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
4235defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
4236defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
4237
4238}
4239
4240//-----------------------------------
4241// Texture Query Intrinsics
4242//-----------------------------------
4243
4244let IsSurfTexQuery = true in {
4245def TXQ_CHANNEL_ORDER_R
4246  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4247              "txq.channel_order.b32 \t$d, [$a];",
4248              []>;
4249def TXQ_CHANNEL_ORDER_I
4250  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4251              "txq.channel_order.b32 \t$d, [$a];",
4252              []>;
4253def TXQ_CHANNEL_DATA_TYPE_R
4254  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4255              "txq.channel_data_type.b32 \t$d, [$a];",
4256              []>;
4257def TXQ_CHANNEL_DATA_TYPE_I
4258  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4259              "txq.channel_data_type.b32 \t$d, [$a];",
4260              []>;
4261def TXQ_WIDTH_R
4262  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4263              "txq.width.b32 \t$d, [$a];",
4264              []>;
4265def TXQ_WIDTH_I
4266  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4267              "txq.width.b32 \t$d, [$a];",
4268              []>;
4269def TXQ_HEIGHT_R
4270  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4271              "txq.height.b32 \t$d, [$a];",
4272              []>;
4273def TXQ_HEIGHT_I
4274  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4275              "txq.height.b32 \t$d, [$a];",
4276              []>;
4277def TXQ_DEPTH_R
4278  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4279              "txq.depth.b32 \t$d, [$a];",
4280              []>;
4281def TXQ_DEPTH_I
4282  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4283              "txq.depth.b32 \t$d, [$a];",
4284              []>;
4285def TXQ_ARRAY_SIZE_R
4286  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4287              "txq.array_size.b32 \t$d, [$a];",
4288              []>;
4289def TXQ_ARRAY_SIZE_I
4290  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4291              "txq.array_size.b32 \t$d, [$a];",
4292              []>;
4293def TXQ_NUM_SAMPLES_R
4294  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4295              "txq.num_samples.b32 \t$d, [$a];",
4296              []>;
4297def TXQ_NUM_SAMPLES_I
4298  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4299              "txq.num_samples.b32 \t$d, [$a];",
4300              []>;
4301def TXQ_NUM_MIPMAP_LEVELS_R
4302  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4303              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4304              []>;
4305def TXQ_NUM_MIPMAP_LEVELS_I
4306  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4307              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4308              []>;
4309}
4310
4311def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4312          (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4313def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4314          (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4315def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4316          (TXQ_WIDTH_R Int64Regs:$a)>;
4317def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4318          (TXQ_HEIGHT_R Int64Regs:$a)>;
4319def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4320          (TXQ_DEPTH_R Int64Regs:$a)>;
4321def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4322          (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
4323def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4324          (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
4325def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4326          (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
4327
4328
4329//-----------------------------------
4330// Surface Query Intrinsics
4331//-----------------------------------
4332
4333let IsSurfTexQuery = true in {
4334def SUQ_CHANNEL_ORDER_R
4335  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4336              "suq.channel_order.b32 \t$d, [$a];",
4337              []>;
4338def SUQ_CHANNEL_ORDER_I
4339  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4340              "suq.channel_order.b32 \t$d, [$a];",
4341              []>;
4342def SUQ_CHANNEL_DATA_TYPE_R
4343  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4344              "suq.channel_data_type.b32 \t$d, [$a];",
4345              []>;
4346def SUQ_CHANNEL_DATA_TYPE_I
4347  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4348              "suq.channel_data_type.b32 \t$d, [$a];",
4349              []>;
4350def SUQ_WIDTH_R
4351  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4352              "suq.width.b32 \t$d, [$a];",
4353              []>;
4354def SUQ_WIDTH_I
4355  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4356              "suq.width.b32 \t$d, [$a];",
4357              []>;
4358def SUQ_HEIGHT_R
4359  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4360              "suq.height.b32 \t$d, [$a];",
4361              []>;
4362def SUQ_HEIGHT_I
4363  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4364              "suq.height.b32 \t$d, [$a];",
4365              []>;
4366def SUQ_DEPTH_R
4367  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4368              "suq.depth.b32 \t$d, [$a];",
4369              []>;
4370def SUQ_DEPTH_I
4371  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4372              "suq.depth.b32 \t$d, [$a];",
4373              []>;
4374def SUQ_ARRAY_SIZE_R
4375  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4376              "suq.array_size.b32 \t$d, [$a];",
4377              []>;
4378def SUQ_ARRAY_SIZE_I
4379  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4380              "suq.array_size.b32 \t$d, [$a];",
4381              []>;
4382}
4383
4384def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4385          (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4386def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4387          (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4388def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4389          (SUQ_WIDTH_R Int64Regs:$a)>;
4390def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4391          (SUQ_HEIGHT_R Int64Regs:$a)>;
4392def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4393          (SUQ_DEPTH_R Int64Regs:$a)>;
4394def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4395          (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
4396
4397
4398//===- Handle Query -------------------------------------------------------===//
4399
4400// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4401def ISTYPEP_SAMPLER
4402  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4403              "istypep.samplerref \t$d, $a;",
4404              [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4405def ISTYPEP_SURFACE
4406  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4407              "istypep.surfref \t$d, $a;",
4408              [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4409def ISTYPEP_TEXTURE
4410  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4411              "istypep.texref \t$d, $a;",
4412              [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4413
4414//===- Surface Stores -----------------------------------------------------===//
4415
4416let IsSust = true in {
4417
4418class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
4419    : NVPTXInst<(outs),
4420                !con(surf, (ins Int32Regs:$x, intype:$r)),
4421                inst # " \t[$s, \\{$x\\}], \\{$r\\};",
4422                []>;
4423multiclass SUST_1D<string inst, NVPTXRegClass intype> {
4424  def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
4425  def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
4426}
4427
4428defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
4429defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
4430defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
4431defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
4432
4433defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
4434defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
4435defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
4436defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
4437
4438defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
4439defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
4440defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
4441defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
4442
4443defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
4444defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
4445defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
4446
4447class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4448    : NVPTXInst<(outs),
4449                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
4450                inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
4451                []>;
4452multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
4453  def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4454  def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
4455}
4456
4457defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
4458defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
4459defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
4460defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
4461
4462defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
4463defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
4464defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
4465defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
4466
4467defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
4468defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
4469defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
4470defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
4471
4472defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
4473defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
4474defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
4475
4476class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4477    : NVPTXInst<(outs),
4478                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
4479                                intype:$b, intype:$a)),
4480                inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4481                []>;
4482multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
4483  def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4484  def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
4485}
4486
4487defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
4488defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
4489defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
4490
4491defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
4492defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
4493defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
4494
4495defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
4496defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
4497defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
4498
4499defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
4500defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
4501defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
4502
4503class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4504    : NVPTXInst<(outs),
4505                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
4506                inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4507                []>;
4508multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
4509  def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4510  def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4511}
4512
4513defm SUST_B_1D_ARRAY_B8_CLAMP
4514  : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
4515defm SUST_B_1D_ARRAY_B16_CLAMP
4516  : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
4517defm SUST_B_1D_ARRAY_B32_CLAMP
4518  : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
4519defm SUST_B_1D_ARRAY_B64_CLAMP
4520  : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
4521
4522defm SUST_B_1D_ARRAY_B8_TRAP
4523  : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
4524defm SUST_B_1D_ARRAY_B16_TRAP
4525  : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
4526defm SUST_B_1D_ARRAY_B32_TRAP
4527  : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
4528defm SUST_B_1D_ARRAY_B64_TRAP
4529  : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
4530
4531defm SUST_B_1D_ARRAY_B8_ZERO
4532  : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
4533defm SUST_B_1D_ARRAY_B16_ZERO
4534  : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
4535defm SUST_B_1D_ARRAY_B32_ZERO
4536  : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
4537defm SUST_B_1D_ARRAY_B64_ZERO
4538  : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
4539
4540defm SUST_P_1D_ARRAY_B8_TRAP
4541  : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
4542defm SUST_P_1D_ARRAY_B16_TRAP
4543  : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
4544defm SUST_P_1D_ARRAY_B32_TRAP
4545  : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
4546
4547class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4548    : NVPTXInst<(outs),
4549                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4550                                intype:$r, intype:$g)),
4551                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4552                []>;
4553multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4554  def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4555  def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4556}
4557
4558defm SUST_B_1D_ARRAY_V2B8_CLAMP
4559  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
4560defm SUST_B_1D_ARRAY_V2B16_CLAMP
4561  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
4562defm SUST_B_1D_ARRAY_V2B32_CLAMP
4563  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
4564defm SUST_B_1D_ARRAY_V2B64_CLAMP
4565  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
4566
4567defm SUST_B_1D_ARRAY_V2B8_TRAP
4568  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
4569defm SUST_B_1D_ARRAY_V2B16_TRAP
4570  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
4571defm SUST_B_1D_ARRAY_V2B32_TRAP
4572  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
4573defm SUST_B_1D_ARRAY_V2B64_TRAP
4574  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
4575
4576defm SUST_B_1D_ARRAY_V2B8_ZERO
4577  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
4578defm SUST_B_1D_ARRAY_V2B16_ZERO
4579  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
4580defm SUST_B_1D_ARRAY_V2B32_ZERO
4581  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
4582defm SUST_B_1D_ARRAY_V2B64_ZERO
4583  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
4584
4585defm SUST_P_1D_ARRAY_V2B8_TRAP
4586  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
4587defm SUST_P_1D_ARRAY_V2B16_TRAP
4588  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
4589defm SUST_P_1D_ARRAY_V2B32_TRAP
4590  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
4591
4592class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4593    : NVPTXInst<(outs),
4594                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4595                                intype:$r, intype:$g, intype:$b, intype:$a)),
4596                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
4597                []>;
4598multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4599  def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4600  def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4601}
4602
4603defm SUST_B_1D_ARRAY_V4B8_CLAMP
4604  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
4605defm SUST_B_1D_ARRAY_V4B16_CLAMP
4606  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
4607defm SUST_B_1D_ARRAY_V4B32_CLAMP
4608  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
4609
4610defm SUST_B_1D_ARRAY_V4B8_TRAP
4611  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
4612defm SUST_B_1D_ARRAY_V4B16_TRAP
4613  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
4614defm SUST_B_1D_ARRAY_V4B32_TRAP
4615  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
4616
4617defm SUST_B_1D_ARRAY_V4B8_ZERO
4618  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
4619defm SUST_B_1D_ARRAY_V4B16_ZERO
4620  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
4621defm SUST_B_1D_ARRAY_V4B32_ZERO
4622  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
4623
4624defm SUST_P_1D_ARRAY_V4B8_TRAP
4625  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
4626defm SUST_P_1D_ARRAY_V4B16_TRAP
4627  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
4628defm SUST_P_1D_ARRAY_V4B32_TRAP
4629  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
4630
4631class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
4632    : NVPTXInst<(outs),
4633                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
4634                inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
4635                []>;
4636multiclass SUST_2D<string inst, NVPTXRegClass intype> {
4637  def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
4638  def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
4639}
4640
4641defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
4642defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
4643defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
4644defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
4645
4646defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
4647defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
4648defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
4649defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
4650
4651defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
4652defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
4653defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
4654defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
4655
4656defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
4657defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
4658defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
4659
4660class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4661    : NVPTXInst<(outs),
4662                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4663                                intype:$r, intype:$g)),
4664                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4665                []>;
4666multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
4667  def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4668  def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
4669}
4670
4671defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
4672defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
4673defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
4674defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
4675
4676defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
4677defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
4678defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
4679defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
4680
4681defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
4682defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
4683defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
4684defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
4685
4686defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
4687defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
4688defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
4689
4690class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4691    : NVPTXInst<(outs),
4692                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4693                                intype:$r, intype:$g, intype:$b, intype:$a)),
4694                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
4695                []>;
4696multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
4697  def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4698  def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
4699}
4700
4701defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
4702defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
4703defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
4704
4705defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
4706defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
4707defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
4708
4709defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
4710defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
4711defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
4712
4713defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
4714defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
4715defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
4716
4717class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4718    : NVPTXInst<(outs),
4719                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4720                                intype:$r)),
4721                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4722                []>;
4723multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
4724  def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4725  def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4726}
4727
4728defm SUST_B_2D_ARRAY_B8_CLAMP
4729  : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
4730defm SUST_B_2D_ARRAY_B16_CLAMP
4731  : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
4732defm SUST_B_2D_ARRAY_B32_CLAMP
4733  : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
4734defm SUST_B_2D_ARRAY_B64_CLAMP
4735  : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
4736
4737defm SUST_B_2D_ARRAY_B8_TRAP
4738  : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
4739defm SUST_B_2D_ARRAY_B16_TRAP
4740  : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
4741defm SUST_B_2D_ARRAY_B32_TRAP
4742  : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
4743defm SUST_B_2D_ARRAY_B64_TRAP
4744  : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
4745
4746defm SUST_B_2D_ARRAY_B8_ZERO
4747  : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
4748defm SUST_B_2D_ARRAY_B16_ZERO
4749  : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
4750defm SUST_B_2D_ARRAY_B32_ZERO
4751  : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
4752defm SUST_B_2D_ARRAY_B64_ZERO
4753  : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
4754
4755defm SUST_P_2D_ARRAY_B8_TRAP
4756  : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
4757defm SUST_P_2D_ARRAY_B16_TRAP
4758  : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
4759defm SUST_P_2D_ARRAY_B32_TRAP
4760  : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
4761
4762class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4763    : NVPTXInst<(outs),
4764                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4765                                intype:$r, intype:$g)),
4766                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
4767                []>;
4768multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4769  def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4770  def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4771}
4772
4773defm SUST_B_2D_ARRAY_V2B8_CLAMP
4774  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
4775defm SUST_B_2D_ARRAY_V2B16_CLAMP
4776  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
4777defm SUST_B_2D_ARRAY_V2B32_CLAMP
4778  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
4779defm SUST_B_2D_ARRAY_V2B64_CLAMP
4780  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
4781
4782defm SUST_B_2D_ARRAY_V2B8_TRAP
4783  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
4784defm SUST_B_2D_ARRAY_V2B16_TRAP
4785  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
4786defm SUST_B_2D_ARRAY_V2B32_TRAP
4787  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
4788defm SUST_B_2D_ARRAY_V2B64_TRAP
4789  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
4790
4791defm SUST_B_2D_ARRAY_V2B8_ZERO
4792  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
4793defm SUST_B_2D_ARRAY_V2B16_ZERO
4794  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
4795defm SUST_B_2D_ARRAY_V2B32_ZERO
4796  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
4797defm SUST_B_2D_ARRAY_V2B64_ZERO
4798  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
4799
4800defm SUST_P_2D_ARRAY_V2B8_TRAP
4801  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
4802defm SUST_P_2D_ARRAY_V2B16_TRAP
4803  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
4804defm SUST_P_2D_ARRAY_V2B32_TRAP
4805  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
4806
4807class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4808    : NVPTXInst<(outs),
4809                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4810                                intype:$r, intype:$g, intype:$b, intype:$a)),
4811                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
4812                []>;
4813multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4814  def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4815  def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4816}
4817
4818defm SUST_B_2D_ARRAY_V4B8_CLAMP
4819  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
4820defm SUST_B_2D_ARRAY_V4B16_CLAMP
4821  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
4822defm SUST_B_2D_ARRAY_V4B32_CLAMP
4823  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
4824
4825defm SUST_B_2D_ARRAY_V4B8_TRAP
4826  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
4827defm SUST_B_2D_ARRAY_V4B16_TRAP
4828  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
4829defm SUST_B_2D_ARRAY_V4B32_TRAP
4830  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
4831
4832defm SUST_B_2D_ARRAY_V4B8_ZERO
4833  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
4834defm SUST_B_2D_ARRAY_V4B16_ZERO
4835  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
4836defm SUST_B_2D_ARRAY_V4B32_ZERO
4837  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
4838
4839defm SUST_P_2D_ARRAY_V4B8_TRAP
4840  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
4841defm SUST_P_2D_ARRAY_V4B16_TRAP
4842  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
4843defm SUST_P_2D_ARRAY_V4B32_TRAP
4844  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
4845
4846class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
4847    : NVPTXInst<(outs),
4848                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4849                                intype:$r)),
4850                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4851                []>;
4852multiclass SUST_3D<string inst, NVPTXRegClass intype> {
4853  def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
4854  def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
4855}
4856
4857defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
4858defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
4859defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
4860defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
4861
4862defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
4863defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
4864defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
4865defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
4866
4867defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
4868defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
4869defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
4870defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
4871
4872defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
4873defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
4874defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
4875
4876class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4877    : NVPTXInst<(outs),
4878                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4879                                intype:$r, intype:$g)),
4880                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
4881                []>;
4882multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
4883  def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4884  def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
4885}
4886
4887defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
4888defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
4889defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
4890defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
4891
4892defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
4893defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
4894defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
4895defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
4896
4897defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
4898defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
4899defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
4900defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
4901
4902defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
4903defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
4904defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
4905
4906class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4907    : NVPTXInst<(outs),
4908                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4909                                intype:$r, intype:$g, intype:$b, intype:$a)),
4910                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
4911                []>;
4912multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
4913  def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4914  def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
4915}
4916
4917defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
4918defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
4919defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
4920
4921defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
4922defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
4923defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
4924
4925defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
4926defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
4927defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
4928
4929defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
4930defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
4931defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
4932
4933}
4934
4935// Surface store instruction patterns
4936// I'm not sure why we can't just include these in the instruction definitions,
4937// but TableGen complains of type errors :(
4938
4939// .clamp variant
4940def : Pat<(int_nvvm_sust_b_1d_i8_clamp
4941           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4942          (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
4943
4944def : Pat<(int_nvvm_sust_b_1d_i16_clamp
4945           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4946          (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
4947
4948def : Pat<(int_nvvm_sust_b_1d_i32_clamp
4949           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4950          (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
4951
4952def : Pat<(int_nvvm_sust_b_1d_i64_clamp
4953           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4954          (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
4955
4956def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
4957           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4958          (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4959           Int16Regs:$r, Int16Regs:$g)>;
4960
4961def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
4962           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4963          (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4964           Int16Regs:$r, Int16Regs:$g)>;
4965
4966def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
4967           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4968          (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4969           Int32Regs:$r, Int32Regs:$g)>;
4970
4971def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
4972           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4973          (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4974           Int64Regs:$r, Int64Regs:$g)>;
4975
4976def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
4977           Int64Regs:$s, Int32Regs:$x,
4978           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4979          (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4980           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4981
4982def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
4983           Int64Regs:$s, Int32Regs:$x,
4984           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4985          (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4986           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4987
4988def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
4989           Int64Regs:$s, Int32Regs:$x,
4990           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4991          (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4992           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
4993
4994
4995
4996def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
4997           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
4998          (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4999           Int16Regs:$r)>;
5000
5001def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5002           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5003          (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5004           Int16Regs:$r)>;
5005
5006def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5007           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5008          (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5009           Int32Regs:$r)>;
5010
5011def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5012           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5013          (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5014           Int64Regs:$r)>;
5015
5016def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5017          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5018          (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5019           Int16Regs:$r, Int16Regs:$g)>;
5020
5021def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5022          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5023          (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5024           Int16Regs:$r, Int16Regs:$g)>;
5025
5026def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5027          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5028          (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5029           Int32Regs:$r, Int32Regs:$g)>;
5030
5031def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5032          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5033          (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5034           Int64Regs:$r, Int64Regs:$g)>;
5035
5036def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5037           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5038           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5039          (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5040           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5041
5042def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5043           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5044           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5045          (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5046           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5047
5048def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5049           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5050           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5051          (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5052           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5053
5054
5055
5056def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5057           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5058          (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5059           Int16Regs:$r)>;
5060
5061def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5062           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5063          (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5064           Int16Regs:$r)>;
5065
5066def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5067           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5068          (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5069           Int32Regs:$r)>;
5070
5071def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5072           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5073          (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5074           Int64Regs:$r)>;
5075
5076def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5077          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5078          (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5079           Int16Regs:$r, Int16Regs:$g)>;
5080
5081def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5082          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5083          (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5084           Int16Regs:$r, Int16Regs:$g)>;
5085
5086def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5087          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5088          (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5089           Int32Regs:$r, Int32Regs:$g)>;
5090
5091def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5092          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5093          (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5094           Int64Regs:$r, Int64Regs:$g)>;
5095
5096def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5097           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5098           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5099          (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5100           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5101
5102def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5103           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5104           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5105          (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5106           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5107
5108def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5109           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5110           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5111          (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5112           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5113
5114
5115
5116def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5117          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5118          (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
5119           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5120           Int16Regs:$r)>;
5121
5122def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5123          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5124          (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
5125           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5126           Int16Regs:$r)>;
5127
5128def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5129          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5130          (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
5131           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5132           Int32Regs:$r)>;
5133
5134def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5135          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5136          (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
5137           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5138           Int64Regs:$r)>;
5139
5140def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5141           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5142           Int16Regs:$r, Int16Regs:$g),
5143          (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5144           Int32Regs:$x, Int32Regs:$y,
5145           Int16Regs:$r, Int16Regs:$g)>;
5146
5147def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5148           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5149           Int16Regs:$r, Int16Regs:$g),
5150          (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5151           Int32Regs:$x, Int32Regs:$y,
5152           Int16Regs:$r, Int16Regs:$g)>;
5153
5154def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5155           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5156           Int32Regs:$g),
5157          (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5158           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5159
5160def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5161           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5162           Int64Regs:$g),
5163          (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5164           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5165
5166def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5167           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5168           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5169          (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
5170           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5171           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5172
5173def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5174           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5175           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5176          (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
5177           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5178           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5179
5180def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5181           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5182           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5183          (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5184           Int32Regs:$x, Int32Regs:$y,
5185           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5186
5187
5188
5189def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5190           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5191           Int16Regs:$r),
5192          (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
5193           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5194           Int16Regs:$r)>;
5195
5196def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5197           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5198           Int16Regs:$r),
5199          (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
5200           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5201           Int16Regs:$r)>;
5202
5203def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5204           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5205           Int32Regs:$r),
5206          (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
5207           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5208           Int32Regs:$r)>;
5209
5210def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5211           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5212           Int64Regs:$r),
5213          (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
5214           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5215           Int64Regs:$r)>;
5216
5217def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5218           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5219           Int16Regs:$r, Int16Regs:$g),
5220          (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
5221           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5222           Int16Regs:$r, Int16Regs:$g)>;
5223
5224def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5225           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5226           Int16Regs:$r, Int16Regs:$g),
5227          (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
5228           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5229           Int16Regs:$r, Int16Regs:$g)>;
5230
5231def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5232           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5233           Int32Regs:$r, Int32Regs:$g),
5234          (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
5235           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5236           Int32Regs:$r, Int32Regs:$g)>;
5237
5238def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5239           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5240           Int64Regs:$r, Int64Regs:$g),
5241          (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
5242           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5243           Int64Regs:$r, Int64Regs:$g)>;
5244
5245def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
5246           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5247           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5248          (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
5249           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5250           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5251
5252def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
5253           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5254           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5255          (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
5256           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5257           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5258
5259def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
5260           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5261           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5262          (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
5263           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5264           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5265
5266
5267// .trap variant
5268def : Pat<(int_nvvm_sust_b_1d_i8_trap
5269           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5270          (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5271
5272def : Pat<(int_nvvm_sust_b_1d_i16_trap
5273           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5274          (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5275
5276def : Pat<(int_nvvm_sust_b_1d_i32_trap
5277           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5278          (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5279
5280def : Pat<(int_nvvm_sust_b_1d_i64_trap
5281           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5282          (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5283
5284def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
5285           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5286          (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5287           Int16Regs:$r, Int16Regs:$g)>;
5288
5289def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
5290           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5291          (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5292           Int16Regs:$r, Int16Regs:$g)>;
5293
5294def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
5295           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5296          (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5297           Int32Regs:$r, Int32Regs:$g)>;
5298
5299def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
5300           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5301          (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
5302           Int64Regs:$r, Int64Regs:$g)>;
5303
5304def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
5305           Int64Regs:$s, Int32Regs:$x,
5306           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5307          (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5308           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5309
5310def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
5311           Int64Regs:$s, Int32Regs:$x,
5312           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5313          (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5314           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5315
5316def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
5317           Int64Regs:$s, Int32Regs:$x,
5318           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5319          (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5320           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5321
5322
5323
5324def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
5325           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5326          (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5327           Int16Regs:$r)>;
5328
5329def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
5330           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5331          (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5332           Int16Regs:$r)>;
5333
5334def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
5335           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5336          (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5337           Int32Regs:$r)>;
5338
5339def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
5340           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5341          (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5342           Int64Regs:$r)>;
5343
5344def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
5345          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5346          (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5347           Int16Regs:$r, Int16Regs:$g)>;
5348
5349def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
5350          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5351          (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5352           Int16Regs:$r, Int16Regs:$g)>;
5353
5354def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
5355          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5356          (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5357           Int32Regs:$r, Int32Regs:$g)>;
5358
5359def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
5360          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5361          (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5362           Int64Regs:$r, Int64Regs:$g)>;
5363
5364def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
5365           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5366           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5367          (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5368           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5369
5370def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
5371           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5372           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5373          (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5374           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5375
5376def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
5377           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5378           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5379          (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5380           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5381
5382
5383
5384def : Pat<(int_nvvm_sust_b_2d_i8_trap
5385           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5386          (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5387           Int16Regs:$r)>;
5388
5389def : Pat<(int_nvvm_sust_b_2d_i16_trap
5390           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5391          (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5392           Int16Regs:$r)>;
5393
5394def : Pat<(int_nvvm_sust_b_2d_i32_trap
5395           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5396          (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5397           Int32Regs:$r)>;
5398
5399def : Pat<(int_nvvm_sust_b_2d_i64_trap
5400           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5401          (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5402           Int64Regs:$r)>;
5403
5404def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
5405          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5406          (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5407           Int16Regs:$r, Int16Regs:$g)>;
5408
5409def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
5410          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5411          (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5412           Int16Regs:$r, Int16Regs:$g)>;
5413
5414def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
5415          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5416          (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5417           Int32Regs:$r, Int32Regs:$g)>;
5418
5419def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
5420          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5421          (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5422           Int64Regs:$r, Int64Regs:$g)>;
5423
5424def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
5425           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5426           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5427          (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5428           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5429
5430def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
5431           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5432           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5433          (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5434           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5435
5436def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
5437           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5438           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5439          (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5440           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5441
5442
5443
5444def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
5445          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5446          (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5447           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5448           Int16Regs:$r)>;
5449
5450def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
5451          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5452          (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5453           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5454           Int16Regs:$r)>;
5455
5456def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
5457          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5458          (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5459           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5460           Int32Regs:$r)>;
5461
5462def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
5463          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5464          (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
5465           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5466           Int64Regs:$r)>;
5467
5468def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
5469           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5470           Int16Regs:$r, Int16Regs:$g),
5471          (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5472           Int32Regs:$x, Int32Regs:$y,
5473           Int16Regs:$r, Int16Regs:$g)>;
5474
5475def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
5476           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5477           Int16Regs:$r, Int16Regs:$g),
5478          (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5479           Int32Regs:$x, Int32Regs:$y,
5480           Int16Regs:$r, Int16Regs:$g)>;
5481
5482def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
5483           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5484           Int32Regs:$g),
5485          (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5486           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5487
5488def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
5489           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5490           Int64Regs:$g),
5491          (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
5492           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5493
5494def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
5495           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5496           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5497          (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5498           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5499           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5500
5501def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
5502           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5503           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5504          (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5505           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5506           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5507
5508def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
5509           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5510           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5511          (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5512           Int32Regs:$x, Int32Regs:$y,
5513           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5514
5515
5516
5517def : Pat<(int_nvvm_sust_b_3d_i8_trap
5518           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5519           Int16Regs:$r),
5520          (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
5521           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5522           Int16Regs:$r)>;
5523
5524def : Pat<(int_nvvm_sust_b_3d_i16_trap
5525           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5526           Int16Regs:$r),
5527          (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
5528           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5529           Int16Regs:$r)>;
5530
5531def : Pat<(int_nvvm_sust_b_3d_i32_trap
5532           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5533           Int32Regs:$r),
5534          (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
5535           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5536           Int32Regs:$r)>;
5537
5538def : Pat<(int_nvvm_sust_b_3d_i64_trap
5539           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5540           Int64Regs:$r),
5541          (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
5542           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5543           Int64Regs:$r)>;
5544
5545def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
5546           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5547           Int16Regs:$r, Int16Regs:$g),
5548          (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
5549           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5550           Int16Regs:$r, Int16Regs:$g)>;
5551
5552def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
5553           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5554           Int16Regs:$r, Int16Regs:$g),
5555          (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
5556           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5557           Int16Regs:$r, Int16Regs:$g)>;
5558
5559def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
5560           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5561           Int32Regs:$r, Int32Regs:$g),
5562          (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
5563           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5564           Int32Regs:$r, Int32Regs:$g)>;
5565
5566def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
5567           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5568           Int64Regs:$r, Int64Regs:$g),
5569          (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
5570           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5571           Int64Regs:$r, Int64Regs:$g)>;
5572
5573def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
5574           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5575           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5576          (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
5577           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5578           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5579
5580def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
5581           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5582           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5583          (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
5584           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5585           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5586
5587def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
5588           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5589           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5590          (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
5591           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5592           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5593
5594
5595// .zero variant
5596def : Pat<(int_nvvm_sust_b_1d_i8_zero
5597           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5598          (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5599
5600def : Pat<(int_nvvm_sust_b_1d_i16_zero
5601           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5602          (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5603
5604def : Pat<(int_nvvm_sust_b_1d_i32_zero
5605           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5606          (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5607
5608def : Pat<(int_nvvm_sust_b_1d_i64_zero
5609           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5610          (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5611
5612def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
5613           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5614          (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5615           Int16Regs:$r, Int16Regs:$g)>;
5616
5617def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
5618           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5619          (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5620           Int16Regs:$r, Int16Regs:$g)>;
5621
5622def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
5623           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5624          (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5625           Int32Regs:$r, Int32Regs:$g)>;
5626
5627def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
5628           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5629          (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
5630           Int64Regs:$r, Int64Regs:$g)>;
5631
5632def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
5633           Int64Regs:$s, Int32Regs:$x,
5634           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5635          (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5636           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5637
5638def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
5639           Int64Regs:$s, Int32Regs:$x,
5640           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5641          (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5642           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5643
5644def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
5645           Int64Regs:$s, Int32Regs:$x,
5646           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5647          (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5648           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5649
5650
5651
5652def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
5653           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5654          (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5655           Int16Regs:$r)>;
5656
5657def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
5658           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5659          (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5660           Int16Regs:$r)>;
5661
5662def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
5663           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5664          (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5665           Int32Regs:$r)>;
5666
5667def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
5668           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5669          (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5670           Int64Regs:$r)>;
5671
5672def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
5673          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5674          (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5675           Int16Regs:$r, Int16Regs:$g)>;
5676
5677def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
5678          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5679          (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5680           Int16Regs:$r, Int16Regs:$g)>;
5681
5682def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
5683          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5684          (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5685           Int32Regs:$r, Int32Regs:$g)>;
5686
5687def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
5688          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5689          (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5690           Int64Regs:$r, Int64Regs:$g)>;
5691
5692def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
5693           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5694           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5695          (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5696           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5697
5698def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
5699           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5700           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5701          (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5702           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5703
5704def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
5705           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5706           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5707          (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5708           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5709
5710
5711
5712def : Pat<(int_nvvm_sust_b_2d_i8_zero
5713           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5714          (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5715           Int16Regs:$r)>;
5716
5717def : Pat<(int_nvvm_sust_b_2d_i16_zero
5718           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5719          (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5720           Int16Regs:$r)>;
5721
5722def : Pat<(int_nvvm_sust_b_2d_i32_zero
5723           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5724          (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5725           Int32Regs:$r)>;
5726
5727def : Pat<(int_nvvm_sust_b_2d_i64_zero
5728           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5729          (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5730           Int64Regs:$r)>;
5731
5732def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
5733          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5734          (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5735           Int16Regs:$r, Int16Regs:$g)>;
5736
5737def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
5738          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5739          (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5740           Int16Regs:$r, Int16Regs:$g)>;
5741
5742def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
5743          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5744          (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5745           Int32Regs:$r, Int32Regs:$g)>;
5746
5747def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
5748          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5749          (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5750           Int64Regs:$r, Int64Regs:$g)>;
5751
5752def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
5753           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5754           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5755          (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5756           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5757
5758def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
5759           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5760           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5761          (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5762           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5763
5764def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
5765           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5766           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5767          (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5768           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5769
5770
5771
5772def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
5773          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5774          (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
5775           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5776           Int16Regs:$r)>;
5777
5778def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
5779          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5780          (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
5781           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5782           Int16Regs:$r)>;
5783
5784def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
5785          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5786          (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
5787           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5788           Int32Regs:$r)>;
5789
5790def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
5791          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5792          (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
5793           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5794           Int64Regs:$r)>;
5795
5796def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
5797           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5798           Int16Regs:$r, Int16Regs:$g),
5799          (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
5800           Int32Regs:$x, Int32Regs:$y,
5801           Int16Regs:$r, Int16Regs:$g)>;
5802
5803def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
5804           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5805           Int16Regs:$r, Int16Regs:$g),
5806          (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
5807           Int32Regs:$x, Int32Regs:$y,
5808           Int16Regs:$r, Int16Regs:$g)>;
5809
5810def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
5811           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5812           Int32Regs:$g),
5813          (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5814           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5815
5816def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
5817           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5818           Int64Regs:$g),
5819          (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
5820           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5821
5822def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
5823           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5824           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5825          (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
5826           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5827           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5828
5829def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
5830           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5831           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5832          (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
5833           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5834           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5835
5836def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
5837           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5838           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5839          (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5840           Int32Regs:$x, Int32Regs:$y,
5841           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5842
5843
5844
5845def : Pat<(int_nvvm_sust_b_3d_i8_zero
5846           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5847           Int16Regs:$r),
5848          (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
5849           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5850           Int16Regs:$r)>;
5851
5852def : Pat<(int_nvvm_sust_b_3d_i16_zero
5853           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5854           Int16Regs:$r),
5855          (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
5856           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5857           Int16Regs:$r)>;
5858
5859def : Pat<(int_nvvm_sust_b_3d_i32_zero
5860           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5861           Int32Regs:$r),
5862          (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
5863           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5864           Int32Regs:$r)>;
5865
5866def : Pat<(int_nvvm_sust_b_3d_i64_zero
5867           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5868           Int64Regs:$r),
5869          (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
5870           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5871           Int64Regs:$r)>;
5872
5873def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
5874           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5875           Int16Regs:$r, Int16Regs:$g),
5876          (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
5877           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5878           Int16Regs:$r, Int16Regs:$g)>;
5879
5880def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
5881           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5882           Int16Regs:$r, Int16Regs:$g),
5883          (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
5884           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5885           Int16Regs:$r, Int16Regs:$g)>;
5886
5887def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
5888           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5889           Int32Regs:$r, Int32Regs:$g),
5890          (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
5891           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5892           Int32Regs:$r, Int32Regs:$g)>;
5893
5894def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
5895           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5896           Int64Regs:$r, Int64Regs:$g),
5897          (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
5898           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5899           Int64Regs:$r, Int64Regs:$g)>;
5900
5901def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
5902           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5903           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5904          (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
5905           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5906           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5907
5908def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
5909           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5910           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5911          (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
5912           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5913           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5914
5915def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
5916           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5917           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5918          (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
5919           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5920           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5921
5922
5923
5924
5925def : Pat<(int_nvvm_sust_p_1d_i8_trap
5926           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5927          (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5928
5929def : Pat<(int_nvvm_sust_p_1d_i16_trap
5930           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5931          (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5932
5933def : Pat<(int_nvvm_sust_p_1d_i32_trap
5934           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5935          (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5936
5937def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
5938           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5939          (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5940           Int16Regs:$r, Int16Regs:$g)>;
5941
5942def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
5943           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5944          (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5945           Int16Regs:$r, Int16Regs:$g)>;
5946
5947def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
5948           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5949          (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5950           Int32Regs:$r, Int32Regs:$g)>;
5951
5952def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
5953           Int64Regs:$s, Int32Regs:$x,
5954           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5955          (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5956           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5957
5958def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
5959           Int64Regs:$s, Int32Regs:$x,
5960           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5961          (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5962           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5963
5964def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
5965           Int64Regs:$s, Int32Regs:$x,
5966           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5967          (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5968           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5969
5970
5971
5972def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
5973           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5974          (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5975           Int16Regs:$r)>;
5976
5977def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
5978           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5979          (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5980           Int16Regs:$r)>;
5981
5982def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
5983           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5984          (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5985           Int32Regs:$r)>;
5986
5987def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
5988          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5989          (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5990           Int16Regs:$r, Int16Regs:$g)>;
5991
5992def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
5993          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5994          (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5995           Int16Regs:$r, Int16Regs:$g)>;
5996
5997def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
5998          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5999          (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6000           Int32Regs:$r, Int32Regs:$g)>;
6001
6002def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6003           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6004           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6005          (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6006           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6007
6008def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6009           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6010           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6011          (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6012           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6013
6014def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6015           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6016           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6017          (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6018           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6019
6020
6021
6022def : Pat<(int_nvvm_sust_p_2d_i8_trap
6023           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6024          (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6025           Int16Regs:$r)>;
6026
6027def : Pat<(int_nvvm_sust_p_2d_i16_trap
6028           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6029          (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6030           Int16Regs:$r)>;
6031
6032def : Pat<(int_nvvm_sust_p_2d_i32_trap
6033           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6034          (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6035           Int32Regs:$r)>;
6036
6037def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6038          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6039          (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6040           Int16Regs:$r, Int16Regs:$g)>;
6041
6042def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6043          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6044          (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6045           Int16Regs:$r, Int16Regs:$g)>;
6046
6047def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6048          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6049          (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6050           Int32Regs:$r, Int32Regs:$g)>;
6051
6052def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6053           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6054           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6055          (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6056           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6057
6058def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6059           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6060           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6061          (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6062           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6063
6064def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6065           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6066           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6067          (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6068           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6069
6070
6071
6072def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6073          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6074          (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
6075           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6076           Int16Regs:$r)>;
6077
6078def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6079          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6080          (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
6081           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6082           Int16Regs:$r)>;
6083
6084def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6085          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6086          (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
6087           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6088           Int32Regs:$r)>;
6089
6090def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6091           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6092           Int16Regs:$r, Int16Regs:$g),
6093          (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
6094           Int32Regs:$x, Int32Regs:$y,
6095           Int16Regs:$r, Int16Regs:$g)>;
6096
6097def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6098           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6099           Int16Regs:$r, Int16Regs:$g),
6100          (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
6101           Int32Regs:$x, Int32Regs:$y,
6102           Int16Regs:$r, Int16Regs:$g)>;
6103
6104def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6105           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6106           Int32Regs:$g),
6107          (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6108           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6109
6110def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6111           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6112           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6113          (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
6114           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6115           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6116
6117def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6118           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6119           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6120          (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
6121           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6122           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6123
6124def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6125           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6126           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6127          (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6128           Int32Regs:$x, Int32Regs:$y,
6129           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6130
6131
6132
6133def : Pat<(int_nvvm_sust_p_3d_i8_trap
6134           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6135           Int16Regs:$r),
6136          (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
6137           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6138           Int16Regs:$r)>;
6139
6140def : Pat<(int_nvvm_sust_p_3d_i16_trap
6141           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6142           Int16Regs:$r),
6143          (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
6144           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6145           Int16Regs:$r)>;
6146
6147def : Pat<(int_nvvm_sust_p_3d_i32_trap
6148           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6149           Int32Regs:$r),
6150          (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
6151           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6152           Int32Regs:$r)>;
6153
6154def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6155           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6156           Int16Regs:$r, Int16Regs:$g),
6157          (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
6158           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6159           Int16Regs:$r, Int16Regs:$g)>;
6160
6161def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6162           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6163           Int16Regs:$r, Int16Regs:$g),
6164          (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
6165           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6166           Int16Regs:$r, Int16Regs:$g)>;
6167
6168def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6169           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6170           Int32Regs:$r, Int32Regs:$g),
6171          (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
6172           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6173           Int32Regs:$r, Int32Regs:$g)>;
6174
6175def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6176           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6177           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6178          (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
6179           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6180           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6181
6182def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6183           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6184           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6185          (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
6186           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6187           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6188
6189def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6190           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6191           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6192          (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
6193           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6194           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6195
6196//-----------------------------------
6197// Read Special Registers
6198//-----------------------------------
6199
6200class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6201  : NVPTXInst<(outs Int64Regs:$d), (ins),
6202              !strconcat("mov.u64 \t$d, %", regname, ";"),
6203              [(set Int64Regs:$d, (intop))]>,
6204    Requires<Preds>;
6205
6206class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6207  : NVPTXInst<(outs Int32Regs:$d), (ins),
6208              !strconcat("mov.u32 \t$d, %", regname, ";"),
6209              [(set Int32Regs:$d, (intop))]>,
6210    Requires<Preds>;
6211
6212multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
6213   foreach suffix = ["x", "y", "z", "w"] in {
6214      defvar reg = regname # "." # suffix;
6215      defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
6216      def "_"#suffix :  PTX_READ_SREG_R32<reg, intr, Preds>;
6217   }
6218}
6219
6220// TODO Add read vector-version of special registers
6221
6222defm INT_PTX_SREG_TID   : PTX_READ_SREG_R32V4<"tid">;
6223defm INT_PTX_SREG_NTID  : PTX_READ_SREG_R32V4<"ntid">;
6224defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
6225defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
6226
6227defm INT_PTX_SREG_CLUSTERID :
6228       PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
6229defm INT_PTX_SREG_NCLUSTERID :
6230       PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
6231defm INT_PTX_SREG_CLUSTER_CTAID :
6232       PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
6233defm INT_PTX_SREG_CLUSTER_NCTAID:
6234       PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
6235
6236def  INT_PTX_SREG_CLUSTER_CTARANK :
6237       PTX_READ_SREG_R32<"cluster_ctarank",
6238                         int_nvvm_read_ptx_sreg_cluster_ctarank,
6239                         [hasSM<90>, hasPTX<78>]>;
6240def  INT_PTX_SREG_CLUSTER_NCTARANK:
6241       PTX_READ_SREG_R32<"cluster_nctarank",
6242                         int_nvvm_read_ptx_sreg_cluster_nctarank,
6243                         [hasSM<90>, hasPTX<78>]>;
6244
6245
6246def INT_PTX_SREG_LANEID :
6247    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
6248def INT_PTX_SREG_WARPID :
6249    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
6250def INT_PTX_SREG_NWARPID :
6251    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6252def INT_PTX_SREG_SMID :
6253    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
6254def INT_PTX_SREG_NSMID :
6255    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
6256def INT_PTX_SREG_GRIDID :
6257    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
6258
6259def INT_PTX_SREG_LANEMASK_EQ :
6260    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
6261def INT_PTX_SREG_LANEMASK_LE :
6262    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
6263def INT_PTX_SREG_LANEMASK_LT :
6264    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
6265def INT_PTX_SREG_LANEMASK_GE :
6266    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
6267def INT_PTX_SREG_LANEMASK_GT :
6268    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
6269
6270def INT_PTX_SREG_CLOCK :
6271    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
6272def INT_PTX_SREG_CLOCK64 :
6273    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6274
6275def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
6276def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
6277def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
6278def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
6279
6280// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
6281// handle the constant.
6282def INT_PTX_SREG_WARPSIZE :
6283    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
6284              [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
6285
6286// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
6287// In addition to target-independent fields provided by WMMA_REGS, it adds
6288// the fields commonly used to implement specific PTX instruction -- register
6289// types and names, constraints, parts of assembly, etc.
6290class WMMA_REGINFO<WMMA_REGS r, string op>
6291      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
6292  // NVPTX register types used to carry fragment data.
6293  NVPTXRegClass regclass = !cond(
6294    !eq(ptx_elt_type, "f16") : Int32Regs,
6295    !eq(ptx_elt_type, "f32") : Float32Regs,
6296    !eq(ptx_elt_type, "f64") : Float64Regs,
6297    !eq(ptx_elt_type, "bf16") : Int32Regs,
6298    !eq(ptx_elt_type, "tf32") : Int32Regs,
6299    !eq(ptx_elt_type, "s32") : Int32Regs,
6300    !eq(ptx_elt_type, "b16") : Int32Regs,
6301    !eq(ptx_elt_type, "s8") : Int32Regs,
6302    !eq(ptx_elt_type, "u8") : Int32Regs,
6303    !eq(ptx_elt_type, "s4") : Int32Regs,
6304    !eq(ptx_elt_type, "u4") : Int32Regs,
6305    !eq(ptx_elt_type, "b1") : Int32Regs);
6306
6307  // Instruction input/output arguments for the fragment.
6308  list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
6309
6310  // List of register names for the fragment -- ["ra0", "ra1",...]
6311  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
6312
6313  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6314  string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
6315
6316  // Predicates for particular fragment variant. Technically those are
6317  // per-instruction predicates, but currently all fragments that can be used in
6318  // a given instruction are subject to the same constraints, so an instruction
6319  // can use predicates from any of its fragments. If/when this is no
6320  // longer the case, we can concat all per-fragment predicates to enforce that
6321  // all fragments of the instruction are viable.
6322  list<Predicate> Predicates = !cond(
6323    // fp16 -> fp16/fp32 @ m16n16k16
6324    !and(!eq(geom, "m16n16k16"),
6325         !or(!eq(ptx_elt_type, "f16"),
6326             !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>],
6327
6328    !and(!eq(geom,"m8n8k4"),
6329         !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>],
6330
6331    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6332    !and(!or(!eq(geom, "m8n32k16"),
6333             !eq(geom, "m32n8k16")),
6334         !or(!eq(ptx_elt_type, "f16"),
6335             !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>],
6336
6337    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6338    !and(!or(!eq(geom,"m16n16k16"),
6339             !eq(geom,"m8n32k16"),
6340             !eq(geom,"m32n8k16")),
6341         !or(!eq(ptx_elt_type, "u8"),
6342             !eq(ptx_elt_type, "s8"),
6343             !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>],
6344
6345    !and(!or(!eq(geom,"m16n16k16"),
6346             !eq(geom,"m8n32k16"),
6347             !eq(geom,"m32n8k16")),
6348         !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>],
6349
6350    !and(!eq(geom,"m16n16k8"),
6351         !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>],
6352
6353    !and(!eq(geom,"m16n16k8"),
6354         !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>],
6355
6356    // b1 -> s32 @ m8n8k128(b1)
6357    !and(!ne(op,"mma"),
6358         !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>],
6359
6360    // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6361    !and(!ne(op,"mma"),
6362         !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>],
6363
6364    !or(!eq(geom,"m16n8k8"),
6365        !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>],
6366
6367    !and(!ne(ptx_elt_type,"f64"),
6368         !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>],
6369
6370    // mma m8n8k32 requires higher PTX version
6371    !and(!eq(op,"mma"),
6372         !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>],
6373
6374    !and(!eq(ptx_elt_type,"f64"),
6375         !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>],
6376
6377    !and(!eq(op,"mma"),
6378         !or(!eq(geom, "m16n8k16"),
6379             !eq(geom, "m16n8k4"),
6380             !eq(geom, "m16n8k32"),
6381             !eq(geom, "m16n8k64"),
6382             !eq(geom, "m8n8k128"),
6383             !eq(geom, "m16n8k128"),
6384             !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>],
6385
6386    !and(!eq(op,"ldmatrix"),
6387         !eq(ptx_elt_type,"b16"),
6388         !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]);
6389
6390  // template DAGs for instruction inputs/output.
6391  dag Outs = !dag(outs, ptx_regs, reg_names);
6392  dag Ins = !dag(ins, ptx_regs, reg_names);
6393}
6394
6395// Convert dag of arguments into a dag to match given intrinsic.
6396class BuildPatternI<Intrinsic Intr, dag Ins> {
6397  // Build a dag pattern that matches the intrinsic call.
6398  dag ret = !foreach(tmp, Ins,
6399                          !subst(imem, ADDRvar,
6400                          !subst(MEMri64, ADDRri64,
6401                          !subst(MEMri, ADDRri,
6402                          !subst(ins, Intr, tmp)))));
6403}
6404
6405// Same as above, but uses PatFrag instead of an Intrinsic.
6406class BuildPatternPF<PatFrag Intr, dag Ins> {
6407  // Build a dag pattern that matches the intrinsic call.
6408  dag ret = !foreach(tmp, Ins,
6409                          !subst(imem, ADDRvar,
6410                          !subst(MEMri64, ADDRri64,
6411                          !subst(MEMri, ADDRri,
6412                          !subst(ins, Intr, tmp)))));
6413}
6414
6415// Common WMMA-related fields used for building patterns for all MMA instructions.
6416class WMMA_INSTR<string _Intr, list<dag> _Args>
6417  : NVPTXInst<(outs), (ins), "?", []> {
6418  Intrinsic Intr = !cast<Intrinsic>(_Intr);
6419  // Concatenate all arguments into a single dag.
6420  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
6421  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6422  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
6423}
6424
6425//
6426// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6427//
6428
6429class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
6430                DAGOperand SrcOp>
6431  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
6432                              [!con((ins SrcOp:$src),
6433                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6434    Requires<Frag.Predicates> {
6435  // Load/store intrinsics are overloaded on pointer's address space.
6436  // To match the right intrinsic, we need to build AS-constrained PatFrag.
6437  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6438  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
6439  dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
6440  // Build PatFrag that only matches particular address space.
6441  PatFrag IntrFrag = PatFrag<PFOperands,
6442                             PFOperandsIntr,
6443                             !cond(!eq(Space, ".shared"): AS_match.shared,
6444                                   !eq(Space, ".global"): AS_match.global,
6445                                   true: AS_match.generic)>;
6446  // Build AS-constrained pattern.
6447  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6448
6449  let OutOperandList = Frag.Outs;
6450  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6451  let AsmString = "wmma.load."
6452                  # Frag.frag
6453                  # ".sync"
6454                  # "${ptx:aligned}"
6455                  # "." # Layout
6456                  # "." # Frag.geom
6457                  # Space
6458                  # "." # Frag.ptx_elt_type # " \t"
6459                  # Frag.regstring
6460                  # ", [$src]"
6461                  # !if(WithStride, ", $ldm", "")
6462                  # ";";
6463}
6464
6465//
6466// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6467//
6468class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
6469                   bit WithStride, DAGOperand DstOp>
6470  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
6471               [!con((ins DstOp:$dst),
6472                     Frag.Ins,
6473                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6474    Requires<Frag.Predicates> {
6475
6476  // Load/store intrinsics are overloaded on pointer's address space.
6477  // To match the right intrinsic, we need to build AS-constrained PatFrag.
6478  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6479  dag PFOperands = !con((ops node:$dst),
6480                        !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
6481                        !if(WithStride, (ops node:$ldm), (ops)));
6482  // Build PatFrag that only matches particular address space.
6483  PatFrag IntrFrag = PatFrag<PFOperands,
6484                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
6485                             !cond(!eq(Space, ".shared"): AS_match.shared,
6486                                   !eq(Space, ".global"): AS_match.global,
6487                                   true: AS_match.generic)>;
6488  // Build AS-constrained pattern.
6489  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6490
6491  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6492  let OutOperandList = (outs);
6493  let AsmString = "wmma.store.d.sync"
6494                  # "${ptx:aligned}"
6495                  # "." # Layout
6496                  # "." # Frag.geom
6497                  # Space
6498                  # "." # Frag.ptx_elt_type
6499                  # " \t[$dst],"
6500                  # Frag.regstring
6501                  # !if(WithStride, ", $ldm", "")
6502                  # ";";
6503}
6504
6505// Create all load/store variants
6506defset list<WMMA_INSTR> MMA_LDSTs  = {
6507  foreach layout = ["row", "col"] in {
6508    foreach stride = [false, true] in {
6509      foreach space = [".global", ".shared", ""] in {
6510        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6511          foreach frag = NVVM_MMA_OPS.all_ld_ops in
6512            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6513              def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
6514          foreach frag = NVVM_MMA_OPS.all_st_ops in
6515            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6516              def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
6517        } // addr
6518      } // space
6519    } // stride
6520  } // layout
6521} // defset
6522
6523// B1 instruction variants need extra constraints.
6524class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
6525  string Op = b1op;
6526  WMMA_REGINFO Frag = FragA;
6527  list<Predicate> ret = !listconcat(
6528    FragA.Predicates,
6529    !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[])
6530  );
6531}
6532// WMMA.MMA
6533class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6534               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6535               string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
6536  : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
6537                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
6538    // Requires does not seem to have effect on Instruction w/o Patterns.
6539    // We set it here anyways and propagate to the Pat<> we construct below.
6540    Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6541  let OutOperandList = FragD.Outs;
6542  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6543  string TypeList = !cond(
6544    !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6545                                     # "." # FragC.ptx_elt_type,
6546    1: "." # FragD.ptx_elt_type
6547       # "." # FragA.ptx_elt_type
6548       # "." # FragB.ptx_elt_type
6549       # "." # FragC.ptx_elt_type,
6550  );
6551  let AsmString = "wmma.mma"
6552                  # b1op
6553                  # ".sync"
6554                  # "${ptx:aligned}"
6555                  # "." # ALayout
6556                  # "." # BLayout
6557                  # "." # FragA.geom
6558                  # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
6559                  # TypeList
6560                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
6561                  # FragD.regstring # ",\n\t\t"
6562                  # FragA.regstring # ",\n\t\t"
6563                  # FragB.regstring # ",\n\t\t"
6564                  # FragC.regstring # ";";
6565}
6566
6567defset list<WMMA_INSTR> WMMAs  = {
6568  foreach layout_a = ["row", "col"] in {
6569    foreach layout_b = ["row", "col"] in {
6570      foreach satf = [0, 1] in {
6571        foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
6572          foreach op = NVVM_MMA_OPS.all_wmma_ops in {
6573            foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6574              if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
6575                def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
6576                              WMMA_REGINFO<op[1], "wmma.mma">,
6577                              WMMA_REGINFO<op[2], "wmma.mma">,
6578                              WMMA_REGINFO<op[3], "wmma.mma">,
6579                              layout_a, layout_b, satf, rnd, b1op>;
6580              }
6581            } // b1op
6582          } // op
6583        } // rnd
6584      } // satf
6585    } // layout_b
6586  } // layout_a
6587} // defset
6588
6589// MMA
6590class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6591               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6592               string ALayout, string BLayout, int Satfinite, string b1op>
6593  : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
6594                        [FragA.Ins, FragB.Ins, FragC.Ins]>,
6595    // Requires does not seem to have effect on Instruction w/o Patterns.
6596    // We set it here anyways and propagate to the Pat<> we construct below.
6597  Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6598  let OutOperandList = FragD.Outs;
6599  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6600  string TypeList = "." # FragD.ptx_elt_type
6601                    # "." # FragA.ptx_elt_type
6602                    # "." # FragB.ptx_elt_type
6603                    # "." # FragC.ptx_elt_type;
6604  let AsmString = "mma.sync.aligned."
6605                  # FragA.geom
6606                  # "." # ALayout
6607                  # "." # BLayout
6608                  # !if(Satfinite, ".satfinite", "")
6609                  # TypeList
6610                  # b1op # "\n\t\t"
6611                  # FragD.regstring # ",\n\t\t"
6612                  # FragA.regstring # ",\n\t\t"
6613                  # FragB.regstring # ",\n\t\t"
6614                  # FragC.regstring # ";";
6615}
6616
6617defset list<WMMA_INSTR> MMAs  = {
6618  foreach layout_a = ["row", "col"] in {
6619    foreach layout_b = ["row", "col"] in {
6620      foreach satf = [0, 1] in {
6621        foreach op = NVVM_MMA_OPS.all_mma_ops in {
6622          foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6623            if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
6624              def : MMA<WMMA_REGINFO<op[0], "mma">,
6625                        WMMA_REGINFO<op[1], "mma">,
6626                        WMMA_REGINFO<op[2], "mma">,
6627                        WMMA_REGINFO<op[3], "mma">,
6628                        layout_a, layout_b, satf, b1op>;
6629            }
6630          } // b1op
6631        } // op
6632      } // satf
6633    } // layout_b
6634  } // layout_a
6635} // defset
6636
6637//
6638// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
6639//
6640class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
6641               DAGOperand SrcOp>
6642  : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
6643    Requires<Frag.Predicates> {
6644  // Build PatFrag that only matches particular address space.
6645  PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
6646                             !cond(!eq(Space, ".shared"): AS_match.shared,
6647                                   true: AS_match.generic)>;
6648  // Build AS-constrained pattern.
6649  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6650
6651  let OutOperandList = Frag.Outs;
6652  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6653  let AsmString = "ldmatrix.sync.aligned."
6654                  # Frag.geom
6655                  # "." # Frag.frag
6656                  # !if(Transposed, ".trans", "")
6657                  # Space
6658                  # "." # Frag.ptx_elt_type
6659                  # " " # Frag.regstring # ", [$src];";
6660}
6661
6662// Create all ldmatrix variants
6663defset list<WMMA_INSTR> LDMATRIXs  = {
6664  foreach transposed = [false, true] in {
6665    foreach space = [".shared", ""] in {
6666      foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6667        foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
6668          if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
6669            def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
6670                            addr>;
6671      } // addr
6672    } // space
6673  } // transposed
6674} // defset
6675
6676// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6677// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
6678// the instruction record.
6679class MMA_PAT<WMMA_INSTR wi>
6680      : Pat<wi.IntrinsicPattern,
6681            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
6682                 (wi ptx.version))>,
6683        Requires<wi.Predicates>;
6684
6685// Build intrinsic->instruction patterns for all MMA instructions.
6686foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6687  def : MMA_PAT<mma>;
6688
6689multiclass MAPA<string suffix, Intrinsic Intr> {
6690  def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
6691              "mapa" # suffix # ".u32\t$d, $a, $b;",
6692              [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
6693    Requires<[hasSM<90>, hasPTX<78>]>;
6694  def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
6695              "mapa" # suffix # ".u32\t$d, $a, $b;",
6696              [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
6697    Requires<[hasSM<90>, hasPTX<78>]>;
6698  def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
6699              "mapa" # suffix # ".u64\t$d, $a, $b;",
6700              [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
6701    Requires<[hasSM<90>, hasPTX<78>]>;
6702  def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
6703              "mapa" # suffix # ".u64\t$d, $a, $b;",
6704              [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
6705    Requires<[hasSM<90>, hasPTX<78>]>;
6706}
6707
6708defm mapa  : MAPA<"", int_nvvm_mapa>;
6709defm mapa_shared_cluster  : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
6710
6711
6712multiclass GETCTARANK<string suffix, Intrinsic Intr> {
6713  def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
6714              "getctarank" # suffix # ".u32\t$d, $a;",
6715              [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
6716    Requires<[hasSM<90>, hasPTX<78>]>;
6717  def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
6718              "getctarank" # suffix # ".u64\t$d, $a;",
6719              [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
6720    Requires<[hasSM<90>, hasPTX<78>]>;
6721}
6722
6723defm getctarank  : GETCTARANK<"", int_nvvm_getctarank>;
6724defm getctarank_shared_cluster  : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
6725
6726def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
6727              "mov.pred\t$d, %is_explicit_cluster;",
6728              [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
6729    Requires<[hasSM<90>, hasPTX<78>]>;
6730