1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def immFloat0 : PatLeaf<(fpimm), [{ 10 float f = (float)N->getValueAPF().convertToFloat(); 11 return (f==0.0f); 12}]>; 13 14def immFloat1 : PatLeaf<(fpimm), [{ 15 float f = (float)N->getValueAPF().convertToFloat(); 16 return (f==1.0f); 17}]>; 18 19def immDouble0 : PatLeaf<(fpimm), [{ 20 double d = (double)N->getValueAPF().convertToDouble(); 21 return (d==0.0); 22}]>; 23 24def immDouble1 : PatLeaf<(fpimm), [{ 25 double d = (double)N->getValueAPF().convertToDouble(); 26 return (d==1.0); 27}]>; 28 29def AS_match { 30 code generic = [{ 31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 32 }]; 33 code shared = [{ 34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 35 }]; 36 code global = [{ 37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 38 }]; 39} 40 41// A node that will be replaced with the current PTX version. 42class PTX { 43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{ 44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N)); 45 }]>; 46 // (i32 0) will be XForm'ed to the currently used PTX version. 47 dag version = (PTXVerXform (i32 0)); 48} 49def ptx : PTX; 50 51// Generates list of n sequential register names. 52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ] 53class RegSeq<int n, string prefix> { 54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret, 55 [prefix # !sub(n, 1)]), 56 []); 57} 58 59class THREADMASK_INFO<bit sync> { 60 list<bit> ret = !if(sync, [0, 1], [0]); 61} 62 63//----------------------------------- 64// Synchronization and shuffle functions 65//----------------------------------- 66let isConvergent = true in { 67def INT_BARRIER0 : NVPTXInst<(outs), (ins), 68 "bar.sync \t0;", 69 [(int_nvvm_barrier0)]>; 70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), 71 "bar.sync \t$src1;", 72 [(int_nvvm_barrier_n Int32Regs:$src1)]>; 73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), 74 "bar.sync \t$src1, $src2;", 75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; 76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 77 !strconcat("{{ \n\t", 78 ".reg .pred \t%p1; \n\t", 79 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 81 "}}"), 82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 84 !strconcat("{{ \n\t", 85 ".reg .pred \t%p1; \n\t", 86 ".reg .pred \t%p2; \n\t", 87 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 88 "bar.red.and.pred \t%p2, 0, %p1; \n\t", 89 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 90 "}}"), 91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 93 !strconcat("{{ \n\t", 94 ".reg .pred \t%p1; \n\t", 95 ".reg .pred \t%p2; \n\t", 96 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 97 "bar.red.or.pred \t%p2, 0, %p1; \n\t", 98 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 99 "}}"), 100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 101 102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", 103 [(int_nvvm_bar_sync imm:$i)]>; 104 105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", 106 [(int_nvvm_bar_warp_sync imm:$i)]>, 107 Requires<[hasPTX60, hasSM30]>; 108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", 109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, 110 Requires<[hasPTX60, hasSM30]>; 111 112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", 113 [(int_nvvm_barrier_sync imm:$i)]>, 114 Requires<[hasPTX60, hasSM30]>; 115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", 116 [(int_nvvm_barrier_sync Int32Regs:$i)]>, 117 Requires<[hasPTX60, hasSM30]>; 118 119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), 120 "barrier.sync \t$id, $cnt;", 121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, 122 Requires<[hasPTX60, hasSM30]>; 123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), 124 "barrier.sync \t$id, $cnt;", 125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, 126 Requires<[hasPTX60, hasSM30]>; 127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), 128 "barrier.sync \t$id, $cnt;", 129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, 130 Requires<[hasPTX60, hasSM30]>; 131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), 132 "barrier.sync \t$id, $cnt;", 133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, 134 Requires<[hasPTX60, hasSM30]>; 135 136class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred, 137 bit offset_imm, bit mask_imm, bit threadmask_imm> 138 : NVPTXInst<(outs), (ins), "?", []> { 139 NVPTXRegClass rc = !cond( 140 !eq(reg, "i32"): Int32Regs, 141 !eq(reg, "f32"): Float32Regs); 142 string IntrName = "int_nvvm_shfl_" 143 # !if(sync, "sync_", "") 144 # mode 145 # "_" # reg 146 # !if(return_pred, "p", ""); 147 Intrinsic Intr = !cast<Intrinsic>(IntrName); 148 let InOperandList = !con( 149 !if(sync, 150 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), 151 (ins)), 152 (ins rc:$src), 153 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), 154 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]) 155 ); 156 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst)); 157 let AsmString = "shfl." 158 # !if(sync, "sync.", "") 159 # mode # ".b32\t" 160 # "$dst" 161 # !if(return_pred, "|$pred", "") # ", " 162 # "$src, $offset, $mask" 163 # !if(sync, ", $threadmask", "") 164 # ";" 165 ; 166 let Pattern = [!con( 167 !foreach(tmp, OutOperandList, 168 !subst(outs, set, 169 !subst(i32imm, imm, tmp))), 170 (set !foreach(tmp, InOperandList, 171 !subst(ins, Intr, 172 !subst(i32imm, imm, tmp)))) 173 )]; 174} 175 176foreach sync = [false, true] in { 177 foreach mode = ["up", "down", "bfly", "idx"] in { 178 foreach regclass = ["i32", "f32"] in { 179 foreach return_pred = [false, true] in { 180 foreach offset_imm = [false, true] in { 181 foreach mask_imm = [false, true] in { 182 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in { 183 def : SHFL_INSTR<sync, mode, regclass, return_pred, 184 offset_imm, mask_imm, threadmask_imm>, 185 Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>; 186 } 187 } 188 } 189 } 190 } 191 } 192} 193 194// vote.{all,any,uni,ballot} 195multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 196 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), 197 "vote." # mode # " \t$dest, $pred;", 198 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, 199 Requires<[hasPTX60, hasSM30]>; 200} 201 202defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; 203defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; 204defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; 205defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; 206 207// vote.sync.{all,any,uni,ballot} 208multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 209 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), 210 "vote.sync." # mode # " \t$dest, $pred, $mask;", 211 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, 212 Requires<[hasPTX60, hasSM30]>; 213 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), 214 "vote.sync." # mode #" \t$dest, $pred, $mask;", 215 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, 216 Requires<[hasPTX60, hasSM30]>; 217} 218 219defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; 220defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; 221defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; 222defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; 223 224multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 225 Operand ImmOp> { 226 def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value), 227 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 228 [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>, 229 Requires<[hasPTX60, hasSM70]>; 230 def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value), 231 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 232 [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, 233 Requires<[hasPTX60, hasSM70]>; 234 def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value), 235 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 236 [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>, 237 Requires<[hasPTX60, hasSM70]>; 238 def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value), 239 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 240 [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, 241 Requires<[hasPTX60, hasSM70]>; 242} 243 244defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, 245 i32imm>; 246defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64, 247 i64imm>; 248 249multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 250 Operand ImmOp> { 251 def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 252 (ins i32imm:$mask, ImmOp:$value), 253 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 254 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, 255 Requires<[hasPTX60, hasSM70]>; 256 def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 257 (ins Int32Regs:$mask, ImmOp:$value), 258 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 259 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, 260 Requires<[hasPTX60, hasSM70]>; 261 def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 262 (ins i32imm:$mask, regclass:$value), 263 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 264 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, 265 Requires<[hasPTX60, hasSM70]>; 266 def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 267 (ins Int32Regs:$mask, regclass:$value), 268 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 269 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, 270 Requires<[hasPTX60, hasSM70]>; 271} 272defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, 273 i32imm>; 274defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, 275 i64imm>; 276 277multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> { 278 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask), 279 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;", 280 [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>, 281 Requires<[hasPTX70, hasSM80]>; 282} 283 284defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>; 285defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>; 286defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>; 287defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>; 288defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>; 289defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>; 290defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>; 291defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>; 292 293} // isConvergent = true 294 295//----------------------------------- 296// Explicit Memory Fence Functions 297//----------------------------------- 298class MEMBAR<string StrOp, Intrinsic IntOP> : 299 NVPTXInst<(outs), (ins), 300 StrOp, [(IntOP)]>; 301 302def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 303def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 304def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 305 306 307//----------------------------------- 308// Async Copy Functions 309//----------------------------------- 310 311multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> { 312 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 313 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 314 [(Intrin Int32Regs:$addr)]>, 315 Requires<[hasPTX70, hasSM80]>; 316 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 317 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 318 [(Intrin Int64Regs:$addr)]>, 319 Requires<[hasPTX70, hasSM80]>; 320} 321 322defm CP_ASYNC_MBARRIER_ARRIVE : 323 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>; 324defm CP_ASYNC_MBARRIER_ARRIVE_SHARED : 325 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>; 326defm CP_ASYNC_MBARRIER_ARRIVE_NOINC : 327 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>; 328defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED : 329 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>; 330 331multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> { 332 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 333 !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"), 334 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 335 Requires<[hasPTX70, hasSM80]>; 336 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 337 !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"), 338 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 339 Requires<[hasPTX70, hasSM80]>; 340} 341 342defm CP_ASYNC_CA_SHARED_GLOBAL_4 : 343 CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>; 344 345defm CP_ASYNC_CA_SHARED_GLOBAL_8 : 346 CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>; 347 348defm CP_ASYNC_CA_SHARED_GLOBAL_16 : 349 CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>; 350 351multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> { 352 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 353 !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"), 354 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 355 Requires<[hasPTX70, hasSM80]>; 356 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 357 !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"), 358 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 359 Requires<[hasPTX70, hasSM80]>; 360} 361 362defm CP_ASYNC_CG_SHARED_GLOBAL_16 : 363 CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>; 364 365def CP_ASYNC_COMMIT_GROUP : 366 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>, 367 Requires<[hasPTX70, hasSM80]>; 368 369def CP_ASYNC_WAIT_GROUP : 370 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;", 371 [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>, 372 Requires<[hasPTX70, hasSM80]>; 373 374def CP_ASYNC_WAIT_ALL : 375 NVPTXInst<(outs), (ins), "cp.async.wait_all;", 376 [(int_nvvm_cp_async_wait_all)]>, 377 Requires<[hasPTX70, hasSM80]>; 378 379//----------------------------------- 380// MBarrier Functions 381//----------------------------------- 382 383multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> { 384 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count), 385 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 386 [(Intrin Int32Regs:$addr, Int32Regs:$count)]>, 387 Requires<[hasPTX70, hasSM80]>; 388 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count), 389 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 390 [(Intrin Int64Regs:$addr, Int32Regs:$count)]>, 391 Requires<[hasPTX70, hasSM80]>; 392} 393 394defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>; 395defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared", 396 int_nvvm_mbarrier_init_shared>; 397 398multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> { 399 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 400 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 401 [(Intrin Int32Regs:$addr)]>, 402 Requires<[hasPTX70, hasSM80]>; 403 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 404 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 405 [(Intrin Int64Regs:$addr)]>, 406 Requires<[hasPTX70, hasSM80]>; 407} 408 409defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>; 410defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared", 411 int_nvvm_mbarrier_inval_shared>; 412 413multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> { 414 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 415 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 416 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 417 Requires<[hasPTX70, hasSM80]>; 418 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 419 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 420 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 421 Requires<[hasPTX70, hasSM80]>; 422} 423 424defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>; 425defm MBARRIER_ARRIVE_SHARED : 426 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>; 427 428multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 429 def _32 : NVPTXInst<(outs Int64Regs:$state), 430 (ins Int32Regs:$addr, Int32Regs:$count), 431 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 432 ".b64 $state, [$addr], $count;"), 433 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 434 Requires<[hasPTX70, hasSM80]>; 435 def _64 : NVPTXInst<(outs Int64Regs:$state), 436 (ins Int64Regs:$addr, Int32Regs:$count), 437 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 438 ".b64 $state, [$addr], $count;"), 439 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 440 Requires<[hasPTX70, hasSM80]>; 441} 442 443defm MBARRIER_ARRIVE_NOCOMPLETE : 444 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>; 445defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED : 446 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>; 447 448multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> { 449 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 450 !strconcat("mbarrier.arrive_drop", AddrSpace, 451 ".b64 $state, [$addr];"), 452 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 453 Requires<[hasPTX70, hasSM80]>; 454 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 455 !strconcat("mbarrier.arrive_drop", AddrSpace, 456 ".b64 $state, [$addr];"), 457 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 458 Requires<[hasPTX70, hasSM80]>; 459} 460 461defm MBARRIER_ARRIVE_DROP : 462 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>; 463defm MBARRIER_ARRIVE_DROP_SHARED : 464 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>; 465 466multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 467 def _32 : NVPTXInst<(outs Int64Regs:$state), 468 (ins Int32Regs:$addr, Int32Regs:$count), 469 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 470 ".b64 $state, [$addr], $count;"), 471 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 472 Requires<[hasPTX70, hasSM80]>; 473 def _64 : NVPTXInst<(outs Int64Regs:$state), 474 (ins Int64Regs:$addr, Int32Regs:$count), 475 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 476 ".b64 $state, [$addr], $count;"), 477 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 478 Requires<[hasPTX70, hasSM80]>; 479} 480 481defm MBARRIER_ARRIVE_DROP_NOCOMPLETE : 482 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>; 483defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED : 484 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared", 485 int_nvvm_mbarrier_arrive_drop_noComplete_shared>; 486 487multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> { 488 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state), 489 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 490 [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>, 491 Requires<[hasPTX70, hasSM80]>; 492 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state), 493 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 494 [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>, 495 Requires<[hasPTX70, hasSM80]>; 496} 497 498defm MBARRIER_TEST_WAIT : 499 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>; 500defm MBARRIER_TEST_WAIT_SHARED : 501 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>; 502 503class MBARRIER_PENDING_COUNT<Intrinsic Intrin> : 504 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state), 505 "mbarrier.pending_count.b64 $res, $state;", 506 [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>, 507 Requires<[hasPTX70, hasSM80]>; 508 509def MBARRIER_PENDING_COUNT : 510 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>; 511 512//----------------------------------- 513// Math Functions 514//----------------------------------- 515 516// Map min(1.0, max(0.0, x)) to sat(x) 517// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 518// NaN 519// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 520// Same story for fmax, fmin. 521 522def : Pat<(int_nvvm_fmin_f immFloat1, 523 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 524 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 525def : Pat<(int_nvvm_fmin_f immFloat1, 526 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 527 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 528def : Pat<(int_nvvm_fmin_f 529 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 530 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 531def : Pat<(int_nvvm_fmin_f 532 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 533 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 534 535def : Pat<(int_nvvm_fmin_d immDouble1, 536 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 537 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 538def : Pat<(int_nvvm_fmin_d immDouble1, 539 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 540 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 541def : Pat<(int_nvvm_fmin_d 542 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 543 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 544def : Pat<(int_nvvm_fmin_d 545 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 546 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 547 548 549// We need a full string for OpcStr here because we need to deal with case like 550// INT_PTX_RECIP. 551class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 552 NVPTXRegClass src_regclass, Intrinsic IntOP> 553 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 554 OpcStr, 555 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; 556 557// We need a full string for OpcStr here because we need to deal with the case 558// like INT_PTX_NATIVE_POWR_F. 559class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 560 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP> 561 : NVPTXInst<(outs t_regclass:$dst), 562 (ins s0_regclass:$src0, s1_regclass:$src1), 563 OpcStr, 564 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; 565 566class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 567 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 568 NVPTXRegClass s2_regclass, Intrinsic IntOP> 569 : NVPTXInst<(outs t_regclass:$dst), 570 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 571 OpcStr, 572 [(set t_regclass:$dst, 573 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; 574 575// 576// MISC 577// 578 579def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 580 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 581 582// 583// Min Max 584// 585 586def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 587 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 588def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 589 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 590 591def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 592 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 593def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 594 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 595 596def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 597 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 598def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 599 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 600 601 602// 603// Multiplication 604// 605 606def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 607 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 608def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 609 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 610 611def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 612 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 613def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 614 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 615 616def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 617 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 618def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 619 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 620def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 621 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 622def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 623 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 624def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 625 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 626def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 627 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 628def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 629 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 630def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 631 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 632 633def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 634 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 635def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 636 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 637def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 638 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 639def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 640 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 641 642def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 643 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 644def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 645 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 646 647// 648// Div 649// 650 651def INT_NVVM_DIV_APPROX_FTZ_F 652 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 653 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 654def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 655 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 656 657def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 658 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 659def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 660 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 661def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 662 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 663def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 664 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 665def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 666 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 667def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 668 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 669def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 670 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 671def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 672 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 673 674def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 675 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 676def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 677 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 678def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 679 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 680def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 681 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 682 683// 684// Sad 685// 686 687def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 688 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 689def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 690 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 691 692// 693// Floor Ceil 694// 695 696def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 697 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 698def : Pat<(int_nvvm_floor_f Float32Regs:$a), 699 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 700def : Pat<(int_nvvm_floor_d Float64Regs:$a), 701 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 702 703def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 704 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 705def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 706 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 707def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 708 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 709 710// 711// Abs 712// 713 714def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 715 Float32Regs, int_nvvm_fabs_ftz_f>; 716def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 717 Float32Regs, int_nvvm_fabs_f>; 718 719def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 720 Float64Regs, int_nvvm_fabs_d>; 721 722// 723// Round 724// 725 726def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 727 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 728def : Pat<(int_nvvm_round_f Float32Regs:$a), 729 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 730def : Pat<(int_nvvm_round_d Float64Regs:$a), 731 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 732 733// 734// Trunc 735// 736 737def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 738 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 739def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 740 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 741def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 742 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 743 744// 745// Saturate 746// 747 748def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 749 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 750def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 751 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 752def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 753 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 754 755// 756// Exp2 Log2 757// 758 759def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 760 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 761def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 762 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 763def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 764 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 765 766def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 767 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 768def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 769 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 770def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 771 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 772 773// 774// Sin Cos 775// 776 777def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 778 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 779def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 780 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 781 782def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 783 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 784def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 785 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 786 787// 788// Fma 789// 790 791def INT_NVVM_FMA_RN_FTZ_F 792 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 793 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; 794def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", 795 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; 796def INT_NVVM_FMA_RZ_FTZ_F 797 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 798 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; 799def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", 800 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; 801def INT_NVVM_FMA_RM_FTZ_F 802 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 803 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; 804def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", 805 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; 806def INT_NVVM_FMA_RP_FTZ_F 807 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 808 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; 809def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", 810 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; 811 812def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", 813 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; 814def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", 815 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; 816def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", 817 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; 818def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", 819 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; 820 821// 822// Rcp 823// 824 825def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 826 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 827def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 828 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 829def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 830 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 831def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 832 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 833def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 834 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 835def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 836 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 837def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 838 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 839def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 840 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 841 842def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 843 Float64Regs, int_nvvm_rcp_rn_d>; 844def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 845 Float64Regs, int_nvvm_rcp_rz_d>; 846def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 847 Float64Regs, int_nvvm_rcp_rm_d>; 848def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 849 Float64Regs, int_nvvm_rcp_rp_d>; 850 851def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 852 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 853 854// 855// Sqrt 856// 857 858def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 859 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 860def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 861 Float32Regs, int_nvvm_sqrt_rn_f>; 862def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 863 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 864def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 865 Float32Regs, int_nvvm_sqrt_rz_f>; 866def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 867 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 868def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 869 Float32Regs, int_nvvm_sqrt_rm_f>; 870def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 871 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 872def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 873 Float32Regs, int_nvvm_sqrt_rp_f>; 874def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 875 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 876def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 877 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 878 879def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 880 Float64Regs, int_nvvm_sqrt_rn_d>; 881def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 882 Float64Regs, int_nvvm_sqrt_rz_d>; 883def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 884 Float64Regs, int_nvvm_sqrt_rm_d>; 885def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 886 Float64Regs, int_nvvm_sqrt_rp_d>; 887 888// nvvm_sqrt intrinsic 889def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 890 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 891def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 892 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 893def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 894 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 895def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 896 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 897 898// 899// Rsqrt 900// 901 902def INT_NVVM_RSQRT_APPROX_FTZ_F 903 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 904 int_nvvm_rsqrt_approx_ftz_f>; 905def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 906 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 907def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 908 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 909 910// 911// Add 912// 913 914def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 915 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 916def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 917 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 918def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 919 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 920def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 921 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 922def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 923 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 924def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 925 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 926def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 927 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 928def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 929 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 930 931def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 932 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 933def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 934 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 935def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 936 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 937def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 938 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 939 940// 941// Convert 942// 943 944def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 945 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 946def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 947 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 948def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 949 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 950def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 951 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 952def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 953 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 954def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 955 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 956def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 957 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 958def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 959 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 960 961def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 962 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 963def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 964 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 965def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 966 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 967def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 968 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 969 970def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 971 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 972def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 973 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 974def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 975 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 976def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 977 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 978 979def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 980 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 981def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 982 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 983def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 984 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 985def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 986 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 987 988def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 989 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 990def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 991 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 992def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 993 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 994def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 995 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 996 997def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 998 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 999def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 1000 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 1001def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 1002 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1003def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 1004 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 1005def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 1006 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1007def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 1008 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 1009def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 1010 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1011def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 1012 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 1013 1014def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 1015 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1016def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 1017 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 1018def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 1019 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1020def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 1021 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 1022def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 1023 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1024def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 1025 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 1026def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 1027 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1028def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 1029 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 1030 1031def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 1032 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 1033def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 1034 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 1035def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 1036 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 1037def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 1038 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 1039 1040def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 1041 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 1042def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 1043 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 1044def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 1045 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 1046def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 1047 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 1048 1049def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 1050 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 1051 1052def INT_NVVM_D2I_LO : F_MATH_1< 1053 !strconcat("{{\n\t", 1054 ".reg .b32 %temp; \n\t", 1055 "mov.b64 \t{$dst, %temp}, $src0;\n\t", 1056 "}}"), 1057 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 1058def INT_NVVM_D2I_HI : F_MATH_1< 1059 !strconcat("{{\n\t", 1060 ".reg .b32 %temp; \n\t", 1061 "mov.b64 \t{%temp, $dst}, $src0;\n\t", 1062 "}}"), 1063 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 1064 1065def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 1066 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1067def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 1068 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 1069def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 1070 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1071def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 1072 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 1073def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 1074 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1075def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 1076 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 1077def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 1078 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1079def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 1080 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 1081 1082def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 1083 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1084def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 1085 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 1086def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 1087 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1088def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 1089 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 1090def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 1091 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1092def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 1093 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 1094def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 1095 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1096def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 1097 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 1098 1099def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 1100 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 1101def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 1102 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 1103def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 1104 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 1105def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 1106 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 1107 1108def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 1109 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 1110def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 1111 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 1112def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 1113 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 1114def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 1115 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 1116 1117def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 1118 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 1119def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 1120 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 1121def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 1122 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 1123def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 1124 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 1125 1126def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 1127 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 1128def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 1129 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 1130def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 1131 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 1132def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 1133 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 1134 1135def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 1136 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 1137def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 1138 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 1139def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 1140 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 1141def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 1142 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 1143 1144def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 1145 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 1146def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 1147 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 1148def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 1149 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 1150def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 1151 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 1152 1153 1154def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 1155 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>; 1156def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 1157 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>; 1158 1159// 1160// Bitcast 1161// 1162 1163def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 1164 Float32Regs, int_nvvm_bitcast_f2i>; 1165def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 1166 Int32Regs, int_nvvm_bitcast_i2f>; 1167 1168def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 1169 Int64Regs, int_nvvm_bitcast_ll2d>; 1170def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 1171 Float64Regs, int_nvvm_bitcast_d2ll>; 1172 1173// 1174// FNS 1175// 1176 1177class INT_FNS_MBO<dag ins, dag Operands> 1178 : NVPTXInst<(outs Int32Regs:$dst), ins, 1179 "fns.b32 \t$dst, $mask, $base, $offset;", 1180 [(set Int32Regs:$dst, Operands )]>, 1181 Requires<[hasPTX60, hasSM30]>; 1182 1183def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), 1184 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1185def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), 1186 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; 1187def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), 1188 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; 1189def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), 1190 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; 1191def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), 1192 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1193def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), 1194 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; 1195def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), 1196 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; 1197def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), 1198 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; 1199 1200//----------------------------------- 1201// Atomic Functions 1202//----------------------------------- 1203 1204class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 1205 : PatFrag<ops, frag, AS_match.global>; 1206class ATOMIC_SHARED_CHK <dag ops, dag frag> 1207 : PatFrag<ops, frag, AS_match.shared>; 1208class ATOMIC_GENERIC_CHK <dag ops, dag frag> 1209 : PatFrag<ops, frag, AS_match.generic>; 1210 1211multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1212 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1213 Operand IMMType, SDNode IMM, list<Predicate> Pred> { 1214 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1215 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), 1216 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1217 Requires<Pred>; 1218 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 1219 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), 1220 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, 1221 Requires<Pred>; 1222} 1223multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1224 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, 1225 list<Predicate> Pred = []> { 1226 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1227 IntOp, IMMType, IMM, Pred>; 1228 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1229 IntOp, IMMType, IMM, Pred>; 1230} 1231 1232// has 2 operands, neg the second one 1233multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1234 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1235 list<Predicate> Pred> { 1236 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1237 !strconcat( 1238 "{{ \n\t", 1239 ".reg \t.s", TypeStr, " temp; \n\t", 1240 "neg.s", TypeStr, " \ttemp, $b; \n\t", 1241 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", 1242 "}}"), 1243 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1244 Requires<Pred>; 1245} 1246multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, 1247 string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> { 1248 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1249 IntOp, Pred> ; 1250 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1251 IntOp, Pred> ; 1252} 1253 1254// has 3 operands 1255multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1256 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1257 Operand IMMType, list<Predicate> Pred> { 1258 def reg : NVPTXInst<(outs regclass:$dst), 1259 (ins ptrclass:$addr, regclass:$b, regclass:$c), 1260 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1261 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, 1262 Requires<Pred>; 1263 1264 def imm1 : NVPTXInst<(outs regclass:$dst), 1265 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 1266 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1267 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, 1268 Requires<Pred>; 1269 1270 def imm2 : NVPTXInst<(outs regclass:$dst), 1271 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 1272 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), 1273 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, 1274 Requires<Pred>; 1275 1276 def imm3 : NVPTXInst<(outs regclass:$dst), 1277 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 1278 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1279 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, 1280 Requires<Pred>; 1281} 1282multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1283 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { 1284 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1285 IntOp, IMMType, Pred>; 1286 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1287 IntOp, IMMType, Pred>; 1288} 1289 1290// atom_add 1291 1292def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1293 (atomic_load_add_32 node:$a, node:$b)>; 1294def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1295 (atomic_load_add_32 node:$a, node:$b)>; 1296def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1297 (atomic_load_add_32 node:$a, node:$b)>; 1298def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1299 (atomic_load_add_64 node:$a, node:$b)>; 1300def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1301 (atomic_load_add_64 node:$a, node:$b)>; 1302def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1303 (atomic_load_add_64 node:$a, node:$b)>; 1304def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1305 (atomic_load_fadd node:$a, node:$b)>; 1306def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1307 (atomic_load_fadd node:$a, node:$b)>; 1308def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1309 (atomic_load_fadd node:$a, node:$b)>; 1310 1311defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", 1312 atomic_load_add_32_g, i32imm, imm>; 1313defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", 1314 atomic_load_add_32_s, i32imm, imm>; 1315defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", 1316 atomic_load_add_32_gen, i32imm, imm>; 1317defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1318 ".add", atomic_load_add_32_gen, i32imm, imm>; 1319 1320defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", 1321 atomic_load_add_64_g, i64imm, imm>; 1322defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", 1323 atomic_load_add_64_s, i64imm, imm>; 1324defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", 1325 atomic_load_add_64_gen, i64imm, imm>; 1326defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1327 ".add", atomic_load_add_64_gen, i64imm, imm>; 1328 1329defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", 1330 atomic_load_add_g, f32imm, fpimm>; 1331defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", 1332 atomic_load_add_s, f32imm, fpimm>; 1333defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", 1334 atomic_load_add_gen, f32imm, fpimm>; 1335 1336defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add", 1337 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>; 1338defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add", 1339 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>; 1340defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add", 1341 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>; 1342 1343// atom_sub 1344 1345def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1346 (atomic_load_sub_32 node:$a, node:$b)>; 1347def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1348 (atomic_load_sub_32 node:$a, node:$b)>; 1349def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1350 (atomic_load_sub_32 node:$a, node:$b)>; 1351def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1352 (atomic_load_sub_64 node:$a, node:$b)>; 1353def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1354 (atomic_load_sub_64 node:$a, node:$b)>; 1355def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1356 (atomic_load_sub_64 node:$a, node:$b)>; 1357 1358defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", 1359 atomic_load_sub_32_g>; 1360defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", 1361 atomic_load_sub_64_g>; 1362defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", 1363 atomic_load_sub_32_gen>; 1364defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", 1365 ".add", atomic_load_sub_32_gen>; 1366defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", 1367 atomic_load_sub_32_s>; 1368defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", 1369 atomic_load_sub_64_s>; 1370defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", 1371 atomic_load_sub_64_gen>; 1372defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", 1373 ".add", atomic_load_sub_64_gen>; 1374 1375// atom_swap 1376 1377def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1378 (atomic_swap_32 node:$a, node:$b)>; 1379def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1380 (atomic_swap_32 node:$a, node:$b)>; 1381def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1382 (atomic_swap_32 node:$a, node:$b)>; 1383def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1384 (atomic_swap_64 node:$a, node:$b)>; 1385def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1386 (atomic_swap_64 node:$a, node:$b)>; 1387def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1388 (atomic_swap_64 node:$a, node:$b)>; 1389 1390defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", 1391 atomic_swap_32_g, i32imm, imm>; 1392defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", 1393 atomic_swap_32_s, i32imm, imm>; 1394defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", 1395 atomic_swap_32_gen, i32imm, imm>; 1396defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1397 ".exch", atomic_swap_32_gen, i32imm, imm>; 1398defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", 1399 atomic_swap_64_g, i64imm, imm>; 1400defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", 1401 atomic_swap_64_s, i64imm, imm>; 1402defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", 1403 atomic_swap_64_gen, i64imm, imm>; 1404defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1405 ".exch", atomic_swap_64_gen, i64imm, imm>; 1406 1407// atom_max 1408 1409def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1410 , (atomic_load_max_32 node:$a, node:$b)>; 1411def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1412 (atomic_load_max_32 node:$a, node:$b)>; 1413def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1414 (atomic_load_max_32 node:$a, node:$b)>; 1415def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1416 , (atomic_load_max_64 node:$a, node:$b)>; 1417def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1418 (atomic_load_max_64 node:$a, node:$b)>; 1419def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1420 (atomic_load_max_64 node:$a, node:$b)>; 1421def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1422 (atomic_load_umax_32 node:$a, node:$b)>; 1423def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1424 (atomic_load_umax_32 node:$a, node:$b)>; 1425def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1426 (atomic_load_umax_32 node:$a, node:$b)>; 1427def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1428 (atomic_load_umax_64 node:$a, node:$b)>; 1429def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1430 (atomic_load_umax_64 node:$a, node:$b)>; 1431def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1432 (atomic_load_umax_64 node:$a, node:$b)>; 1433 1434defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1435 ".max", atomic_load_max_32_g, i32imm, imm>; 1436defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1437 ".max", atomic_load_max_32_s, i32imm, imm>; 1438defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", 1439 atomic_load_max_32_gen, i32imm, imm>; 1440defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1441 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>; 1442defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1443 ".max", atomic_load_max_64_g, i64imm, imm>; 1444defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1445 ".max", atomic_load_max_64_s, i64imm, imm>; 1446defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max", 1447 atomic_load_max_64_gen, i64imm, imm>; 1448defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1449 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>; 1450defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1451 ".max", atomic_load_umax_32_g, i32imm, imm>; 1452defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1453 ".max", atomic_load_umax_32_s, i32imm, imm>; 1454defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", 1455 atomic_load_umax_32_gen, i32imm, imm>; 1456defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1457 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>; 1458defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1459 ".max", atomic_load_umax_64_g, i64imm, imm>; 1460defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1461 ".max", atomic_load_umax_64_s, i64imm, imm>; 1462defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max", 1463 atomic_load_umax_64_gen, i64imm, imm>; 1464defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1465 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>; 1466 1467// atom_min 1468 1469def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1470 (atomic_load_min_32 node:$a, node:$b)>; 1471def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1472 (atomic_load_min_32 node:$a, node:$b)>; 1473def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1474 (atomic_load_min_32 node:$a, node:$b)>; 1475def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1476 (atomic_load_min_64 node:$a, node:$b)>; 1477def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1478 (atomic_load_min_64 node:$a, node:$b)>; 1479def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1480 (atomic_load_min_64 node:$a, node:$b)>; 1481def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1482 (atomic_load_umin_32 node:$a, node:$b)>; 1483def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1484 (atomic_load_umin_32 node:$a, node:$b)>; 1485def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1486 (atomic_load_umin_32 node:$a, node:$b)>; 1487def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1488 (atomic_load_umin_64 node:$a, node:$b)>; 1489def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1490 (atomic_load_umin_64 node:$a, node:$b)>; 1491def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1492 (atomic_load_umin_64 node:$a, node:$b)>; 1493 1494defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1495 ".min", atomic_load_min_32_g, i32imm, imm>; 1496defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1497 ".min", atomic_load_min_32_s, i32imm, imm>; 1498defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", 1499 atomic_load_min_32_gen, i32imm, imm>; 1500defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1501 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>; 1502defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1503 ".min", atomic_load_min_64_g, i64imm, imm>; 1504defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1505 ".min", atomic_load_min_64_s, i64imm, imm>; 1506defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min", 1507 atomic_load_min_64_gen, i64imm, imm>; 1508defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1509 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>; 1510defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1511 ".min", atomic_load_umin_32_g, i32imm, imm>; 1512defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1513 ".min", atomic_load_umin_32_s, i32imm, imm>; 1514defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", 1515 atomic_load_umin_32_gen, i32imm, imm>; 1516defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1517 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>; 1518defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1519 ".min", atomic_load_umin_64_g, i64imm, imm>; 1520defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1521 ".min", atomic_load_umin_64_s, i64imm, imm>; 1522defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min", 1523 atomic_load_umin_64_gen, i64imm, imm>; 1524defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1525 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>; 1526 1527// atom_inc atom_dec 1528 1529def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1530 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1531def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1532 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1533def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1534 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1535def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1536 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1537def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1538 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1539def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1540 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1541 1542defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", 1543 atomic_load_inc_32_g, i32imm, imm>; 1544defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", 1545 atomic_load_inc_32_s, i32imm, imm>; 1546defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", 1547 atomic_load_inc_32_gen, i32imm, imm>; 1548defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1549 ".inc", atomic_load_inc_32_gen, i32imm, imm>; 1550defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", 1551 atomic_load_dec_32_g, i32imm, imm>; 1552defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", 1553 atomic_load_dec_32_s, i32imm, imm>; 1554defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", 1555 atomic_load_dec_32_gen, i32imm, imm>; 1556defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1557 ".dec", atomic_load_dec_32_gen, i32imm, imm>; 1558 1559// atom_and 1560 1561def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1562 (atomic_load_and_32 node:$a, node:$b)>; 1563def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1564 (atomic_load_and_32 node:$a, node:$b)>; 1565def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1566 (atomic_load_and_32 node:$a, node:$b)>; 1567def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1568 (atomic_load_and_64 node:$a, node:$b)>; 1569def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1570 (atomic_load_and_64 node:$a, node:$b)>; 1571def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1572 (atomic_load_and_64 node:$a, node:$b)>; 1573 1574defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", 1575 atomic_load_and_32_g, i32imm, imm>; 1576defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", 1577 atomic_load_and_32_s, i32imm, imm>; 1578defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", 1579 atomic_load_and_32_gen, i32imm, imm>; 1580defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1581 ".and", atomic_load_and_32_gen, i32imm, imm>; 1582defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and", 1583 atomic_load_and_64_g, i64imm, imm>; 1584defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and", 1585 atomic_load_and_64_s, i64imm, imm>; 1586defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and", 1587 atomic_load_and_64_gen, i64imm, imm>; 1588defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1589 ".and", atomic_load_and_64_gen, i64imm, imm>; 1590 1591// atom_or 1592 1593def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1594 (atomic_load_or_32 node:$a, node:$b)>; 1595def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1596 (atomic_load_or_32 node:$a, node:$b)>; 1597def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1598 (atomic_load_or_32 node:$a, node:$b)>; 1599def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1600 (atomic_load_or_64 node:$a, node:$b)>; 1601def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1602 (atomic_load_or_64 node:$a, node:$b)>; 1603def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1604 (atomic_load_or_64 node:$a, node:$b)>; 1605 1606defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", 1607 atomic_load_or_32_g, i32imm, imm>; 1608defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", 1609 atomic_load_or_32_gen, i32imm, imm>; 1610defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1611 ".or", atomic_load_or_32_gen, i32imm, imm>; 1612defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", 1613 atomic_load_or_32_s, i32imm, imm>; 1614defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or", 1615 atomic_load_or_64_g, i64imm, imm>; 1616defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or", 1617 atomic_load_or_64_gen, i64imm, imm>; 1618defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1619 ".or", atomic_load_or_64_gen, i64imm, imm>; 1620defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or", 1621 atomic_load_or_64_s, i64imm, imm>; 1622 1623// atom_xor 1624 1625def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1626 (atomic_load_xor_32 node:$a, node:$b)>; 1627def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1628 (atomic_load_xor_32 node:$a, node:$b)>; 1629def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1630 (atomic_load_xor_32 node:$a, node:$b)>; 1631def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1632 (atomic_load_xor_64 node:$a, node:$b)>; 1633def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1634 (atomic_load_xor_64 node:$a, node:$b)>; 1635def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1636 (atomic_load_xor_64 node:$a, node:$b)>; 1637 1638defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", 1639 atomic_load_xor_32_g, i32imm, imm>; 1640defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", 1641 atomic_load_xor_32_s, i32imm, imm>; 1642defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", 1643 atomic_load_xor_32_gen, i32imm, imm>; 1644defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1645 ".xor", atomic_load_xor_32_gen, i32imm, imm>; 1646defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor", 1647 atomic_load_xor_64_g, i64imm, imm>; 1648defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor", 1649 atomic_load_xor_64_s, i64imm, imm>; 1650defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor", 1651 atomic_load_xor_64_gen, i64imm, imm>; 1652defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1653 ".xor", atomic_load_xor_64_gen, i64imm, imm>; 1654 1655// atom_cas 1656 1657def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1658 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1659def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1660 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1661def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1662 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1663def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1664 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1665def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1666 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1667def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1668 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1669 1670defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", 1671 atomic_cmp_swap_32_g, i32imm>; 1672defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", 1673 atomic_cmp_swap_32_s, i32imm>; 1674defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", 1675 atomic_cmp_swap_32_gen, i32imm>; 1676defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", 1677 ".cas", atomic_cmp_swap_32_gen, i32imm>; 1678defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", 1679 atomic_cmp_swap_64_g, i64imm>; 1680defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", 1681 atomic_cmp_swap_64_s, i64imm>; 1682defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", 1683 atomic_cmp_swap_64_gen, i64imm>; 1684defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", 1685 ".cas", atomic_cmp_swap_64_gen, i64imm>; 1686 1687// Support for scoped atomic operations. Matches 1688// int_nvvm_atomic_{op}_{space}_{type}_{scope} 1689// and converts it into the appropriate instruction. 1690// NOTE: not all possible combinations are implemented 1691// 'space' is limited to generic as it's the only one needed to support CUDA. 1692// 'scope' = 'gpu' is default and is handled by regular atomic instructions. 1693class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds, 1694 dag ins, dag Operands> 1695 : NVPTXInst<(outs regclass:$result), ins, 1696 AsmStr, 1697 [(set regclass:$result, Operands)]>, 1698 Requires<Preds>; 1699 1700// Define instruction variants for all addressing modes. 1701multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, 1702 NVPTXRegClass regclass, Operand ImmType, 1703 SDNode Imm, ValueType ImmTy, 1704 list<Predicate> Preds> { 1705 let AddedComplexity = 1 in { 1706 def : ATOM23_impl<AsmStr, regclass, Preds, 1707 (ins Int32Regs:$src, regclass:$b), 1708 (Intr Int32Regs:$src, regclass:$b)>; 1709 def : ATOM23_impl<AsmStr, regclass, Preds, 1710 (ins Int64Regs:$src, regclass:$b), 1711 (Intr Int64Regs:$src, regclass:$b)>; 1712 } 1713 // tablegen can't infer argument types from Intrinsic (though it can 1714 // from Instruction) so we have to enforce specific type on 1715 // immediates via explicit cast to ImmTy. 1716 def : ATOM23_impl<AsmStr, regclass, Preds, 1717 (ins Int32Regs:$src, ImmType:$b), 1718 (Intr Int32Regs:$src, (ImmTy Imm:$b))>; 1719 def : ATOM23_impl<AsmStr, regclass, Preds, 1720 (ins Int64Regs:$src, ImmType:$b), 1721 (Intr Int64Regs:$src, (ImmTy Imm:$b))>; 1722} 1723 1724multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, 1725 NVPTXRegClass regclass, Operand ImmType, 1726 SDNode Imm, ValueType ImmTy, 1727 list<Predicate> Preds> { 1728 // Variants for register/immediate permutations of $b and $c 1729 let AddedComplexity = 2 in { 1730 def : ATOM23_impl<AsmStr, regclass, Preds, 1731 (ins Int32Regs:$src, regclass:$b, regclass:$c), 1732 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>; 1733 def : ATOM23_impl<AsmStr, regclass, Preds, 1734 (ins Int64Regs:$src, regclass:$b, regclass:$c), 1735 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>; 1736 } 1737 let AddedComplexity = 1 in { 1738 def : ATOM23_impl<AsmStr, regclass, Preds, 1739 (ins Int32Regs:$src, ImmType:$b, regclass:$c), 1740 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1741 def : ATOM23_impl<AsmStr, regclass, Preds, 1742 (ins Int64Regs:$src, ImmType:$b, regclass:$c), 1743 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1744 def : ATOM23_impl<AsmStr, regclass, Preds, 1745 (ins Int32Regs:$src, regclass:$b, ImmType:$c), 1746 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1747 def : ATOM23_impl<AsmStr, regclass, Preds, 1748 (ins Int64Regs:$src, regclass:$b, ImmType:$c), 1749 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1750 } 1751 def : ATOM23_impl<AsmStr, regclass, Preds, 1752 (ins Int32Regs:$src, ImmType:$b, ImmType:$c), 1753 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1754 def : ATOM23_impl<AsmStr, regclass, Preds, 1755 (ins Int64Regs:$src, ImmType:$b, ImmType:$c), 1756 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1757} 1758 1759// Constructs instrinsic name and instruction asm strings. 1760multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, 1761 string ScopeStr, string SpaceStr, 1762 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1763 ValueType ImmTy, list<Predicate> Preds> { 1764 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1765 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1766 # "." # OpStr # "." # TypeStr 1767 # " \t$result, [$src], $b;", 1768 !cast<Intrinsic>( 1769 "int_nvvm_atomic_" # OpStr 1770 # "_" # SpaceStr # "_" # IntTypeStr 1771 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 1772 regclass, ImmType, Imm, ImmTy, Preds>; 1773} 1774multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, 1775 string ScopeStr, string SpaceStr, 1776 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1777 ValueType ImmTy, list<Predicate> Preds> { 1778 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1779 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1780 # "." # OpStr # "." # TypeStr 1781 # " \t$result, [$src], $b, $c;", 1782 !cast<Intrinsic>( 1783 "int_nvvm_atomic_" # OpStr 1784 # "_" # SpaceStr # "_" # IntTypeStr 1785 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 1786 regclass, ImmType, Imm, ImmTy, Preds>; 1787} 1788 1789// Constructs variants for different address spaces. 1790// For now we only need variants for generic space pointers. 1791multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr, 1792 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1793 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1794 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1795 regclass, ImmType, Imm, ImmTy, Preds>; 1796} 1797multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr, 1798 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1799 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1800 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1801 regclass, ImmType, Imm, ImmTy, Preds>; 1802} 1803 1804// Constructs variants for different scopes of atomic op. 1805multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr, 1806 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1807 ValueType ImmTy, list<Predicate> Preds> { 1808 // .gpu scope is default and is currently covered by existing 1809 // atomics w/o explicitly specified scope. 1810 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1811 regclass, ImmType, Imm, ImmTy, 1812 !listconcat(Preds,[hasAtomScope])>; 1813 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1814 regclass, ImmType, Imm, ImmTy, 1815 !listconcat(Preds,[hasAtomScope])>; 1816} 1817multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr, 1818 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy, 1819 list<Predicate> Preds> { 1820 // No need to define ".gpu"-scoped atomics. They do the same thing 1821 // as the regular, non-scoped atomics defined elsewhere. 1822 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1823 regclass, ImmType, Imm, ImmTy, 1824 !listconcat(Preds,[hasAtomScope])>; 1825 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1826 regclass, ImmType, Imm, ImmTy, 1827 !listconcat(Preds,[hasAtomScope])>; 1828} 1829 1830// atom.add 1831multiclass ATOM2_add_impl<string OpStr> { 1832 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1833 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1834 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>; 1835 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32, 1836 []>; 1837 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64, 1838 [hasAtomAddF64]>; 1839} 1840 1841// atom.{and,or,xor} 1842multiclass ATOM2_bitwise_impl<string OpStr> { 1843 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1844 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, 1845 [hasAtomBitwise64]>; 1846} 1847 1848// atom.exch 1849multiclass ATOM2_exch_impl<string OpStr> { 1850 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1851 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1852} 1853 1854// atom.{min,max} 1855multiclass ATOM2_minmax_impl<string OpStr> { 1856 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1857 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1858 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64, 1859 [hasAtomMinMax64]>; 1860 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, 1861 [hasAtomMinMax64]>; 1862} 1863 1864// atom.{inc,dec} 1865multiclass ATOM2_incdec_impl<string OpStr> { 1866 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1867} 1868 1869// atom.cas 1870multiclass ATOM3_cas_impl<string OpStr> { 1871 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1872 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1873} 1874 1875defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; 1876defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; 1877defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; 1878defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">; 1879defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">; 1880defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">; 1881defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">; 1882defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">; 1883defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">; 1884defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; 1885 1886//----------------------------------- 1887// Support for ldu on sm_20 or later 1888//----------------------------------- 1889 1890// Don't annotate ldu instructions as mayLoad, as they load from memory that is 1891// read-only in a kernel. 1892 1893// Scalar 1894 1895multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 1896 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1897 !strconcat("ldu.global.", TyStr), 1898 []>, Requires<[hasLDU]>; 1899 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1900 !strconcat("ldu.global.", TyStr), 1901 []>, Requires<[hasLDU]>; 1902 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 1903 !strconcat("ldu.global.", TyStr), 1904 []>, Requires<[hasLDU]>; 1905 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1906 !strconcat("ldu.global.", TyStr), 1907 []>, Requires<[hasLDU]>; 1908 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1909 !strconcat("ldu.global.", TyStr), 1910 []>, Requires<[hasLDU]>; 1911} 1912 1913defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 1914defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 1915defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1916defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1917defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>; 1918defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>; 1919defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 1920defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 1921defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1922defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1923 1924// vector 1925 1926// Elementized vector ldu 1927multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1928 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1929 (ins Int32Regs:$src), 1930 !strconcat("ldu.global.", TyStr), []>; 1931 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1932 (ins Int64Regs:$src), 1933 !strconcat("ldu.global.", TyStr), []>; 1934 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1935 (ins MEMri:$src), 1936 !strconcat("ldu.global.", TyStr), []>; 1937 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1938 (ins MEMri64:$src), 1939 !strconcat("ldu.global.", TyStr), []>; 1940 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1941 (ins imemAny:$src), 1942 !strconcat("ldu.global.", TyStr), []>; 1943} 1944 1945multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1946 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1947 regclass:$dst4), (ins Int32Regs:$src), 1948 !strconcat("ldu.global.", TyStr), []>; 1949 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1950 regclass:$dst4), (ins Int64Regs:$src), 1951 !strconcat("ldu.global.", TyStr), []>; 1952 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1953 regclass:$dst4), (ins MEMri:$src), 1954 !strconcat("ldu.global.", TyStr), []>; 1955 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1956 regclass:$dst4), (ins MEMri64:$src), 1957 !strconcat("ldu.global.", TyStr), []>; 1958 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1959 regclass:$dst4), (ins imemAny:$src), 1960 !strconcat("ldu.global.", TyStr), []>; 1961} 1962 1963defm INT_PTX_LDU_G_v2i8_ELE 1964 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1965defm INT_PTX_LDU_G_v2i16_ELE 1966 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1967defm INT_PTX_LDU_G_v2i32_ELE 1968 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1969defm INT_PTX_LDU_G_v2f16_ELE 1970 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 1971defm INT_PTX_LDU_G_v2f16x2_ELE 1972 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 1973defm INT_PTX_LDU_G_v2f32_ELE 1974 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1975defm INT_PTX_LDU_G_v2i64_ELE 1976 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1977defm INT_PTX_LDU_G_v2f64_ELE 1978 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1979defm INT_PTX_LDU_G_v4i8_ELE 1980 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1981defm INT_PTX_LDU_G_v4i16_ELE 1982 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1983 Int16Regs>; 1984defm INT_PTX_LDU_G_v4i32_ELE 1985 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1986 Int32Regs>; 1987defm INT_PTX_LDU_G_v4f16_ELE 1988 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1989 Float16Regs>; 1990defm INT_PTX_LDU_G_v4f16x2_ELE 1991 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1992 Float16x2Regs>; 1993defm INT_PTX_LDU_G_v4f32_ELE 1994 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1995 Float32Regs>; 1996 1997 1998//----------------------------------- 1999// Support for ldg on sm_35 or later 2000//----------------------------------- 2001 2002// Don't annotate ld.global.nc as mayLoad, because these loads go through the 2003// non-coherent texture cache, and therefore the values read must be read-only 2004// during the lifetime of the kernel. 2005 2006multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 2007 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2008 !strconcat("ld.global.nc.", TyStr), 2009 []>, Requires<[hasLDG]>; 2010 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2011 !strconcat("ld.global.nc.", TyStr), 2012 []>, Requires<[hasLDG]>; 2013 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2014 !strconcat("ld.global.nc.", TyStr), 2015 []>, Requires<[hasLDG]>; 2016 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2017 !strconcat("ld.global.nc.", TyStr), 2018 []>, Requires<[hasLDG]>; 2019 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2020 !strconcat("ld.global.nc.", TyStr), 2021 []>, Requires<[hasLDG]>; 2022} 2023 2024defm INT_PTX_LDG_GLOBAL_i8 2025 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 2026defm INT_PTX_LDG_GLOBAL_i16 2027 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 2028defm INT_PTX_LDG_GLOBAL_i32 2029 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2030defm INT_PTX_LDG_GLOBAL_i64 2031 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2032defm INT_PTX_LDG_GLOBAL_f16 2033 : LDG_G<"b16 \t$result, [$src];", Float16Regs>; 2034defm INT_PTX_LDG_GLOBAL_f16x2 2035 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>; 2036defm INT_PTX_LDG_GLOBAL_f32 2037 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 2038defm INT_PTX_LDG_GLOBAL_f64 2039 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 2040defm INT_PTX_LDG_GLOBAL_p32 2041 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2042defm INT_PTX_LDG_GLOBAL_p64 2043 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2044 2045// vector 2046 2047// Elementized vector ldg 2048multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2049 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2050 (ins Int32Regs:$src), 2051 !strconcat("ld.global.nc.", TyStr), []>; 2052 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2053 (ins Int64Regs:$src), 2054 !strconcat("ld.global.nc.", TyStr), []>; 2055 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2056 (ins MEMri:$src), 2057 !strconcat("ld.global.nc.", TyStr), []>; 2058 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2059 (ins MEMri64:$src), 2060 !strconcat("ld.global.nc.", TyStr), []>; 2061 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2062 (ins imemAny:$src), 2063 !strconcat("ld.global.nc.", TyStr), []>; 2064} 2065 2066multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2067 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2068 regclass:$dst4), (ins Int32Regs:$src), 2069 !strconcat("ld.global.nc.", TyStr), []>; 2070 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2071 regclass:$dst4), (ins Int64Regs:$src), 2072 !strconcat("ld.global.nc.", TyStr), []>; 2073 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2074 regclass:$dst4), (ins MEMri:$src), 2075 !strconcat("ld.global.nc.", TyStr), []>; 2076 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2077 regclass:$dst4), (ins MEMri64:$src), 2078 !strconcat("ld.global.nc.", TyStr), []>; 2079 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2080 regclass:$dst4), (ins imemAny:$src), 2081 !strconcat("ld.global.nc.", TyStr), []>; 2082} 2083 2084// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 2085defm INT_PTX_LDG_G_v2i8_ELE 2086 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2087defm INT_PTX_LDG_G_v2i16_ELE 2088 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2089defm INT_PTX_LDG_G_v2i32_ELE 2090 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2091defm INT_PTX_LDG_G_v2f16_ELE 2092 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 2093defm INT_PTX_LDG_G_v2f16x2_ELE 2094 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 2095defm INT_PTX_LDG_G_v2f32_ELE 2096 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2097defm INT_PTX_LDG_G_v2i64_ELE 2098 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2099defm INT_PTX_LDG_G_v2f64_ELE 2100 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2101defm INT_PTX_LDG_G_v4i8_ELE 2102 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2103defm INT_PTX_LDG_G_v4i16_ELE 2104 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2105defm INT_PTX_LDG_G_v4i32_ELE 2106 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 2107defm INT_PTX_LDG_G_v4f16_ELE 2108 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>; 2109defm INT_PTX_LDG_G_v4f16x2_ELE 2110 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; 2111defm INT_PTX_LDG_G_v4f32_ELE 2112 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 2113 2114 2115multiclass NG_TO_G<string Str, Intrinsic Intrin> { 2116 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2117 !strconcat("cvta.", Str, ".u32 \t$result, $src;"), 2118 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2119 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2120 !strconcat("cvta.", Str, ".u64 \t$result, $src;"), 2121 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2122 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), 2123 "{{ .reg .b64 %tmp;\n\t" 2124 #" cvt.u64.u32 \t%tmp, $src;\n\t" 2125 #" cvta." # Str # ".u64 \t$result, %tmp; }}", 2126 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, 2127 Requires<[useShortPtr]>; 2128} 2129 2130multiclass G_TO_NG<string Str, Intrinsic Intrin> { 2131 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2132 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), 2133 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2134 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2135 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), 2136 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2137 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), 2138 "{{ .reg .b64 %tmp;\n\t" 2139 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" 2140 #" cvt.u32.u64 \t$result, %tmp; }}", 2141 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, 2142 Requires<[useShortPtr]>; 2143} 2144 2145defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 2146defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 2147defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 2148defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 2149 2150defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 2151defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 2152defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 2153defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 2154 2155 2156// nvvm.ptr.gen.to.param 2157def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 2158 (ins Int32Regs:$src), 2159 "mov.u32 \t$result, $src;", 2160 [(set Int32Regs:$result, 2161 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 2162def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 2163 (ins Int64Regs:$src), 2164 "mov.u64 \t$result, $src;", 2165 [(set Int64Regs:$result, 2166 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 2167 2168 2169// nvvm.move intrinsicc 2170def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 2171 "mov.b16 \t$r, $s;", 2172 [(set Int16Regs:$r, 2173 (int_nvvm_move_i16 Int16Regs:$s))]>; 2174def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2175 "mov.b32 \t$r, $s;", 2176 [(set Int32Regs:$r, 2177 (int_nvvm_move_i32 Int32Regs:$s))]>; 2178def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2179 "mov.b64 \t$r, $s;", 2180 [(set Int64Regs:$r, 2181 (int_nvvm_move_i64 Int64Regs:$s))]>; 2182def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 2183 "mov.f32 \t$r, $s;", 2184 [(set Float32Regs:$r, 2185 (int_nvvm_move_float Float32Regs:$s))]>; 2186def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 2187 "mov.f64 \t$r, $s;", 2188 [(set Float64Regs:$r, 2189 (int_nvvm_move_double Float64Regs:$s))]>; 2190def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2191 "mov.u32 \t$r, $s;", 2192 [(set Int32Regs:$r, 2193 (int_nvvm_move_ptr Int32Regs:$s))]>; 2194def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2195 "mov.u64 \t$r, $s;", 2196 [(set Int64Regs:$r, 2197 (int_nvvm_move_ptr Int64Regs:$s))]>; 2198 2199// @TODO: Are these actually needed, or will we always just see symbols 2200// copied to registers first? 2201/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 2202 "mov.u32 \t$r, $s;", 2203 [(set Int32Regs:$r, 2204 (int_nvvm_move_ptr texternalsym:$s))]>; 2205def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 2206 "mov.u64 \t$r, $s;", 2207 [(set Int64Regs:$r, 2208 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 2209 2210 2211// MoveParam %r1, param 2212// ptr_local_to_gen %r2, %r1 2213// ptr_gen_to_local %r3, %r2 2214// -> 2215// mov %r1, param 2216 2217// @TODO: Revisit this. There is a type 2218// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 2219// instructions are not currently defined. However, we can use the ptr 2220// variants and the asm printer will do the right thing. 2221def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2222 (MoveParam texternalsym:$src)))), 2223 (nvvm_move_ptr64 texternalsym:$src)>; 2224def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2225 (MoveParam texternalsym:$src)))), 2226 (nvvm_move_ptr32 texternalsym:$src)>; 2227 2228def texsurf_handles 2229 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 2230 "mov.u64 \t$result, $src;", []>; 2231 2232//----------------------------------- 2233// Compiler Error Warn 2234// - Just ignore them in codegen 2235//----------------------------------- 2236 2237def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2238 "// llvm.nvvm.compiler.warn()", 2239 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 2240def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2241 "// llvm.nvvm.compiler.warn()", 2242 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 2243def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2244 "// llvm.nvvm.compiler.error()", 2245 [(int_nvvm_compiler_error Int32Regs:$a)]>; 2246def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2247 "// llvm.nvvm.compiler.error()", 2248 [(int_nvvm_compiler_error Int64Regs:$a)]>; 2249 2250 2251// isspacep 2252 2253def ISSPACEP_CONST_32 2254 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2255 "isspacep.const \t$d, $a;", 2256 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>, 2257 Requires<[hasPTX31]>; 2258def ISSPACEP_CONST_64 2259 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2260 "isspacep.const \t$d, $a;", 2261 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>, 2262 Requires<[hasPTX31]>; 2263def ISSPACEP_GLOBAL_32 2264 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2265 "isspacep.global \t$d, $a;", 2266 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>; 2267def ISSPACEP_GLOBAL_64 2268 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2269 "isspacep.global \t$d, $a;", 2270 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>; 2271def ISSPACEP_LOCAL_32 2272 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2273 "isspacep.local \t$d, $a;", 2274 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>; 2275def ISSPACEP_LOCAL_64 2276 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2277 "isspacep.local \t$d, $a;", 2278 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>; 2279def ISSPACEP_SHARED_32 2280 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2281 "isspacep.shared \t$d, $a;", 2282 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>; 2283def ISSPACEP_SHARED_64 2284 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2285 "isspacep.shared \t$d, $a;", 2286 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>; 2287 2288 2289// Special register reads 2290def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 2291 (ins SpecialRegs:$r), 2292 "mov.b32 \t$d, $r;", []>; 2293 2294def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 2295def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 2296def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 2297def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 2298def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 2299def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 2300def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 2301def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 2302def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 2303def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 2304def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 2305def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 2306def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 2307def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 2308def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 2309def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 2310def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 2311def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 2312def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 2313def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 2314def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 2315def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 2316def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 2317def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 2318def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 2319def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 2320def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 2321def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 2322def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 2323def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 2324def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 2325def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 2326 2327 2328// rotate builtin support 2329 2330def ROTATE_B32_HW_IMM 2331 : NVPTXInst<(outs Int32Regs:$dst), 2332 (ins Int32Regs:$src, i32imm:$amt), 2333 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2334 [(set Int32Regs:$dst, 2335 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, 2336 Requires<[hasHWROT32]> ; 2337 2338def ROTATE_B32_HW_REG 2339 : NVPTXInst<(outs Int32Regs:$dst), 2340 (ins Int32Regs:$src, Int32Regs:$amt), 2341 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2342 [(set Int32Regs:$dst, 2343 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, 2344 Requires<[hasHWROT32]> ; 2345 2346def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), 2347 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2348 Requires<[noHWROT32]> ; 2349 2350def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), 2351 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, 2352 Requires<[noHWROT32]> ; 2353 2354let hasSideEffects = false in { 2355 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2356 !strconcat("{{\n\t", 2357 ".reg .b32 %dummy;\n\t", 2358 "mov.b64 \t{$dst,%dummy}, $src;\n\t", 2359 "}}"), 2360 []> ; 2361 2362 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2363 !strconcat("{{\n\t", 2364 ".reg .b32 %dummy;\n\t", 2365 "mov.b64 \t{%dummy,$dst}, $src;\n\t", 2366 "}}"), 2367 []> ; 2368} 2369 2370let hasSideEffects = false in { 2371 def PACK_TWO_INT32 2372 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), 2373 "mov.b64 \t$dst, {{$lo, $hi}};", []> ; 2374} 2375 2376def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), 2377 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), 2378 (GET_LO_INT64 Int64Regs:$src))> ; 2379 2380// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so 2381// no side effects. 2382let hasSideEffects = false in { 2383 def SHF_L_WRAP_B32_IMM 2384 : NVPTXInst<(outs Int32Regs:$dst), 2385 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2386 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2387 Requires<[hasHWROT32]>; 2388 2389 def SHF_L_WRAP_B32_REG 2390 : NVPTXInst<(outs Int32Regs:$dst), 2391 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2392 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2393 Requires<[hasHWROT32]>; 2394 2395 def SHF_R_WRAP_B32_IMM 2396 : NVPTXInst<(outs Int32Regs:$dst), 2397 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2398 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2399 Requires<[hasHWROT32]>; 2400 2401 def SHF_R_WRAP_B32_REG 2402 : NVPTXInst<(outs Int32Regs:$dst), 2403 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2404 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2405 Requires<[hasHWROT32]>; 2406} 2407 2408// HW version of rotate 64 2409def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2410 (PACK_TWO_INT32 2411 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2412 (GET_LO_INT64 Int64Regs:$src), imm:$amt), 2413 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2414 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, 2415 Requires<[hasHWROT32]>; 2416 2417def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2418 (PACK_TWO_INT32 2419 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2420 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), 2421 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2422 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2423 Requires<[hasHWROT32]>; 2424 2425 2426def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2427 (PACK_TWO_INT32 2428 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2429 (GET_HI_INT64 Int64Regs:$src), imm:$amt), 2430 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2431 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, 2432 Requires<[hasHWROT32]>; 2433 2434def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2435 (PACK_TWO_INT32 2436 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2437 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), 2438 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2439 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2440 Requires<[hasHWROT32]>; 2441 2442// SW version of rotate 64 2443def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2444 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2445 Requires<[noHWROT32]>; 2446def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2447 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2448 Requires<[noHWROT32]>; 2449def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2450 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, 2451 Requires<[noHWROT32]>; 2452def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2453 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2454 Requires<[noHWROT32]>; 2455 2456 2457//----------------------------------- 2458// Texture Intrinsics 2459//----------------------------------- 2460 2461// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 2462// also defined in NVPTXReplaceImageHandles.cpp 2463 2464// texmode_independent 2465let IsTex = true, IsTexModeUnified = false in { 2466// Texture fetch instructions using handles 2467def TEX_1D_F32_S32 2468 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2469 Float32Regs:$b, Float32Regs:$a), 2470 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2471 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2472 []>; 2473def TEX_1D_F32_F32 2474 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2475 Float32Regs:$b, Float32Regs:$a), 2476 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2477 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2478 []>; 2479def TEX_1D_F32_F32_LEVEL 2480 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2481 Float32Regs:$b, Float32Regs:$a), 2482 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod), 2483 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2484 "[$t, $s, \\{$x\\}], $lod;", 2485 []>; 2486def TEX_1D_F32_F32_GRAD 2487 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2488 Float32Regs:$b, Float32Regs:$a), 2489 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2490 Float32Regs:$gradx, Float32Regs:$grady), 2491 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2492 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2493 []>; 2494def TEX_1D_S32_S32 2495 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2496 Int32Regs:$b, Int32Regs:$a), 2497 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2498 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2499 []>; 2500def TEX_1D_S32_F32 2501 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2502 Int32Regs:$b, Int32Regs:$a), 2503 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2504 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2505 []>; 2506def TEX_1D_S32_F32_LEVEL 2507 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2508 Int32Regs:$b, Int32Regs:$a), 2509 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2510 Float32Regs:$lod), 2511 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2512 "[$t, $s, \\{$x\\}], $lod;", 2513 []>; 2514def TEX_1D_S32_F32_GRAD 2515 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2516 Int32Regs:$b, Int32Regs:$a), 2517 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2518 Float32Regs:$gradx, Float32Regs:$grady), 2519 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2520 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2521 []>; 2522def TEX_1D_U32_S32 2523 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2524 Int32Regs:$b, Int32Regs:$a), 2525 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2526 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2527 []>; 2528def TEX_1D_U32_F32 2529 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2530 Int32Regs:$b, Int32Regs:$a), 2531 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2532 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2533 []>; 2534def TEX_1D_U32_F32_LEVEL 2535 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2536 Int32Regs:$b, Int32Regs:$a), 2537 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2538 Float32Regs:$lod), 2539 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2540 "[$t, $s, \\{$x\\}], $lod;", 2541 []>; 2542def TEX_1D_U32_F32_GRAD 2543 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2544 Int32Regs:$b, Int32Regs:$a), 2545 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2546 Float32Regs:$gradx, Float32Regs:$grady), 2547 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2548 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2549 []>; 2550 2551def TEX_1D_ARRAY_F32_S32 2552 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2553 Float32Regs:$b, Float32Regs:$a), 2554 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2555 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2556 "[$t, $s, \\{$l, $x\\}];", 2557 []>; 2558def TEX_1D_ARRAY_F32_F32 2559 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2560 Float32Regs:$b, Float32Regs:$a), 2561 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2562 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2563 "[$t, $s, \\{$l, $x\\}];", 2564 []>; 2565def TEX_1D_ARRAY_F32_F32_LEVEL 2566 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2567 Float32Regs:$b, Float32Regs:$a), 2568 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2569 Float32Regs:$lod), 2570 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2571 "[$t, $s, \\{$l, $x\\}], $lod;", 2572 []>; 2573def TEX_1D_ARRAY_F32_F32_GRAD 2574 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2575 Float32Regs:$b, Float32Regs:$a), 2576 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2577 Float32Regs:$gradx, Float32Regs:$grady), 2578 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2579 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2580 []>; 2581def TEX_1D_ARRAY_S32_S32 2582 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2583 Int32Regs:$b, Int32Regs:$a), 2584 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2585 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2586 "[$t, $s, \\{$l, $x\\}];", 2587 []>; 2588def TEX_1D_ARRAY_S32_F32 2589 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2590 Int32Regs:$b, Int32Regs:$a), 2591 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2592 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2593 "[$t, $s, \\{$l, $x\\}];", 2594 []>; 2595def TEX_1D_ARRAY_S32_F32_LEVEL 2596 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2597 Int32Regs:$b, Int32Regs:$a), 2598 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2599 Float32Regs:$lod), 2600 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2601 "[$t, $s, \\{$l, $x\\}], $lod;", 2602 []>; 2603def TEX_1D_ARRAY_S32_F32_GRAD 2604 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2605 Int32Regs:$b, Int32Regs:$a), 2606 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2607 Float32Regs:$gradx, Float32Regs:$grady), 2608 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2609 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2610 []>; 2611def TEX_1D_ARRAY_U32_S32 2612 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2613 Int32Regs:$b, Int32Regs:$a), 2614 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2615 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2616 "[$t, $s, \\{$l, $x\\}];", 2617 []>; 2618def TEX_1D_ARRAY_U32_F32 2619 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2620 Int32Regs:$b, Int32Regs:$a), 2621 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2622 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2623 "[$t, $s, \\{$l, $x\\}];", 2624 []>; 2625def TEX_1D_ARRAY_U32_F32_LEVEL 2626 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2627 Int32Regs:$b, Int32Regs:$a), 2628 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2629 Float32Regs:$lod), 2630 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2631 "[$t, $s, \\{$l, $x\\}], $lod;", 2632 []>; 2633def TEX_1D_ARRAY_U32_F32_GRAD 2634 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2635 Int32Regs:$b, Int32Regs:$a), 2636 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2637 Float32Regs:$gradx, Float32Regs:$grady), 2638 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2639 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2640 []>; 2641 2642def TEX_2D_F32_S32 2643 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2644 Float32Regs:$b, Float32Regs:$a), 2645 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2646 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2647 "[$t, $s, \\{$x, $y\\}];", 2648 []>; 2649def TEX_2D_F32_F32 2650 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2651 Float32Regs:$b, Float32Regs:$a), 2652 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2653 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2654 "[$t, $s, \\{$x, $y\\}];", 2655 []>; 2656def TEX_2D_F32_F32_LEVEL 2657 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2658 Float32Regs:$b, Float32Regs:$a), 2659 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2660 Float32Regs:$lod), 2661 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2662 "[$t, $s, \\{$x, $y\\}], $lod;", 2663 []>; 2664def TEX_2D_F32_F32_GRAD 2665 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2666 Float32Regs:$b, Float32Regs:$a), 2667 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2668 Float32Regs:$gradx0, Float32Regs:$gradx1, 2669 Float32Regs:$grady0, Float32Regs:$grady1), 2670 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2671 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2672 "\\{$grady0, $grady1\\};", 2673 []>; 2674def TEX_2D_S32_S32 2675 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2676 Int32Regs:$b, Int32Regs:$a), 2677 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2678 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2679 "[$t, $s, \\{$x, $y\\}];", 2680 []>; 2681def TEX_2D_S32_F32 2682 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2683 Int32Regs:$b, Int32Regs:$a), 2684 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2685 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2686 "[$t, $s, \\{$x, $y\\}];", 2687 []>; 2688def TEX_2D_S32_F32_LEVEL 2689 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2690 Int32Regs:$b, Int32Regs:$a), 2691 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2692 Float32Regs:$lod), 2693 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2694 "[$t, $s, \\{$x, $y\\}], $lod;", 2695 []>; 2696def TEX_2D_S32_F32_GRAD 2697 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2698 Int32Regs:$b, Int32Regs:$a), 2699 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2700 Float32Regs:$gradx0, Float32Regs:$gradx1, 2701 Float32Regs:$grady0, Float32Regs:$grady1), 2702 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2703 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2704 "\\{$grady0, $grady1\\};", 2705 []>; 2706def TEX_2D_U32_S32 2707 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2708 Int32Regs:$b, Int32Regs:$a), 2709 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2710 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2711 "[$t, $s, \\{$x, $y\\}];", 2712 []>; 2713def TEX_2D_U32_F32 2714 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2715 Int32Regs:$b, Int32Regs:$a), 2716 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2717 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2718 "[$t, $s, \\{$x, $y\\}];", 2719 []>; 2720def TEX_2D_U32_F32_LEVEL 2721 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2722 Int32Regs:$b, Int32Regs:$a), 2723 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2724 Float32Regs:$lod), 2725 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2726 "[$t, $s, \\{$x, $y\\}], $lod;", 2727 []>; 2728def TEX_2D_U32_F32_GRAD 2729 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2730 Int32Regs:$b, Int32Regs:$a), 2731 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2732 Float32Regs:$gradx0, Float32Regs:$gradx1, 2733 Float32Regs:$grady0, Float32Regs:$grady1), 2734 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2735 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2736 "\\{$grady0, $grady1\\};", 2737 []>; 2738 2739def TEX_2D_ARRAY_F32_S32 2740 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2741 Float32Regs:$b, Float32Regs:$a), 2742 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2743 Int32Regs:$y), 2744 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2745 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2746 []>; 2747def TEX_2D_ARRAY_F32_F32 2748 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2749 Float32Regs:$b, Float32Regs:$a), 2750 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2751 Float32Regs:$y), 2752 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2753 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2754 []>; 2755def TEX_2D_ARRAY_F32_F32_LEVEL 2756 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2757 Float32Regs:$b, Float32Regs:$a), 2758 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2759 Float32Regs:$y, Float32Regs:$lod), 2760 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2761 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2762 []>; 2763def TEX_2D_ARRAY_F32_F32_GRAD 2764 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2765 Float32Regs:$b, Float32Regs:$a), 2766 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2767 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, 2768 Float32Regs:$grady0, Float32Regs:$grady1), 2769 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2770 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2771 "\\{$grady0, $grady1\\};", 2772 []>; 2773def TEX_2D_ARRAY_S32_S32 2774 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2775 Int32Regs:$b, Int32Regs:$a), 2776 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2777 Int32Regs:$y), 2778 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2779 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2780 []>; 2781def TEX_2D_ARRAY_S32_F32 2782 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2783 Int32Regs:$b, Int32Regs:$a), 2784 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2785 Float32Regs:$y), 2786 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2787 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2788 []>; 2789def TEX_2D_ARRAY_S32_F32_LEVEL 2790 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2791 Int32Regs:$b, Int32Regs:$a), 2792 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2793 Float32Regs:$y, Float32Regs:$lod), 2794 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2795 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2796 []>; 2797def TEX_2D_ARRAY_S32_F32_GRAD 2798 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2799 Int32Regs:$b, Int32Regs:$a), 2800 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2801 Float32Regs:$y, 2802 Float32Regs:$gradx0, Float32Regs:$gradx1, 2803 Float32Regs:$grady0, Float32Regs:$grady1), 2804 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2805 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2806 "\\{$grady0, $grady1\\};", 2807 []>; 2808def TEX_2D_ARRAY_U32_S32 2809 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2810 Int32Regs:$b, Int32Regs:$a), 2811 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2812 Int32Regs:$y), 2813 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2814 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2815 []>; 2816def TEX_2D_ARRAY_U32_F32 2817 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2818 Int32Regs:$b, Int32Regs:$a), 2819 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2820 Float32Regs:$y), 2821 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2822 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2823 []>; 2824def TEX_2D_ARRAY_U32_F32_LEVEL 2825 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2826 Int32Regs:$b, Int32Regs:$a), 2827 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2828 Float32Regs:$y, Float32Regs:$lod), 2829 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2830 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2831 []>; 2832def TEX_2D_ARRAY_U32_F32_GRAD 2833 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2834 Int32Regs:$b, Int32Regs:$a), 2835 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2836 Float32Regs:$y, 2837 Float32Regs:$gradx0, Float32Regs:$gradx1, 2838 Float32Regs:$grady0, Float32Regs:$grady1), 2839 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2840 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2841 "\\{$grady0, $grady1\\};", 2842 []>; 2843 2844def TEX_3D_F32_S32 2845 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2846 Float32Regs:$b, Float32Regs:$a), 2847 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2848 Int32Regs:$z), 2849 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2850 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2851 []>; 2852def TEX_3D_F32_F32 2853 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2854 Float32Regs:$b, Float32Regs:$a), 2855 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2856 Float32Regs:$z), 2857 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2858 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2859 []>; 2860def TEX_3D_F32_F32_LEVEL 2861 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2862 Float32Regs:$b, Float32Regs:$a), 2863 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2864 Float32Regs:$z, Float32Regs:$lod), 2865 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2866 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2867 []>; 2868def TEX_3D_F32_F32_GRAD 2869 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2870 Float32Regs:$b, Float32Regs:$a), 2871 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2872 Float32Regs:$z, 2873 Float32Regs:$gradx0, Float32Regs:$gradx1, 2874 Float32Regs:$gradx2, Float32Regs:$grady0, 2875 Float32Regs:$grady1, Float32Regs:$grady2), 2876 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2877 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2878 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2879 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2880 []>; 2881def TEX_3D_S32_S32 2882 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2883 Int32Regs:$b, Int32Regs:$a), 2884 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2885 Int32Regs:$z), 2886 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2887 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2888 []>; 2889def TEX_3D_S32_F32 2890 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2891 Int32Regs:$b, Int32Regs:$a), 2892 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2893 Float32Regs:$z), 2894 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2895 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2896 []>; 2897def TEX_3D_S32_F32_LEVEL 2898 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2899 Int32Regs:$b, Int32Regs:$a), 2900 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2901 Float32Regs:$z, Float32Regs:$lod), 2902 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2903 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2904 []>; 2905def TEX_3D_S32_F32_GRAD 2906 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2907 Int32Regs:$b, Int32Regs:$a), 2908 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2909 Float32Regs:$z, 2910 Float32Regs:$gradx0, Float32Regs:$gradx1, 2911 Float32Regs:$gradx2, Float32Regs:$grady0, 2912 Float32Regs:$grady1, Float32Regs:$grady2), 2913 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2914 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2915 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2916 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2917 []>; 2918def TEX_3D_U32_S32 2919 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2920 Int32Regs:$b, Int32Regs:$a), 2921 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2922 Int32Regs:$z), 2923 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2924 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2925 []>; 2926def TEX_3D_U32_F32 2927 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2928 Int32Regs:$b, Int32Regs:$a), 2929 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2930 Float32Regs:$z), 2931 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2932 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2933 []>; 2934def TEX_3D_U32_F32_LEVEL 2935 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2936 Int32Regs:$b, Int32Regs:$a), 2937 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2938 Float32Regs:$z, Float32Regs:$lod), 2939 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2940 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2941 []>; 2942def TEX_3D_U32_F32_GRAD 2943 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2944 Int32Regs:$b, Int32Regs:$a), 2945 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2946 Float32Regs:$z, 2947 Float32Regs:$gradx0, Float32Regs:$gradx1, 2948 Float32Regs:$gradx2, Float32Regs:$grady0, 2949 Float32Regs:$grady1, Float32Regs:$grady2), 2950 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2951 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2952 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2953 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2954 []>; 2955 2956def TEX_CUBE_F32_F32 2957 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2958 Float32Regs:$b, Float32Regs:$a), 2959 (ins Int64Regs:$t, Int64Regs:$s, 2960 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2961 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2962 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2963 []>; 2964def TEX_CUBE_F32_F32_LEVEL 2965 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2966 Float32Regs:$b, Float32Regs:$a), 2967 (ins Int64Regs:$t, Int64Regs:$s, 2968 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2969 Float32Regs:$lod), 2970 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2971 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2972 []>; 2973def TEX_CUBE_S32_F32 2974 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2975 Int32Regs:$b, Int32Regs:$a), 2976 (ins Int64Regs:$t, Int64Regs:$s, 2977 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2978 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2979 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2980 []>; 2981def TEX_CUBE_S32_F32_LEVEL 2982 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2983 Int32Regs:$b, Int32Regs:$a), 2984 (ins Int64Regs:$t, Int64Regs:$s, 2985 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2986 Float32Regs:$lod), 2987 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2988 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2989 []>; 2990def TEX_CUBE_U32_F32 2991 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2992 Int32Regs:$b, Int32Regs:$a), 2993 (ins Int64Regs:$t, Int64Regs:$s, 2994 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2995 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2996 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2997 []>; 2998def TEX_CUBE_U32_F32_LEVEL 2999 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3000 Int32Regs:$b, Int32Regs:$a), 3001 (ins Int64Regs:$t, Int64Regs:$s, 3002 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3003 Float32Regs:$lod), 3004 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3005 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 3006 []>; 3007 3008def TEX_CUBE_ARRAY_F32_F32 3009 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3010 Float32Regs:$b, Float32Regs:$a), 3011 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 3012 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3013 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3014 "[$t, $s, \\{$l, $x, $y, $z\\}];", 3015 []>; 3016def TEX_CUBE_ARRAY_F32_F32_LEVEL 3017 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3018 Float32Regs:$b, Float32Regs:$a), 3019 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 3020 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3021 Float32Regs:$lod), 3022 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3023 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 3024 []>; 3025def TEX_CUBE_ARRAY_S32_F32 3026 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3027 Int32Regs:$b, Int32Regs:$a), 3028 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 3029 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3030 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3031 "[$t, $s, \\{$l, $x, $y, $z\\}];", 3032 []>; 3033def TEX_CUBE_ARRAY_S32_F32_LEVEL 3034 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3035 Int32Regs:$b, Int32Regs:$a), 3036 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 3037 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3038 Float32Regs:$lod), 3039 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3040 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 3041 []>; 3042def TEX_CUBE_ARRAY_U32_F32 3043 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3044 Int32Regs:$b, Int32Regs:$a), 3045 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 3046 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3047 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3048 "[$t, $s, \\{$l, $x, $y, $z\\}];", 3049 []>; 3050def TEX_CUBE_ARRAY_U32_F32_LEVEL 3051 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3052 Int32Regs:$b, Int32Regs:$a), 3053 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 3054 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3055 Float32Regs:$lod), 3056 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3057 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 3058 []>; 3059 3060def TLD4_R_2D_F32_F32 3061 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3062 Float32Regs:$v2, Float32Regs:$v3), 3063 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3064 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3065 "[$t, $s, \\{$x, $y\\}];", 3066 []>; 3067def TLD4_G_2D_F32_F32 3068 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3069 Float32Regs:$v2, Float32Regs:$v3), 3070 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3071 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3072 "[$t, $s, \\{$x, $y\\}];", 3073 []>; 3074def TLD4_B_2D_F32_F32 3075 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3076 Float32Regs:$v2, Float32Regs:$v3), 3077 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3078 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3079 "[$t, $s, \\{$x, $y\\}];", 3080 []>; 3081def TLD4_A_2D_F32_F32 3082 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3083 Float32Regs:$v2, Float32Regs:$v3), 3084 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3085 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3086 "[$t, $s, \\{$x, $y\\}];", 3087 []>; 3088def TLD4_R_2D_S32_F32 3089 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3090 Int32Regs:$v2, Int32Regs:$v3), 3091 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3092 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3093 "[$t, $s, \\{$x, $y\\}];", 3094 []>; 3095def TLD4_G_2D_S32_F32 3096 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3097 Int32Regs:$v2, Int32Regs:$v3), 3098 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3099 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3100 "[$t, $s, \\{$x, $y\\}];", 3101 []>; 3102def TLD4_B_2D_S32_F32 3103 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3104 Int32Regs:$v2, Int32Regs:$v3), 3105 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3106 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3107 "[$t, $s, \\{$x, $y\\}];", 3108 []>; 3109def TLD4_A_2D_S32_F32 3110 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3111 Int32Regs:$v2, Int32Regs:$v3), 3112 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3113 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3114 "[$t, $s, \\{$x, $y\\}];", 3115 []>; 3116def TLD4_R_2D_U32_F32 3117 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3118 Int32Regs:$v2, Int32Regs:$v3), 3119 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3120 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3121 "[$t, $s, \\{$x, $y\\}];", 3122 []>; 3123def TLD4_G_2D_U32_F32 3124 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3125 Int32Regs:$v2, Int32Regs:$v3), 3126 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3127 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3128 "[$t, $s, \\{$x, $y\\}];", 3129 []>; 3130def TLD4_B_2D_U32_F32 3131 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3132 Int32Regs:$v2, Int32Regs:$v3), 3133 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3134 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3135 "[$t, $s, \\{$x, $y\\}];", 3136 []>; 3137def TLD4_A_2D_U32_F32 3138 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3139 Int32Regs:$v2, Int32Regs:$v3), 3140 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3141 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3142 "[$t, $s, \\{$x, $y\\}];", 3143 []>; 3144} 3145 3146 3147// texmode_unified 3148let IsTex = true, IsTexModeUnified = true in { 3149// Texture fetch instructions using handles 3150def TEX_UNIFIED_1D_F32_S32 3151 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3152 Float32Regs:$b, Float32Regs:$a), 3153 (ins Int64Regs:$t, Int32Regs:$x), 3154 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3155 []>; 3156def TEX_UNIFIED_1D_F32_F32 3157 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3158 Float32Regs:$b, Float32Regs:$a), 3159 (ins Int64Regs:$t, Float32Regs:$x), 3160 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3161 []>; 3162def TEX_UNIFIED_1D_F32_F32_LEVEL 3163 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3164 Float32Regs:$b, Float32Regs:$a), 3165 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod), 3166 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3167 "[$t, \\{$x\\}], $lod;", 3168 []>; 3169def TEX_UNIFIED_1D_F32_F32_GRAD 3170 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3171 Float32Regs:$b, Float32Regs:$a), 3172 (ins Int64Regs:$t, Float32Regs:$x, 3173 Float32Regs:$gradx, Float32Regs:$grady), 3174 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3175 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3176 []>; 3177def TEX_UNIFIED_1D_S32_S32 3178 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3179 Int32Regs:$b, Int32Regs:$a), 3180 (ins Int64Regs:$t, Int32Regs:$x), 3181 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3182 []>; 3183def TEX_UNIFIED_1D_S32_F32 3184 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3185 Int32Regs:$b, Int32Regs:$a), 3186 (ins Int64Regs:$t, Float32Regs:$x), 3187 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3188 []>; 3189def TEX_UNIFIED_1D_S32_F32_LEVEL 3190 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3191 Int32Regs:$b, Int32Regs:$a), 3192 (ins Int64Regs:$t, Float32Regs:$x, 3193 Float32Regs:$lod), 3194 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3195 "[$t, \\{$x\\}], $lod;", 3196 []>; 3197def TEX_UNIFIED_1D_S32_F32_GRAD 3198 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3199 Int32Regs:$b, Int32Regs:$a), 3200 (ins Int64Regs:$t, Float32Regs:$x, 3201 Float32Regs:$gradx, Float32Regs:$grady), 3202 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3203 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3204 []>; 3205def TEX_UNIFIED_1D_U32_S32 3206 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3207 Int32Regs:$b, Int32Regs:$a), 3208 (ins Int64Regs:$t, Int32Regs:$x), 3209 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3210 []>; 3211def TEX_UNIFIED_1D_U32_F32 3212 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3213 Int32Regs:$b, Int32Regs:$a), 3214 (ins Int64Regs:$t, Float32Regs:$x), 3215 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3216 []>; 3217def TEX_UNIFIED_1D_U32_F32_LEVEL 3218 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3219 Int32Regs:$b, Int32Regs:$a), 3220 (ins Int64Regs:$t, Float32Regs:$x, 3221 Float32Regs:$lod), 3222 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3223 "[$t, \\{$x\\}], $lod;", 3224 []>; 3225def TEX_UNIFIED_1D_U32_F32_GRAD 3226 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3227 Int32Regs:$b, Int32Regs:$a), 3228 (ins Int64Regs:$t, Float32Regs:$x, 3229 Float32Regs:$gradx, Float32Regs:$grady), 3230 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3231 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3232 []>; 3233 3234def TEX_UNIFIED_1D_ARRAY_F32_S32 3235 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3236 Float32Regs:$b, Float32Regs:$a), 3237 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3238 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3239 "[$t, \\{$l, $x\\}];", 3240 []>; 3241def TEX_UNIFIED_1D_ARRAY_F32_F32 3242 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3243 Float32Regs:$b, Float32Regs:$a), 3244 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3245 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3246 "[$t, \\{$l, $x\\}];", 3247 []>; 3248def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL 3249 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3250 Float32Regs:$b, Float32Regs:$a), 3251 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3252 Float32Regs:$lod), 3253 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3254 "[$t, \\{$l, $x\\}], $lod;", 3255 []>; 3256def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD 3257 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3258 Float32Regs:$b, Float32Regs:$a), 3259 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3260 Float32Regs:$gradx, Float32Regs:$grady), 3261 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3262 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3263 []>; 3264def TEX_UNIFIED_1D_ARRAY_S32_S32 3265 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3266 Int32Regs:$b, Int32Regs:$a), 3267 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3268 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3269 "[$t, \\{$l, $x\\}];", 3270 []>; 3271def TEX_UNIFIED_1D_ARRAY_S32_F32 3272 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3273 Int32Regs:$b, Int32Regs:$a), 3274 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3275 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3276 "[$t, \\{$l, $x\\}];", 3277 []>; 3278def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL 3279 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3280 Int32Regs:$b, Int32Regs:$a), 3281 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3282 Float32Regs:$lod), 3283 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3284 "[$t, \\{$l, $x\\}], $lod;", 3285 []>; 3286def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD 3287 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3288 Int32Regs:$b, Int32Regs:$a), 3289 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3290 Float32Regs:$gradx, Float32Regs:$grady), 3291 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3292 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3293 []>; 3294def TEX_UNIFIED_1D_ARRAY_U32_S32 3295 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3296 Int32Regs:$b, Int32Regs:$a), 3297 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3298 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3299 "[$t, \\{$l, $x\\}];", 3300 []>; 3301def TEX_UNIFIED_1D_ARRAY_U32_F32 3302 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3303 Int32Regs:$b, Int32Regs:$a), 3304 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3305 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3306 "[$t, \\{$l, $x\\}];", 3307 []>; 3308def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL 3309 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3310 Int32Regs:$b, Int32Regs:$a), 3311 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3312 Float32Regs:$lod), 3313 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3314 "[$t, \\{$l, $x\\}], $lod;", 3315 []>; 3316def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD 3317 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3318 Int32Regs:$b, Int32Regs:$a), 3319 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3320 Float32Regs:$gradx, Float32Regs:$grady), 3321 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3322 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3323 []>; 3324 3325def TEX_UNIFIED_2D_F32_S32 3326 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3327 Float32Regs:$b, Float32Regs:$a), 3328 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3329 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3330 "[$t, \\{$x, $y\\}];", 3331 []>; 3332def TEX_UNIFIED_2D_F32_F32 3333 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3334 Float32Regs:$b, Float32Regs:$a), 3335 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3336 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3337 "[$t, \\{$x, $y\\}];", 3338 []>; 3339def TEX_UNIFIED_2D_F32_F32_LEVEL 3340 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3341 Float32Regs:$b, Float32Regs:$a), 3342 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3343 Float32Regs:$lod), 3344 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3345 "[$t, \\{$x, $y\\}], $lod;", 3346 []>; 3347def TEX_UNIFIED_2D_F32_F32_GRAD 3348 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3349 Float32Regs:$b, Float32Regs:$a), 3350 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3351 Float32Regs:$gradx0, Float32Regs:$gradx1, 3352 Float32Regs:$grady0, Float32Regs:$grady1), 3353 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3354 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3355 "\\{$grady0, $grady1\\};", 3356 []>; 3357def TEX_UNIFIED_2D_S32_S32 3358 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3359 Int32Regs:$b, Int32Regs:$a), 3360 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3361 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3362 "[$t, \\{$x, $y\\}];", 3363 []>; 3364def TEX_UNIFIED_2D_S32_F32 3365 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3366 Int32Regs:$b, Int32Regs:$a), 3367 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3368 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3369 "[$t, \\{$x, $y\\}];", 3370 []>; 3371def TEX_UNIFIED_2D_S32_F32_LEVEL 3372 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3373 Int32Regs:$b, Int32Regs:$a), 3374 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3375 Float32Regs:$lod), 3376 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3377 "[$t, \\{$x, $y\\}], $lod;", 3378 []>; 3379def TEX_UNIFIED_2D_S32_F32_GRAD 3380 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3381 Int32Regs:$b, Int32Regs:$a), 3382 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3383 Float32Regs:$gradx0, Float32Regs:$gradx1, 3384 Float32Regs:$grady0, Float32Regs:$grady1), 3385 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3386 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3387 "\\{$grady0, $grady1\\};", 3388 []>; 3389def TEX_UNIFIED_2D_U32_S32 3390 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3391 Int32Regs:$b, Int32Regs:$a), 3392 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3393 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3394 "[$t, \\{$x, $y\\}];", 3395 []>; 3396def TEX_UNIFIED_2D_U32_F32 3397 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3398 Int32Regs:$b, Int32Regs:$a), 3399 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3400 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3401 "[$t, \\{$x, $y\\}];", 3402 []>; 3403def TEX_UNIFIED_2D_U32_F32_LEVEL 3404 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3405 Int32Regs:$b, Int32Regs:$a), 3406 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3407 Float32Regs:$lod), 3408 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3409 "[$t, \\{$x, $y\\}], $lod;", 3410 []>; 3411def TEX_UNIFIED_2D_U32_F32_GRAD 3412 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3413 Int32Regs:$b, Int32Regs:$a), 3414 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3415 Float32Regs:$gradx0, Float32Regs:$gradx1, 3416 Float32Regs:$grady0, Float32Regs:$grady1), 3417 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3418 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3419 "\\{$grady0, $grady1\\};", 3420 []>; 3421 3422def TEX_UNIFIED_2D_ARRAY_F32_S32 3423 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3424 Float32Regs:$b, Float32Regs:$a), 3425 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3426 Int32Regs:$y), 3427 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3428 "[$t, \\{$l, $x, $y, $y\\}];", 3429 []>; 3430def TEX_UNIFIED_2D_ARRAY_F32_F32 3431 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3432 Float32Regs:$b, Float32Regs:$a), 3433 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3434 Float32Regs:$y), 3435 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3436 "[$t, \\{$l, $x, $y, $y\\}];", 3437 []>; 3438def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL 3439 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3440 Float32Regs:$b, Float32Regs:$a), 3441 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3442 Float32Regs:$y, Float32Regs:$lod), 3443 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3444 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3445 []>; 3446def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD 3447 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3448 Float32Regs:$b, Float32Regs:$a), 3449 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3450 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, 3451 Float32Regs:$grady0, Float32Regs:$grady1), 3452 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3453 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3454 "\\{$grady0, $grady1\\};", 3455 []>; 3456def TEX_UNIFIED_2D_ARRAY_S32_S32 3457 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3458 Int32Regs:$b, Int32Regs:$a), 3459 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3460 Int32Regs:$y), 3461 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3462 "[$t, \\{$l, $x, $y, $y\\}];", 3463 []>; 3464def TEX_UNIFIED_2D_ARRAY_S32_F32 3465 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3466 Int32Regs:$b, Int32Regs:$a), 3467 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3468 Float32Regs:$y), 3469 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3470 "[$t, \\{$l, $x, $y, $y\\}];", 3471 []>; 3472def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL 3473 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3474 Int32Regs:$b, Int32Regs:$a), 3475 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3476 Float32Regs:$y, Float32Regs:$lod), 3477 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3478 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3479 []>; 3480def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD 3481 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3482 Int32Regs:$b, Int32Regs:$a), 3483 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3484 Float32Regs:$y, 3485 Float32Regs:$gradx0, Float32Regs:$gradx1, 3486 Float32Regs:$grady0, Float32Regs:$grady1), 3487 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3488 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3489 "\\{$grady0, $grady1\\};", 3490 []>; 3491def TEX_UNIFIED_2D_ARRAY_U32_S32 3492 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3493 Int32Regs:$b, Int32Regs:$a), 3494 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3495 Int32Regs:$y), 3496 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3497 "[$t, \\{$l, $x, $y, $y\\}];", 3498 []>; 3499def TEX_UNIFIED_2D_ARRAY_U32_F32 3500 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3501 Int32Regs:$b, Int32Regs:$a), 3502 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3503 Float32Regs:$y), 3504 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3505 "[$t, \\{$l, $x, $y, $y\\}];", 3506 []>; 3507def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL 3508 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3509 Int32Regs:$b, Int32Regs:$a), 3510 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3511 Float32Regs:$y, Float32Regs:$lod), 3512 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3513 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3514 []>; 3515def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD 3516 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3517 Int32Regs:$b, Int32Regs:$a), 3518 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3519 Float32Regs:$y, 3520 Float32Regs:$gradx0, Float32Regs:$gradx1, 3521 Float32Regs:$grady0, Float32Regs:$grady1), 3522 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3523 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3524 "\\{$grady0, $grady1\\};", 3525 []>; 3526 3527def TEX_UNIFIED_3D_F32_S32 3528 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3529 Float32Regs:$b, Float32Regs:$a), 3530 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3531 Int32Regs:$z), 3532 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3533 "[$t, \\{$x, $y, $z, $z\\}];", 3534 []>; 3535def TEX_UNIFIED_3D_F32_F32 3536 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3537 Float32Regs:$b, Float32Regs:$a), 3538 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3539 Float32Regs:$z), 3540 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3541 "[$t, \\{$x, $y, $z, $z\\}];", 3542 []>; 3543def TEX_UNIFIED_3D_F32_F32_LEVEL 3544 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3545 Float32Regs:$b, Float32Regs:$a), 3546 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3547 Float32Regs:$z, Float32Regs:$lod), 3548 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3549 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3550 []>; 3551def TEX_UNIFIED_3D_F32_F32_GRAD 3552 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3553 Float32Regs:$b, Float32Regs:$a), 3554 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3555 Float32Regs:$z, 3556 Float32Regs:$gradx0, Float32Regs:$gradx1, 3557 Float32Regs:$gradx2, Float32Regs:$grady0, 3558 Float32Regs:$grady1, Float32Regs:$grady2), 3559 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3560 "[$t, \\{$x, $y, $z, $z\\}], " 3561 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3562 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3563 []>; 3564def TEX_UNIFIED_3D_S32_S32 3565 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3566 Int32Regs:$b, Int32Regs:$a), 3567 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3568 Int32Regs:$z), 3569 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3570 "[$t, \\{$x, $y, $z, $z\\}];", 3571 []>; 3572def TEX_UNIFIED_3D_S32_F32 3573 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3574 Int32Regs:$b, Int32Regs:$a), 3575 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3576 Float32Regs:$z), 3577 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3578 "[$t, \\{$x, $y, $z, $z\\}];", 3579 []>; 3580def TEX_UNIFIED_3D_S32_F32_LEVEL 3581 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3582 Int32Regs:$b, Int32Regs:$a), 3583 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3584 Float32Regs:$z, Float32Regs:$lod), 3585 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3586 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3587 []>; 3588def TEX_UNIFIED_3D_S32_F32_GRAD 3589 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3590 Int32Regs:$b, Int32Regs:$a), 3591 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3592 Float32Regs:$z, 3593 Float32Regs:$gradx0, Float32Regs:$gradx1, 3594 Float32Regs:$gradx2, Float32Regs:$grady0, 3595 Float32Regs:$grady1, Float32Regs:$grady2), 3596 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3597 "[$t, \\{$x, $y, $z, $z\\}], " 3598 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3599 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3600 []>; 3601def TEX_UNIFIED_3D_U32_S32 3602 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3603 Int32Regs:$b, Int32Regs:$a), 3604 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3605 Int32Regs:$z), 3606 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3607 "[$t, \\{$x, $y, $z, $z\\}];", 3608 []>; 3609def TEX_UNIFIED_3D_U32_F32 3610 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3611 Int32Regs:$b, Int32Regs:$a), 3612 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3613 Float32Regs:$z), 3614 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3615 "[$t, \\{$x, $y, $z, $z\\}];", 3616 []>; 3617def TEX_UNIFIED_3D_U32_F32_LEVEL 3618 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3619 Int32Regs:$b, Int32Regs:$a), 3620 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3621 Float32Regs:$z, Float32Regs:$lod), 3622 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3623 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3624 []>; 3625def TEX_UNIFIED_3D_U32_F32_GRAD 3626 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3627 Int32Regs:$b, Int32Regs:$a), 3628 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3629 Float32Regs:$z, 3630 Float32Regs:$gradx0, Float32Regs:$gradx1, 3631 Float32Regs:$gradx2, Float32Regs:$grady0, 3632 Float32Regs:$grady1, Float32Regs:$grady2), 3633 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3634 "[$t, \\{$x, $y, $z, $z\\}], " 3635 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3636 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3637 []>; 3638 3639def TEX_UNIFIED_CUBE_F32_F32 3640 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3641 Float32Regs:$b, Float32Regs:$a), 3642 (ins Int64Regs:$t, 3643 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3644 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3645 "[$t, \\{$x, $y, $z, $z\\}];", 3646 []>; 3647def TEX_UNIFIED_CUBE_F32_F32_LEVEL 3648 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3649 Float32Regs:$b, Float32Regs:$a), 3650 (ins Int64Regs:$t, 3651 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3652 Float32Regs:$lod), 3653 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3654 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3655 []>; 3656def TEX_UNIFIED_CUBE_S32_F32 3657 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3658 Int32Regs:$b, Int32Regs:$a), 3659 (ins Int64Regs:$t, 3660 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3661 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3662 "[$t, \\{$x, $y, $z, $z\\}];", 3663 []>; 3664def TEX_UNIFIED_CUBE_S32_F32_LEVEL 3665 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3666 Int32Regs:$b, Int32Regs:$a), 3667 (ins Int64Regs:$t, 3668 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3669 Float32Regs:$lod), 3670 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3671 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3672 []>; 3673def TEX_UNIFIED_CUBE_U32_F32 3674 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3675 Int32Regs:$b, Int32Regs:$a), 3676 (ins Int64Regs:$t, 3677 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3678 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3679 "[$t, \\{$x, $y, $z, $z\\}];", 3680 []>; 3681def TEX_UNIFIED_CUBE_U32_F32_LEVEL 3682 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3683 Int32Regs:$b, Int32Regs:$a), 3684 (ins Int64Regs:$t, 3685 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3686 Float32Regs:$lod), 3687 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3688 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3689 []>; 3690 3691def TEX_UNIFIED_CUBE_ARRAY_F32_F32 3692 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3693 Float32Regs:$b, Float32Regs:$a), 3694 (ins Int64Regs:$t, Int32Regs:$l, 3695 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3696 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3697 "[$t, \\{$l, $x, $y, $z\\}];", 3698 []>; 3699def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL 3700 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3701 Float32Regs:$b, Float32Regs:$a), 3702 (ins Int64Regs:$t, Int32Regs:$l, 3703 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3704 Float32Regs:$lod), 3705 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3706 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3707 []>; 3708def TEX_UNIFIED_CUBE_ARRAY_S32_F32 3709 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3710 Int32Regs:$b, Int32Regs:$a), 3711 (ins Int64Regs:$t, Int32Regs:$l, 3712 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3713 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3714 "[$t, \\{$l, $x, $y, $z\\}];", 3715 []>; 3716def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL 3717 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3718 Int32Regs:$b, Int32Regs:$a), 3719 (ins Int64Regs:$t, Int32Regs:$l, 3720 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3721 Float32Regs:$lod), 3722 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3723 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3724 []>; 3725def TEX_UNIFIED_CUBE_ARRAY_U32_F32 3726 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3727 Int32Regs:$b, Int32Regs:$a), 3728 (ins Int64Regs:$t, Int32Regs:$l, 3729 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3730 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3731 "[$t, \\{$l, $x, $y, $z\\}];", 3732 []>; 3733def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL 3734 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3735 Int32Regs:$b, Int32Regs:$a), 3736 (ins Int64Regs:$t, Int32Regs:$l, 3737 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3738 Float32Regs:$lod), 3739 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3740 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3741 []>; 3742 3743def TLD4_UNIFIED_R_2D_F32_F32 3744 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3745 Float32Regs:$v2, Float32Regs:$v3), 3746 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3747 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3748 "[$t, \\{$x, $y\\}];", 3749 []>; 3750def TLD4_UNIFIED_G_2D_F32_F32 3751 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3752 Float32Regs:$v2, Float32Regs:$v3), 3753 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3754 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3755 "[$t, \\{$x, $y\\}];", 3756 []>; 3757def TLD4_UNIFIED_B_2D_F32_F32 3758 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3759 Float32Regs:$v2, Float32Regs:$v3), 3760 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3761 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3762 "[$t, \\{$x, $y\\}];", 3763 []>; 3764def TLD4_UNIFIED_A_2D_F32_F32 3765 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3766 Float32Regs:$v2, Float32Regs:$v3), 3767 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3768 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3769 "[$t, \\{$x, $y\\}];", 3770 []>; 3771def TLD4_UNIFIED_R_2D_S32_F32 3772 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3773 Int32Regs:$v2, Int32Regs:$v3), 3774 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3775 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3776 "[$t, \\{$x, $y\\}];", 3777 []>; 3778def TLD4_UNIFIED_G_2D_S32_F32 3779 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3780 Int32Regs:$v2, Int32Regs:$v3), 3781 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3782 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3783 "[$t, \\{$x, $y\\}];", 3784 []>; 3785def TLD4_UNIFIED_B_2D_S32_F32 3786 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3787 Int32Regs:$v2, Int32Regs:$v3), 3788 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3789 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3790 "[$t, \\{$x, $y\\}];", 3791 []>; 3792def TLD4_UNIFIED_A_2D_S32_F32 3793 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3794 Int32Regs:$v2, Int32Regs:$v3), 3795 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3796 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3797 "[$t, \\{$x, $y\\}];", 3798 []>; 3799def TLD4_UNIFIED_R_2D_U32_F32 3800 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3801 Int32Regs:$v2, Int32Regs:$v3), 3802 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3803 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3804 "[$t, \\{$x, $y\\}];", 3805 []>; 3806def TLD4_UNIFIED_G_2D_U32_F32 3807 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3808 Int32Regs:$v2, Int32Regs:$v3), 3809 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3810 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3811 "[$t, \\{$x, $y\\}];", 3812 []>; 3813def TLD4_UNIFIED_B_2D_U32_F32 3814 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3815 Int32Regs:$v2, Int32Regs:$v3), 3816 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3817 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3818 "[$t, \\{$x, $y\\}];", 3819 []>; 3820def TLD4_UNIFIED_A_2D_U32_F32 3821 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3822 Int32Regs:$v2, Int32Regs:$v3), 3823 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3824 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3825 "[$t, \\{$x, $y\\}];", 3826 []>; 3827} 3828 3829 3830 3831//=== Surface load instructions 3832// .clamp variant 3833let IsSuld = true in { 3834def SULD_1D_I8_CLAMP 3835 : NVPTXInst<(outs Int16Regs:$r), 3836 (ins Int64Regs:$s, Int32Regs:$x), 3837 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];", 3838 []>; 3839def SULD_1D_I16_CLAMP 3840 : NVPTXInst<(outs Int16Regs:$r), 3841 (ins Int64Regs:$s, Int32Regs:$x), 3842 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];", 3843 []>; 3844def SULD_1D_I32_CLAMP 3845 : NVPTXInst<(outs Int32Regs:$r), 3846 (ins Int64Regs:$s, Int32Regs:$x), 3847 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];", 3848 []>; 3849def SULD_1D_I64_CLAMP 3850 : NVPTXInst<(outs Int64Regs:$r), 3851 (ins Int64Regs:$s, Int32Regs:$x), 3852 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];", 3853 []>; 3854 3855def SULD_1D_ARRAY_I8_CLAMP 3856 : NVPTXInst<(outs Int16Regs:$r), 3857 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3858 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3859 []>; 3860def SULD_1D_ARRAY_I16_CLAMP 3861 : NVPTXInst<(outs Int16Regs:$r), 3862 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3863 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3864 []>; 3865def SULD_1D_ARRAY_I32_CLAMP 3866 : NVPTXInst<(outs Int32Regs:$r), 3867 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3868 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3869 []>; 3870def SULD_1D_ARRAY_I64_CLAMP 3871 : NVPTXInst<(outs Int64Regs:$r), 3872 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3873 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3874 []>; 3875 3876def SULD_2D_I8_CLAMP 3877 : NVPTXInst<(outs Int16Regs:$r), 3878 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3879 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3880 []>; 3881def SULD_2D_I16_CLAMP 3882 : NVPTXInst<(outs Int16Regs:$r), 3883 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3884 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3885 []>; 3886def SULD_2D_I32_CLAMP 3887 : NVPTXInst<(outs Int32Regs:$r), 3888 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3889 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3890 []>; 3891def SULD_2D_I64_CLAMP 3892 : NVPTXInst<(outs Int64Regs:$r), 3893 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3894 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3895 []>; 3896 3897def SULD_2D_ARRAY_I8_CLAMP 3898 : NVPTXInst<(outs Int16Regs:$r), 3899 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3900 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3901 []>; 3902def SULD_2D_ARRAY_I16_CLAMP 3903 : NVPTXInst<(outs Int16Regs:$r), 3904 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3905 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3906 []>; 3907def SULD_2D_ARRAY_I32_CLAMP 3908 : NVPTXInst<(outs Int32Regs:$r), 3909 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3910 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3911 []>; 3912def SULD_2D_ARRAY_I64_CLAMP 3913 : NVPTXInst<(outs Int64Regs:$r), 3914 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3915 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3916 []>; 3917 3918def SULD_3D_I8_CLAMP 3919 : NVPTXInst<(outs Int16Regs:$r), 3920 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3921 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3922 []>; 3923def SULD_3D_I16_CLAMP 3924 : NVPTXInst<(outs Int16Regs:$r), 3925 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3926 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3927 []>; 3928def SULD_3D_I32_CLAMP 3929 : NVPTXInst<(outs Int32Regs:$r), 3930 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3931 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3932 []>; 3933def SULD_3D_I64_CLAMP 3934 : NVPTXInst<(outs Int64Regs:$r), 3935 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3936 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3937 []>; 3938} 3939 3940let IsSuld = 2 in { 3941def SULD_1D_V2I8_CLAMP 3942 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3943 (ins Int64Regs:$s, Int32Regs:$x), 3944 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3945 []>; 3946def SULD_1D_V2I16_CLAMP 3947 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3948 (ins Int64Regs:$s, Int32Regs:$x), 3949 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3950 []>; 3951def SULD_1D_V2I32_CLAMP 3952 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3953 (ins Int64Regs:$s, Int32Regs:$x), 3954 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3955 []>; 3956def SULD_1D_V2I64_CLAMP 3957 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3958 (ins Int64Regs:$s, Int32Regs:$x), 3959 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3960 []>; 3961 3962def SULD_1D_ARRAY_V2I8_CLAMP 3963 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3964 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3965 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3966 []>; 3967def SULD_1D_ARRAY_V2I16_CLAMP 3968 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3969 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3970 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3971 []>; 3972def SULD_1D_ARRAY_V2I32_CLAMP 3973 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3974 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3975 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3976 []>; 3977def SULD_1D_ARRAY_V2I64_CLAMP 3978 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3979 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3980 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3981 []>; 3982 3983def SULD_2D_V2I8_CLAMP 3984 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3985 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3986 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3987 []>; 3988def SULD_2D_V2I16_CLAMP 3989 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3990 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3991 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3992 []>; 3993def SULD_2D_V2I32_CLAMP 3994 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3995 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3996 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3997 []>; 3998def SULD_2D_V2I64_CLAMP 3999 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4000 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4001 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4002 []>; 4003 4004def SULD_2D_ARRAY_V2I8_CLAMP 4005 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4006 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4007 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, " 4008 "[$s, \\{$l, $x, $y, $y\\}];", 4009 []>; 4010def SULD_2D_ARRAY_V2I16_CLAMP 4011 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4012 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4013 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, " 4014 "[$s, \\{$l, $x, $y, $y\\}];", 4015 []>; 4016def SULD_2D_ARRAY_V2I32_CLAMP 4017 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4018 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4019 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, " 4020 "[$s, \\{$l, $x, $y, $y\\}];", 4021 []>; 4022def SULD_2D_ARRAY_V2I64_CLAMP 4023 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4024 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4025 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, " 4026 "[$s, \\{$l, $x, $y, $y\\}];", 4027 []>; 4028 4029def SULD_3D_V2I8_CLAMP 4030 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4031 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4032 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4033 []>; 4034def SULD_3D_V2I16_CLAMP 4035 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4036 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4037 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4038 []>; 4039def SULD_3D_V2I32_CLAMP 4040 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4041 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4042 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4043 []>; 4044def SULD_3D_V2I64_CLAMP 4045 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4046 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4047 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4048 []>; 4049} 4050 4051let IsSuld = 3 in { 4052def SULD_1D_V4I8_CLAMP 4053 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4054 (ins Int64Regs:$s, Int32Regs:$x), 4055 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4056 []>; 4057def SULD_1D_V4I16_CLAMP 4058 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4059 (ins Int64Regs:$s, Int32Regs:$x), 4060 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4061 []>; 4062def SULD_1D_V4I32_CLAMP 4063 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4064 (ins Int64Regs:$s, Int32Regs:$x), 4065 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4066 []>; 4067 4068def SULD_1D_ARRAY_V4I8_CLAMP 4069 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4070 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4071 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 4072 "[$s, \\{$l, $x\\}];", 4073 []>; 4074def SULD_1D_ARRAY_V4I16_CLAMP 4075 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4076 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4077 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 4078 "[$s, \\{$l, $x\\}];", 4079 []>; 4080def SULD_1D_ARRAY_V4I32_CLAMP 4081 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4082 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4083 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 4084 "[$s, \\{$l, $x\\}];", 4085 []>; 4086 4087def SULD_2D_V4I8_CLAMP 4088 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4089 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4090 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4091 []>; 4092def SULD_2D_V4I16_CLAMP 4093 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4094 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4095 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4096 []>; 4097def SULD_2D_V4I32_CLAMP 4098 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4099 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4100 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4101 []>; 4102 4103def SULD_2D_ARRAY_V4I8_CLAMP 4104 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4105 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4106 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 4107 "[$s, \\{$l, $x, $y, $y\\}];", 4108 []>; 4109def SULD_2D_ARRAY_V4I16_CLAMP 4110 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4111 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4112 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 4113 "[$s, \\{$l, $x, $y, $y\\}];", 4114 []>; 4115def SULD_2D_ARRAY_V4I32_CLAMP 4116 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4117 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4118 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 4119 "[$s, \\{$l, $x, $y, $y\\}];", 4120 []>; 4121 4122 4123def SULD_3D_V4I8_CLAMP 4124 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4125 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4126 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 4127 "[$s, \\{$x, $y, $z, $z\\}];", 4128 []>; 4129def SULD_3D_V4I16_CLAMP 4130 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4131 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4132 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 4133 "[$s, \\{$x, $y, $z, $z\\}];", 4134 []>; 4135def SULD_3D_V4I32_CLAMP 4136 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4137 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4138 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 4139 "[$s, \\{$x, $y, $z, $z\\}];", 4140 []>; 4141} 4142 4143 4144// .trap variant 4145let IsSuld = true in { 4146def SULD_1D_I8_TRAP 4147 : NVPTXInst<(outs Int16Regs:$r), 4148 (ins Int64Regs:$s, Int32Regs:$x), 4149 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", 4150 []>; 4151def SULD_1D_I16_TRAP 4152 : NVPTXInst<(outs Int16Regs:$r), 4153 (ins Int64Regs:$s, Int32Regs:$x), 4154 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", 4155 []>; 4156def SULD_1D_I32_TRAP 4157 : NVPTXInst<(outs Int32Regs:$r), 4158 (ins Int64Regs:$s, Int32Regs:$x), 4159 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", 4160 []>; 4161def SULD_1D_I64_TRAP 4162 : NVPTXInst<(outs Int64Regs:$r), 4163 (ins Int64Regs:$s, Int32Regs:$x), 4164 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];", 4165 []>; 4166 4167def SULD_1D_ARRAY_I8_TRAP 4168 : NVPTXInst<(outs Int16Regs:$r), 4169 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4170 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4171 []>; 4172def SULD_1D_ARRAY_I16_TRAP 4173 : NVPTXInst<(outs Int16Regs:$r), 4174 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4175 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4176 []>; 4177def SULD_1D_ARRAY_I32_TRAP 4178 : NVPTXInst<(outs Int32Regs:$r), 4179 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4180 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4181 []>; 4182def SULD_1D_ARRAY_I64_TRAP 4183 : NVPTXInst<(outs Int64Regs:$r), 4184 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4185 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4186 []>; 4187 4188def SULD_2D_I8_TRAP 4189 : NVPTXInst<(outs Int16Regs:$r), 4190 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4191 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4192 []>; 4193def SULD_2D_I16_TRAP 4194 : NVPTXInst<(outs Int16Regs:$r), 4195 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4196 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4197 []>; 4198def SULD_2D_I32_TRAP 4199 : NVPTXInst<(outs Int32Regs:$r), 4200 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4201 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4202 []>; 4203def SULD_2D_I64_TRAP 4204 : NVPTXInst<(outs Int64Regs:$r), 4205 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4206 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4207 []>; 4208 4209def SULD_2D_ARRAY_I8_TRAP 4210 : NVPTXInst<(outs Int16Regs:$r), 4211 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4212 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4213 []>; 4214def SULD_2D_ARRAY_I16_TRAP 4215 : NVPTXInst<(outs Int16Regs:$r), 4216 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4217 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4218 []>; 4219def SULD_2D_ARRAY_I32_TRAP 4220 : NVPTXInst<(outs Int32Regs:$r), 4221 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4222 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4223 []>; 4224def SULD_2D_ARRAY_I64_TRAP 4225 : NVPTXInst<(outs Int64Regs:$r), 4226 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4227 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4228 []>; 4229 4230def SULD_3D_I8_TRAP 4231 : NVPTXInst<(outs Int16Regs:$r), 4232 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4233 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4234 []>; 4235def SULD_3D_I16_TRAP 4236 : NVPTXInst<(outs Int16Regs:$r), 4237 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4238 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4239 []>; 4240def SULD_3D_I32_TRAP 4241 : NVPTXInst<(outs Int32Regs:$r), 4242 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4243 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4244 []>; 4245def SULD_3D_I64_TRAP 4246 : NVPTXInst<(outs Int64Regs:$r), 4247 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4248 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4249 []>; 4250} 4251 4252let IsSuld = 2 in { 4253def SULD_1D_V2I8_TRAP 4254 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4255 (ins Int64Regs:$s, Int32Regs:$x), 4256 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4257 []>; 4258def SULD_1D_V2I16_TRAP 4259 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4260 (ins Int64Regs:$s, Int32Regs:$x), 4261 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4262 []>; 4263def SULD_1D_V2I32_TRAP 4264 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4265 (ins Int64Regs:$s, Int32Regs:$x), 4266 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4267 []>; 4268def SULD_1D_V2I64_TRAP 4269 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4270 (ins Int64Regs:$s, Int32Regs:$x), 4271 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4272 []>; 4273 4274def SULD_1D_ARRAY_V2I8_TRAP 4275 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4276 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4277 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4278 []>; 4279def SULD_1D_ARRAY_V2I16_TRAP 4280 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4281 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4282 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4283 []>; 4284def SULD_1D_ARRAY_V2I32_TRAP 4285 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4286 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4287 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4288 []>; 4289def SULD_1D_ARRAY_V2I64_TRAP 4290 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4291 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4292 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4293 []>; 4294 4295def SULD_2D_V2I8_TRAP 4296 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4297 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4298 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4299 []>; 4300def SULD_2D_V2I16_TRAP 4301 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4302 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4303 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4304 []>; 4305def SULD_2D_V2I32_TRAP 4306 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4307 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4308 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4309 []>; 4310def SULD_2D_V2I64_TRAP 4311 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4312 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4313 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4314 []>; 4315 4316def SULD_2D_ARRAY_V2I8_TRAP 4317 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4318 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4319 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " 4320 "[$s, \\{$l, $x, $y, $y\\}];", 4321 []>; 4322def SULD_2D_ARRAY_V2I16_TRAP 4323 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4324 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4325 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " 4326 "[$s, \\{$l, $x, $y, $y\\}];", 4327 []>; 4328def SULD_2D_ARRAY_V2I32_TRAP 4329 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4330 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4331 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " 4332 "[$s, \\{$l, $x, $y, $y\\}];", 4333 []>; 4334def SULD_2D_ARRAY_V2I64_TRAP 4335 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4336 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4337 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, " 4338 "[$s, \\{$l, $x, $y, $y\\}];", 4339 []>; 4340 4341def SULD_3D_V2I8_TRAP 4342 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4343 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4344 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4345 []>; 4346def SULD_3D_V2I16_TRAP 4347 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4348 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4349 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4350 []>; 4351def SULD_3D_V2I32_TRAP 4352 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4353 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4354 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4355 []>; 4356def SULD_3D_V2I64_TRAP 4357 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4358 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4359 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4360 []>; 4361} 4362 4363let IsSuld = 3 in { 4364def SULD_1D_V4I8_TRAP 4365 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4366 (ins Int64Regs:$s, Int32Regs:$x), 4367 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4368 []>; 4369def SULD_1D_V4I16_TRAP 4370 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4371 (ins Int64Regs:$s, Int32Regs:$x), 4372 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4373 []>; 4374def SULD_1D_V4I32_TRAP 4375 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4376 (ins Int64Regs:$s, Int32Regs:$x), 4377 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4378 []>; 4379 4380def SULD_1D_ARRAY_V4I8_TRAP 4381 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4382 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4383 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4384 "[$s, \\{$l, $x\\}];", 4385 []>; 4386def SULD_1D_ARRAY_V4I16_TRAP 4387 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4388 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4389 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4390 "[$s, \\{$l, $x\\}];", 4391 []>; 4392def SULD_1D_ARRAY_V4I32_TRAP 4393 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4394 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4395 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4396 "[$s, \\{$l, $x\\}];", 4397 []>; 4398 4399def SULD_2D_V4I8_TRAP 4400 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4401 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4402 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4403 []>; 4404def SULD_2D_V4I16_TRAP 4405 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4406 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4407 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4408 []>; 4409def SULD_2D_V4I32_TRAP 4410 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4411 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4412 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4413 []>; 4414 4415def SULD_2D_ARRAY_V4I8_TRAP 4416 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4417 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4418 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4419 "[$s, \\{$l, $x, $y, $y\\}];", 4420 []>; 4421def SULD_2D_ARRAY_V4I16_TRAP 4422 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4423 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4424 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4425 "[$s, \\{$l, $x, $y, $y\\}];", 4426 []>; 4427def SULD_2D_ARRAY_V4I32_TRAP 4428 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4429 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4430 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4431 "[$s, \\{$l, $x, $y, $y\\}];", 4432 []>; 4433 4434 4435def SULD_3D_V4I8_TRAP 4436 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4437 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4438 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4439 "[$s, \\{$x, $y, $z, $z\\}];", 4440 []>; 4441def SULD_3D_V4I16_TRAP 4442 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4443 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4444 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4445 "[$s, \\{$x, $y, $z, $z\\}];", 4446 []>; 4447def SULD_3D_V4I32_TRAP 4448 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4449 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4450 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4451 "[$s, \\{$x, $y, $z, $z\\}];", 4452 []>; 4453} 4454 4455// .zero variant 4456let IsSuld = true in { 4457def SULD_1D_I8_ZERO 4458 : NVPTXInst<(outs Int16Regs:$r), 4459 (ins Int64Regs:$s, Int32Regs:$x), 4460 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];", 4461 []>; 4462def SULD_1D_I16_ZERO 4463 : NVPTXInst<(outs Int16Regs:$r), 4464 (ins Int64Regs:$s, Int32Regs:$x), 4465 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];", 4466 []>; 4467def SULD_1D_I32_ZERO 4468 : NVPTXInst<(outs Int32Regs:$r), 4469 (ins Int64Regs:$s, Int32Regs:$x), 4470 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];", 4471 []>; 4472def SULD_1D_I64_ZERO 4473 : NVPTXInst<(outs Int64Regs:$r), 4474 (ins Int64Regs:$s, Int32Regs:$x), 4475 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];", 4476 []>; 4477 4478def SULD_1D_ARRAY_I8_ZERO 4479 : NVPTXInst<(outs Int16Regs:$r), 4480 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4481 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4482 []>; 4483def SULD_1D_ARRAY_I16_ZERO 4484 : NVPTXInst<(outs Int16Regs:$r), 4485 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4486 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4487 []>; 4488def SULD_1D_ARRAY_I32_ZERO 4489 : NVPTXInst<(outs Int32Regs:$r), 4490 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4491 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4492 []>; 4493def SULD_1D_ARRAY_I64_ZERO 4494 : NVPTXInst<(outs Int64Regs:$r), 4495 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4496 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4497 []>; 4498 4499def SULD_2D_I8_ZERO 4500 : NVPTXInst<(outs Int16Regs:$r), 4501 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4502 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4503 []>; 4504def SULD_2D_I16_ZERO 4505 : NVPTXInst<(outs Int16Regs:$r), 4506 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4507 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4508 []>; 4509def SULD_2D_I32_ZERO 4510 : NVPTXInst<(outs Int32Regs:$r), 4511 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4512 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4513 []>; 4514def SULD_2D_I64_ZERO 4515 : NVPTXInst<(outs Int64Regs:$r), 4516 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4517 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4518 []>; 4519 4520def SULD_2D_ARRAY_I8_ZERO 4521 : NVPTXInst<(outs Int16Regs:$r), 4522 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4523 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4524 []>; 4525def SULD_2D_ARRAY_I16_ZERO 4526 : NVPTXInst<(outs Int16Regs:$r), 4527 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4528 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4529 []>; 4530def SULD_2D_ARRAY_I32_ZERO 4531 : NVPTXInst<(outs Int32Regs:$r), 4532 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4533 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4534 []>; 4535def SULD_2D_ARRAY_I64_ZERO 4536 : NVPTXInst<(outs Int64Regs:$r), 4537 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4538 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4539 []>; 4540 4541def SULD_3D_I8_ZERO 4542 : NVPTXInst<(outs Int16Regs:$r), 4543 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4544 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4545 []>; 4546def SULD_3D_I16_ZERO 4547 : NVPTXInst<(outs Int16Regs:$r), 4548 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4549 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4550 []>; 4551def SULD_3D_I32_ZERO 4552 : NVPTXInst<(outs Int32Regs:$r), 4553 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4554 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4555 []>; 4556def SULD_3D_I64_ZERO 4557 : NVPTXInst<(outs Int64Regs:$r), 4558 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4559 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4560 []>; 4561} 4562 4563let IsSuld = 2 in { 4564def SULD_1D_V2I8_ZERO 4565 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4566 (ins Int64Regs:$s, Int32Regs:$x), 4567 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4568 []>; 4569def SULD_1D_V2I16_ZERO 4570 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4571 (ins Int64Regs:$s, Int32Regs:$x), 4572 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4573 []>; 4574def SULD_1D_V2I32_ZERO 4575 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4576 (ins Int64Regs:$s, Int32Regs:$x), 4577 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4578 []>; 4579def SULD_1D_V2I64_ZERO 4580 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4581 (ins Int64Regs:$s, Int32Regs:$x), 4582 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4583 []>; 4584 4585def SULD_1D_ARRAY_V2I8_ZERO 4586 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4587 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4588 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4589 []>; 4590def SULD_1D_ARRAY_V2I16_ZERO 4591 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4592 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4593 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4594 []>; 4595def SULD_1D_ARRAY_V2I32_ZERO 4596 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4597 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4598 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4599 []>; 4600def SULD_1D_ARRAY_V2I64_ZERO 4601 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4602 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4603 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4604 []>; 4605 4606def SULD_2D_V2I8_ZERO 4607 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4608 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4609 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4610 []>; 4611def SULD_2D_V2I16_ZERO 4612 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4613 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4614 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4615 []>; 4616def SULD_2D_V2I32_ZERO 4617 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4618 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4619 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4620 []>; 4621def SULD_2D_V2I64_ZERO 4622 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4623 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4624 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4625 []>; 4626 4627def SULD_2D_ARRAY_V2I8_ZERO 4628 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4629 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4630 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, " 4631 "[$s, \\{$l, $x, $y, $y\\}];", 4632 []>; 4633def SULD_2D_ARRAY_V2I16_ZERO 4634 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4635 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4636 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, " 4637 "[$s, \\{$l, $x, $y, $y\\}];", 4638 []>; 4639def SULD_2D_ARRAY_V2I32_ZERO 4640 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4641 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4642 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, " 4643 "[$s, \\{$l, $x, $y, $y\\}];", 4644 []>; 4645def SULD_2D_ARRAY_V2I64_ZERO 4646 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4647 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4648 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, " 4649 "[$s, \\{$l, $x, $y, $y\\}];", 4650 []>; 4651 4652def SULD_3D_V2I8_ZERO 4653 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4654 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4655 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4656 []>; 4657def SULD_3D_V2I16_ZERO 4658 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4659 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4660 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4661 []>; 4662def SULD_3D_V2I32_ZERO 4663 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4664 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4665 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4666 []>; 4667def SULD_3D_V2I64_ZERO 4668 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4669 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4670 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4671 []>; 4672} 4673 4674let IsSuld = 3 in { 4675def SULD_1D_V4I8_ZERO 4676 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4677 (ins Int64Regs:$s, Int32Regs:$x), 4678 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4679 []>; 4680def SULD_1D_V4I16_ZERO 4681 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4682 (ins Int64Regs:$s, Int32Regs:$x), 4683 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4684 []>; 4685def SULD_1D_V4I32_ZERO 4686 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4687 (ins Int64Regs:$s, Int32Regs:$x), 4688 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4689 []>; 4690 4691def SULD_1D_ARRAY_V4I8_ZERO 4692 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4693 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4694 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4695 "[$s, \\{$l, $x\\}];", 4696 []>; 4697def SULD_1D_ARRAY_V4I16_ZERO 4698 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4699 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4700 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4701 "[$s, \\{$l, $x\\}];", 4702 []>; 4703def SULD_1D_ARRAY_V4I32_ZERO 4704 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4705 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4706 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4707 "[$s, \\{$l, $x\\}];", 4708 []>; 4709 4710def SULD_2D_V4I8_ZERO 4711 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4712 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4713 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4714 []>; 4715def SULD_2D_V4I16_ZERO 4716 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4717 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4718 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4719 []>; 4720def SULD_2D_V4I32_ZERO 4721 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4722 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4723 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4724 []>; 4725 4726def SULD_2D_ARRAY_V4I8_ZERO 4727 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4728 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4729 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4730 "[$s, \\{$l, $x, $y, $y\\}];", 4731 []>; 4732def SULD_2D_ARRAY_V4I16_ZERO 4733 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4734 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4735 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4736 "[$s, \\{$l, $x, $y, $y\\}];", 4737 []>; 4738def SULD_2D_ARRAY_V4I32_ZERO 4739 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4740 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4741 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4742 "[$s, \\{$l, $x, $y, $y\\}];", 4743 []>; 4744 4745 4746def SULD_3D_V4I8_ZERO 4747 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4748 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4749 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4750 "[$s, \\{$x, $y, $z, $z\\}];", 4751 []>; 4752def SULD_3D_V4I16_ZERO 4753 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4754 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4755 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4756 "[$s, \\{$x, $y, $z, $z\\}];", 4757 []>; 4758def SULD_3D_V4I32_ZERO 4759 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4760 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4761 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4762 "[$s, \\{$x, $y, $z, $z\\}];", 4763 []>; 4764} 4765 4766//----------------------------------- 4767// Texture Query Intrinsics 4768//----------------------------------- 4769 4770let IsSurfTexQuery = true in { 4771def TXQ_CHANNEL_ORDER 4772 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4773 "txq.channel_order.b32 \t$d, [$a];", 4774 []>; 4775def TXQ_CHANNEL_DATA_TYPE 4776 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4777 "txq.channel_data_type.b32 \t$d, [$a];", 4778 []>; 4779def TXQ_WIDTH 4780 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4781 "txq.width.b32 \t$d, [$a];", 4782 []>; 4783def TXQ_HEIGHT 4784 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4785 "txq.height.b32 \t$d, [$a];", 4786 []>; 4787def TXQ_DEPTH 4788 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4789 "txq.depth.b32 \t$d, [$a];", 4790 []>; 4791def TXQ_ARRAY_SIZE 4792 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4793 "txq.array_size.b32 \t$d, [$a];", 4794 []>; 4795def TXQ_NUM_SAMPLES 4796 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4797 "txq.num_samples.b32 \t$d, [$a];", 4798 []>; 4799def TXQ_NUM_MIPMAP_LEVELS 4800 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4801 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4802 []>; 4803} 4804 4805def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), 4806 (TXQ_CHANNEL_ORDER Int64Regs:$a)>; 4807def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), 4808 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; 4809def : Pat<(int_nvvm_txq_width Int64Regs:$a), 4810 (TXQ_WIDTH Int64Regs:$a)>; 4811def : Pat<(int_nvvm_txq_height Int64Regs:$a), 4812 (TXQ_HEIGHT Int64Regs:$a)>; 4813def : Pat<(int_nvvm_txq_depth Int64Regs:$a), 4814 (TXQ_DEPTH Int64Regs:$a)>; 4815def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), 4816 (TXQ_ARRAY_SIZE Int64Regs:$a)>; 4817def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), 4818 (TXQ_NUM_SAMPLES Int64Regs:$a)>; 4819def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), 4820 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; 4821 4822 4823//----------------------------------- 4824// Surface Query Intrinsics 4825//----------------------------------- 4826 4827let IsSurfTexQuery = true in { 4828def SUQ_CHANNEL_ORDER 4829 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4830 "suq.channel_order.b32 \t$d, [$a];", 4831 []>; 4832def SUQ_CHANNEL_DATA_TYPE 4833 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4834 "suq.channel_data_type.b32 \t$d, [$a];", 4835 []>; 4836def SUQ_WIDTH 4837 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4838 "suq.width.b32 \t$d, [$a];", 4839 []>; 4840def SUQ_HEIGHT 4841 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4842 "suq.height.b32 \t$d, [$a];", 4843 []>; 4844def SUQ_DEPTH 4845 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4846 "suq.depth.b32 \t$d, [$a];", 4847 []>; 4848def SUQ_ARRAY_SIZE 4849 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4850 "suq.array_size.b32 \t$d, [$a];", 4851 []>; 4852} 4853 4854def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), 4855 (SUQ_CHANNEL_ORDER Int64Regs:$a)>; 4856def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), 4857 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; 4858def : Pat<(int_nvvm_suq_width Int64Regs:$a), 4859 (SUQ_WIDTH Int64Regs:$a)>; 4860def : Pat<(int_nvvm_suq_height Int64Regs:$a), 4861 (SUQ_HEIGHT Int64Regs:$a)>; 4862def : Pat<(int_nvvm_suq_depth Int64Regs:$a), 4863 (SUQ_DEPTH Int64Regs:$a)>; 4864def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), 4865 (SUQ_ARRAY_SIZE Int64Regs:$a)>; 4866 4867 4868//===- Handle Query -------------------------------------------------------===// 4869 4870// TODO: These intrinsics are not yet finalized, pending PTX ISA design work 4871def ISTYPEP_SAMPLER 4872 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4873 "istypep.samplerref \t$d, $a;", 4874 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; 4875def ISTYPEP_SURFACE 4876 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4877 "istypep.surfref \t$d, $a;", 4878 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; 4879def ISTYPEP_TEXTURE 4880 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4881 "istypep.texref \t$d, $a;", 4882 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; 4883 4884//===- Surface Stores -----------------------------------------------------===// 4885 4886let IsSust = true in { 4887// Unformatted 4888// .clamp variant 4889def SUST_B_1D_B8_CLAMP 4890 : NVPTXInst<(outs), 4891 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4892 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4893 []>; 4894def SUST_B_1D_B16_CLAMP 4895 : NVPTXInst<(outs), 4896 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4897 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4898 []>; 4899def SUST_B_1D_B32_CLAMP 4900 : NVPTXInst<(outs), 4901 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 4902 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4903 []>; 4904def SUST_B_1D_B64_CLAMP 4905 : NVPTXInst<(outs), 4906 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 4907 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4908 []>; 4909def SUST_B_1D_V2B8_CLAMP 4910 : NVPTXInst<(outs), 4911 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4912 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4913 []>; 4914def SUST_B_1D_V2B16_CLAMP 4915 : NVPTXInst<(outs), 4916 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4917 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4918 []>; 4919def SUST_B_1D_V2B32_CLAMP 4920 : NVPTXInst<(outs), 4921 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 4922 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4923 []>; 4924def SUST_B_1D_V2B64_CLAMP 4925 : NVPTXInst<(outs), 4926 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 4927 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4928 []>; 4929def SUST_B_1D_V4B8_CLAMP 4930 : NVPTXInst<(outs), 4931 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 4932 Int16Regs:$b, Int16Regs:$a), 4933 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4934 []>; 4935def SUST_B_1D_V4B16_CLAMP 4936 : NVPTXInst<(outs), 4937 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 4938 Int16Regs:$b, Int16Regs:$a), 4939 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4940 []>; 4941def SUST_B_1D_V4B32_CLAMP 4942 : NVPTXInst<(outs), 4943 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 4944 Int32Regs:$b, Int32Regs:$a), 4945 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4946 []>; 4947 4948 4949def SUST_B_1D_ARRAY_B8_CLAMP 4950 : NVPTXInst<(outs), 4951 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 4952 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4953 []>; 4954def SUST_B_1D_ARRAY_B16_CLAMP 4955 : NVPTXInst<(outs), 4956 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 4957 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4958 []>; 4959def SUST_B_1D_ARRAY_B32_CLAMP 4960 : NVPTXInst<(outs), 4961 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 4962 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4963 []>; 4964def SUST_B_1D_ARRAY_B64_CLAMP 4965 : NVPTXInst<(outs), 4966 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 4967 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4968 []>; 4969def SUST_B_1D_ARRAY_V2B8_CLAMP 4970 : NVPTXInst<(outs), 4971 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4972 Int16Regs:$g), 4973 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4974 []>; 4975def SUST_B_1D_ARRAY_V2B16_CLAMP 4976 : NVPTXInst<(outs), 4977 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4978 Int16Regs:$g), 4979 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4980 []>; 4981def SUST_B_1D_ARRAY_V2B32_CLAMP 4982 : NVPTXInst<(outs), 4983 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 4984 Int32Regs:$g), 4985 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4986 []>; 4987def SUST_B_1D_ARRAY_V2B64_CLAMP 4988 : NVPTXInst<(outs), 4989 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 4990 Int64Regs:$g), 4991 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4992 []>; 4993def SUST_B_1D_ARRAY_V4B8_CLAMP 4994 : NVPTXInst<(outs), 4995 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4996 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4997 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], " 4998 "\\{$r, $g, $b, $a\\};", 4999 []>; 5000def SUST_B_1D_ARRAY_V4B16_CLAMP 5001 : NVPTXInst<(outs), 5002 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5003 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5004 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], " 5005 "\\{$r, $g, $b, $a\\};", 5006 []>; 5007def SUST_B_1D_ARRAY_V4B32_CLAMP 5008 : NVPTXInst<(outs), 5009 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5010 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5011 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], " 5012 "\\{$r, $g, $b, $a\\};", 5013 []>; 5014 5015 5016def SUST_B_2D_B8_CLAMP 5017 : NVPTXInst<(outs), 5018 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5019 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 5020 []>; 5021def SUST_B_2D_B16_CLAMP 5022 : NVPTXInst<(outs), 5023 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5024 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 5025 []>; 5026def SUST_B_2D_B32_CLAMP 5027 : NVPTXInst<(outs), 5028 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5029 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 5030 []>; 5031def SUST_B_2D_B64_CLAMP 5032 : NVPTXInst<(outs), 5033 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5034 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 5035 []>; 5036def SUST_B_2D_V2B8_CLAMP 5037 : NVPTXInst<(outs), 5038 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5039 Int16Regs:$g), 5040 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5041 []>; 5042def SUST_B_2D_V2B16_CLAMP 5043 : NVPTXInst<(outs), 5044 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5045 Int16Regs:$g), 5046 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5047 []>; 5048def SUST_B_2D_V2B32_CLAMP 5049 : NVPTXInst<(outs), 5050 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5051 Int32Regs:$g), 5052 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5053 []>; 5054def SUST_B_2D_V2B64_CLAMP 5055 : NVPTXInst<(outs), 5056 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5057 Int64Regs:$g), 5058 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5059 []>; 5060def SUST_B_2D_V4B8_CLAMP 5061 : NVPTXInst<(outs), 5062 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5063 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5064 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], " 5065 "\\{$r, $g, $b, $a\\};", 5066 []>; 5067def SUST_B_2D_V4B16_CLAMP 5068 : NVPTXInst<(outs), 5069 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5070 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5071 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], " 5072 "\\{$r, $g, $b, $a\\};", 5073 []>; 5074def SUST_B_2D_V4B32_CLAMP 5075 : NVPTXInst<(outs), 5076 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5077 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5078 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], " 5079 "\\{$r, $g, $b, $a\\};", 5080 []>; 5081 5082 5083def SUST_B_2D_ARRAY_B8_CLAMP 5084 : NVPTXInst<(outs), 5085 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5086 Int16Regs:$r), 5087 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5088 []>; 5089def SUST_B_2D_ARRAY_B16_CLAMP 5090 : NVPTXInst<(outs), 5091 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5092 Int16Regs:$r), 5093 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5094 []>; 5095def SUST_B_2D_ARRAY_B32_CLAMP 5096 : NVPTXInst<(outs), 5097 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5098 Int32Regs:$r), 5099 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5100 []>; 5101def SUST_B_2D_ARRAY_B64_CLAMP 5102 : NVPTXInst<(outs), 5103 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5104 Int64Regs:$r), 5105 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5106 []>; 5107def SUST_B_2D_ARRAY_V2B8_CLAMP 5108 : NVPTXInst<(outs), 5109 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5110 Int16Regs:$r, Int16Regs:$g), 5111 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5112 "\\{$r, $g\\};", 5113 []>; 5114def SUST_B_2D_ARRAY_V2B16_CLAMP 5115 : NVPTXInst<(outs), 5116 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5117 Int16Regs:$r, Int16Regs:$g), 5118 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5119 "\\{$r, $g\\};", 5120 []>; 5121def SUST_B_2D_ARRAY_V2B32_CLAMP 5122 : NVPTXInst<(outs), 5123 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5124 Int32Regs:$r, Int32Regs:$g), 5125 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5126 "\\{$r, $g\\};", 5127 []>; 5128def SUST_B_2D_ARRAY_V2B64_CLAMP 5129 : NVPTXInst<(outs), 5130 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5131 Int64Regs:$r, Int64Regs:$g), 5132 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5133 "\\{$r, $g\\};", 5134 []>; 5135def SUST_B_2D_ARRAY_V4B8_CLAMP 5136 : NVPTXInst<(outs), 5137 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5138 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5139 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5140 "\\{$r, $g, $b, $a\\};", 5141 []>; 5142def SUST_B_2D_ARRAY_V4B16_CLAMP 5143 : NVPTXInst<(outs), 5144 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5145 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5146 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5147 "\\{$r, $g, $b, $a\\};", 5148 []>; 5149def SUST_B_2D_ARRAY_V4B32_CLAMP 5150 : NVPTXInst<(outs), 5151 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5152 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5153 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5154 "\\{$r, $g, $b, $a\\};", 5155 []>; 5156 5157 5158def SUST_B_3D_B8_CLAMP 5159 : NVPTXInst<(outs), 5160 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5161 Int16Regs:$r), 5162 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5163 []>; 5164def SUST_B_3D_B16_CLAMP 5165 : NVPTXInst<(outs), 5166 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5167 Int16Regs:$r), 5168 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5169 []>; 5170def SUST_B_3D_B32_CLAMP 5171 : NVPTXInst<(outs), 5172 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5173 Int32Regs:$r), 5174 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5175 []>; 5176def SUST_B_3D_B64_CLAMP 5177 : NVPTXInst<(outs), 5178 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5179 Int64Regs:$r), 5180 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5181 []>; 5182def SUST_B_3D_V2B8_CLAMP 5183 : NVPTXInst<(outs), 5184 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5185 Int16Regs:$r, Int16Regs:$g), 5186 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5187 "\\{$r, $g\\};", 5188 []>; 5189def SUST_B_3D_V2B16_CLAMP 5190 : NVPTXInst<(outs), 5191 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5192 Int16Regs:$r, Int16Regs:$g), 5193 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5194 "\\{$r, $g\\};", 5195 []>; 5196def SUST_B_3D_V2B32_CLAMP 5197 : NVPTXInst<(outs), 5198 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5199 Int32Regs:$r, Int32Regs:$g), 5200 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5201 "\\{$r, $g\\};", 5202 []>; 5203def SUST_B_3D_V2B64_CLAMP 5204 : NVPTXInst<(outs), 5205 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5206 Int64Regs:$r, Int64Regs:$g), 5207 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5208 "\\{$r, $g\\};", 5209 []>; 5210def SUST_B_3D_V4B8_CLAMP 5211 : NVPTXInst<(outs), 5212 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5213 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5214 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5215 "\\{$r, $g, $b, $a\\};", 5216 []>; 5217def SUST_B_3D_V4B16_CLAMP 5218 : NVPTXInst<(outs), 5219 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5220 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5221 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5222 "\\{$r, $g, $b, $a\\};", 5223 []>; 5224def SUST_B_3D_V4B32_CLAMP 5225 : NVPTXInst<(outs), 5226 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5227 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5228 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5229 "\\{$r, $g, $b, $a\\};", 5230 []>; 5231 5232 5233// .trap variant 5234def SUST_B_1D_B8_TRAP 5235 : NVPTXInst<(outs), 5236 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5237 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", 5238 []>; 5239def SUST_B_1D_B16_TRAP 5240 : NVPTXInst<(outs), 5241 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5242 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", 5243 []>; 5244def SUST_B_1D_B32_TRAP 5245 : NVPTXInst<(outs), 5246 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5247 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", 5248 []>; 5249def SUST_B_1D_B64_TRAP 5250 : NVPTXInst<(outs), 5251 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5252 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};", 5253 []>; 5254def SUST_B_1D_V2B8_TRAP 5255 : NVPTXInst<(outs), 5256 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5257 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5258 []>; 5259def SUST_B_1D_V2B16_TRAP 5260 : NVPTXInst<(outs), 5261 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5262 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5263 []>; 5264def SUST_B_1D_V2B32_TRAP 5265 : NVPTXInst<(outs), 5266 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5267 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5268 []>; 5269def SUST_B_1D_V2B64_TRAP 5270 : NVPTXInst<(outs), 5271 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5272 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5273 []>; 5274def SUST_B_1D_V4B8_TRAP 5275 : NVPTXInst<(outs), 5276 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5277 Int16Regs:$b, Int16Regs:$a), 5278 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5279 []>; 5280def SUST_B_1D_V4B16_TRAP 5281 : NVPTXInst<(outs), 5282 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5283 Int16Regs:$b, Int16Regs:$a), 5284 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5285 []>; 5286def SUST_B_1D_V4B32_TRAP 5287 : NVPTXInst<(outs), 5288 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5289 Int32Regs:$b, Int32Regs:$a), 5290 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5291 []>; 5292 5293 5294def SUST_B_1D_ARRAY_B8_TRAP 5295 : NVPTXInst<(outs), 5296 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5297 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5298 []>; 5299def SUST_B_1D_ARRAY_B16_TRAP 5300 : NVPTXInst<(outs), 5301 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5302 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5303 []>; 5304def SUST_B_1D_ARRAY_B32_TRAP 5305 : NVPTXInst<(outs), 5306 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5307 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5308 []>; 5309def SUST_B_1D_ARRAY_B64_TRAP 5310 : NVPTXInst<(outs), 5311 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 5312 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5313 []>; 5314def SUST_B_1D_ARRAY_V2B8_TRAP 5315 : NVPTXInst<(outs), 5316 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5317 Int16Regs:$g), 5318 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5319 []>; 5320def SUST_B_1D_ARRAY_V2B16_TRAP 5321 : NVPTXInst<(outs), 5322 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5323 Int16Regs:$g), 5324 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5325 []>; 5326def SUST_B_1D_ARRAY_V2B32_TRAP 5327 : NVPTXInst<(outs), 5328 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5329 Int32Regs:$g), 5330 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5331 []>; 5332def SUST_B_1D_ARRAY_V2B64_TRAP 5333 : NVPTXInst<(outs), 5334 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 5335 Int64Regs:$g), 5336 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5337 []>; 5338def SUST_B_1D_ARRAY_V4B8_TRAP 5339 : NVPTXInst<(outs), 5340 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5341 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5342 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " 5343 "\\{$r, $g, $b, $a\\};", 5344 []>; 5345def SUST_B_1D_ARRAY_V4B16_TRAP 5346 : NVPTXInst<(outs), 5347 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5348 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5349 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " 5350 "\\{$r, $g, $b, $a\\};", 5351 []>; 5352def SUST_B_1D_ARRAY_V4B32_TRAP 5353 : NVPTXInst<(outs), 5354 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5355 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5356 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " 5357 "\\{$r, $g, $b, $a\\};", 5358 []>; 5359 5360 5361def SUST_B_2D_B8_TRAP 5362 : NVPTXInst<(outs), 5363 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5364 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5365 []>; 5366def SUST_B_2D_B16_TRAP 5367 : NVPTXInst<(outs), 5368 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5369 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5370 []>; 5371def SUST_B_2D_B32_TRAP 5372 : NVPTXInst<(outs), 5373 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5374 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5375 []>; 5376def SUST_B_2D_B64_TRAP 5377 : NVPTXInst<(outs), 5378 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5379 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5380 []>; 5381def SUST_B_2D_V2B8_TRAP 5382 : NVPTXInst<(outs), 5383 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5384 Int16Regs:$g), 5385 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5386 []>; 5387def SUST_B_2D_V2B16_TRAP 5388 : NVPTXInst<(outs), 5389 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5390 Int16Regs:$g), 5391 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5392 []>; 5393def SUST_B_2D_V2B32_TRAP 5394 : NVPTXInst<(outs), 5395 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5396 Int32Regs:$g), 5397 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5398 []>; 5399def SUST_B_2D_V2B64_TRAP 5400 : NVPTXInst<(outs), 5401 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5402 Int64Regs:$g), 5403 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5404 []>; 5405def SUST_B_2D_V4B8_TRAP 5406 : NVPTXInst<(outs), 5407 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5408 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5409 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " 5410 "\\{$r, $g, $b, $a\\};", 5411 []>; 5412def SUST_B_2D_V4B16_TRAP 5413 : NVPTXInst<(outs), 5414 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5415 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5416 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " 5417 "\\{$r, $g, $b, $a\\};", 5418 []>; 5419def SUST_B_2D_V4B32_TRAP 5420 : NVPTXInst<(outs), 5421 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5422 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5423 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " 5424 "\\{$r, $g, $b, $a\\};", 5425 []>; 5426 5427 5428def SUST_B_2D_ARRAY_B8_TRAP 5429 : NVPTXInst<(outs), 5430 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5431 Int16Regs:$r), 5432 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5433 []>; 5434def SUST_B_2D_ARRAY_B16_TRAP 5435 : NVPTXInst<(outs), 5436 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5437 Int16Regs:$r), 5438 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5439 []>; 5440def SUST_B_2D_ARRAY_B32_TRAP 5441 : NVPTXInst<(outs), 5442 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5443 Int32Regs:$r), 5444 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5445 []>; 5446def SUST_B_2D_ARRAY_B64_TRAP 5447 : NVPTXInst<(outs), 5448 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5449 Int64Regs:$r), 5450 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5451 []>; 5452def SUST_B_2D_ARRAY_V2B8_TRAP 5453 : NVPTXInst<(outs), 5454 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5455 Int16Regs:$r, Int16Regs:$g), 5456 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5457 "\\{$r, $g\\};", 5458 []>; 5459def SUST_B_2D_ARRAY_V2B16_TRAP 5460 : NVPTXInst<(outs), 5461 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5462 Int16Regs:$r, Int16Regs:$g), 5463 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5464 "\\{$r, $g\\};", 5465 []>; 5466def SUST_B_2D_ARRAY_V2B32_TRAP 5467 : NVPTXInst<(outs), 5468 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5469 Int32Regs:$r, Int32Regs:$g), 5470 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5471 "\\{$r, $g\\};", 5472 []>; 5473def SUST_B_2D_ARRAY_V2B64_TRAP 5474 : NVPTXInst<(outs), 5475 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5476 Int64Regs:$r, Int64Regs:$g), 5477 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5478 "\\{$r, $g\\};", 5479 []>; 5480def SUST_B_2D_ARRAY_V4B8_TRAP 5481 : NVPTXInst<(outs), 5482 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5483 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5484 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5485 "\\{$r, $g, $b, $a\\};", 5486 []>; 5487def SUST_B_2D_ARRAY_V4B16_TRAP 5488 : NVPTXInst<(outs), 5489 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5490 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5491 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5492 "\\{$r, $g, $b, $a\\};", 5493 []>; 5494def SUST_B_2D_ARRAY_V4B32_TRAP 5495 : NVPTXInst<(outs), 5496 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5497 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5498 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5499 "\\{$r, $g, $b, $a\\};", 5500 []>; 5501 5502 5503def SUST_B_3D_B8_TRAP 5504 : NVPTXInst<(outs), 5505 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5506 Int16Regs:$r), 5507 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5508 []>; 5509def SUST_B_3D_B16_TRAP 5510 : NVPTXInst<(outs), 5511 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5512 Int16Regs:$r), 5513 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5514 []>; 5515def SUST_B_3D_B32_TRAP 5516 : NVPTXInst<(outs), 5517 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5518 Int32Regs:$r), 5519 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5520 []>; 5521def SUST_B_3D_B64_TRAP 5522 : NVPTXInst<(outs), 5523 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5524 Int64Regs:$r), 5525 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5526 []>; 5527def SUST_B_3D_V2B8_TRAP 5528 : NVPTXInst<(outs), 5529 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5530 Int16Regs:$r, Int16Regs:$g), 5531 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5532 "\\{$r, $g\\};", 5533 []>; 5534def SUST_B_3D_V2B16_TRAP 5535 : NVPTXInst<(outs), 5536 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5537 Int16Regs:$r, Int16Regs:$g), 5538 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5539 "\\{$r, $g\\};", 5540 []>; 5541def SUST_B_3D_V2B32_TRAP 5542 : NVPTXInst<(outs), 5543 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5544 Int32Regs:$r, Int32Regs:$g), 5545 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5546 "\\{$r, $g\\};", 5547 []>; 5548def SUST_B_3D_V2B64_TRAP 5549 : NVPTXInst<(outs), 5550 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5551 Int64Regs:$r, Int64Regs:$g), 5552 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5553 "\\{$r, $g\\};", 5554 []>; 5555def SUST_B_3D_V4B8_TRAP 5556 : NVPTXInst<(outs), 5557 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5558 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5559 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5560 "\\{$r, $g, $b, $a\\};", 5561 []>; 5562def SUST_B_3D_V4B16_TRAP 5563 : NVPTXInst<(outs), 5564 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5565 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5566 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5567 "\\{$r, $g, $b, $a\\};", 5568 []>; 5569def SUST_B_3D_V4B32_TRAP 5570 : NVPTXInst<(outs), 5571 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5572 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5573 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5574 "\\{$r, $g, $b, $a\\};", 5575 []>; 5576 5577 5578// .zero variant 5579def SUST_B_1D_B8_ZERO 5580 : NVPTXInst<(outs), 5581 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5582 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};", 5583 []>; 5584def SUST_B_1D_B16_ZERO 5585 : NVPTXInst<(outs), 5586 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5587 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};", 5588 []>; 5589def SUST_B_1D_B32_ZERO 5590 : NVPTXInst<(outs), 5591 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5592 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};", 5593 []>; 5594def SUST_B_1D_B64_ZERO 5595 : NVPTXInst<(outs), 5596 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5597 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};", 5598 []>; 5599def SUST_B_1D_V2B8_ZERO 5600 : NVPTXInst<(outs), 5601 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5602 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5603 []>; 5604def SUST_B_1D_V2B16_ZERO 5605 : NVPTXInst<(outs), 5606 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5607 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5608 []>; 5609def SUST_B_1D_V2B32_ZERO 5610 : NVPTXInst<(outs), 5611 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5612 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5613 []>; 5614def SUST_B_1D_V2B64_ZERO 5615 : NVPTXInst<(outs), 5616 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5617 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5618 []>; 5619def SUST_B_1D_V4B8_ZERO 5620 : NVPTXInst<(outs), 5621 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5622 Int16Regs:$b, Int16Regs:$a), 5623 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5624 []>; 5625def SUST_B_1D_V4B16_ZERO 5626 : NVPTXInst<(outs), 5627 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5628 Int16Regs:$b, Int16Regs:$a), 5629 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5630 []>; 5631def SUST_B_1D_V4B32_ZERO 5632 : NVPTXInst<(outs), 5633 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5634 Int32Regs:$b, Int32Regs:$a), 5635 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5636 []>; 5637 5638 5639def SUST_B_1D_ARRAY_B8_ZERO 5640 : NVPTXInst<(outs), 5641 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5642 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5643 []>; 5644def SUST_B_1D_ARRAY_B16_ZERO 5645 : NVPTXInst<(outs), 5646 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5647 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5648 []>; 5649def SUST_B_1D_ARRAY_B32_ZERO 5650 : NVPTXInst<(outs), 5651 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5652 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5653 []>; 5654def SUST_B_1D_ARRAY_B64_ZERO 5655 : NVPTXInst<(outs), 5656 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 5657 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5658 []>; 5659def SUST_B_1D_ARRAY_V2B8_ZERO 5660 : NVPTXInst<(outs), 5661 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5662 Int16Regs:$g), 5663 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5664 []>; 5665def SUST_B_1D_ARRAY_V2B16_ZERO 5666 : NVPTXInst<(outs), 5667 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5668 Int16Regs:$g), 5669 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5670 []>; 5671def SUST_B_1D_ARRAY_V2B32_ZERO 5672 : NVPTXInst<(outs), 5673 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5674 Int32Regs:$g), 5675 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5676 []>; 5677def SUST_B_1D_ARRAY_V2B64_ZERO 5678 : NVPTXInst<(outs), 5679 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 5680 Int64Regs:$g), 5681 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5682 []>; 5683def SUST_B_1D_ARRAY_V4B8_ZERO 5684 : NVPTXInst<(outs), 5685 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5686 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5687 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], " 5688 "\\{$r, $g, $b, $a\\};", 5689 []>; 5690def SUST_B_1D_ARRAY_V4B16_ZERO 5691 : NVPTXInst<(outs), 5692 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5693 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5694 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], " 5695 "\\{$r, $g, $b, $a\\};", 5696 []>; 5697def SUST_B_1D_ARRAY_V4B32_ZERO 5698 : NVPTXInst<(outs), 5699 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5700 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5701 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], " 5702 "\\{$r, $g, $b, $a\\};", 5703 []>; 5704 5705 5706def SUST_B_2D_B8_ZERO 5707 : NVPTXInst<(outs), 5708 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5709 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5710 []>; 5711def SUST_B_2D_B16_ZERO 5712 : NVPTXInst<(outs), 5713 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5714 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5715 []>; 5716def SUST_B_2D_B32_ZERO 5717 : NVPTXInst<(outs), 5718 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5719 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5720 []>; 5721def SUST_B_2D_B64_ZERO 5722 : NVPTXInst<(outs), 5723 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5724 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5725 []>; 5726def SUST_B_2D_V2B8_ZERO 5727 : NVPTXInst<(outs), 5728 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5729 Int16Regs:$g), 5730 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5731 []>; 5732def SUST_B_2D_V2B16_ZERO 5733 : NVPTXInst<(outs), 5734 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5735 Int16Regs:$g), 5736 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5737 []>; 5738def SUST_B_2D_V2B32_ZERO 5739 : NVPTXInst<(outs), 5740 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5741 Int32Regs:$g), 5742 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5743 []>; 5744def SUST_B_2D_V2B64_ZERO 5745 : NVPTXInst<(outs), 5746 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5747 Int64Regs:$g), 5748 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5749 []>; 5750def SUST_B_2D_V4B8_ZERO 5751 : NVPTXInst<(outs), 5752 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5753 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5754 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], " 5755 "\\{$r, $g, $b, $a\\};", 5756 []>; 5757def SUST_B_2D_V4B16_ZERO 5758 : NVPTXInst<(outs), 5759 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5760 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5761 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], " 5762 "\\{$r, $g, $b, $a\\};", 5763 []>; 5764def SUST_B_2D_V4B32_ZERO 5765 : NVPTXInst<(outs), 5766 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5767 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5768 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], " 5769 "\\{$r, $g, $b, $a\\};", 5770 []>; 5771 5772 5773def SUST_B_2D_ARRAY_B8_ZERO 5774 : NVPTXInst<(outs), 5775 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5776 Int16Regs:$r), 5777 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5778 []>; 5779def SUST_B_2D_ARRAY_B16_ZERO 5780 : NVPTXInst<(outs), 5781 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5782 Int16Regs:$r), 5783 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5784 []>; 5785def SUST_B_2D_ARRAY_B32_ZERO 5786 : NVPTXInst<(outs), 5787 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5788 Int32Regs:$r), 5789 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5790 []>; 5791def SUST_B_2D_ARRAY_B64_ZERO 5792 : NVPTXInst<(outs), 5793 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5794 Int64Regs:$r), 5795 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5796 []>; 5797def SUST_B_2D_ARRAY_V2B8_ZERO 5798 : NVPTXInst<(outs), 5799 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5800 Int16Regs:$r, Int16Regs:$g), 5801 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5802 "\\{$r, $g\\};", 5803 []>; 5804def SUST_B_2D_ARRAY_V2B16_ZERO 5805 : NVPTXInst<(outs), 5806 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5807 Int16Regs:$r, Int16Regs:$g), 5808 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5809 "\\{$r, $g\\};", 5810 []>; 5811def SUST_B_2D_ARRAY_V2B32_ZERO 5812 : NVPTXInst<(outs), 5813 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5814 Int32Regs:$r, Int32Regs:$g), 5815 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5816 "\\{$r, $g\\};", 5817 []>; 5818def SUST_B_2D_ARRAY_V2B64_ZERO 5819 : NVPTXInst<(outs), 5820 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5821 Int64Regs:$r, Int64Regs:$g), 5822 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5823 "\\{$r, $g\\};", 5824 []>; 5825def SUST_B_2D_ARRAY_V4B8_ZERO 5826 : NVPTXInst<(outs), 5827 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5828 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5829 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5830 "\\{$r, $g, $b, $a\\};", 5831 []>; 5832def SUST_B_2D_ARRAY_V4B16_ZERO 5833 : NVPTXInst<(outs), 5834 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5835 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5836 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5837 "\\{$r, $g, $b, $a\\};", 5838 []>; 5839def SUST_B_2D_ARRAY_V4B32_ZERO 5840 : NVPTXInst<(outs), 5841 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5842 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5843 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5844 "\\{$r, $g, $b, $a\\};", 5845 []>; 5846 5847 5848def SUST_B_3D_B8_ZERO 5849 : NVPTXInst<(outs), 5850 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5851 Int16Regs:$r), 5852 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5853 []>; 5854def SUST_B_3D_B16_ZERO 5855 : NVPTXInst<(outs), 5856 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5857 Int16Regs:$r), 5858 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5859 []>; 5860def SUST_B_3D_B32_ZERO 5861 : NVPTXInst<(outs), 5862 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5863 Int32Regs:$r), 5864 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5865 []>; 5866def SUST_B_3D_B64_ZERO 5867 : NVPTXInst<(outs), 5868 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5869 Int64Regs:$r), 5870 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5871 []>; 5872def SUST_B_3D_V2B8_ZERO 5873 : NVPTXInst<(outs), 5874 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5875 Int16Regs:$r, Int16Regs:$g), 5876 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5877 "\\{$r, $g\\};", 5878 []>; 5879def SUST_B_3D_V2B16_ZERO 5880 : NVPTXInst<(outs), 5881 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5882 Int16Regs:$r, Int16Regs:$g), 5883 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5884 "\\{$r, $g\\};", 5885 []>; 5886def SUST_B_3D_V2B32_ZERO 5887 : NVPTXInst<(outs), 5888 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5889 Int32Regs:$r, Int32Regs:$g), 5890 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5891 "\\{$r, $g\\};", 5892 []>; 5893def SUST_B_3D_V2B64_ZERO 5894 : NVPTXInst<(outs), 5895 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5896 Int64Regs:$r, Int64Regs:$g), 5897 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5898 "\\{$r, $g\\};", 5899 []>; 5900def SUST_B_3D_V4B8_ZERO 5901 : NVPTXInst<(outs), 5902 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5903 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5904 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5905 "\\{$r, $g, $b, $a\\};", 5906 []>; 5907def SUST_B_3D_V4B16_ZERO 5908 : NVPTXInst<(outs), 5909 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5910 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5911 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5912 "\\{$r, $g, $b, $a\\};", 5913 []>; 5914def SUST_B_3D_V4B32_ZERO 5915 : NVPTXInst<(outs), 5916 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5917 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5918 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5919 "\\{$r, $g, $b, $a\\};", 5920 []>; 5921 5922 5923 5924// Formatted 5925 5926def SUST_P_1D_B8_TRAP 5927 : NVPTXInst<(outs), 5928 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5929 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", 5930 []>; 5931def SUST_P_1D_B16_TRAP 5932 : NVPTXInst<(outs), 5933 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5934 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", 5935 []>; 5936def SUST_P_1D_B32_TRAP 5937 : NVPTXInst<(outs), 5938 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5939 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", 5940 []>; 5941def SUST_P_1D_V2B8_TRAP 5942 : NVPTXInst<(outs), 5943 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5944 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5945 []>; 5946def SUST_P_1D_V2B16_TRAP 5947 : NVPTXInst<(outs), 5948 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5949 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5950 []>; 5951def SUST_P_1D_V2B32_TRAP 5952 : NVPTXInst<(outs), 5953 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5954 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5955 []>; 5956def SUST_P_1D_V4B8_TRAP 5957 : NVPTXInst<(outs), 5958 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5959 Int16Regs:$b, Int16Regs:$a), 5960 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5961 []>; 5962def SUST_P_1D_V4B16_TRAP 5963 : NVPTXInst<(outs), 5964 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5965 Int16Regs:$b, Int16Regs:$a), 5966 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5967 []>; 5968def SUST_P_1D_V4B32_TRAP 5969 : NVPTXInst<(outs), 5970 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5971 Int32Regs:$b, Int32Regs:$a), 5972 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5973 []>; 5974 5975 5976def SUST_P_1D_ARRAY_B8_TRAP 5977 : NVPTXInst<(outs), 5978 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5979 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5980 []>; 5981def SUST_P_1D_ARRAY_B16_TRAP 5982 : NVPTXInst<(outs), 5983 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5984 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5985 []>; 5986def SUST_P_1D_ARRAY_B32_TRAP 5987 : NVPTXInst<(outs), 5988 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5989 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5990 []>; 5991def SUST_P_1D_ARRAY_V2B8_TRAP 5992 : NVPTXInst<(outs), 5993 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5994 Int16Regs:$g), 5995 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5996 []>; 5997def SUST_P_1D_ARRAY_V2B16_TRAP 5998 : NVPTXInst<(outs), 5999 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 6000 Int16Regs:$g), 6001 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 6002 []>; 6003def SUST_P_1D_ARRAY_V2B32_TRAP 6004 : NVPTXInst<(outs), 6005 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 6006 Int32Regs:$g), 6007 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 6008 []>; 6009def SUST_P_1D_ARRAY_V4B8_TRAP 6010 : NVPTXInst<(outs), 6011 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 6012 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6013 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " 6014 "\\{$r, $g, $b, $a\\};", 6015 []>; 6016def SUST_P_1D_ARRAY_V4B16_TRAP 6017 : NVPTXInst<(outs), 6018 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 6019 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6020 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " 6021 "\\{$r, $g, $b, $a\\};", 6022 []>; 6023def SUST_P_1D_ARRAY_V4B32_TRAP 6024 : NVPTXInst<(outs), 6025 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 6026 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6027 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " 6028 "\\{$r, $g, $b, $a\\};", 6029 []>; 6030 6031 6032def SUST_P_2D_B8_TRAP 6033 : NVPTXInst<(outs), 6034 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6035 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 6036 []>; 6037def SUST_P_2D_B16_TRAP 6038 : NVPTXInst<(outs), 6039 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6040 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 6041 []>; 6042def SUST_P_2D_B32_TRAP 6043 : NVPTXInst<(outs), 6044 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6045 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 6046 []>; 6047def SUST_P_2D_V2B8_TRAP 6048 : NVPTXInst<(outs), 6049 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 6050 Int16Regs:$g), 6051 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 6052 []>; 6053def SUST_P_2D_V2B16_TRAP 6054 : NVPTXInst<(outs), 6055 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 6056 Int16Regs:$g), 6057 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 6058 []>; 6059def SUST_P_2D_V2B32_TRAP 6060 : NVPTXInst<(outs), 6061 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6062 Int32Regs:$g), 6063 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 6064 []>; 6065def SUST_P_2D_V4B8_TRAP 6066 : NVPTXInst<(outs), 6067 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 6068 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6069 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " 6070 "\\{$r, $g, $b, $a\\};", 6071 []>; 6072def SUST_P_2D_V4B16_TRAP 6073 : NVPTXInst<(outs), 6074 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 6075 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6076 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " 6077 "\\{$r, $g, $b, $a\\};", 6078 []>; 6079def SUST_P_2D_V4B32_TRAP 6080 : NVPTXInst<(outs), 6081 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6082 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6083 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " 6084 "\\{$r, $g, $b, $a\\};", 6085 []>; 6086 6087 6088def SUST_P_2D_ARRAY_B8_TRAP 6089 : NVPTXInst<(outs), 6090 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6091 Int16Regs:$r), 6092 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 6093 []>; 6094def SUST_P_2D_ARRAY_B16_TRAP 6095 : NVPTXInst<(outs), 6096 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6097 Int16Regs:$r), 6098 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 6099 []>; 6100def SUST_P_2D_ARRAY_B32_TRAP 6101 : NVPTXInst<(outs), 6102 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6103 Int32Regs:$r), 6104 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 6105 []>; 6106def SUST_P_2D_ARRAY_V2B8_TRAP 6107 : NVPTXInst<(outs), 6108 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6109 Int16Regs:$r, Int16Regs:$g), 6110 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 6111 "\\{$r, $g\\};", 6112 []>; 6113def SUST_P_2D_ARRAY_V2B16_TRAP 6114 : NVPTXInst<(outs), 6115 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6116 Int16Regs:$r, Int16Regs:$g), 6117 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 6118 "\\{$r, $g\\};", 6119 []>; 6120def SUST_P_2D_ARRAY_V2B32_TRAP 6121 : NVPTXInst<(outs), 6122 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6123 Int32Regs:$r, Int32Regs:$g), 6124 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 6125 "\\{$r, $g\\};", 6126 []>; 6127def SUST_P_2D_ARRAY_V4B8_TRAP 6128 : NVPTXInst<(outs), 6129 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6130 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6131 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 6132 "\\{$r, $g, $b, $a\\};", 6133 []>; 6134def SUST_P_2D_ARRAY_V4B16_TRAP 6135 : NVPTXInst<(outs), 6136 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6137 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6138 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 6139 "\\{$r, $g, $b, $a\\};", 6140 []>; 6141def SUST_P_2D_ARRAY_V4B32_TRAP 6142 : NVPTXInst<(outs), 6143 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6144 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6145 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 6146 "\\{$r, $g, $b, $a\\};", 6147 []>; 6148 6149 6150def SUST_P_3D_B8_TRAP 6151 : NVPTXInst<(outs), 6152 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6153 Int16Regs:$r), 6154 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 6155 []>; 6156def SUST_P_3D_B16_TRAP 6157 : NVPTXInst<(outs), 6158 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6159 Int16Regs:$r), 6160 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 6161 []>; 6162def SUST_P_3D_B32_TRAP 6163 : NVPTXInst<(outs), 6164 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6165 Int32Regs:$r), 6166 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 6167 []>; 6168def SUST_P_3D_V2B8_TRAP 6169 : NVPTXInst<(outs), 6170 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6171 Int16Regs:$r, Int16Regs:$g), 6172 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6173 "\\{$r, $g\\};", 6174 []>; 6175def SUST_P_3D_V2B16_TRAP 6176 : NVPTXInst<(outs), 6177 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6178 Int16Regs:$r, Int16Regs:$g), 6179 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6180 "\\{$r, $g\\};", 6181 []>; 6182def SUST_P_3D_V2B32_TRAP 6183 : NVPTXInst<(outs), 6184 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6185 Int32Regs:$r, Int32Regs:$g), 6186 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6187 "\\{$r, $g\\};", 6188 []>; 6189def SUST_P_3D_V4B8_TRAP 6190 : NVPTXInst<(outs), 6191 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6192 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6193 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6194 "\\{$r, $g, $b, $a\\};", 6195 []>; 6196def SUST_P_3D_V4B16_TRAP 6197 : NVPTXInst<(outs), 6198 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6199 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6200 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6201 "\\{$r, $g, $b, $a\\};", 6202 []>; 6203def SUST_P_3D_V4B32_TRAP 6204 : NVPTXInst<(outs), 6205 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6206 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6207 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6208 "\\{$r, $g, $b, $a\\};", 6209 []>; 6210} 6211 6212// Surface store instruction patterns 6213// I'm not sure why we can't just include these in the instruction definitions, 6214// but TableGen complains of type errors :( 6215 6216// .clamp variant 6217def : Pat<(int_nvvm_sust_b_1d_i8_clamp 6218 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6219 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6220 6221def : Pat<(int_nvvm_sust_b_1d_i16_clamp 6222 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6223 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6224 6225def : Pat<(int_nvvm_sust_b_1d_i32_clamp 6226 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6227 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6228 6229def : Pat<(int_nvvm_sust_b_1d_i64_clamp 6230 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6231 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6232 6233def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp 6234 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6235 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, 6236 Int16Regs:$r, Int16Regs:$g)>; 6237 6238def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp 6239 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6240 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, 6241 Int16Regs:$r, Int16Regs:$g)>; 6242 6243def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp 6244 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6245 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, 6246 Int32Regs:$r, Int32Regs:$g)>; 6247 6248def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp 6249 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6250 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, 6251 Int64Regs:$r, Int64Regs:$g)>; 6252 6253def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp 6254 Int64Regs:$s, Int32Regs:$x, 6255 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6256 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, 6257 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6258 6259def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp 6260 Int64Regs:$s, Int32Regs:$x, 6261 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6262 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, 6263 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6264 6265def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp 6266 Int64Regs:$s, Int32Regs:$x, 6267 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6268 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, 6269 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6270 6271 6272 6273def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp 6274 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6275 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6276 Int16Regs:$r)>; 6277 6278def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp 6279 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6280 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6281 Int16Regs:$r)>; 6282 6283def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp 6284 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6285 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6286 Int32Regs:$r)>; 6287 6288def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp 6289 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6290 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6291 Int64Regs:$r)>; 6292 6293def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp 6294 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6295 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6296 Int16Regs:$r, Int16Regs:$g)>; 6297 6298def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp 6299 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6300 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6301 Int16Regs:$r, Int16Regs:$g)>; 6302 6303def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp 6304 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6305 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6306 Int32Regs:$r, Int32Regs:$g)>; 6307 6308def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp 6309 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6310 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6311 Int64Regs:$r, Int64Regs:$g)>; 6312 6313def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp 6314 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6315 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6316 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6317 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6318 6319def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp 6320 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6321 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6322 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6323 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6324 6325def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp 6326 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6327 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6328 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6329 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6330 6331 6332 6333def : Pat<(int_nvvm_sust_b_2d_i8_clamp 6334 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6335 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6336 Int16Regs:$r)>; 6337 6338def : Pat<(int_nvvm_sust_b_2d_i16_clamp 6339 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6340 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6341 Int16Regs:$r)>; 6342 6343def : Pat<(int_nvvm_sust_b_2d_i32_clamp 6344 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6345 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6346 Int32Regs:$r)>; 6347 6348def : Pat<(int_nvvm_sust_b_2d_i64_clamp 6349 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6350 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6351 Int64Regs:$r)>; 6352 6353def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp 6354 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6355 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6356 Int16Regs:$r, Int16Regs:$g)>; 6357 6358def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp 6359 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6360 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6361 Int16Regs:$r, Int16Regs:$g)>; 6362 6363def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp 6364 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6365 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6366 Int32Regs:$r, Int32Regs:$g)>; 6367 6368def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp 6369 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6370 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6371 Int64Regs:$r, Int64Regs:$g)>; 6372 6373def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp 6374 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6375 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6376 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6377 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6378 6379def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp 6380 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6381 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6382 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6383 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6384 6385def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp 6386 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6387 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6388 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6389 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6390 6391 6392 6393def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp 6394 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6395 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s, 6396 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6397 Int16Regs:$r)>; 6398 6399def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp 6400 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6401 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s, 6402 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6403 Int16Regs:$r)>; 6404 6405def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp 6406 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6407 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s, 6408 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6409 Int32Regs:$r)>; 6410 6411def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp 6412 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6413 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s, 6414 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6415 Int64Regs:$r)>; 6416 6417def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp 6418 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6419 Int16Regs:$r, Int16Regs:$g), 6420 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, 6421 Int32Regs:$x, Int32Regs:$y, 6422 Int16Regs:$r, Int16Regs:$g)>; 6423 6424def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp 6425 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6426 Int16Regs:$r, Int16Regs:$g), 6427 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, 6428 Int32Regs:$x, Int32Regs:$y, 6429 Int16Regs:$r, Int16Regs:$g)>; 6430 6431def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp 6432 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6433 Int32Regs:$g), 6434 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, 6435 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6436 6437def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp 6438 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6439 Int64Regs:$g), 6440 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, 6441 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6442 6443def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp 6444 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6445 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6446 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s, 6447 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6448 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6449 6450def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp 6451 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6452 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6453 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s, 6454 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6455 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6456 6457def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp 6458 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6459 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6460 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, 6461 Int32Regs:$x, Int32Regs:$y, 6462 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6463 6464 6465 6466def : Pat<(int_nvvm_sust_b_3d_i8_clamp 6467 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6468 Int16Regs:$r), 6469 (SUST_B_3D_B8_CLAMP Int64Regs:$s, 6470 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6471 Int16Regs:$r)>; 6472 6473def : Pat<(int_nvvm_sust_b_3d_i16_clamp 6474 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6475 Int16Regs:$r), 6476 (SUST_B_3D_B16_CLAMP Int64Regs:$s, 6477 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6478 Int16Regs:$r)>; 6479 6480def : Pat<(int_nvvm_sust_b_3d_i32_clamp 6481 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6482 Int32Regs:$r), 6483 (SUST_B_3D_B32_CLAMP Int64Regs:$s, 6484 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6485 Int32Regs:$r)>; 6486 6487def : Pat<(int_nvvm_sust_b_3d_i64_clamp 6488 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6489 Int64Regs:$r), 6490 (SUST_B_3D_B64_CLAMP Int64Regs:$s, 6491 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6492 Int64Regs:$r)>; 6493 6494def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp 6495 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6496 Int16Regs:$r, Int16Regs:$g), 6497 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s, 6498 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6499 Int16Regs:$r, Int16Regs:$g)>; 6500 6501def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp 6502 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6503 Int16Regs:$r, Int16Regs:$g), 6504 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s, 6505 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6506 Int16Regs:$r, Int16Regs:$g)>; 6507 6508def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp 6509 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6510 Int32Regs:$r, Int32Regs:$g), 6511 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s, 6512 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6513 Int32Regs:$r, Int32Regs:$g)>; 6514 6515def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp 6516 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6517 Int64Regs:$r, Int64Regs:$g), 6518 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s, 6519 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6520 Int64Regs:$r, Int64Regs:$g)>; 6521 6522def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp 6523 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6524 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6525 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s, 6526 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6527 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6528 6529def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp 6530 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6531 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6532 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s, 6533 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6534 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6535 6536def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp 6537 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6538 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6539 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s, 6540 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6541 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6542 6543 6544// .trap variant 6545def : Pat<(int_nvvm_sust_b_1d_i8_trap 6546 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6547 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6548 6549def : Pat<(int_nvvm_sust_b_1d_i16_trap 6550 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6551 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6552 6553def : Pat<(int_nvvm_sust_b_1d_i32_trap 6554 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6555 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6556 6557def : Pat<(int_nvvm_sust_b_1d_i64_trap 6558 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6559 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6560 6561def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 6562 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6563 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, 6564 Int16Regs:$r, Int16Regs:$g)>; 6565 6566def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 6567 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6568 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, 6569 Int16Regs:$r, Int16Regs:$g)>; 6570 6571def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 6572 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6573 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, 6574 Int32Regs:$r, Int32Regs:$g)>; 6575 6576def : Pat<(int_nvvm_sust_b_1d_v2i64_trap 6577 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6578 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, 6579 Int64Regs:$r, Int64Regs:$g)>; 6580 6581def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 6582 Int64Regs:$s, Int32Regs:$x, 6583 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6584 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, 6585 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6586 6587def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 6588 Int64Regs:$s, Int32Regs:$x, 6589 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6590 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, 6591 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6592 6593def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 6594 Int64Regs:$s, Int32Regs:$x, 6595 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6596 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, 6597 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6598 6599 6600 6601def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 6602 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6603 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6604 Int16Regs:$r)>; 6605 6606def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 6607 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6608 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6609 Int16Regs:$r)>; 6610 6611def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 6612 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6613 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6614 Int32Regs:$r)>; 6615 6616def : Pat<(int_nvvm_sust_b_1d_array_i64_trap 6617 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6618 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6619 Int64Regs:$r)>; 6620 6621def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 6622 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6623 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6624 Int16Regs:$r, Int16Regs:$g)>; 6625 6626def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 6627 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6628 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6629 Int16Regs:$r, Int16Regs:$g)>; 6630 6631def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 6632 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6633 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6634 Int32Regs:$r, Int32Regs:$g)>; 6635 6636def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap 6637 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6638 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6639 Int64Regs:$r, Int64Regs:$g)>; 6640 6641def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 6642 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6643 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6644 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6645 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6646 6647def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 6648 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6649 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6650 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6651 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6652 6653def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 6654 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6655 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6656 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6657 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6658 6659 6660 6661def : Pat<(int_nvvm_sust_b_2d_i8_trap 6662 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6663 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6664 Int16Regs:$r)>; 6665 6666def : Pat<(int_nvvm_sust_b_2d_i16_trap 6667 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6668 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6669 Int16Regs:$r)>; 6670 6671def : Pat<(int_nvvm_sust_b_2d_i32_trap 6672 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6673 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6674 Int32Regs:$r)>; 6675 6676def : Pat<(int_nvvm_sust_b_2d_i64_trap 6677 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6678 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6679 Int64Regs:$r)>; 6680 6681def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 6682 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6683 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6684 Int16Regs:$r, Int16Regs:$g)>; 6685 6686def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 6687 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6688 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6689 Int16Regs:$r, Int16Regs:$g)>; 6690 6691def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 6692 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6693 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6694 Int32Regs:$r, Int32Regs:$g)>; 6695 6696def : Pat<(int_nvvm_sust_b_2d_v2i64_trap 6697 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6698 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6699 Int64Regs:$r, Int64Regs:$g)>; 6700 6701def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 6702 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6703 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6704 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6705 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6706 6707def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 6708 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6709 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6710 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6711 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6712 6713def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 6714 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6715 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6716 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6717 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6718 6719 6720 6721def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 6722 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6723 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s, 6724 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6725 Int16Regs:$r)>; 6726 6727def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 6728 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6729 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s, 6730 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6731 Int16Regs:$r)>; 6732 6733def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 6734 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6735 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s, 6736 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6737 Int32Regs:$r)>; 6738 6739def : Pat<(int_nvvm_sust_b_2d_array_i64_trap 6740 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6741 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s, 6742 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6743 Int64Regs:$r)>; 6744 6745def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 6746 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6747 Int16Regs:$r, Int16Regs:$g), 6748 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, 6749 Int32Regs:$x, Int32Regs:$y, 6750 Int16Regs:$r, Int16Regs:$g)>; 6751 6752def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 6753 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6754 Int16Regs:$r, Int16Regs:$g), 6755 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, 6756 Int32Regs:$x, Int32Regs:$y, 6757 Int16Regs:$r, Int16Regs:$g)>; 6758 6759def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 6760 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6761 Int32Regs:$g), 6762 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, 6763 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6764 6765def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap 6766 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6767 Int64Regs:$g), 6768 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, 6769 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6770 6771def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 6772 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6773 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6774 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s, 6775 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6776 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6777 6778def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 6779 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6780 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6781 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s, 6782 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6783 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6784 6785def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 6786 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6787 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6788 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, 6789 Int32Regs:$x, Int32Regs:$y, 6790 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6791 6792 6793 6794def : Pat<(int_nvvm_sust_b_3d_i8_trap 6795 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6796 Int16Regs:$r), 6797 (SUST_B_3D_B8_TRAP Int64Regs:$s, 6798 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6799 Int16Regs:$r)>; 6800 6801def : Pat<(int_nvvm_sust_b_3d_i16_trap 6802 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6803 Int16Regs:$r), 6804 (SUST_B_3D_B16_TRAP Int64Regs:$s, 6805 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6806 Int16Regs:$r)>; 6807 6808def : Pat<(int_nvvm_sust_b_3d_i32_trap 6809 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6810 Int32Regs:$r), 6811 (SUST_B_3D_B32_TRAP Int64Regs:$s, 6812 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6813 Int32Regs:$r)>; 6814 6815def : Pat<(int_nvvm_sust_b_3d_i64_trap 6816 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6817 Int64Regs:$r), 6818 (SUST_B_3D_B64_TRAP Int64Regs:$s, 6819 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6820 Int64Regs:$r)>; 6821 6822def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 6823 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6824 Int16Regs:$r, Int16Regs:$g), 6825 (SUST_B_3D_V2B8_TRAP Int64Regs:$s, 6826 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6827 Int16Regs:$r, Int16Regs:$g)>; 6828 6829def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 6830 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6831 Int16Regs:$r, Int16Regs:$g), 6832 (SUST_B_3D_V2B16_TRAP Int64Regs:$s, 6833 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6834 Int16Regs:$r, Int16Regs:$g)>; 6835 6836def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 6837 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6838 Int32Regs:$r, Int32Regs:$g), 6839 (SUST_B_3D_V2B32_TRAP Int64Regs:$s, 6840 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6841 Int32Regs:$r, Int32Regs:$g)>; 6842 6843def : Pat<(int_nvvm_sust_b_3d_v2i64_trap 6844 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6845 Int64Regs:$r, Int64Regs:$g), 6846 (SUST_B_3D_V2B64_TRAP Int64Regs:$s, 6847 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6848 Int64Regs:$r, Int64Regs:$g)>; 6849 6850def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 6851 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6852 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6853 (SUST_B_3D_V4B8_TRAP Int64Regs:$s, 6854 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6855 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6856 6857def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 6858 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6859 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6860 (SUST_B_3D_V4B16_TRAP Int64Regs:$s, 6861 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6862 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6863 6864def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 6865 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6866 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6867 (SUST_B_3D_V4B32_TRAP Int64Regs:$s, 6868 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6869 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6870 6871 6872// .zero variant 6873def : Pat<(int_nvvm_sust_b_1d_i8_zero 6874 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6875 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6876 6877def : Pat<(int_nvvm_sust_b_1d_i16_zero 6878 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6879 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6880 6881def : Pat<(int_nvvm_sust_b_1d_i32_zero 6882 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6883 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6884 6885def : Pat<(int_nvvm_sust_b_1d_i64_zero 6886 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6887 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6888 6889def : Pat<(int_nvvm_sust_b_1d_v2i8_zero 6890 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6891 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, 6892 Int16Regs:$r, Int16Regs:$g)>; 6893 6894def : Pat<(int_nvvm_sust_b_1d_v2i16_zero 6895 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6896 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, 6897 Int16Regs:$r, Int16Regs:$g)>; 6898 6899def : Pat<(int_nvvm_sust_b_1d_v2i32_zero 6900 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6901 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, 6902 Int32Regs:$r, Int32Regs:$g)>; 6903 6904def : Pat<(int_nvvm_sust_b_1d_v2i64_zero 6905 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6906 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, 6907 Int64Regs:$r, Int64Regs:$g)>; 6908 6909def : Pat<(int_nvvm_sust_b_1d_v4i8_zero 6910 Int64Regs:$s, Int32Regs:$x, 6911 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6912 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, 6913 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6914 6915def : Pat<(int_nvvm_sust_b_1d_v4i16_zero 6916 Int64Regs:$s, Int32Regs:$x, 6917 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6918 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, 6919 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6920 6921def : Pat<(int_nvvm_sust_b_1d_v4i32_zero 6922 Int64Regs:$s, Int32Regs:$x, 6923 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6924 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, 6925 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6926 6927 6928 6929def : Pat<(int_nvvm_sust_b_1d_array_i8_zero 6930 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6931 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6932 Int16Regs:$r)>; 6933 6934def : Pat<(int_nvvm_sust_b_1d_array_i16_zero 6935 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6936 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6937 Int16Regs:$r)>; 6938 6939def : Pat<(int_nvvm_sust_b_1d_array_i32_zero 6940 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6941 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6942 Int32Regs:$r)>; 6943 6944def : Pat<(int_nvvm_sust_b_1d_array_i64_zero 6945 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6946 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6947 Int64Regs:$r)>; 6948 6949def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero 6950 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6951 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6952 Int16Regs:$r, Int16Regs:$g)>; 6953 6954def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero 6955 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6956 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6957 Int16Regs:$r, Int16Regs:$g)>; 6958 6959def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero 6960 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6961 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6962 Int32Regs:$r, Int32Regs:$g)>; 6963 6964def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero 6965 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6966 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6967 Int64Regs:$r, Int64Regs:$g)>; 6968 6969def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero 6970 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6971 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6972 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6973 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6974 6975def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero 6976 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6977 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6978 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6979 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6980 6981def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero 6982 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6983 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6984 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6985 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6986 6987 6988 6989def : Pat<(int_nvvm_sust_b_2d_i8_zero 6990 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6991 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6992 Int16Regs:$r)>; 6993 6994def : Pat<(int_nvvm_sust_b_2d_i16_zero 6995 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6996 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6997 Int16Regs:$r)>; 6998 6999def : Pat<(int_nvvm_sust_b_2d_i32_zero 7000 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7001 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7002 Int32Regs:$r)>; 7003 7004def : Pat<(int_nvvm_sust_b_2d_i64_zero 7005 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 7006 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7007 Int64Regs:$r)>; 7008 7009def : Pat<(int_nvvm_sust_b_2d_v2i8_zero 7010 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7011 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7012 Int16Regs:$r, Int16Regs:$g)>; 7013 7014def : Pat<(int_nvvm_sust_b_2d_v2i16_zero 7015 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7016 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7017 Int16Regs:$r, Int16Regs:$g)>; 7018 7019def : Pat<(int_nvvm_sust_b_2d_v2i32_zero 7020 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 7021 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7022 Int32Regs:$r, Int32Regs:$g)>; 7023 7024def : Pat<(int_nvvm_sust_b_2d_v2i64_zero 7025 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 7026 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7027 Int64Regs:$r, Int64Regs:$g)>; 7028 7029def : Pat<(int_nvvm_sust_b_2d_v4i8_zero 7030 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7031 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7032 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7033 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7034 7035def : Pat<(int_nvvm_sust_b_2d_v4i16_zero 7036 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7037 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7038 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7039 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7040 7041def : Pat<(int_nvvm_sust_b_2d_v4i32_zero 7042 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7043 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7044 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7045 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7046 7047 7048 7049def : Pat<(int_nvvm_sust_b_2d_array_i8_zero 7050 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7051 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s, 7052 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7053 Int16Regs:$r)>; 7054 7055def : Pat<(int_nvvm_sust_b_2d_array_i16_zero 7056 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7057 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s, 7058 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7059 Int16Regs:$r)>; 7060 7061def : Pat<(int_nvvm_sust_b_2d_array_i32_zero 7062 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7063 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s, 7064 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7065 Int32Regs:$r)>; 7066 7067def : Pat<(int_nvvm_sust_b_2d_array_i64_zero 7068 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 7069 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s, 7070 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7071 Int64Regs:$r)>; 7072 7073def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero 7074 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7075 Int16Regs:$r, Int16Regs:$g), 7076 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, 7077 Int32Regs:$x, Int32Regs:$y, 7078 Int16Regs:$r, Int16Regs:$g)>; 7079 7080def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero 7081 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7082 Int16Regs:$r, Int16Regs:$g), 7083 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, 7084 Int32Regs:$x, Int32Regs:$y, 7085 Int16Regs:$r, Int16Regs:$g)>; 7086 7087def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero 7088 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 7089 Int32Regs:$g), 7090 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, 7091 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 7092 7093def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero 7094 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 7095 Int64Regs:$g), 7096 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, 7097 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 7098 7099def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero 7100 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7101 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7102 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s, 7103 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7104 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7105 7106def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero 7107 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7108 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7109 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s, 7110 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7111 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7112 7113def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero 7114 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7115 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7116 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, 7117 Int32Regs:$x, Int32Regs:$y, 7118 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7119 7120 7121 7122def : Pat<(int_nvvm_sust_b_3d_i8_zero 7123 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7124 Int16Regs:$r), 7125 (SUST_B_3D_B8_ZERO Int64Regs:$s, 7126 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7127 Int16Regs:$r)>; 7128 7129def : Pat<(int_nvvm_sust_b_3d_i16_zero 7130 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7131 Int16Regs:$r), 7132 (SUST_B_3D_B16_ZERO Int64Regs:$s, 7133 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7134 Int16Regs:$r)>; 7135 7136def : Pat<(int_nvvm_sust_b_3d_i32_zero 7137 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7138 Int32Regs:$r), 7139 (SUST_B_3D_B32_ZERO Int64Regs:$s, 7140 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7141 Int32Regs:$r)>; 7142 7143def : Pat<(int_nvvm_sust_b_3d_i64_zero 7144 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7145 Int64Regs:$r), 7146 (SUST_B_3D_B64_ZERO Int64Regs:$s, 7147 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7148 Int64Regs:$r)>; 7149 7150def : Pat<(int_nvvm_sust_b_3d_v2i8_zero 7151 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7152 Int16Regs:$r, Int16Regs:$g), 7153 (SUST_B_3D_V2B8_ZERO Int64Regs:$s, 7154 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7155 Int16Regs:$r, Int16Regs:$g)>; 7156 7157def : Pat<(int_nvvm_sust_b_3d_v2i16_zero 7158 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7159 Int16Regs:$r, Int16Regs:$g), 7160 (SUST_B_3D_V2B16_ZERO Int64Regs:$s, 7161 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7162 Int16Regs:$r, Int16Regs:$g)>; 7163 7164def : Pat<(int_nvvm_sust_b_3d_v2i32_zero 7165 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7166 Int32Regs:$r, Int32Regs:$g), 7167 (SUST_B_3D_V2B32_ZERO Int64Regs:$s, 7168 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7169 Int32Regs:$r, Int32Regs:$g)>; 7170 7171def : Pat<(int_nvvm_sust_b_3d_v2i64_zero 7172 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7173 Int64Regs:$r, Int64Regs:$g), 7174 (SUST_B_3D_V2B64_ZERO Int64Regs:$s, 7175 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7176 Int64Regs:$r, Int64Regs:$g)>; 7177 7178def : Pat<(int_nvvm_sust_b_3d_v4i8_zero 7179 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7180 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7181 (SUST_B_3D_V4B8_ZERO Int64Regs:$s, 7182 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7183 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7184 7185def : Pat<(int_nvvm_sust_b_3d_v4i16_zero 7186 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7187 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7188 (SUST_B_3D_V4B16_ZERO Int64Regs:$s, 7189 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7190 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7191 7192def : Pat<(int_nvvm_sust_b_3d_v4i32_zero 7193 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7194 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7195 (SUST_B_3D_V4B32_ZERO Int64Regs:$s, 7196 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7197 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7198 7199 7200 7201 7202def : Pat<(int_nvvm_sust_p_1d_i8_trap 7203 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 7204 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 7205 7206def : Pat<(int_nvvm_sust_p_1d_i16_trap 7207 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 7208 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 7209 7210def : Pat<(int_nvvm_sust_p_1d_i32_trap 7211 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 7212 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 7213 7214def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 7215 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7216 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, 7217 Int16Regs:$r, Int16Regs:$g)>; 7218 7219def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 7220 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7221 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, 7222 Int16Regs:$r, Int16Regs:$g)>; 7223 7224def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 7225 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 7226 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, 7227 Int32Regs:$r, Int32Regs:$g)>; 7228 7229def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 7230 Int64Regs:$s, Int32Regs:$x, 7231 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7232 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, 7233 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7234 7235def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 7236 Int64Regs:$s, Int32Regs:$x, 7237 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7238 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, 7239 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7240 7241def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 7242 Int64Regs:$s, Int32Regs:$x, 7243 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7244 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, 7245 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7246 7247 7248 7249def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 7250 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 7251 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7252 Int16Regs:$r)>; 7253 7254def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 7255 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 7256 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7257 Int16Regs:$r)>; 7258 7259def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 7260 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 7261 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7262 Int32Regs:$r)>; 7263 7264def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 7265 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7266 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7267 Int16Regs:$r, Int16Regs:$g)>; 7268 7269def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 7270 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7271 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7272 Int16Regs:$r, Int16Regs:$g)>; 7273 7274def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 7275 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 7276 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7277 Int32Regs:$r, Int32Regs:$g)>; 7278 7279def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 7280 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7281 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7282 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7283 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7284 7285def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 7286 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7287 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7288 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7289 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7290 7291def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 7292 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7293 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7294 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7295 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7296 7297 7298 7299def : Pat<(int_nvvm_sust_p_2d_i8_trap 7300 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7301 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7302 Int16Regs:$r)>; 7303 7304def : Pat<(int_nvvm_sust_p_2d_i16_trap 7305 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7306 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7307 Int16Regs:$r)>; 7308 7309def : Pat<(int_nvvm_sust_p_2d_i32_trap 7310 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7311 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7312 Int32Regs:$r)>; 7313 7314def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 7315 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7316 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7317 Int16Regs:$r, Int16Regs:$g)>; 7318 7319def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 7320 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7321 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7322 Int16Regs:$r, Int16Regs:$g)>; 7323 7324def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 7325 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 7326 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7327 Int32Regs:$r, Int32Regs:$g)>; 7328 7329def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 7330 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7331 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7332 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7333 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7334 7335def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 7336 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7337 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7338 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7339 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7340 7341def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 7342 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7343 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7344 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7345 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7346 7347 7348 7349def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 7350 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7351 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s, 7352 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7353 Int16Regs:$r)>; 7354 7355def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 7356 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7357 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s, 7358 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7359 Int16Regs:$r)>; 7360 7361def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 7362 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7363 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s, 7364 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7365 Int32Regs:$r)>; 7366 7367def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 7368 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7369 Int16Regs:$r, Int16Regs:$g), 7370 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, 7371 Int32Regs:$x, Int32Regs:$y, 7372 Int16Regs:$r, Int16Regs:$g)>; 7373 7374def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 7375 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7376 Int16Regs:$r, Int16Regs:$g), 7377 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, 7378 Int32Regs:$x, Int32Regs:$y, 7379 Int16Regs:$r, Int16Regs:$g)>; 7380 7381def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 7382 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 7383 Int32Regs:$g), 7384 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, 7385 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 7386 7387def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 7388 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7389 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7390 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s, 7391 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7392 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7393 7394def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 7395 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7396 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7397 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s, 7398 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7399 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7400 7401def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 7402 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7403 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7404 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, 7405 Int32Regs:$x, Int32Regs:$y, 7406 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7407 7408 7409 7410def : Pat<(int_nvvm_sust_p_3d_i8_trap 7411 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7412 Int16Regs:$r), 7413 (SUST_P_3D_B8_TRAP Int64Regs:$s, 7414 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7415 Int16Regs:$r)>; 7416 7417def : Pat<(int_nvvm_sust_p_3d_i16_trap 7418 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7419 Int16Regs:$r), 7420 (SUST_P_3D_B16_TRAP Int64Regs:$s, 7421 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7422 Int16Regs:$r)>; 7423 7424def : Pat<(int_nvvm_sust_p_3d_i32_trap 7425 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7426 Int32Regs:$r), 7427 (SUST_P_3D_B32_TRAP Int64Regs:$s, 7428 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7429 Int32Regs:$r)>; 7430 7431def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 7432 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7433 Int16Regs:$r, Int16Regs:$g), 7434 (SUST_P_3D_V2B8_TRAP Int64Regs:$s, 7435 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7436 Int16Regs:$r, Int16Regs:$g)>; 7437 7438def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 7439 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7440 Int16Regs:$r, Int16Regs:$g), 7441 (SUST_P_3D_V2B16_TRAP Int64Regs:$s, 7442 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7443 Int16Regs:$r, Int16Regs:$g)>; 7444 7445def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 7446 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7447 Int32Regs:$r, Int32Regs:$g), 7448 (SUST_P_3D_V2B32_TRAP Int64Regs:$s, 7449 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7450 Int32Regs:$r, Int32Regs:$g)>; 7451 7452def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 7453 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7454 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7455 (SUST_P_3D_V4B8_TRAP Int64Regs:$s, 7456 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7457 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7458 7459def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 7460 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7461 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7462 (SUST_P_3D_V4B16_TRAP Int64Regs:$s, 7463 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7464 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7465 7466def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 7467 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7468 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7469 (SUST_P_3D_V4B32_TRAP Int64Regs:$s, 7470 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7471 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7472 7473//----------------------------------- 7474// Read Special Registers 7475//----------------------------------- 7476 7477class PTX_READ_SREG_R64<string regname, Intrinsic intop> 7478 : NVPTXInst<(outs Int64Regs:$d), (ins), 7479 !strconcat("mov.u64 \t$d, %", regname, ";"), 7480 [(set Int64Regs:$d, (intop))]>; 7481 7482class PTX_READ_SREG_R32<string regname, Intrinsic intop> 7483 : NVPTXInst<(outs Int32Regs:$d), (ins), 7484 !strconcat("mov.u32 \t$d, %", regname, ";"), 7485 [(set Int32Regs:$d, (intop))]>; 7486 7487// TODO Add read vector-version of special registers 7488 7489def INT_PTX_SREG_TID_X : 7490 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>; 7491def INT_PTX_SREG_TID_Y : 7492 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>; 7493def INT_PTX_SREG_TID_Z : 7494 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>; 7495def INT_PTX_SREG_TID_W : 7496 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>; 7497 7498def INT_PTX_SREG_NTID_X : 7499 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>; 7500def INT_PTX_SREG_NTID_Y : 7501 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>; 7502def INT_PTX_SREG_NTID_Z : 7503 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>; 7504def INT_PTX_SREG_NTID_W : 7505 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>; 7506 7507def INT_PTX_SREG_LANEID : 7508 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; 7509def INT_PTX_SREG_WARPID : 7510 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; 7511def INT_PTX_SREG_NWARPID : 7512 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; 7513 7514def INT_PTX_SREG_CTAID_X : 7515 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>; 7516def INT_PTX_SREG_CTAID_Y : 7517 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>; 7518def INT_PTX_SREG_CTAID_Z : 7519 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>; 7520def INT_PTX_SREG_CTAID_W : 7521 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>; 7522 7523def INT_PTX_SREG_NCTAID_X : 7524 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>; 7525def INT_PTX_SREG_NCTAID_Y : 7526 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>; 7527def INT_PTX_SREG_NCTAID_Z : 7528 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>; 7529def INT_PTX_SREG_NCTAID_W : 7530 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>; 7531 7532def INT_PTX_SREG_SMID : 7533 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; 7534def INT_PTX_SREG_NSMID : 7535 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; 7536def INT_PTX_SREG_GRIDID : 7537 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; 7538 7539def INT_PTX_SREG_LANEMASK_EQ : 7540 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; 7541def INT_PTX_SREG_LANEMASK_LE : 7542 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>; 7543def INT_PTX_SREG_LANEMASK_LT : 7544 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>; 7545def INT_PTX_SREG_LANEMASK_GE : 7546 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>; 7547def INT_PTX_SREG_LANEMASK_GT : 7548 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; 7549 7550def INT_PTX_SREG_CLOCK : 7551 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; 7552def INT_PTX_SREG_CLOCK64 : 7553 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; 7554 7555def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; 7556def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; 7557def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>; 7558def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; 7559 7560// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't 7561// handle the constant. 7562def INT_PTX_SREG_WARPSIZE : 7563 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", 7564 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; 7565 7566// Helper class that represents a 'fragment' of an NVPTX *MMA instruction. 7567// In addition to target-independent fields provided by WMMA_REGS, it adds 7568// the fields commonly used to implement specific PTX instruction -- register 7569// types and names, constraints, parts of assembly, etc. 7570class WMMA_REGINFO<WMMA_REGS r, string op> 7571 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> { 7572 // NVPTX register types used to carry fragment data. 7573 NVPTXRegClass regclass = !cond( 7574 !eq(ptx_elt_type, "f16") : Float16x2Regs, 7575 !eq(ptx_elt_type, "f32") : Float32Regs, 7576 !eq(ptx_elt_type, "f64") : Float64Regs, 7577 !eq(ptx_elt_type, "bf16") : Int32Regs, 7578 !eq(ptx_elt_type, "tf32") : Int32Regs, 7579 !eq(ptx_elt_type, "s32") : Int32Regs, 7580 !eq(ptx_elt_type, "b16") : Int32Regs, 7581 !eq(ptx_elt_type, "s8") : Int32Regs, 7582 !eq(ptx_elt_type, "u8") : Int32Regs, 7583 !eq(ptx_elt_type, "s4") : Int32Regs, 7584 !eq(ptx_elt_type, "u4") : Int32Regs, 7585 !eq(ptx_elt_type, "b1") : Int32Regs); 7586 7587 // Instruction input/output arguments for the fragment. 7588 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs)); 7589 7590 // List of register names for the fragment -- ["ra0", "ra1",...] 7591 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret; 7592 7593 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction. 7594 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}"; 7595 7596 // Predicates for particular fragment variant. Technically those are 7597 // per-instruction predicates, but currently all fragments that can be used in 7598 // a given instruction are subject to the same constraints, so an instruction 7599 // can use predicates from any of its fragments. If/when this is no 7600 // longer the case, we can concat all per-fragment predicates to enforce that 7601 // all fragments of the instruction are viable. 7602 list<Predicate> Predicates = !cond( 7603 // fp16 -> fp16/fp32 @ m16n16k16 7604 !and(!eq(geom, "m16n16k16"), 7605 !or(!eq(ptx_elt_type, "f16"), 7606 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60], 7607 7608 !and(!eq(geom,"m8n8k4"), 7609 !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70], 7610 7611 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16 7612 !and(!or(!eq(geom, "m8n32k16"), 7613 !eq(geom, "m32n8k16")), 7614 !or(!eq(ptx_elt_type, "f16"), 7615 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61], 7616 7617 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 7618 !and(!or(!eq(geom,"m16n16k16"), 7619 !eq(geom,"m8n32k16"), 7620 !eq(geom,"m32n8k16")), 7621 !or(!eq(ptx_elt_type, "u8"), 7622 !eq(ptx_elt_type, "s8"), 7623 !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63], 7624 7625 !and(!or(!eq(geom,"m16n16k16"), 7626 !eq(geom,"m8n32k16"), 7627 !eq(geom,"m32n8k16")), 7628 !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70], 7629 7630 !and(!eq(geom,"m16n16k8"), 7631 !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70], 7632 7633 !and(!eq(geom,"m16n16k8"), 7634 !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70], 7635 7636 // b1 -> s32 @ m8n8k128(b1) 7637 !and(!ne(op,"mma"), 7638 !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63], 7639 7640 // u4/s4 -> s32 @ m8n8k32 (u4/s4) 7641 !and(!ne(op,"mma"), 7642 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63], 7643 7644 !or(!eq(geom,"m16n8k8"), 7645 !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65], 7646 7647 !and(!ne(ptx_elt_type,"f64"), 7648 !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64], 7649 7650 // mma m8n8k32 requires higher PTX version 7651 !and(!eq(op,"mma"), 7652 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65], 7653 7654 !and(!eq(ptx_elt_type,"f64"), 7655 !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70], 7656 7657 !and(!eq(op,"mma"), 7658 !or(!eq(geom, "m16n8k16"), 7659 !eq(geom, "m16n8k4"), 7660 !eq(geom, "m16n8k32"), 7661 !eq(geom, "m16n8k64"), 7662 !eq(geom, "m8n8k128"), 7663 !eq(geom, "m16n8k128"), 7664 !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70], 7665 7666 !and(!eq(op,"ldmatrix"), 7667 !eq(ptx_elt_type,"b16"), 7668 !eq(geom, "m8n8")) : [hasSM75, hasPTX65]); 7669 7670 // template DAGs for instruction inputs/output. 7671 dag Outs = !dag(outs, ptx_regs, reg_names); 7672 dag Ins = !dag(ins, ptx_regs, reg_names); 7673} 7674 7675// Convert dag of arguments into a dag to match given intrinsic. 7676class BuildPatternI<Intrinsic Intr, dag Ins> { 7677 // Build a dag pattern that matches the intrinsic call. 7678 dag ret = !foreach(tmp, Ins, 7679 !subst(imem, ADDRvar, 7680 !subst(MEMri64, ADDRri64, 7681 !subst(MEMri, ADDRri, 7682 !subst(ins, Intr, tmp))))); 7683} 7684 7685// Same as above, but uses PatFrag instead of an Intrinsic. 7686class BuildPatternPF<PatFrag Intr, dag Ins> { 7687 // Build a dag pattern that matches the intrinsic call. 7688 dag ret = !foreach(tmp, Ins, 7689 !subst(imem, ADDRvar, 7690 !subst(MEMri64, ADDRri64, 7691 !subst(MEMri, ADDRri, 7692 !subst(ins, Intr, tmp))))); 7693} 7694 7695// Common WMMA-related fields used for building patterns for all MMA instructions. 7696class WMMA_INSTR<string _Intr, list<dag> _Args> 7697 : NVPTXInst<(outs), (ins), "?", []> { 7698 Intrinsic Intr = !cast<Intrinsic>(_Intr); 7699 // Concatenate all arguments into a single dag. 7700 dag Args = !foldl((ins), _Args, a, b, !con(a,b)); 7701 // Pre-build the pattern to match (intrinsic arg0, arg1, ...). 7702 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret; 7703} 7704 7705// 7706// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 7707// 7708 7709class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride, 7710 DAGOperand SrcOp> 7711 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record, 7712 [!con((ins SrcOp:$src), 7713 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 7714 Requires<Frag.Predicates> { 7715 // Load/store intrinsics are overloaded on pointer's address space. 7716 // To match the right intrinsic, we need to build AS-constrained PatFrag. 7717 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 7718 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); 7719 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src)); 7720 // Build PatFrag that only matches particular address space. 7721 PatFrag IntrFrag = PatFrag<PFOperands, 7722 PFOperandsIntr, 7723 !cond(!eq(Space, ".shared"): AS_match.shared, 7724 !eq(Space, ".global"): AS_match.global, 7725 true: AS_match.generic)>; 7726 // Build AS-constrained pattern. 7727 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 7728 7729 let OutOperandList = Frag.Outs; 7730 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7731 let AsmString = "wmma.load." 7732 # Frag.frag 7733 # ".sync" 7734 # "${ptx:aligned}" 7735 # "." # Layout 7736 # "." # Frag.geom 7737 # Space 7738 # "." # Frag.ptx_elt_type # " \t" 7739 # Frag.regstring 7740 # ", [$src]" 7741 # !if(WithStride, ", $ldm", "") 7742 # ";"; 7743} 7744 7745// 7746// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 7747// 7748class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, 7749 bit WithStride, DAGOperand DstOp> 7750 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record, 7751 [!con((ins DstOp:$dst), 7752 Frag.Ins, 7753 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 7754 Requires<Frag.Predicates> { 7755 7756 // Load/store intrinsics are overloaded on pointer's address space. 7757 // To match the right intrinsic, we need to build AS-constrained PatFrag. 7758 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 7759 dag PFOperands = !con((ops node:$dst), 7760 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names), 7761 !if(WithStride, (ops node:$ldm), (ops))); 7762 // Build PatFrag that only matches particular address space. 7763 PatFrag IntrFrag = PatFrag<PFOperands, 7764 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 7765 !cond(!eq(Space, ".shared"): AS_match.shared, 7766 !eq(Space, ".global"): AS_match.global, 7767 true: AS_match.generic)>; 7768 // Build AS-constrained pattern. 7769 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 7770 7771 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7772 let OutOperandList = (outs); 7773 let AsmString = "wmma.store.d.sync" 7774 # "${ptx:aligned}" 7775 # "." # Layout 7776 # "." # Frag.geom 7777 # Space 7778 # "." # Frag.ptx_elt_type 7779 # " \t[$dst]," 7780 # Frag.regstring 7781 # !if(WithStride, ", $ldm", "") 7782 # ";"; 7783} 7784 7785// Create all load/store variants 7786defset list<WMMA_INSTR> MMA_LDSTs = { 7787 foreach layout = ["row", "col"] in { 7788 foreach stride = [false, true] in { 7789 foreach space = [".global", ".shared", ""] in { 7790 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 7791 foreach frag = NVVM_MMA_OPS.all_ld_ops in 7792 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 7793 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>; 7794 foreach frag = NVVM_MMA_OPS.all_st_ops in 7795 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 7796 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>; 7797 } // addr 7798 } // space 7799 } // stride 7800 } // layout 7801} // defset 7802 7803// B1 instruction variants need extra constraints. 7804class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> { 7805 string Op = b1op; 7806 WMMA_REGINFO Frag = FragA; 7807 list<Predicate> ret = !listconcat( 7808 FragA.Predicates, 7809 !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[]) 7810 ); 7811} 7812// WMMA.MMA 7813class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 7814 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 7815 string ALayout, string BLayout, int Satfinite, string rnd, string b1op> 7816 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record, 7817 [FragA.Ins, FragB.Ins, FragC.Ins]>, 7818 // Requires does not seem to have effect on Instruction w/o Patterns. 7819 // We set it here anyways and propagate to the Pat<> we construct below. 7820 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 7821 let OutOperandList = FragD.Outs; 7822 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7823 string TypeList = !cond( 7824 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type 7825 # "." # FragC.ptx_elt_type, 7826 1: "." # FragD.ptx_elt_type 7827 # "." # FragA.ptx_elt_type 7828 # "." # FragB.ptx_elt_type 7829 # "." # FragC.ptx_elt_type, 7830 ); 7831 let AsmString = "wmma.mma" 7832 # b1op 7833 # ".sync" 7834 # "${ptx:aligned}" 7835 # "." # ALayout 7836 # "." # BLayout 7837 # "." # FragA.geom 7838 # !if(!ne(rnd, ""), !strconcat(".", rnd), "") 7839 # TypeList 7840 # !if(Satfinite, ".satfinite", "") # "\n\t\t" 7841 # FragD.regstring # ",\n\t\t" 7842 # FragA.regstring # ",\n\t\t" 7843 # FragB.regstring # ",\n\t\t" 7844 # FragC.regstring # ";"; 7845} 7846 7847defset list<WMMA_INSTR> WMMAs = { 7848 foreach layout_a = ["row", "col"] in { 7849 foreach layout_b = ["row", "col"] in { 7850 foreach satf = [0, 1] in { 7851 foreach rnd = ["", "rn", "rz", "rm", "rp"] in { 7852 foreach op = NVVM_MMA_OPS.all_wmma_ops in { 7853 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 7854 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then { 7855 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">, 7856 WMMA_REGINFO<op[1], "wmma.mma">, 7857 WMMA_REGINFO<op[2], "wmma.mma">, 7858 WMMA_REGINFO<op[3], "wmma.mma">, 7859 layout_a, layout_b, satf, rnd, b1op>; 7860 } 7861 } // b1op 7862 } // op 7863 } // rnd 7864 } // satf 7865 } // layout_b 7866 } // layout_a 7867} // defset 7868 7869// MMA 7870class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 7871 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 7872 string ALayout, string BLayout, int Satfinite, string b1op> 7873 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record, 7874 [FragA.Ins, FragB.Ins, FragC.Ins]>, 7875 // Requires does not seem to have effect on Instruction w/o Patterns. 7876 // We set it here anyways and propagate to the Pat<> we construct below. 7877 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 7878 let OutOperandList = FragD.Outs; 7879 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7880 string TypeList = "." # FragD.ptx_elt_type 7881 # "." # FragA.ptx_elt_type 7882 # "." # FragB.ptx_elt_type 7883 # "." # FragC.ptx_elt_type; 7884 let AsmString = "mma.sync.aligned." 7885 # FragA.geom 7886 # "." # ALayout 7887 # "." # BLayout 7888 # !if(Satfinite, ".satfinite", "") 7889 # TypeList 7890 # b1op # "\n\t\t" 7891 # FragD.regstring # ",\n\t\t" 7892 # FragA.regstring # ",\n\t\t" 7893 # FragB.regstring # ",\n\t\t" 7894 # FragC.regstring # ";"; 7895} 7896 7897defset list<WMMA_INSTR> MMAs = { 7898 foreach layout_a = ["row", "col"] in { 7899 foreach layout_b = ["row", "col"] in { 7900 foreach satf = [0, 1] in { 7901 foreach op = NVVM_MMA_OPS.all_mma_ops in { 7902 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 7903 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { 7904 def : MMA<WMMA_REGINFO<op[0], "mma">, 7905 WMMA_REGINFO<op[1], "mma">, 7906 WMMA_REGINFO<op[2], "mma">, 7907 WMMA_REGINFO<op[3], "mma">, 7908 layout_a, layout_b, satf, b1op>; 7909 } 7910 } // b1op 7911 } // op 7912 } // satf 7913 } // layout_b 7914 } // layout_a 7915} // defset 7916 7917// 7918// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 7919// 7920class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space, 7921 DAGOperand SrcOp> 7922 : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>, 7923 Requires<Frag.Predicates> { 7924 // Build PatFrag that only matches particular address space. 7925 PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), 7926 !cond(!eq(Space, ".shared"): AS_match.shared, 7927 true: AS_match.generic)>; 7928 // Build AS-constrained pattern. 7929 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 7930 7931 let OutOperandList = Frag.Outs; 7932 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7933 let AsmString = "ldmatrix.sync.aligned." 7934 # Frag.geom 7935 # "." # Frag.frag 7936 # !if(Transposed, ".trans", "") 7937 # Space 7938 # "." # Frag.ptx_elt_type 7939 # " " # Frag.regstring # ", [$src];"; 7940} 7941 7942// Create all ldmatrix variants 7943defset list<WMMA_INSTR> LDMATRIXs = { 7944 foreach transposed = [false, true] in { 7945 foreach space = [".shared", ""] in { 7946 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 7947 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in 7948 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then 7949 def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space, 7950 addr>; 7951 } // addr 7952 } // space 7953 } // transposed 7954} // defset 7955 7956// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a 7957// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with 7958// the instruction record. 7959class MMA_PAT<WMMA_INSTR wi> 7960 : Pat<wi.IntrinsicPattern, 7961 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)), 7962 (wi ptx.version))>, 7963 Requires<wi.Predicates>; 7964 7965// Build intrinsic->instruction patterns for all MMA instructions. 7966foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in 7967 def : MMA_PAT<mma>; 7968