1//===-- SIInstrInfo.td -----------------------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">, 10 AssemblerPredicate <(all_of FeatureWavefrontSize32)>; 11def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">, 12 AssemblerPredicate <(all_of FeatureWavefrontSize64)>; 13 14class GCNPredicateControl : PredicateControl { 15 Predicate SIAssemblerPredicate = isGFX6GFX7; 16 Predicate VIAssemblerPredicate = isGFX8GFX9; 17} 18 19// Except for the NONE field, this must be kept in sync with the 20// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the 21// getMCOpcodeGen table. 22def SIEncodingFamily { 23 int NONE = -1; 24 int SI = 0; 25 int VI = 1; 26 int SDWA = 2; 27 int SDWA9 = 3; 28 int GFX80 = 4; 29 int GFX9 = 5; 30 int GFX10 = 6; 31 int SDWA10 = 7; 32 int GFX90A = 8; 33 int GFX940 = 9; 34 int GFX11 = 10; 35} 36 37//===----------------------------------------------------------------------===// 38// SI DAG Nodes 39//===----------------------------------------------------------------------===// 40 41def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; 42 43def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", 44 SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, 45 [SDNPMayLoad, SDNPMemOperand] 46>; 47 48def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", 49 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>, 50 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] 51>; 52 53def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2, 54 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 55>; 56 57def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2, 58 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 59>; 60 61def SDTAtomic2_f32 : SDTypeProfile<1, 2, [ 62 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> 63]>; 64 65def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32, 66 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 67>; 68 69def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32, 70 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 71>; 72 73// load_d16_{lo|hi} ptr, tied_input 74def SIload_d16 : SDTypeProfile<1, 2, [ 75 SDTCisPtrTy<1>, 76 SDTCisSameAs<0, 2> 77]>; 78 79 80def SDTtbuffer_load : SDTypeProfile<1, 8, 81 [ // vdata 82 SDTCisVT<1, v4i32>, // rsrc 83 SDTCisVT<2, i32>, // vindex(VGPR) 84 SDTCisVT<3, i32>, // voffset(VGPR) 85 SDTCisVT<4, i32>, // soffset(SGPR) 86 SDTCisVT<5, i32>, // offset(imm) 87 SDTCisVT<6, i32>, // format(imm) 88 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 89 SDTCisVT<8, i1> // idxen(imm) 90 ]>; 91 92def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load, 93 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 94def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16", 95 SDTtbuffer_load, 96 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 97 98def SDTtbuffer_store : SDTypeProfile<0, 9, 99 [ // vdata 100 SDTCisVT<1, v4i32>, // rsrc 101 SDTCisVT<2, i32>, // vindex(VGPR) 102 SDTCisVT<3, i32>, // voffset(VGPR) 103 SDTCisVT<4, i32>, // soffset(SGPR) 104 SDTCisVT<5, i32>, // offset(imm) 105 SDTCisVT<6, i32>, // format(imm) 106 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 107 SDTCisVT<8, i1> // idxen(imm) 108 ]>; 109 110def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store, 111 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 112def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16", 113 SDTtbuffer_store, 114 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 115 116def SDTBufferLoad : SDTypeProfile<1, 7, 117 [ // vdata 118 SDTCisVT<1, v4i32>, // rsrc 119 SDTCisVT<2, i32>, // vindex(VGPR) 120 SDTCisVT<3, i32>, // voffset(VGPR) 121 SDTCisVT<4, i32>, // soffset(SGPR) 122 SDTCisVT<5, i32>, // offset(imm) 123 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 124 SDTCisVT<7, i1>]>; // idxen(imm) 125 126def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, 127 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 128def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad, 129 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 130def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad, 131 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 132def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad, 133 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 134def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad, 135 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 136def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad, 137 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 138def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16", 139 SDTBufferLoad, 140 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 141 142def SDTBufferStore : SDTypeProfile<0, 8, 143 [ // vdata 144 SDTCisVT<1, v4i32>, // rsrc 145 SDTCisVT<2, i32>, // vindex(VGPR) 146 SDTCisVT<3, i32>, // voffset(VGPR) 147 SDTCisVT<4, i32>, // soffset(SGPR) 148 SDTCisVT<5, i32>, // offset(imm) 149 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 150 SDTCisVT<7, i1>]>; // idxen(imm) 151 152def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, 153 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 154def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE", 155 SDTBufferStore, 156 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 157def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT", 158 SDTBufferStore, 159 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 160def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", 161 SDTBufferStore, 162 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 163def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16", 164 SDTBufferStore, 165 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 166 167class SDBufferAtomic<string opcode> : SDNode <opcode, 168 SDTypeProfile<1, 8, 169 [SDTCisVT<2, v4i32>, // rsrc 170 SDTCisVT<3, i32>, // vindex(VGPR) 171 SDTCisVT<4, i32>, // voffset(VGPR) 172 SDTCisVT<5, i32>, // soffset(SGPR) 173 SDTCisVT<6, i32>, // offset(imm) 174 SDTCisVT<7, i32>, // cachepolicy(imm) 175 SDTCisVT<8, i1>]>, // idxen(imm) 176 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 177>; 178 179def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">; 180def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">; 181def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">; 182def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">; 183def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">; 184def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">; 185def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; 186def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; 187def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; 188def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; 189def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; 190def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; 191def SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; 192def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; 193def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; 194def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; 195 196multiclass SDBufferAtomicRetNoRet { 197 def "_ret" : PatFrag< 198 (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, 199 node:$offset, node:$cachepolicy, node:$idxen), 200 (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex, 201 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 202 node:$idxen)> { 203 let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }]; 204 let GISelPredicateCode = [{ return true; }]; 205 } 206 207 def "_noret" : PatFrag< 208 (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, 209 node:$offset, node:$cachepolicy, node:$idxen), 210 (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex, 211 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 212 node:$idxen)> { 213 let PredicateCode = [{ return SDValue(N, 0).use_empty(); }]; 214 let GISelPredicateCode = [{ return false; }]; 215 } 216} 217 218defm SIbuffer_atomic_swap : SDBufferAtomicRetNoRet; 219defm SIbuffer_atomic_add : SDBufferAtomicRetNoRet; 220defm SIbuffer_atomic_sub : SDBufferAtomicRetNoRet; 221defm SIbuffer_atomic_smin : SDBufferAtomicRetNoRet; 222defm SIbuffer_atomic_umin : SDBufferAtomicRetNoRet; 223defm SIbuffer_atomic_smax : SDBufferAtomicRetNoRet; 224defm SIbuffer_atomic_umax : SDBufferAtomicRetNoRet; 225defm SIbuffer_atomic_and : SDBufferAtomicRetNoRet; 226defm SIbuffer_atomic_or : SDBufferAtomicRetNoRet; 227defm SIbuffer_atomic_xor : SDBufferAtomicRetNoRet; 228defm SIbuffer_atomic_inc : SDBufferAtomicRetNoRet; 229defm SIbuffer_atomic_dec : SDBufferAtomicRetNoRet; 230defm SIbuffer_atomic_fadd : SDBufferAtomicRetNoRet; 231defm SIbuffer_atomic_fmin : SDBufferAtomicRetNoRet; 232defm SIbuffer_atomic_fmax : SDBufferAtomicRetNoRet; 233 234def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", 235 SDTypeProfile<1, 9, 236 [SDTCisVT<0, i32>, // dst 237 SDTCisVT<1, i32>, // src 238 SDTCisVT<2, i32>, // cmp 239 SDTCisVT<3, v4i32>, // rsrc 240 SDTCisVT<4, i32>, // vindex(VGPR) 241 SDTCisVT<5, i32>, // voffset(VGPR) 242 SDTCisVT<6, i32>, // soffset(SGPR) 243 SDTCisVT<7, i32>, // offset(imm) 244 SDTCisVT<8, i32>, // cachepolicy(imm) 245 SDTCisVT<9, i1>]>, // idxen(imm) 246 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 247>; 248 249def SIbuffer_atomic_cmpswap_ret : PatFrag< 250 (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, 251 node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), 252 (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, 253 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 254 node:$idxen)> { 255 let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }]; 256 let GISelPredicateCode = [{ return true; }]; 257} 258 259def SIbuffer_atomic_cmpswap_noret : PatFrag< 260 (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, 261 node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), 262 (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, 263 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 264 node:$idxen)> { 265 let PredicateCode = [{ return SDValue(N, 0).use_empty(); }]; 266 let GISelPredicateCode = [{ return false; }]; 267} 268 269class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode, 270 SDTypeProfile<0, 2, 271 [SDTCisPtrTy<0>, // vaddr 272 SDTCisVT<1, ty>]>, // vdata 273 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 274>; 275 276def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", 277 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> 278>; 279 280def SIlds : SDNode<"AMDGPUISD::LDS", 281 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> 282>; 283 284def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO", 285 SIload_d16, 286 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 287>; 288 289def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8", 290 SIload_d16, 291 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 292>; 293 294def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8", 295 SIload_d16, 296 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 297>; 298 299def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI", 300 SIload_d16, 301 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 302>; 303 304def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8", 305 SIload_d16, 306 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 307>; 308 309def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8", 310 SIload_d16, 311 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 312>; 313 314def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", 315 SDTypeProfile<0 ,1, [SDTCisInt<0>]>, 316 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 317>; 318 319def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD", 320 SDTFPRoundOp 321>; 322 323def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD", 324 SDTFPRoundOp 325>; 326 327//===----------------------------------------------------------------------===// 328// ValueType helpers 329//===----------------------------------------------------------------------===// 330 331// Returns 1 if the source arguments have modifiers, 0 if they do not. 332class isFloatType<ValueType SrcVT> { 333 bit ret = !or(!eq(SrcVT.Value, f16.Value), 334 !eq(SrcVT.Value, f32.Value), 335 !eq(SrcVT.Value, f64.Value), 336 !eq(SrcVT.Value, v2f16.Value), 337 !eq(SrcVT.Value, v4f16.Value), 338 !eq(SrcVT.Value, v8f16.Value), 339 !eq(SrcVT.Value, v16f16.Value), 340 !eq(SrcVT.Value, v2f32.Value), 341 !eq(SrcVT.Value, v4f32.Value), 342 !eq(SrcVT.Value, v8f32.Value), 343 !eq(SrcVT.Value, v2f64.Value), 344 !eq(SrcVT.Value, v4f64.Value)); 345} 346 347// XXX - do v2i16 instructions? 348class isIntType<ValueType SrcVT> { 349 bit ret = !or(!eq(SrcVT.Value, i16.Value), 350 !eq(SrcVT.Value, i32.Value), 351 !eq(SrcVT.Value, i64.Value), 352 !eq(SrcVT.Value, v4i16.Value), 353 !eq(SrcVT.Value, v8i16.Value), 354 !eq(SrcVT.Value, v16i16.Value), 355 !eq(SrcVT.Value, v2i32.Value), 356 !eq(SrcVT.Value, v4i32.Value), 357 !eq(SrcVT.Value, v8i32.Value)); 358} 359 360class isPackedType<ValueType SrcVT> { 361 bit ret = !or(!eq(SrcVT.Value, v2i16.Value), 362 !eq(SrcVT.Value, v2f16.Value), 363 !eq(SrcVT.Value, v4f16.Value), 364 !eq(SrcVT.Value, v2i32.Value), 365 !eq(SrcVT.Value, v2f32.Value), 366 !eq(SrcVT.Value, v4i32.Value), 367 !eq(SrcVT.Value, v4f32.Value), 368 !eq(SrcVT.Value, v8i32.Value), 369 !eq(SrcVT.Value, v8f32.Value)); 370} 371 372 373//===----------------------------------------------------------------------===// 374// PatFrags for global memory operations 375//===----------------------------------------------------------------------===// 376 377defm atomic_inc : binary_atomic_op_all_as<SIatomic_inc>; 378defm atomic_dec : binary_atomic_op_all_as<SIatomic_dec>; 379defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>; 380defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>; 381 382//===----------------------------------------------------------------------===// 383// SDNodes PatFrags for loads/stores with a glue input. 384// This is for SDNodes and PatFrag for local loads and stores to 385// enable s_mov_b32 m0, -1 to be glued to the memory instructions. 386// 387// These mirror the regular load/store PatFrags and rely on special 388// processing during Select() to add the glued copy. 389// 390//===----------------------------------------------------------------------===// 391 392def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad, 393 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 394>; 395 396def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, 397 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 398>; 399 400def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { 401 let IsLoad = 1; 402 let IsUnindexed = 1; 403} 404 405def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { 406 let IsLoad = 1; 407 let IsNonExtLoad = 1; 408} 409 410def atomic_load_8_glue : PatFrag<(ops node:$ptr), 411 (AMDGPUatomic_ld_glue node:$ptr)> { 412 let IsAtomic = 1; 413 let MemoryVT = i8; 414} 415 416def atomic_load_16_glue : PatFrag<(ops node:$ptr), 417 (AMDGPUatomic_ld_glue node:$ptr)> { 418 let IsAtomic = 1; 419 let MemoryVT = i16; 420} 421 422def atomic_load_32_glue : PatFrag<(ops node:$ptr), 423 (AMDGPUatomic_ld_glue node:$ptr)> { 424 let IsAtomic = 1; 425 let MemoryVT = i32; 426} 427 428def atomic_load_64_glue : PatFrag<(ops node:$ptr), 429 (AMDGPUatomic_ld_glue node:$ptr)> { 430 let IsAtomic = 1; 431 let MemoryVT = i64; 432} 433 434def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 435 let IsLoad = 1; 436 let IsAnyExtLoad = 1; 437} 438 439def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 440 let IsLoad = 1; 441 let IsSignExtLoad = 1; 442} 443 444def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 445 let IsLoad = 1; 446 let IsZeroExtLoad = 1; 447} 448 449def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 450 let IsLoad = 1; 451 let MemoryVT = i8; 452} 453 454def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 455 let IsLoad = 1; 456 let MemoryVT = i8; 457} 458 459def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 460 let IsLoad = 1; 461 let MemoryVT = i16; 462} 463 464def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 465 let IsLoad = 1; 466 let MemoryVT = i16; 467} 468 469def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 470 let IsLoad = 1; 471 let MemoryVT = i8; 472} 473 474def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 475 let IsLoad = 1; 476 let MemoryVT = i16; 477} 478 479 480let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 481def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { 482 let IsNonExtLoad = 1; 483} 484 485def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; 486def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; 487def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; 488 489def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; 490def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; 491def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; 492} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces 493 494def load_align8_local_m0 : PatFrag<(ops node:$ptr), 495 (load_local_m0 node:$ptr)> { 496 let IsLoad = 1; 497 int MinAlignment = 8; 498} 499 500def load_align16_local_m0 : PatFrag<(ops node:$ptr), 501 (load_local_m0 node:$ptr)> { 502 let IsLoad = 1; 503 int MinAlignment = 16; 504} 505 506let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 507def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr), 508 (atomic_load_8_glue node:$ptr)>; 509def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr), 510 (atomic_load_16_glue node:$ptr)>; 511def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), 512 (atomic_load_32_glue node:$ptr)>; 513def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), 514 (atomic_load_64_glue node:$ptr)>; 515} // End let AddressSpaces = LoadAddress_local.AddrSpaces 516 517 518def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, 519 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 520>; 521 522def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, 523 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 524>; 525 526def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), 527 (AMDGPUst_glue node:$val, node:$ptr)> { 528 let IsStore = 1; 529 let IsUnindexed = 1; 530} 531 532def store_glue : PatFrag<(ops node:$val, node:$ptr), 533 (unindexedstore_glue node:$val, node:$ptr)> { 534 let IsStore = 1; 535 let IsTruncStore = 0; 536} 537 538def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), 539 (unindexedstore_glue node:$val, node:$ptr)> { 540 let IsStore = 1; 541 let IsTruncStore = 1; 542} 543 544def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), 545 (truncstore_glue node:$val, node:$ptr)> { 546 let IsStore = 1; 547 let MemoryVT = i8; 548 let IsTruncStore = 1; 549} 550 551def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), 552 (truncstore_glue node:$val, node:$ptr)> { 553 let IsStore = 1; 554 let MemoryVT = i16; 555 let IsTruncStore = 1; 556} 557 558let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 559def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), 560 (store_glue node:$val, node:$ptr)>; 561def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 562 (truncstorei8_glue node:$val, node:$ptr)>; 563def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 564 (truncstorei16_glue node:$val, node:$ptr)>; 565} 566 567def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr), 568 (store_local_m0 node:$value, node:$ptr)>, 569 Aligned<8> { 570 let IsStore = 1; 571} 572 573def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr), 574 (store_local_m0 node:$value, node:$ptr)>, 575 Aligned<16> { 576 let IsStore = 1; 577} 578 579let PredicateCode = [{return cast<MemSDNode>(N)->getAlignment() < 4;}], 580 GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}], 581 AddressSpaces = [ AddrSpaces.Local ] in { 582def load_align_less_than_4_local : PatFrag<(ops node:$ptr), 583 (load_local node:$ptr)> { 584 let IsLoad = 1; 585 let IsNonExtLoad = 1; 586} 587 588def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr), 589 (load_local_m0 node:$ptr)> { 590 let IsLoad = 1; 591 let IsNonExtLoad = 1; 592} 593 594def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr), 595 (store_local node:$value, node:$ptr)> { 596 let IsStore = 1; 597 let IsTruncStore = 0; 598} 599 600def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr), 601 (store_local_m0 node:$value, node:$ptr)> { 602 let IsStore = 1; 603 let IsTruncStore = 0; 604} 605} 606 607def atomic_store_8_glue : PatFrag < 608 (ops node:$ptr, node:$value), 609 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 610 let IsAtomic = 1; 611 let MemoryVT = i8; 612} 613 614def atomic_store_16_glue : PatFrag < 615 (ops node:$ptr, node:$value), 616 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 617 let IsAtomic = 1; 618 let MemoryVT = i16; 619} 620 621def atomic_store_32_glue : PatFrag < 622 (ops node:$ptr, node:$value), 623 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 624 let IsAtomic = 1; 625 let MemoryVT = i32; 626} 627 628def atomic_store_64_glue : PatFrag < 629 (ops node:$ptr, node:$value), 630 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 631 let IsAtomic = 1; 632 let MemoryVT = i64; 633} 634 635let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 636def atomic_store_8_local_m0 : PatFrag<(ops node:$ptr, node:$val), 637 (atomic_store_8_glue node:$ptr, node:$val)>; 638def atomic_store_16_local_m0 : PatFrag<(ops node:$ptr, node:$val), 639 (atomic_store_16_glue node:$ptr, node:$val)>; 640def atomic_store_32_local_m0 : PatFrag<(ops node:$ptr, node:$val), 641 (atomic_store_32_glue node:$ptr, node:$val)>; 642def atomic_store_64_local_m0 : PatFrag<(ops node:$ptr, node:$val), 643 (atomic_store_64_glue node:$ptr, node:$val)>; 644} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces 645 646 647def si_setcc_uniform : PatFrag < 648 (ops node:$lhs, node:$rhs, node:$cond), 649 (setcc node:$lhs, node:$rhs, node:$cond), [{ 650 return !N->isDivergent(); 651}]>; 652 653//===----------------------------------------------------------------------===// 654// SDNodes PatFrags for a16 loads and stores with 3 components. 655// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory 656// load/store size. 657//===----------------------------------------------------------------------===// 658 659class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 660 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 661 node:$auxiliary, node:$idxen), 662 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 663 node:$auxiliary, node:$idxen)> { 664 let IsLoad = 1; 665 let MemoryVT = vt; 666} 667 668class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 669 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 670 node:$auxiliary, node:$idxen), 671 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 672 node:$auxiliary, node:$idxen)> { 673 let IsStore = 1; 674 let MemoryVT = vt; 675} 676 677class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 678 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 679 node:$format, node:$auxiliary, node:$idxen), 680 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 681 node:$format, node:$auxiliary, node:$idxen)> { 682 let IsLoad = 1; 683 let MemoryVT = vt; 684} 685 686class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 687 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 688 node:$format, node:$auxiliary, node:$idxen), 689 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 690 node:$format, node:$auxiliary, node:$idxen)> { 691 let IsStore = 1; 692 let MemoryVT = vt; 693} 694 695//===----------------------------------------------------------------------===// 696// SDNodes PatFrags for d16 loads 697//===----------------------------------------------------------------------===// 698 699class LoadD16Frag <SDPatternOperator op> : PatFrag< 700 (ops node:$ptr, node:$tied_in), 701 (op node:$ptr, node:$tied_in)> { 702 let IsLoad = 1; 703} 704 705foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 706let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 707 708def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>; 709 710def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> { 711 let MemoryVT = i8; 712} 713 714def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> { 715 let MemoryVT = i8; 716} 717 718def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>; 719 720def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> { 721 let MemoryVT = i8; 722} 723 724def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> { 725 let MemoryVT = i8; 726} 727 728} // End let AddressSpaces = ... 729} // End foreach AddrSpace 730 731def lshr_rev : PatFrag < 732 (ops node:$src1, node:$src0), 733 (srl $src0, $src1) 734>; 735 736def ashr_rev : PatFrag < 737 (ops node:$src1, node:$src0), 738 (sra $src0, $src1) 739>; 740 741def lshl_rev : PatFrag < 742 (ops node:$src1, node:$src0), 743 (shl $src0, $src1) 744>; 745 746def add_ctpop : PatFrag < 747 (ops node:$src0, node:$src1), 748 (add (ctpop $src0), $src1) 749>; 750 751def xnor : PatFrag < 752 (ops node:$src0, node:$src1), 753 (not (xor $src0, $src1)) 754>; 755 756foreach I = 1-4 in { 757def shl#I#_add : PatFrag < 758 (ops node:$src0, node:$src1), 759 (add (shl_oneuse $src0, (i32 I)), $src1)> { 760 // FIXME: Poor substitute for disabling pattern in SelectionDAG 761 let PredicateCode = [{return false;}]; 762 let GISelPredicateCode = [{return true;}]; 763} 764} 765 766multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, 767 SDTypeProfile tc = SDTAtomic2, 768 bit IsInt = 1> { 769 770 def _glue : SDNode < 771 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, 772 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 773 >; 774 775 let AddressSpaces = StoreAddress_local.AddrSpaces in { 776 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 777 defm _local_m0 : ret_noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), 778 IsInt>; 779 } 780 781 let AddressSpaces = StoreAddress_region.AddrSpaces in { 782 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 783 defm _region_m0 : ret_noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), 784 IsInt>; 785 } 786} 787 788defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; 789defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">; 790defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>; 791defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>; 792defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">; 793defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">; 794defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">; 795defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">; 796defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; 797defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; 798defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; 799defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; 800defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; 801defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; 802defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; 803 804def as_i1timm : SDNodeXForm<timm, [{ 805 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); 806}]>; 807 808def as_i8imm : SDNodeXForm<imm, [{ 809 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8); 810}]>; 811 812def as_i8timm : SDNodeXForm<timm, [{ 813 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 814}]>; 815 816def as_i16imm : SDNodeXForm<imm, [{ 817 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 818}]>; 819 820def as_i16timm : SDNodeXForm<timm, [{ 821 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 822}]>; 823 824def as_i32imm: SDNodeXForm<imm, [{ 825 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 826}]>; 827 828def as_i32timm: SDNodeXForm<timm, [{ 829 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 830}]>; 831 832def as_i64imm: SDNodeXForm<imm, [{ 833 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); 834}]>; 835 836def cond_as_i32imm: SDNodeXForm<cond, [{ 837 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32); 838}]>; 839 840// Copied from the AArch64 backend: 841def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 842return CurDAG->getTargetConstant( 843 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 844}]>; 845 846def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{ 847 auto FI = cast<FrameIndexSDNode>(N); 848 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32); 849}]>; 850 851// Copied from the AArch64 backend: 852def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 853return CurDAG->getTargetConstant( 854 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 855}]>; 856 857class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{ 858 uint64_t Imm = N->getZExtValue(); 859 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1; 860 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1); 861}]>; 862 863def SIMM16bit : ImmLeaf <i32, 864 [{return isInt<16>(Imm);}] 865>; 866 867def UIMM16bit : ImmLeaf <i32, 868 [{return isUInt<16>(Imm);}] 869>; 870 871def i64imm_32bit : ImmLeaf<i64, [{ 872 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 873}]>; 874 875def InlineImm16 : ImmLeaf<i16, [{ 876 return isInlineImmediate16(Imm); 877}]>; 878 879def InlineImm32 : ImmLeaf<i32, [{ 880 return isInlineImmediate32(Imm); 881}]>; 882 883def InlineImm64 : ImmLeaf<i64, [{ 884 return isInlineImmediate64(Imm); 885}]>; 886 887def InlineImmFP32 : FPImmLeaf<f32, [{ 888 return isInlineImmediate(Imm); 889}]>; 890 891def InlineImmFP64 : FPImmLeaf<f64, [{ 892 return isInlineImmediate(Imm); 893}]>; 894 895 896class VGPRImm <dag frag> : PatLeaf<frag, [{ 897 return isVGPRImm(N); 898}]>; 899 900def NegateImm : SDNodeXForm<imm, [{ 901 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); 902}]>; 903 904// TODO: When FP inline imm values work? 905def NegSubInlineConst32 : ImmLeaf<i32, [{ 906 return Imm < -16 && Imm >= -64; 907}], NegateImm>; 908 909def NegSubInlineIntConst16 : ImmLeaf<i16, [{ 910 return Imm < -16 && Imm >= -64; 911}], NegateImm>; 912 913def ShiftAmt32Imm : ImmLeaf <i32, [{ 914 return Imm < 32; 915}]>; 916 917def getNegV2I16Imm : SDNodeXForm<build_vector, [{ 918 return SDValue(packNegConstantV2I16(N, *CurDAG), 0); 919}]>; 920 921def NegSubInlineConstV216 : PatLeaf<(build_vector), [{ 922 assert(N->getNumOperands() == 2); 923 assert(N->getOperand(0).getValueType().getSizeInBits() == 16); 924 SDValue Src0 = N->getOperand(0); 925 SDValue Src1 = N->getOperand(1); 926 if (Src0 == Src1) 927 return isNegInlineImmediate(Src0.getNode()); 928 929 return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) || 930 (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode())); 931}], getNegV2I16Imm>; 932 933 934def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{ 935 return fp16SrcZerosHighBits(N->getOpcode()); 936}]>; 937 938 939//===----------------------------------------------------------------------===// 940// MUBUF/SMEM Patterns 941//===----------------------------------------------------------------------===// 942 943def extract_cpol : SDNodeXForm<timm, [{ 944 return CurDAG->getTargetConstant(N->getZExtValue() & AMDGPU::CPol::ALL, SDLoc(N), MVT::i8); 945}]>; 946 947def extract_swz : SDNodeXForm<timm, [{ 948 return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8); 949}]>; 950 951def set_glc : SDNodeXForm<timm, [{ 952 return CurDAG->getTargetConstant(N->getZExtValue() | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8); 953}]>; 954 955//===----------------------------------------------------------------------===// 956// Custom Operands 957//===----------------------------------------------------------------------===// 958 959def SoppBrTarget : AsmOperandClass { 960 let Name = "SoppBrTarget"; 961 let ParserMethod = "parseSOppBrTarget"; 962} 963 964def sopp_brtarget : Operand<OtherVT> { 965 let EncoderMethod = "getSOPPBrEncoding"; 966 let DecoderMethod = "decodeSoppBrTarget"; 967 let OperandType = "OPERAND_PCREL"; 968 let ParserMatchClass = SoppBrTarget; 969} 970 971def si_ga : Operand<iPTR>; 972 973def InterpSlotMatchClass : AsmOperandClass { 974 let Name = "InterpSlot"; 975 let PredicateMethod = "isInterpSlot"; 976 let ParserMethod = "parseInterpSlot"; 977 let RenderMethod = "addImmOperands"; 978} 979 980def InterpSlot : Operand<i32> { 981 let PrintMethod = "printInterpSlot"; 982 let ParserMatchClass = InterpSlotMatchClass; 983 let OperandType = "OPERAND_IMMEDIATE"; 984} 985 986def AttrMatchClass : AsmOperandClass { 987 let Name = "Attr"; 988 let PredicateMethod = "isInterpAttr"; 989 let ParserMethod = "parseInterpAttr"; 990 let RenderMethod = "addImmOperands"; 991} 992 993// It appears to be necessary to create a separate operand for this to 994// be able to parse attr<num> with no space. 995def Attr : Operand<i32> { 996 let PrintMethod = "printInterpAttr"; 997 let ParserMatchClass = AttrMatchClass; 998 let OperandType = "OPERAND_IMMEDIATE"; 999} 1000 1001def AttrChanMatchClass : AsmOperandClass { 1002 let Name = "AttrChan"; 1003 let PredicateMethod = "isAttrChan"; 1004 let RenderMethod = "addImmOperands"; 1005} 1006 1007def AttrChan : Operand<i32> { 1008 let PrintMethod = "printInterpAttrChan"; 1009 let ParserMatchClass = AttrChanMatchClass; 1010 let OperandType = "OPERAND_IMMEDIATE"; 1011} 1012 1013def SendMsgMatchClass : AsmOperandClass { 1014 let Name = "SendMsg"; 1015 let PredicateMethod = "isSendMsg"; 1016 let ParserMethod = "parseSendMsgOp"; 1017 let RenderMethod = "addImmOperands"; 1018} 1019 1020def SwizzleMatchClass : AsmOperandClass { 1021 let Name = "Swizzle"; 1022 let PredicateMethod = "isSwizzle"; 1023 let ParserMethod = "parseSwizzleOp"; 1024 let RenderMethod = "addImmOperands"; 1025 let IsOptional = 1; 1026} 1027 1028def EndpgmMatchClass : AsmOperandClass { 1029 let Name = "EndpgmImm"; 1030 let PredicateMethod = "isEndpgm"; 1031 let ParserMethod = "parseEndpgmOp"; 1032 let RenderMethod = "addImmOperands"; 1033 let IsOptional = 1; 1034} 1035 1036def ExpTgtMatchClass : AsmOperandClass { 1037 let Name = "ExpTgt"; 1038 let PredicateMethod = "isExpTgt"; 1039 let ParserMethod = "parseExpTgt"; 1040 let RenderMethod = "printExpTgt"; 1041} 1042 1043def SWaitMatchClass : AsmOperandClass { 1044 let Name = "SWaitCnt"; 1045 let RenderMethod = "addImmOperands"; 1046 let ParserMethod = "parseSWaitCntOps"; 1047} 1048 1049def DepCtrMatchClass : AsmOperandClass { 1050 let Name = "DepCtr"; 1051 let RenderMethod = "addImmOperands"; 1052 let ParserMethod = "parseDepCtrOps"; 1053} 1054 1055def SDelayMatchClass : AsmOperandClass { 1056 let Name = "SDelayAlu"; 1057 let RenderMethod = "addImmOperands"; 1058 let ParserMethod = "parseSDelayAluOps"; 1059} 1060 1061def VReg32OrOffClass : AsmOperandClass { 1062 let Name = "VReg32OrOff"; 1063 let ParserMethod = "parseVReg32OrOff"; 1064} 1065 1066let OperandType = "OPERAND_IMMEDIATE" in { 1067def SendMsgImm : Operand<i32> { 1068 let PrintMethod = "printSendMsg"; 1069 let ParserMatchClass = SendMsgMatchClass; 1070} 1071 1072def SwizzleImm : Operand<i16> { 1073 let PrintMethod = "printSwizzle"; 1074 let ParserMatchClass = SwizzleMatchClass; 1075} 1076 1077def EndpgmImm : Operand<i16> { 1078 let PrintMethod = "printEndpgm"; 1079 let ParserMatchClass = EndpgmMatchClass; 1080} 1081 1082def WAIT_FLAG : Operand <i32> { 1083 let ParserMatchClass = SWaitMatchClass; 1084 let PrintMethod = "printWaitFlag"; 1085} 1086 1087def DepCtrImm : Operand <i32> { 1088 let ParserMatchClass = DepCtrMatchClass; 1089 let PrintMethod = "printDepCtr"; 1090} 1091 1092def DELAY_FLAG : Operand <i32> { 1093 let ParserMatchClass = SDelayMatchClass; 1094 let PrintMethod = "printDelayFlag"; 1095} 1096} // End OperandType = "OPERAND_IMMEDIATE" 1097 1098include "SIInstrFormats.td" 1099include "VIInstrFormats.td" 1100 1101def BoolReg : AsmOperandClass { 1102 let Name = "BoolReg"; 1103 let ParserMethod = "parseBoolReg"; 1104 let RenderMethod = "addRegOperands"; 1105} 1106 1107class BoolRC : RegisterOperand<SReg_1> { 1108 let ParserMatchClass = BoolReg; 1109 let DecoderMethod = "decodeBoolReg"; 1110} 1111 1112def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 1113 let ParserMatchClass = BoolReg; 1114 let DecoderMethod = "decodeBoolReg"; 1115} 1116 1117def VOPDstS64orS32 : BoolRC { 1118 let PrintMethod = "printVOPDst"; 1119} 1120 1121// SCSrc_i1 is the operand for pseudo instructions only. 1122// Boolean immediates shall not be exposed to codegen instructions. 1123def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 1124 let OperandNamespace = "AMDGPU"; 1125 let OperandType = "OPERAND_REG_IMM_INT32"; 1126 let ParserMatchClass = BoolReg; 1127 let DecoderMethod = "decodeBoolReg"; 1128} 1129 1130// ===----------------------------------------------------------------------===// 1131// ExpSrc* Special cases for exp src operands which are printed as 1132// "off" depending on en operand. 1133// ===----------------------------------------------------------------------===// 1134 1135def ExpSrc0 : RegisterOperand<VGPR_32> { 1136 let PrintMethod = "printExpSrc0"; 1137 let ParserMatchClass = VReg32OrOffClass; 1138} 1139 1140def ExpSrc1 : RegisterOperand<VGPR_32> { 1141 let PrintMethod = "printExpSrc1"; 1142 let ParserMatchClass = VReg32OrOffClass; 1143} 1144 1145def ExpSrc2 : RegisterOperand<VGPR_32> { 1146 let PrintMethod = "printExpSrc2"; 1147 let ParserMatchClass = VReg32OrOffClass; 1148} 1149 1150def ExpSrc3 : RegisterOperand<VGPR_32> { 1151 let PrintMethod = "printExpSrc3"; 1152 let ParserMatchClass = VReg32OrOffClass; 1153} 1154 1155class SDWASrc<ValueType vt> : RegisterOperand<VS_32> { 1156 let OperandNamespace = "AMDGPU"; 1157 string Type = !if(isFloatType<vt>.ret, "FP", "INT"); 1158 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size; 1159 let DecoderMethod = "decodeSDWASrc"#vt.Size; 1160 let EncoderMethod = "getSDWASrcEncoding"; 1161} 1162 1163def SDWASrc_i32 : SDWASrc<i32>; 1164def SDWASrc_i16 : SDWASrc<i16>; 1165def SDWASrc_f32 : SDWASrc<f32>; 1166def SDWASrc_f16 : SDWASrc<f16>; 1167 1168def SDWAVopcDst : BoolRC { 1169 let OperandNamespace = "AMDGPU"; 1170 let OperandType = "OPERAND_SDWA_VOPC_DST"; 1171 let EncoderMethod = "getSDWAVopcDstEncoding"; 1172 let DecoderMethod = "decodeSDWAVopcDst"; 1173 let PrintMethod = "printVOPDst"; 1174} 1175 1176class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass { 1177 let Name = "Imm"#CName; 1178 let PredicateMethod = "is"#CName; 1179 let ParserMethod = !if(Optional, "parseOptionalOperand", "parse"#CName); 1180 let RenderMethod = "addImmOperands"; 1181 let IsOptional = Optional; 1182 let DefaultMethod = !if(Optional, "default"#CName, ?); 1183} 1184 1185class NamedOperandBit<string Name, AsmOperandClass MatchClass> : Operand<i1> { 1186 let PrintMethod = "print"#Name; 1187 let ParserMatchClass = MatchClass; 1188} 1189 1190class NamedOperandBit_0<string Name, AsmOperandClass MatchClass> : 1191 OperandWithDefaultOps<i1, (ops (i1 0))> { 1192 let PrintMethod = "print"#Name; 1193 let ParserMatchClass = MatchClass; 1194} 1195 1196class NamedOperandBit_1<string Name, AsmOperandClass MatchClass> : 1197 OperandWithDefaultOps<i1, (ops (i1 1))> { 1198 let PrintMethod = "print"#Name; 1199 let ParserMatchClass = MatchClass; 1200} 1201 1202class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> { 1203 let PrintMethod = "print"#Name; 1204 let ParserMatchClass = MatchClass; 1205} 1206 1207class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> { 1208 let PrintMethod = "print"#Name; 1209 let ParserMatchClass = MatchClass; 1210} 1211 1212class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> { 1213 let PrintMethod = "print"#Name; 1214 let ParserMatchClass = MatchClass; 1215} 1216 1217class NamedOperandU32_0<string Name, AsmOperandClass MatchClass> : 1218 OperandWithDefaultOps<i32, (ops (i32 0))> { 1219 let PrintMethod = "print"#Name; 1220 let ParserMatchClass = MatchClass; 1221} 1222 1223class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> : 1224 OperandWithDefaultOps<i32, (ops (i32 0))> { 1225 let PrintMethod = "print"#Name; 1226 let ParserMatchClass = MatchClass; 1227} 1228 1229class NamedOperandU32Default1<string Name, AsmOperandClass MatchClass> : 1230 OperandWithDefaultOps<i32, (ops (i32 1))> { 1231 let PrintMethod = "print"#Name; 1232 let ParserMatchClass = MatchClass; 1233} 1234 1235let OperandType = "OPERAND_IMMEDIATE" in { 1236 1237def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>; 1238def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>; 1239def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>; 1240 1241def flat_offset : NamedOperandU16<"FlatOffset", NamedMatchClass<"FlatOffset">>; 1242def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>; 1243def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>; 1244def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>; 1245 1246def gds : NamedOperandBit<"GDS", NamedMatchClass<"GDS">>; 1247 1248def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>; 1249def omod0 : NamedOperandU32_0<"OModSI", NamedMatchClass<"OModSI">>; 1250 1251// We need to make the cases with a default of 0 distinct from no 1252// default to help deal with some cases where the operand appears 1253// before a mandatory operand. 1254def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>; 1255def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>; 1256def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>; 1257 1258def CPol : NamedOperandU32<"CPol", NamedMatchClass<"CPol">>; 1259def CPol_0 : NamedOperandU32Default0<"CPol", NamedMatchClass<"CPol">>; 1260def CPol_GLC1 : NamedOperandU32Default1<"CPol", NamedMatchClass<"CPol">>; 1261 1262def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; 1263def TFE_0 : NamedOperandBit_0<"TFE", NamedMatchClass<"TFE">>; 1264def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; 1265def SWZ_0 : NamedOperandBit_0<"SWZ", NamedMatchClass<"SWZ">>; 1266def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; 1267def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>; 1268def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>; 1269def GFX10A16 : NamedOperandBit<"GFX10A16", NamedMatchClass<"GFX10A16">>; 1270def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>; 1271def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; 1272def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>; 1273def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>; 1274 1275def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT", 0>>; 1276 1277def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; 1278def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>; 1279 1280def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>; 1281def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>; 1282def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>; 1283def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>; 1284 1285def op_sel0 : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>; 1286def op_sel_hi0 : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>; 1287def neg_lo0 : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>; 1288def neg_hi0 : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>; 1289 1290def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>; 1291def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; 1292 1293def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>; 1294def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>; 1295def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>; 1296def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>; 1297 1298def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>; 1299def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>; 1300def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>; 1301 1302def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>; 1303 1304def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> { 1305 1306} 1307 1308def wait_vdst : NamedOperandU8<"WaitVDST", NamedMatchClass<"WaitVDST">>; 1309def wait_exp : NamedOperandU8<"WaitEXP", NamedMatchClass<"WaitEXP">>; 1310 1311} // End OperandType = "OPERAND_IMMEDIATE" 1312 1313class KImmMatchClass<int size> : AsmOperandClass { 1314 let Name = "KImmFP"#size; 1315 let PredicateMethod = "isKImmFP"#size; 1316 let ParserMethod = "parseImm"; 1317 let RenderMethod = "addKImmFP"#size#"Operands"; 1318} 1319 1320class kimmOperand<ValueType vt> : Operand<vt> { 1321 let OperandNamespace = "AMDGPU"; 1322 let OperandType = "OPERAND_KIMM"#vt.Size; 1323 let PrintMethod = "printU"#vt.Size#"ImmOperand"; 1324 let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass"); 1325 let DecoderMethod = "decodeOperand_f"#vt.Size#"kimm"; 1326} 1327 1328// 32-bit VALU immediate operand that uses the constant bus. 1329def KImmFP32MatchClass : KImmMatchClass<32>; 1330def f32kimm : kimmOperand<i32>; 1331 1332// 32-bit VALU immediate operand with a 16-bit value that uses the 1333// constant bus. 1334def KImmFP16MatchClass : KImmMatchClass<16>; 1335def f16kimm : kimmOperand<i16>; 1336 1337class FPInputModsMatchClass <int opSize> : AsmOperandClass { 1338 let Name = "RegOrImmWithFP"#opSize#"InputMods"; 1339 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1340 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; 1341} 1342 1343class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> { 1344 let Name = "RegOrInlineImmWithFP"#opSize#"InputMods"; 1345 let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods"; 1346} 1347 1348def FP16InputModsMatchClass : FPInputModsMatchClass<16>; 1349def FP32InputModsMatchClass : FPInputModsMatchClass<32>; 1350def FP64InputModsMatchClass : FPInputModsMatchClass<64>; 1351 1352def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>; 1353def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>; 1354 1355class InputMods <AsmOperandClass matchClass> : Operand <i32> { 1356 let OperandNamespace = "AMDGPU"; 1357 let OperandType = "OPERAND_INPUT_MODS"; 1358 let ParserMatchClass = matchClass; 1359} 1360 1361class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> { 1362 let PrintMethod = "printOperandAndFPInputMods"; 1363} 1364 1365def FP16InputMods : FPInputMods<FP16InputModsMatchClass>; 1366def FP32InputMods : FPInputMods<FP32InputModsMatchClass>; 1367def FP64InputMods : FPInputMods<FP64InputModsMatchClass>; 1368 1369def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>; 1370def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>; 1371 1372class IntInputModsMatchClass <int opSize> : AsmOperandClass { 1373 let Name = "RegOrImmWithInt"#opSize#"InputMods"; 1374 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1375 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; 1376} 1377class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> { 1378 let Name = "RegOrInlineImmWithInt"#opSize#"InputMods"; 1379 let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods"; 1380} 1381def Int32InputModsMatchClass : IntInputModsMatchClass<32>; 1382def Int64InputModsMatchClass : IntInputModsMatchClass<64>; 1383def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>; 1384 1385class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> { 1386 let PrintMethod = "printOperandAndIntInputMods"; 1387} 1388def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; 1389def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; 1390def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>; 1391 1392class OpSelModsMatchClass : AsmOperandClass { 1393 let Name = "OpSelMods"; 1394 let ParserMethod = "parseRegOrImm"; 1395 let PredicateMethod = "isRegOrImm"; 1396} 1397 1398def IntOpSelModsMatchClass : OpSelModsMatchClass; 1399def IntOpSelMods : InputMods<IntOpSelModsMatchClass>; 1400 1401class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1402 let Name = "SDWAWithFP"#opSize#"InputMods"; 1403 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1404 let PredicateMethod = "isSDWAFP"#opSize#"Operand"; 1405} 1406 1407def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>; 1408def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>; 1409 1410class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> : 1411 InputMods <matchClass> { 1412 let PrintMethod = "printOperandAndFPInputMods"; 1413} 1414 1415def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>; 1416def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>; 1417 1418def FPVRegInputModsMatchClass : AsmOperandClass { 1419 let Name = "VRegWithFPInputMods"; 1420 let ParserMethod = "parseRegWithFPInputMods"; 1421 let PredicateMethod = "isVRegWithInputMods"; 1422} 1423 1424def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> { 1425 let PrintMethod = "printOperandAndFPInputMods"; 1426} 1427 1428class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1429 let Name = "SDWAWithInt"#opSize#"InputMods"; 1430 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1431 let PredicateMethod = "isSDWAInt"#opSize#"Operand"; 1432} 1433 1434def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; 1435def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; 1436 1437class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : 1438 InputMods <matchClass> { 1439 let PrintMethod = "printOperandAndIntInputMods"; 1440} 1441 1442def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>; 1443def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>; 1444 1445def IntVRegInputModsMatchClass : AsmOperandClass { 1446 let Name = "VRegWithIntInputMods"; 1447 let ParserMethod = "parseRegWithIntInputMods"; 1448 let PredicateMethod = "isVRegWithInputMods"; 1449} 1450 1451def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> { 1452 let PrintMethod = "printOperandAndIntInputMods"; 1453} 1454 1455class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass { 1456 let Name = "PackedFP"#opSize#"InputMods"; 1457 let ParserMethod = "parseRegOrImm"; 1458 let PredicateMethod = "isRegOrImm"; 1459// let PredicateMethod = "isPackedFP"#opSize#"InputMods"; 1460} 1461 1462class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass { 1463 let Name = "PackedInt"#opSize#"InputMods"; 1464 let ParserMethod = "parseRegOrImm"; 1465 let PredicateMethod = "isRegOrImm"; 1466// let PredicateMethod = "isPackedInt"#opSize#"InputMods"; 1467} 1468 1469def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>; 1470def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>; 1471 1472class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> { 1473// let PrintMethod = "printPackedFPInputMods"; 1474} 1475 1476class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> { 1477 //let PrintMethod = "printPackedIntInputMods"; 1478} 1479 1480def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>; 1481def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>; 1482 1483//===----------------------------------------------------------------------===// 1484// Complex patterns 1485//===----------------------------------------------------------------------===// 1486 1487def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">; 1488def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">; 1489def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">; 1490 1491def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">; 1492 1493def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; 1494def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; 1495def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">; 1496// VOP3Mods, but the input source is known to never be NaN. 1497def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">; 1498 1499def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">; 1500 1501def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">; 1502 1503def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">; 1504def DotIUVOP3PMods : ComplexPattern<untyped, 1, "SelectDotIUVOP3PMods">; 1505def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">; 1506 1507def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">; 1508 1509def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; 1510 1511def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; 1512 1513def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">; 1514def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">; 1515 1516//===----------------------------------------------------------------------===// 1517// SI assembler operands 1518//===----------------------------------------------------------------------===// 1519 1520def SIOperand { 1521 int ZERO = 0x80; 1522 int VCC = 0x6A; 1523 int FLAT_SCR = 0x68; 1524} 1525 1526// This should be kept in sync with SISrcMods enum 1527def SRCMODS { 1528 int NONE = 0; 1529 int NEG = 1; 1530 int ABS = 2; 1531 int NEG_ABS = 3; 1532 1533 int NEG_HI = ABS; 1534 int OP_SEL_0 = 4; 1535 int OP_SEL_1 = 8; 1536 int DST_OP_SEL = 8; 1537} 1538 1539def DSTCLAMP { 1540 int NONE = 0; 1541 int ENABLE = 1; 1542} 1543 1544def DSTOMOD { 1545 int NONE = 0; 1546} 1547 1548def HWREG { 1549 int MODE = 1; 1550 int STATUS = 2; 1551 int TRAPSTS = 3; 1552 int HW_ID = 4; 1553 int GPR_ALLOC = 5; 1554 int LDS_ALLOC = 6; 1555 int IB_STS = 7; 1556 int MEM_BASES = 15; 1557 int TBA_LO = 16; 1558 int TBA_HI = 17; 1559 int TMA_LO = 18; 1560 int TMA_HI = 19; 1561 int FLAT_SCR_LO = 20; 1562 int FLAT_SCR_HI = 21; 1563 int XNACK_MASK = 22; 1564 int POPS_PACKER = 25; 1565 int SHADER_CYCLES = 29; 1566} 1567 1568class getHwRegImm<int Reg, int Offset = 0, int Size = 32> { 1569 int ret = !and(!or(Reg, 1570 !shl(Offset, 6), 1571 !shl(!add(Size, -1), 11)), 65535); 1572} 1573 1574//===----------------------------------------------------------------------===// 1575// 1576// SI Instruction multiclass helpers. 1577// 1578// Instructions with _32 take 32-bit operands. 1579// Instructions with _64 take 64-bit operands. 1580// 1581// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit 1582// encoding is the standard encoding, but instruction that make use of 1583// any of the instruction modifiers must use the 64-bit encoding. 1584// 1585// Instructions with _e32 use the 32-bit encoding. 1586// Instructions with _e64 use the 64-bit encoding. 1587// 1588//===----------------------------------------------------------------------===// 1589 1590class SIMCInstr <string pseudo, int subtarget> { 1591 string PseudoInstr = pseudo; 1592 int Subtarget = subtarget; 1593} 1594 1595//===----------------------------------------------------------------------===// 1596// Vector ALU classes 1597//===----------------------------------------------------------------------===// 1598 1599class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> { 1600 int ret = 1601 !if (!eq(Src0.Value, untyped.Value), 0, 1602 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 1603 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2 1604 3))); // VOP3 1605} 1606 1607// Returns the register class to use for the destination of VOP[123C] 1608// instructions for the given VT. 1609class getVALUDstForVT<ValueType VT> { 1610 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, 1611 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, 1612 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, 1613 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>, 1614 VOPDstS64orS32)))); // else VT == i1 1615} 1616 1617// Returns the register class to use for the destination of VOP[12C] 1618// instructions with SDWA extension 1619class getSDWADstForVT<ValueType VT> { 1620 RegisterOperand ret = !if(!eq(VT.Size, 1), 1621 SDWAVopcDst, // VOPC 1622 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst 1623} 1624 1625// Returns the register class to use for source 0 of VOP[12C] 1626// instructions for the given VT. 1627class getVOPSrc0ForVT<ValueType VT> { 1628 bit isFP = isFloatType<VT>.ret; 1629 1630 RegisterOperand ret = 1631 !if(isFP, 1632 !if(!eq(VT.Size, 64), 1633 VSrc_f64, 1634 !if(!eq(VT.Value, f16.Value), 1635 VSrc_f16, 1636 !if(!eq(VT.Value, v2f16.Value), 1637 VSrc_v2f16, 1638 !if(!eq(VT.Value, v4f16.Value), 1639 AVSrc_64, 1640 VSrc_f32 1641 ) 1642 ) 1643 ) 1644 ), 1645 !if(!eq(VT.Size, 64), 1646 VSrc_b64, 1647 !if(!eq(VT.Value, i16.Value), 1648 VSrc_b16, 1649 !if(!eq(VT.Value, v2i16.Value), 1650 VSrc_v2b16, 1651 VSrc_b32 1652 ) 1653 ) 1654 ) 1655 ); 1656} 1657 1658class getSOPSrcForVT<ValueType VT> { 1659 RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32); 1660} 1661 1662// Returns the vreg register class to use for source operand given VT 1663class getVregSrcForVT<ValueType VT> { 1664 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, 1665 !if(!eq(VT.Size, 96), VReg_96, 1666 !if(!eq(VT.Size, 64), VReg_64, 1667 !if(!eq(VT.Size, 48), VReg_64, 1668 VGPR_32)))); 1669} 1670 1671class getSDWASrcForVT <ValueType VT> { 1672 bit isFP = isFloatType<VT>.ret; 1673 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); 1674 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); 1675 RegisterOperand ret = !if(isFP, retFlt, retInt); 1676} 1677 1678// Returns the register class to use for sources of VOP3 instructions for the 1679// given VT. 1680class getVOP3SrcForVT<ValueType VT> { 1681 bit isFP = isFloatType<VT>.ret; 1682 RegisterOperand ret = 1683 !if(!eq(VT.Size, 128), 1684 VSrc_128, 1685 !if(!eq(VT.Size, 64), 1686 !if(isFP, 1687 !if(!eq(VT.Value, v2f32.Value), 1688 VSrc_v2f32, 1689 VSrc_f64), 1690 !if(!eq(VT.Value, v2i32.Value), 1691 VSrc_v2b32, 1692 VSrc_b64)), 1693 !if(!eq(VT.Value, i1.Value), 1694 SSrc_i1, 1695 !if(isFP, 1696 !if(!eq(VT.Value, f16.Value), 1697 VSrc_f16, 1698 !if(!eq(VT.Value, v2f16.Value), 1699 VSrc_v2f16, 1700 !if(!eq(VT.Value, v4f16.Value), 1701 AVSrc_64, 1702 VSrc_f32 1703 ) 1704 ) 1705 ), 1706 !if(!eq(VT.Value, i16.Value), 1707 VSrc_b16, 1708 !if(!eq(VT.Value, v2i16.Value), 1709 VSrc_v2b16, 1710 VSrc_b32 1711 ) 1712 ) 1713 ) 1714 ) 1715 ) 1716 ); 1717} 1718 1719// Src2 of VOP3 DPP instructions cannot be a literal 1720class getVOP3DPPSrcForVT<ValueType VT> { 1721 bit isFP = isFloatType<VT>.ret; 1722 RegisterOperand ret = 1723 !if (!eq(VT.Value, i1.Value), SSrc_i1, 1724 !if (isFP, 1725 !if (!eq(VT.Value, f16.Value), VCSrc_f16, 1726 !if (!eq(VT.Value, v2f16.Value), VCSrc_v2f16, VCSrc_f32)), 1727 !if (!eq(VT.Value, i16.Value), VCSrc_b16, 1728 !if (!eq(VT.Value, v2i16.Value), VCSrc_v2b16, 1729 VCSrc_b32)))); 1730} 1731 1732// Float or packed int 1733class isModifierType<ValueType SrcVT> { 1734 bit ret = !or(!eq(SrcVT.Value, f16.Value), 1735 !eq(SrcVT.Value, f32.Value), 1736 !eq(SrcVT.Value, f64.Value), 1737 !eq(SrcVT.Value, v2f16.Value), 1738 !eq(SrcVT.Value, v2i16.Value), 1739 !eq(SrcVT.Value, v2f32.Value), 1740 !eq(SrcVT.Value, v2i32.Value), 1741 !eq(SrcVT.Value, v4f16.Value), 1742 !eq(SrcVT.Value, v4i16.Value), 1743 !eq(SrcVT.Value, v4f32.Value), 1744 !eq(SrcVT.Value, v4i32.Value), 1745 !eq(SrcVT.Value, v8f16.Value), 1746 !eq(SrcVT.Value, v8i16.Value), 1747 !eq(SrcVT.Value, v8f32.Value), 1748 !eq(SrcVT.Value, v8i32.Value), 1749 !eq(SrcVT.Value, v16f16.Value), 1750 !eq(SrcVT.Value, v16i16.Value)); 1751} 1752 1753// Return type of input modifiers operand for specified input operand 1754class getSrcMod <ValueType VT, bit EnableF32SrcMods> { 1755 bit isFP = isFloatType<VT>.ret; 1756 bit isPacked = isPackedType<VT>.ret; 1757 Operand ret = !if(!eq(VT.Size, 64), 1758 !if(isFP, FP64InputMods, Int64InputMods), 1759 !if(isFP, 1760 !if(!eq(VT.Value, f16.Value), 1761 FP16InputMods, 1762 FP32InputMods 1763 ), 1764 !if(EnableF32SrcMods, FP32InputMods, Int32InputMods)) 1765 ); 1766} 1767 1768class getOpSelMod <ValueType VT> { 1769 Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods); 1770} 1771 1772// Return type of input modifiers operand specified input operand for DPP 1773class getSrcModDPP <ValueType VT> { 1774 bit isFP = isFloatType<VT>.ret; 1775 Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); 1776} 1777 1778// Return type of input modifiers operand for specified input operand for DPP 1779class getSrcModVOP3DPP <ValueType VT, bit EnableF32SrcMods> { 1780 bit isFP = isFloatType<VT>.ret; 1781 bit isPacked = isPackedType<VT>.ret; 1782 Operand ret = 1783 !if (isFP, 1784 !if (!eq(VT.Value, f16.Value), FP16VCSrcInputMods, 1785 FP32VCSrcInputMods), 1786 !if (EnableF32SrcMods, FP32VCSrcInputMods, Int32VCSrcInputMods)); 1787} 1788 1789// Return type of input modifiers operand specified input operand for SDWA 1790class getSrcModSDWA <ValueType VT> { 1791 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, 1792 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods, 1793 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods, 1794 Int32SDWAInputMods))); 1795} 1796 1797// Returns the input arguments for VOP[12C] instructions for the given SrcVT. 1798class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> { 1799 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 1800 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2 1801 (ins))); 1802} 1803 1804// Returns the input arguments for VOP3 instructions for the given SrcVT. 1805class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, 1806 RegisterOperand Src2RC, int NumSrcArgs, 1807 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1808 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1809 1810 dag ret = 1811 !if (!eq(NumSrcArgs, 0), 1812 // VOP1 without input operands (V_NOP, V_CLREXCP) 1813 (ins), 1814 /* else */ 1815 !if (!eq(NumSrcArgs, 1), 1816 !if (HasModifiers, 1817 // VOP1 with modifiers 1818 !if(HasOMod, 1819 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1820 clampmod0:$clamp, omod0:$omod), 1821 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1822 clampmod0:$clamp)) 1823 /* else */, 1824 // VOP1 without modifiers 1825 !if (HasClamp, 1826 (ins Src0RC:$src0, clampmod0:$clamp), 1827 (ins Src0RC:$src0)) 1828 /* endif */ ), 1829 !if (!eq(NumSrcArgs, 2), 1830 !if (HasModifiers, 1831 // VOP 2 with modifiers 1832 !if(HasOMod, 1833 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1834 Src1Mod:$src1_modifiers, Src1RC:$src1, 1835 clampmod0:$clamp, omod0:$omod), 1836 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1837 Src1Mod:$src1_modifiers, Src1RC:$src1, 1838 clampmod0:$clamp)) 1839 /* else */, 1840 // VOP2 without modifiers 1841 !if (HasClamp, 1842 (ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp), 1843 (ins Src0RC:$src0, Src1RC:$src1)) 1844 1845 /* endif */ ) 1846 /* NumSrcArgs == 3 */, 1847 !if (HasModifiers, 1848 !if (HasSrc2Mods, 1849 // VOP3 with modifiers 1850 !if (HasOMod, 1851 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1852 Src1Mod:$src1_modifiers, Src1RC:$src1, 1853 Src2Mod:$src2_modifiers, Src2RC:$src2, 1854 clampmod0:$clamp, omod0:$omod), 1855 !if (HasClamp, 1856 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1857 Src1Mod:$src1_modifiers, Src1RC:$src1, 1858 Src2Mod:$src2_modifiers, Src2RC:$src2, 1859 clampmod0:$clamp), 1860 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1861 Src1Mod:$src1_modifiers, Src1RC:$src1, 1862 Src2Mod:$src2_modifiers, Src2RC:$src2))), 1863 // VOP3 with modifiers except src2 1864 !if (HasOMod, 1865 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1866 Src1Mod:$src1_modifiers, Src1RC:$src1, 1867 Src2RC:$src2, clampmod0:$clamp, omod0:$omod), 1868 !if (HasClamp, 1869 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1870 Src1Mod:$src1_modifiers, Src1RC:$src1, 1871 Src2RC:$src2, clampmod0:$clamp), 1872 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1873 Src1Mod:$src1_modifiers, Src1RC:$src1, 1874 Src2RC:$src2)))) 1875 /* else */, 1876 // VOP3 without modifiers 1877 !if (HasClamp, 1878 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp), 1879 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)) 1880 /* endif */ )))); 1881} 1882 1883class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, 1884 RegisterOperand Src2RC, int NumSrcArgs, 1885 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1886 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel, 1887 bit IsVOP3P> { 1888 // getInst64 handles clamp and omod. implicit mutex between vop3p and omod 1889 dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs, 1890 HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 1891 Src0Mod, Src1Mod, Src2Mod>.ret; 1892 dag opsel = (ins op_sel0:$op_sel); 1893 dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); 1894 dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi)); 1895 1896 dag ret = !con(base, 1897 !if(HasOpSel, opsel,(ins)), 1898 !if(IsVOP3P, vop3pFields,(ins))); 1899} 1900 1901class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, 1902 RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel, 1903 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1904 dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, 1905 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, 1906 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, 1907 HasOpSel, 1/*IsVOP3P*/>.ret; 1908} 1909 1910class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, 1911 RegisterOperand Src2RC, int NumSrcArgs, 1912 bit HasClamp, bit HasOMod, 1913 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1914 dag ret = getInsVOP3Base<Src0RC, Src1RC, 1915 Src2RC, NumSrcArgs, 1916 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod, 1917 Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/, 0>.ret; 1918} 1919 1920class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1921 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1922 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1923 1924 dag ret = !if (!eq(NumSrcArgs, 0), 1925 // VOP1 without input operands (V_NOP) 1926 (ins ), 1927 !if (!eq(NumSrcArgs, 1), 1928 !if (HasModifiers, 1929 // VOP1_DPP with modifiers 1930 (ins OldRC:$old, Src0Mod:$src0_modifiers, 1931 Src0RC:$src0) 1932 /* else */, 1933 // VOP1_DPP without modifiers 1934 (ins OldRC:$old, Src0RC:$src0) 1935 /* endif */), 1936 !if (!eq(NumSrcArgs, 2), 1937 !if (HasModifiers, 1938 // VOP2_DPP with modifiers 1939 (ins OldRC:$old, 1940 Src0Mod:$src0_modifiers, Src0RC:$src0, 1941 Src1Mod:$src1_modifiers, Src1RC:$src1) 1942 /* else */, 1943 // VOP2_DPP without modifiers 1944 (ins OldRC:$old, 1945 Src0RC:$src0, Src1RC:$src1) 1946 ) 1947 /* NumSrcArgs == 3, VOP3 */, 1948 !if (HasModifiers, 1949 // VOP3_DPP with modifiers 1950 (ins OldRC:$old, 1951 Src0Mod:$src0_modifiers, Src0RC:$src0, 1952 Src1Mod:$src1_modifiers, Src1RC:$src1, 1953 Src2Mod:$src2_modifiers, Src2RC:$src2) 1954 /* else */, 1955 // VOP3_DPP without modifiers 1956 (ins OldRC:$old, 1957 Src0RC:$src0, Src1RC:$src1, 1958 Src2RC:$src2) 1959 ) 1960 /* endif */))); 1961} 1962 1963class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1964 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1965 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1966 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1967 HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret, 1968 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1969 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); 1970} 1971 1972class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1973 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1974 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1975 dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1976 HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret, 1977 (ins FI:$fi)); 1978} 1979 1980class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1981 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1982 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1983 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1984 HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret, 1985 (ins dpp8:$dpp8, FI:$fi)); 1986} 1987 1988class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { 1989 dag old = ( ins OldRC:$old ); 1990 dag base = VOP3Base; 1991 dag ret = !con( 1992 !if(!ne(NumSrcArgs, 0), old, (ins)), 1993 base 1994 ); 1995} 1996 1997class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { 1998 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret, 1999 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 2000 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); 2001} 2002 2003class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { 2004 dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs>.ret, 2005 (ins FI:$fi)); 2006} 2007 2008class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { 2009 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret, 2010 (ins dpp8:$dpp8, FI:$fi)); 2011} 2012 2013// Ins for SDWA 2014class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, 2015 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod, 2016 ValueType DstVT> { 2017 2018 dag ret = !if(!eq(NumSrcArgs, 0), 2019 // VOP1 without input operands (V_NOP) 2020 (ins), 2021 !if(!eq(NumSrcArgs, 1), 2022 // VOP1 2023 !if(!not(HasSDWAOMod), 2024 // VOP1_SDWA without omod 2025 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2026 clampmod:$clamp, 2027 dst_sel:$dst_sel, dst_unused:$dst_unused, 2028 src0_sel:$src0_sel), 2029 // VOP1_SDWA with omod 2030 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2031 clampmod:$clamp, omod:$omod, 2032 dst_sel:$dst_sel, dst_unused:$dst_unused, 2033 src0_sel:$src0_sel)), 2034 !if(!eq(NumSrcArgs, 2), 2035 !if(!eq(DstVT.Size, 1), 2036 // VOPC_SDWA 2037 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2038 Src1Mod:$src1_modifiers, Src1RC:$src1, 2039 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), 2040 // VOP2_SDWA 2041 !if(!not(HasSDWAOMod), 2042 // VOP2_SDWA without omod 2043 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2044 Src1Mod:$src1_modifiers, Src1RC:$src1, 2045 clampmod:$clamp, 2046 dst_sel:$dst_sel, dst_unused:$dst_unused, 2047 src0_sel:$src0_sel, src1_sel:$src1_sel), 2048 // VOP2_SDWA with omod 2049 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2050 Src1Mod:$src1_modifiers, Src1RC:$src1, 2051 clampmod:$clamp, omod:$omod, 2052 dst_sel:$dst_sel, dst_unused:$dst_unused, 2053 src0_sel:$src0_sel, src1_sel:$src1_sel))), 2054 (ins)/* endif */))); 2055} 2056 2057// Outs for DPP 2058class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> { 2059 dag ret = !if(HasDst, 2060 !if(!eq(DstVT.Size, 1), 2061 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions 2062 (outs DstRCDPP:$vdst)), 2063 (outs)); // V_NOP 2064} 2065 2066// Outs for SDWA 2067class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> { 2068 dag ret = !if(HasDst, 2069 !if(!eq(DstVT.Size, 1), 2070 (outs DstRCSDWA:$sdst), 2071 (outs DstRCSDWA:$vdst)), 2072 (outs)); // V_NOP 2073} 2074 2075// Returns the assembly string for the inputs and outputs of a VOP[12C] 2076// instruction. This does not add the _e32 suffix, so it can be reused 2077// by getAsm64. 2078class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 2079 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 2080 string src0 = ", $src0"; 2081 string src1 = ", $src1"; 2082 string src2 = ", $src2"; 2083 string ret = !if(HasDst, dst, "") # 2084 !if(!eq(NumSrcArgs, 1), src0, "") # 2085 !if(!eq(NumSrcArgs, 2), src0#src1, "") # 2086 !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); 2087} 2088 2089class getAsmVOPDPart <int NumSrcArgs, string XorY> { 2090 string dst = "$vdst" # XorY; 2091 string src0 = ", $src0" # XorY; 2092 string src1 = ", $vsrc1" # XorY; 2093 string ret = dst # 2094 !if(!ge(NumSrcArgs, 1), src0, "") # 2095 !if(!ge(NumSrcArgs, 2), src1, ""); 2096} 2097 2098// Returns the assembly string for the inputs and outputs of a VOP3 2099// instruction. 2100class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers, 2101 bit HasOMod, ValueType DstVT = i32> { 2102 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 2103 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2104 string src1 = !if(!eq(NumSrcArgs, 1), "", 2105 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2106 " $src1_modifiers,")); 2107 string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2108 string iclamp = !if(HasIntClamp, "$clamp", ""); 2109 string ret = 2110 !if(!not(HasModifiers), 2111 getAsm32<HasDst, NumSrcArgs, DstVT>.ret # iclamp, 2112 dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", "")); 2113} 2114 2115// Returns the assembly string for the inputs and outputs of a VOP3P 2116// instruction. 2117class getAsmVOP3P <int NumSrcArgs, bit HasModifiers, 2118 bit HasClamp, bit HasOpSel> { 2119 string dst = "$vdst"; 2120 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2121 string src1 = !if(!eq(NumSrcArgs, 1), "", 2122 !if(!eq(NumSrcArgs, 2), " $src1", 2123 " $src1,")); 2124 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2125 2126 string mods = !if(HasModifiers, "$neg_lo$neg_hi", ""); 2127 string clamp = !if(HasClamp, "$clamp", ""); 2128 string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", ""); 2129 2130 // Each modifier is printed as an array of bits for each operand, so 2131 // all operands are printed as part of src0_modifiers. 2132 string ret = dst#", "#src0#src1#src2#opsel#mods#clamp; 2133} 2134 2135class getAsmVOP3OpSel <int NumSrcArgs, 2136 bit HasClamp, 2137 bit Src0HasMods, 2138 bit Src1HasMods, 2139 bit Src2HasMods> { 2140 string dst = "$vdst"; 2141 2142 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2143 string isrc1 = !if(!eq(NumSrcArgs, 1), "", 2144 !if(!eq(NumSrcArgs, 2), " $src1", 2145 " $src1,")); 2146 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2147 2148 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2149 string fsrc1 = !if(!eq(NumSrcArgs, 1), "", 2150 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2151 " $src1_modifiers,")); 2152 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2153 2154 string src0 = !if(Src0HasMods, fsrc0, isrc0); 2155 string src1 = !if(Src1HasMods, fsrc1, isrc1); 2156 string src2 = !if(Src2HasMods, fsrc2, isrc2); 2157 2158 string clamp = !if(HasClamp, "$clamp", ""); 2159 string omod = ""; 2160 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod; 2161} 2162 2163class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2164 string dst = !if(HasDst, 2165 !if(!eq(DstVT.Size, 1), 2166 "$sdst", 2167 "$vdst"), 2168 ""); // use $sdst for VOPC 2169 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2170 string src1 = !if(!eq(NumSrcArgs, 1), "", 2171 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2172 " $src1_modifiers,")); 2173 string args = !if(!not(HasModifiers), 2174 getAsm32<0, NumSrcArgs, DstVT>.ret, 2175 ", "#src0#src1); 2176 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2177} 2178 2179class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2180 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi"; 2181} 2182 2183class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> 2184 : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{ 2185 let ret = dst#args#" $dpp8$fi"; 2186} 2187 2188class getAsmVOP3DPPBase <int NumSrcArgs, bit HasDst, bit HasClamp, 2189 bit HasOpSel, bit HasOMod, bit IsVOP3P, 2190 bit HasModifiers, bit Src0HasMods, 2191 bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32> { 2192 string dst = !if(HasDst, 2193 !if(!eq(DstVT.Size, 1), 2194 "$sdst", 2195 "$vdst"), 2196 ""); // use $sdst for VOPC 2197 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2198 string isrc1 = !if(!eq(NumSrcArgs, 1), "", 2199 !if(!eq(NumSrcArgs, 2), " $src1", 2200 " $src1,")); 2201 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2202 2203 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2204 string fsrc1 = !if(!eq(NumSrcArgs, 1), "", 2205 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2206 " $src1_modifiers,")); 2207 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2208 2209 string src0 = !if(Src0HasMods, fsrc0, isrc0); 2210 string src1 = !if(Src1HasMods, fsrc1, isrc1); 2211 string src2 = !if(Src2HasMods, fsrc2, isrc2); 2212 string opsel = !if(HasOpSel, "$op_sel", ""); 2213 string 3PMods = !if(IsVOP3P, 2214 !if(HasOpSel, "$op_sel_hi", "") 2215 #!if(HasModifiers, "$neg_lo$neg_hi", ""), 2216 ""); 2217 string clamp = !if(HasClamp, "$clamp", ""); 2218 string omod = !if(HasOMod, "$omod", ""); 2219 2220 string ret = dst#", "#src0#src1#src2#opsel#3PMods#clamp#omod; 2221 2222} 2223 2224class getAsmVOP3DPP<string base> { 2225 string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2226} 2227 2228class getAsmVOP3DPP16<string base> { 2229 string ret = getAsmVOP3DPP<base>.ret # "$fi"; 2230} 2231 2232class getAsmVOP3DPP8<string base> { 2233 string ret = base # " $dpp8$fi"; 2234} 2235 2236 2237class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 2238 string dst = !if(HasDst, 2239 !if(!eq(DstVT.Size, 1), 2240 " vcc", // use vcc token as dst for VOPC instructions 2241 "$vdst"), 2242 ""); 2243 string src0 = "$src0_modifiers"; 2244 string src1 = "$src1_modifiers"; 2245 string args = !if(!eq(NumSrcArgs, 0), 2246 "", 2247 !if(!eq(NumSrcArgs, 1), 2248 ", "#src0#"$clamp", 2249 ", "#src0#", "#src1#"$clamp" 2250 ) 2251 ); 2252 string sdwa = !if(!eq(NumSrcArgs, 0), 2253 "", 2254 !if(!eq(NumSrcArgs, 1), 2255 " $dst_sel $dst_unused $src0_sel", 2256 !if(!eq(DstVT.Size, 1), 2257 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC 2258 " $dst_sel $dst_unused $src0_sel $src1_sel" 2259 ) 2260 ) 2261 ); 2262 string ret = dst#args#sdwa; 2263} 2264 2265class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs, 2266 ValueType DstVT = i32> { 2267 string dst = !if(HasDst, 2268 !if(!eq(DstVT.Size, 1), 2269 "$sdst", // VOPC 2270 "$vdst"), // VOP1/2 2271 ""); 2272 string src0 = "$src0_modifiers"; 2273 string src1 = "$src1_modifiers"; 2274 string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod"); 2275 string args = !if(!eq(NumSrcArgs, 0), "", 2276 !if(!eq(NumSrcArgs, 1), 2277 ", "#src0, 2278 ", "#src0#", "#src1 2279 ) 2280 ); 2281 string sdwa = !if(!eq(NumSrcArgs, 0), "", 2282 !if(!eq(NumSrcArgs, 1), 2283 out_mods#" $dst_sel $dst_unused $src0_sel", 2284 !if(!eq(DstVT.Size, 1), 2285 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC 2286 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" 2287 ) 2288 ) 2289 ); 2290 string ret = dst#args#sdwa; 2291} 2292 2293class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT, 2294 ValueType Src1VT> { 2295 bit ret = !if(!eq(NumSrcArgs, 3), 2296 0, 2297 !if(!eq(DstVT.Size, 64), 2298 1, 2299 !if(!eq(Src0VT.Size, 64), 2300 1, 2301 !if(!eq(Src1VT.Size, 64), 2302 1, 2303 0 2304 ) 2305 ) 2306 ) 2307 ); 2308} 2309 2310class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2311 ValueType Src1VT = i32> { 2312 bit ret = !if(!eq(NumSrcArgs, 3), 2313 0, // NumSrcArgs == 3 - No SDWA for VOP3 2314 !if(!eq(DstVT.Size, 64), 2315 0, // 64-bit dst - No SDWA for 64-bit operands 2316 !if(!eq(Src0VT.Size, 64), 2317 0, // 64-bit src0 2318 !if(!eq(Src1VT.Size, 64), 2319 0, // 64-bit src2 2320 1 2321 ) 2322 ) 2323 ) 2324 ); 2325} 2326 2327class getHasDPP <int NumSrcArgs> { 2328 bit ret = !if(!eq(NumSrcArgs, 3), 2329 0, // NumSrcArgs == 3 - No DPP for VOP3 2330 1); 2331} 2332 2333class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2334 ValueType Src1VT = i32> { 2335 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2336 !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret)); 2337} 2338 2339class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2340 ValueType Src1VT = i32> { 2341 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2342 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2343} 2344 2345// Function that checks if instruction supports DPP and SDWA 2346class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2347 ValueType Src1VT = i32> { 2348 bit ret = !or(getHasDPP<NumSrcArgs>.ret, 2349 getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2350} 2351 2352// Return an AGPR+VGPR operand class for the given VGPR register class. 2353class getLdStRegisterOperand<RegisterClass RC> { 2354 RegisterOperand ret = 2355 !if(!eq(RC.Size, 32), AVLdSt_32, 2356 !if(!eq(RC.Size, 64), AVLdSt_64, 2357 !if(!eq(RC.Size, 96), AVLdSt_96, 2358 !if(!eq(RC.Size, 128), AVLdSt_128, 2359 !if(!eq(RC.Size, 160), AVLdSt_160, 2360 RegisterOperand<VReg_1> // invalid register 2361 ))))); 2362} 2363 2364class BitOr<bit a, bit b> { 2365 bit ret = !if(a, 1, !if(b, 1, 0)); 2366} 2367 2368class BitAnd<bit a, bit b> { 2369 bit ret = !if(a, !if(b, 1, 0), 0); 2370} 2371 2372class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32, 2373 ValueType Src1VT = i32, ValueType Src2VT = i32> { 2374 bit ret = !if(!eq(DstVT.Size, 64), 2375 0, // 64-bit dst No DPP for 64-bit operands 2376 !if(!eq(Src0VT.Size, 64), 2377 0, // 64-bit src0 2378 !if(!eq(Src1VT.Size, 64), 2379 0, // 64-bit src1 2380 !if(!eq(Src2VT.Size, 64), 2381 0, // 64-bit src2 2382 1 2383 ) 2384 ) 2385 ) 2386 ); 2387} 2388 2389 2390def PatGenMode { 2391 int NoPattern = 0; 2392 int Pattern = 1; 2393} 2394 2395class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, 2396 bit _EnableClamp = 0> { 2397 2398 field list<ValueType> ArgVT = _ArgVT; 2399 field bit EnableF32SrcMods = _EnableF32SrcMods; 2400 field bit EnableClamp = _EnableClamp; 2401 2402 field ValueType DstVT = ArgVT[0]; 2403 field ValueType Src0VT = ArgVT[1]; 2404 field ValueType Src1VT = ArgVT[2]; 2405 field ValueType Src2VT = ArgVT[3]; 2406 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; 2407 field RegisterOperand DstRC64 = DstRC; 2408 field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret; 2409 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret; 2410 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret; 2411 field RegisterOperand Src1RC32 = RegisterOperand<getVregSrcForVT<Src1VT>.ret>; 2412 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; 2413 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret; 2414 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret; 2415 field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret; 2416 field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret; 2417 field RegisterClass Src2DPP = getVregSrcForVT<Src2VT>.ret; 2418 field RegisterOperand Src0VOP3DPP = VGPRSrc_32; 2419 field RegisterOperand Src1VOP3DPP = VGPRSrc_32; 2420 field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret; 2421 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret; 2422 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret; 2423 field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret; 2424 field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret; 2425 field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret; 2426 field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret; 2427 field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret; 2428 field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret; 2429 field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, EnableF32SrcMods>.ret; 2430 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; 2431 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret; 2432 2433 2434 field bit HasDst = !ne(DstVT.Value, untyped.Value); 2435 field bit HasDst32 = HasDst; 2436 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case 2437 field bit EmitDstSel = EmitDst; 2438 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret; 2439 field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value); 2440 field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value); 2441 field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value); 2442 2443 // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods. 2444 field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret; 2445 field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret; 2446 field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret; 2447 2448 // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods. 2449 field bit HasSrc0IntMods = isIntType<Src0VT>.ret; 2450 field bit HasSrc1IntMods = isIntType<Src1VT>.ret; 2451 field bit HasSrc2IntMods = isIntType<Src2VT>.ret; 2452 2453 field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp); 2454 field bit HasSDWAClamp = EmitDst; 2455 field bit HasFPClamp = !and(isFloatType<DstVT>.ret, HasClamp); 2456 field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp); 2457 field bit HasClampLo = HasClamp; 2458 field bit HasClampHi = !and(isPackedType<DstVT>.ret, HasClamp); 2459 field bit HasHigh = 0; 2460 2461 field bit IsPacked = isPackedType<Src0VT>.ret; 2462 field bit HasOpSel = IsPacked; 2463 field bit HasOMod = !if(HasOpSel, 0, isFloatType<DstVT>.ret); 2464 field bit HasSDWAOMod = isFloatType<DstVT>.ret; 2465 2466 field bit HasModifiers = !or(isModifierType<Src0VT>.ret, 2467 isModifierType<Src1VT>.ret, 2468 isModifierType<Src2VT>.ret, 2469 HasOMod, 2470 EnableF32SrcMods); 2471 2472 field bit HasSrc0Mods = HasModifiers; 2473 field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0); 2474 field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0); 2475 2476 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2477 field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret; 2478 field bit HasExtDPP = !if(!or(getHasDPP<NumSrcArgs>.ret, 2479 HasExtVOP3DPP), 1, 0); 2480 field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2481 field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2482 field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2483 field bit HasExtSDWA9 = HasExtSDWA; 2484 field int NeedPatGen = PatGenMode.NoPattern; 2485 2486 field bit IsMAI = 0; 2487 field bit IsVOP3P = 0; 2488 field bit IsDOT = 0; 2489 field bit IsSingle = 0; 2490 field bit IsWMMA = 0; 2491 2492 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); 2493 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); 2494 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods); 2495 2496 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs)); 2497 2498 // VOP3b instructions are a special case with a second explicit 2499 // output. This is manually overridden for them. 2500 field dag Outs32 = Outs; 2501 field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs)); 2502 field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; 2503 field dag OutsDPP8 = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; 2504 field dag OutsVOP3DPP = OutsDPP; 2505 field dag OutsVOP3DPP8 = OutsDPP8; 2506 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret; 2507 2508 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; 2509 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 2510 HasIntClamp, HasModifiers, HasSrc2Mods, 2511 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; 2512 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64, 2513 NumSrcArgs, HasClamp, HasOpSel, 2514 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; 2515 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, 2516 NumSrcArgs, HasClamp, HasOMod, 2517 getOpSelMod<Src0VT>.ret, 2518 getOpSelMod<Src1VT>.ret, 2519 getOpSelMod<Src2VT>.ret>.ret; 2520 field dag InsDPP = !if(HasExtDPP, 2521 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2522 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret, 2523 (ins)); 2524 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2525 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2526 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, 2527 NumSrcArgs, HasModifiers, 2528 Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2529 field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, 2530 Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 2531 Src0ModDPP, Src1ModDPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret; 2532 field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret; 2533 field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret; 2534 field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret; 2535 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, 2536 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA, 2537 DstVT>.ret; 2538 field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X); 2539 // It is a slight misnomer to use the deferred f32 operand type for non-float 2540 // operands, but this operand type will only be used if the other dual 2541 // component is FMAAK or FMAMK 2542 field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X); 2543 field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y); 2544 field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y); 2545 2546 2547 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; 2548 field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret; 2549 field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret; 2550 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, 2551 HasClamp, 2552 HasSrc0FloatMods, 2553 HasSrc1FloatMods, 2554 HasSrc2FloatMods>.ret; 2555 field string AsmDPP = !if(HasExtDPP, 2556 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, ""); 2557 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret; 2558 // DPP8 encoding has no fields for modifiers, and it is enforced by setting 2559 // the asm operand name via this HasModifiers flag 2560 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret; 2561 field string AsmVOP3DPPBase = getAsmVOP3DPPBase<NumSrcArgs, HasDst, HasClamp, 2562 HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasSrc0FloatMods, HasSrc1FloatMods, 2563 HasSrc2FloatMods, DstVT >.ret; 2564 field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3DPPBase>.ret; 2565 field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3DPPBase>.ret; 2566 field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3DPPBase>.ret; 2567 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret; 2568 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret; 2569 field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret; 2570 field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret; 2571 field string TieRegDPP = "$old"; 2572} 2573 2574 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { 2575 let HasExt = 0; 2576 let HasExtDPP = 0; 2577 let HasExtVOP3DPP = 0; 2578 let HasExt32BitDPP = 0; 2579 let HasExt64BitDPP = 0; 2580 let HasExtSDWA = 0; 2581 let HasExtSDWA9 = 0; 2582} 2583 2584class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> { 2585 let NeedPatGen = mode; 2586} 2587def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; 2588def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; 2589def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; 2590def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; 2591 2592def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; 2593def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; 2594def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; 2595def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; 2596def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], 0, /*EnableClamp=*/1>; 2597 2598def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; 2599def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; 2600 2601def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; 2602def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>; 2603 2604def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; 2605def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; 2606def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; 2607 2608def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; 2609def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; 2610def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>; 2611def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>; 2612 2613def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>; 2614def VOP_I16_V2I16_V2I16_I16 : VOPProfile <[i16, v2i16, v2i16, i16]>; 2615def VOP_F32_V2I16_V2I16_F32 : VOPProfile <[f32, v2i16, v2i16, f32]>; 2616 2617def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; 2618 2619def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; 2620 2621def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; 2622def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>; 2623def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>; 2624def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>; 2625def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>; 2626def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>; 2627def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>; 2628def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>; 2629def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>; 2630def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>; 2631def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>; 2632def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>; 2633 2634def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>; 2635def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>; 2636def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>; 2637def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>; 2638def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>; 2639def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; 2640def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; 2641def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; 2642def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>; 2643def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; 2644def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; 2645 2646def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; 2647def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; 2648def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; 2649 2650def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>; 2651def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>; 2652def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; 2653def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; 2654def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; 2655def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; 2656def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; 2657def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; 2658def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>; 2659 2660def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>; 2661def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; 2662 2663def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>; 2664def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>; 2665def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>; 2666def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>; 2667def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>; 2668def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>; 2669def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>; 2670def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>; 2671def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>; 2672def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>; 2673def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>; 2674def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>; 2675 2676def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>; 2677def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>; 2678 2679def VOP_V2F32_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, v2f32]>; 2680def VOP_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, untyped]>; 2681def VOP_V2I32_V2I32_V2I32 : VOPProfile <[v2i32, v2i32, v2i32, untyped]>; 2682def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>; 2683def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>; 2684def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>; 2685 2686def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>; 2687def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>; 2688def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>; 2689def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>; 2690 2691def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>; 2692def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>; 2693def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>; 2694def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>; 2695def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>; 2696def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>; 2697 2698class Commutable_REV <string revOp, bit isOrig> { 2699 string RevOp = revOp; 2700 bit IsOrig = isOrig; 2701} 2702 2703class AtomicNoRet <string noRetOp, bit isRet> { 2704 string NoRetOp = noRetOp; 2705 bit IsRet = isRet; 2706} 2707 2708//===----------------------------------------------------------------------===// 2709// Interpolation opcodes 2710//===----------------------------------------------------------------------===// 2711 2712class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">; 2713 2714class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : 2715 VINTRPCommon <outs, ins, "", pattern>, 2716 SIMCInstr<opName, SIEncodingFamily.NONE> { 2717 let isPseudo = 1; 2718 let isCodeGenOnly = 1; 2719} 2720 2721// FIXME-GFX10: WIP. 2722class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins, 2723 string asm, int encodingFamily> : 2724 VINTRPCommon <outs, ins, asm, []>, 2725 VINTRPe <op>, 2726 SIMCInstr<opName, encodingFamily> { 2727} 2728 2729class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins, 2730 string asm> : 2731 VINTRPCommon <outs, ins, asm, []>, 2732 VINTRPe_vi <op>, 2733 SIMCInstr<opName, SIEncodingFamily.VI> { 2734 let AssemblerPredicate = VIAssemblerPredicate; 2735 let DecoderNamespace = "GFX8"; 2736} 2737 2738// FIXME-GFX10: WIP. 2739multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm, 2740 list<dag> pattern = []> { 2741 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>; 2742 2743 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2744 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>; 2745 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2746 2747 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>; 2748 2749 let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 2750 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>; 2751 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 2752} 2753 2754//===----------------------------------------------------------------------===// 2755// Vector instruction mappings 2756//===----------------------------------------------------------------------===// 2757 2758// Maps an opcode in e32 form to its e64 equivalent 2759def getVOPe64 : InstrMapping { 2760 let FilterClass = "VOP"; 2761 let RowFields = ["OpName"]; 2762 let ColFields = ["Size", "VOP3"]; 2763 let KeyCol = ["4", "0"]; 2764 let ValueCols = [["8", "1"]]; 2765} 2766 2767// Maps an opcode in e64 form to its e32 equivalent 2768def getVOPe32 : InstrMapping { 2769 let FilterClass = "VOP"; 2770 let RowFields = ["OpName"]; 2771 let ColFields = ["Size", "VOP3"]; 2772 let KeyCol = ["8", "1"]; 2773 let ValueCols = [["4", "0"]]; 2774} 2775 2776// Maps ordinary instructions to their SDWA counterparts 2777def getSDWAOp : InstrMapping { 2778 let FilterClass = "VOP"; 2779 let RowFields = ["OpName"]; 2780 let ColFields = ["AsmVariantName"]; 2781 let KeyCol = ["Default"]; 2782 let ValueCols = [["SDWA"]]; 2783} 2784 2785// Maps SDWA instructions to their ordinary counterparts 2786def getBasicFromSDWAOp : InstrMapping { 2787 let FilterClass = "VOP"; 2788 let RowFields = ["OpName"]; 2789 let ColFields = ["AsmVariantName"]; 2790 let KeyCol = ["SDWA"]; 2791 let ValueCols = [["Default"]]; 2792} 2793 2794// Maps ordinary instructions to their DPP counterparts 2795def getDPPOp32 : InstrMapping { 2796 let FilterClass = "VOP"; 2797 let RowFields = ["OpName"]; 2798 let ColFields = ["AsmVariantName"]; 2799 let KeyCol = ["Default"]; 2800 let ValueCols = [["DPP"]]; 2801} 2802 2803// Maps an commuted opcode to its original version 2804def getCommuteOrig : InstrMapping { 2805 let FilterClass = "Commutable_REV"; 2806 let RowFields = ["RevOp"]; 2807 let ColFields = ["IsOrig"]; 2808 let KeyCol = ["0"]; 2809 let ValueCols = [["1"]]; 2810} 2811 2812// Maps an original opcode to its commuted version 2813def getCommuteRev : InstrMapping { 2814 let FilterClass = "Commutable_REV"; 2815 let RowFields = ["RevOp"]; 2816 let ColFields = ["IsOrig"]; 2817 let KeyCol = ["1"]; 2818 let ValueCols = [["0"]]; 2819} 2820 2821def getMCOpcodeGen : InstrMapping { 2822 let FilterClass = "SIMCInstr"; 2823 let RowFields = ["PseudoInstr"]; 2824 let ColFields = ["Subtarget"]; 2825 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)]; 2826 // These columns must be kept in sync with the SIEncodingFamily enumeration. 2827 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)], 2828 [!cast<string>(SIEncodingFamily.VI)], 2829 [!cast<string>(SIEncodingFamily.SDWA)], 2830 [!cast<string>(SIEncodingFamily.SDWA9)], 2831 // GFX80 encoding is added to work around a multiple matching 2832 // issue for buffer instructions with unpacked d16 data. This 2833 // does not actually change the encoding, and thus may be 2834 // removed later. 2835 [!cast<string>(SIEncodingFamily.GFX80)], 2836 [!cast<string>(SIEncodingFamily.GFX9)], 2837 [!cast<string>(SIEncodingFamily.GFX10)], 2838 [!cast<string>(SIEncodingFamily.SDWA10)], 2839 [!cast<string>(SIEncodingFamily.GFX90A)], 2840 [!cast<string>(SIEncodingFamily.GFX940)], 2841 [!cast<string>(SIEncodingFamily.GFX11)]]; 2842} 2843 2844// Get equivalent SOPK instruction. 2845def getSOPKOp : InstrMapping { 2846 let FilterClass = "SOPKInstTable"; 2847 let RowFields = ["BaseCmpOp"]; 2848 let ColFields = ["IsSOPK"]; 2849 let KeyCol = ["0"]; 2850 let ValueCols = [["1"]]; 2851} 2852 2853def getAddr64Inst : InstrMapping { 2854 let FilterClass = "MUBUFAddr64Table"; 2855 let RowFields = ["OpName"]; 2856 let ColFields = ["IsAddr64"]; 2857 let KeyCol = ["0"]; 2858 let ValueCols = [["1"]]; 2859} 2860 2861def getIfAddr64Inst : InstrMapping { 2862 let FilterClass = "MUBUFAddr64Table"; 2863 let RowFields = ["OpName"]; 2864 let ColFields = ["IsAddr64"]; 2865 let KeyCol = ["1"]; 2866 let ValueCols = [["1"]]; 2867} 2868 2869// Maps an atomic opcode to its returnless version. 2870def getAtomicNoRetOp : InstrMapping { 2871 let FilterClass = "AtomicNoRet"; 2872 let RowFields = ["NoRetOp"]; 2873 let ColFields = ["IsRet"]; 2874 let KeyCol = ["1"]; 2875 let ValueCols = [["0"]]; 2876} 2877 2878// Maps a GLOBAL to its SADDR form. 2879def getGlobalSaddrOp : InstrMapping { 2880 let FilterClass = "GlobalSaddrTable"; 2881 let RowFields = ["SaddrOp"]; 2882 let ColFields = ["IsSaddr"]; 2883 let KeyCol = ["0"]; 2884 let ValueCols = [["1"]]; 2885} 2886 2887// Maps a GLOBAL SADDR to its VADDR form. 2888def getGlobalVaddrOp : InstrMapping { 2889 let FilterClass = "GlobalSaddrTable"; 2890 let RowFields = ["SaddrOp"]; 2891 let ColFields = ["IsSaddr"]; 2892 let KeyCol = ["1"]; 2893 let ValueCols = [["0"]]; 2894} 2895 2896// Maps a v_cmpx opcode with sdst to opcode without sdst. 2897def getVCMPXNoSDstOp : InstrMapping { 2898 let FilterClass = "VCMPXNoSDstTable"; 2899 let RowFields = ["NoSDstOp"]; 2900 let ColFields = ["HasSDst"]; 2901 let KeyCol = ["1"]; 2902 let ValueCols = [["0"]]; 2903} 2904 2905// Maps a SOPP to a SOPP with S_NOP 2906def getSOPPWithRelaxation : InstrMapping { 2907 let FilterClass = "SOPPRelaxTable"; 2908 let RowFields = ["KeyName"]; 2909 let ColFields = ["IsRelaxed"]; 2910 let KeyCol = ["0"]; 2911 let ValueCols = [["1"]]; 2912} 2913 2914// Maps flat scratch opcodes by addressing modes 2915def getFlatScratchInstSTfromSS : InstrMapping { 2916 let FilterClass = "FlatScratchInst"; 2917 let RowFields = ["SVOp"]; 2918 let ColFields = ["Mode"]; 2919 let KeyCol = ["SS"]; 2920 let ValueCols = [["ST"]]; 2921} 2922 2923def getFlatScratchInstSSfromSV : InstrMapping { 2924 let FilterClass = "FlatScratchInst"; 2925 let RowFields = ["SVOp"]; 2926 let ColFields = ["Mode"]; 2927 let KeyCol = ["SV"]; 2928 let ValueCols = [["SS"]]; 2929} 2930 2931def getFlatScratchInstSVfromSVS : InstrMapping { 2932 let FilterClass = "FlatScratchInst"; 2933 let RowFields = ["SVOp"]; 2934 let ColFields = ["Mode"]; 2935 let KeyCol = ["SVS"]; 2936 let ValueCols = [["SV"]]; 2937} 2938 2939def getFlatScratchInstSVfromSS : InstrMapping { 2940 let FilterClass = "FlatScratchInst"; 2941 let RowFields = ["SVOp"]; 2942 let ColFields = ["Mode"]; 2943 let KeyCol = ["SS"]; 2944 let ValueCols = [["SV"]]; 2945} 2946 2947def getMFMAEarlyClobberOp : InstrMapping { 2948 let FilterClass = "MFMATable"; 2949 let RowFields = ["FMAOp"]; 2950 let ColFields = ["IsMac"]; 2951 let KeyCol = ["1"]; 2952 let ValueCols = [["0"]]; 2953} 2954 2955// Maps an v_cmp instruction to its v_cmpx equivalent. 2956def getVCMPXOpFromVCMP : InstrMapping { 2957 let FilterClass = "VCMPVCMPXTable"; 2958 let RowFields = ["VCMPOp"]; 2959 let ColFields = ["IsVCMPX"]; 2960 let KeyCol = ["0"]; 2961 let ValueCols = [["1"]]; 2962} 2963 2964include "SIInstructions.td" 2965 2966include "DSInstructions.td" 2967include "MIMGInstructions.td" 2968