1//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP1 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 17 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?); 18 let Inst{16-9} = op; 19 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 20 let Inst{31-25} = 0x3f; //encoding 21} 22 23class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 24 bits<8> vdst; 25 26 let Inst{8-0} = 0xf9; // sdwa 27 let Inst{16-9} = op; 28 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 29 let Inst{31-25} = 0x3f; // encoding 30} 31 32class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 33 bits<8> vdst; 34 35 let Inst{8-0} = 0xf9; // sdwa 36 let Inst{16-9} = op; 37 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 38 let Inst{31-25} = 0x3f; // encoding 39} 40 41class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 42 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 43 44 let AsmOperands = P.Asm32; 45 46 let Size = 4; 47 let mayLoad = 0; 48 let mayStore = 0; 49 let hasSideEffects = 0; 50 51 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 52 53 let mayRaiseFPException = ReadsModeReg; 54 55 let VOP1 = 1; 56 let VALU = 1; 57 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 58 59 let AsmVariantName = AMDGPUAsmVariants.Default; 60} 61 62class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic > : 63 VOP_Real <ps>, 64 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 65 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 66 67 let VALU = 1; 68 let VOP1 = 1; 69 let isPseudo = 0; 70 let isCodeGenOnly = 0; 71 72 let Constraints = ps.Constraints; 73 let DisableEncoding = ps.DisableEncoding; 74 75 // copy relevant pseudo op flags 76 let SubtargetPredicate = ps.SubtargetPredicate; 77 let AsmMatchConverter = ps.AsmMatchConverter; 78 let AsmVariantName = ps.AsmVariantName; 79 let Constraints = ps.Constraints; 80 let DisableEncoding = ps.DisableEncoding; 81 let TSFlags = ps.TSFlags; 82 let UseNamedOperandTable = ps.UseNamedOperandTable; 83 let Uses = ps.Uses; 84 let Defs = ps.Defs; 85 let SchedRW = ps.SchedRW; 86 let mayLoad = ps.mayLoad; 87 let mayStore = ps.mayStore; 88 let TRANS = ps.TRANS; 89} 90 91class VOP1_Real_Gen <VOP1_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> : 92 VOP1_Real <ps, Gen.Subtarget, real_name> { 93 let AssemblerPredicate = Gen.AssemblerPredicate; 94 let DecoderNamespace = Gen.DecoderNamespace; 95} 96 97class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 98 VOP_SDWA_Pseudo <OpName, P, pattern> { 99 let AsmMatchConverter = "cvtSdwaVOP1"; 100} 101 102class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 103 VOP_DPP_Pseudo <OpName, P, pattern> { 104} 105 106class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 107 list<dag> ret = 108 !if(P.HasModifiers, 109 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))], 110 !if(P.HasOMod, 111 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 112 i1:$clamp, i32:$omod))))], 113 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] 114 ) 115 ); 116} 117 118multiclass VOP1Inst <string opName, VOPProfile P, 119 SDPatternOperator node = null_frag, int VOPDOp = -1> { 120 // We only want to set this on the basic, non-SDWA or DPP forms. 121 defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"), 122 !eq(opName, "v_mov_b64")); 123 124 let isMoveImm = should_mov_imm in { 125 if !eq(VOPDOp, -1) then 126 def _e32 : VOP1_Pseudo <opName, P>; 127 else 128 // Only for V_MOV_B32 129 def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, opName>; 130 def _e64 : VOP3InstBase <opName, P, node>; 131 } 132 133 if P.HasExtSDWA then 134 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 135 136 if P.HasExtDPP then 137 def _dpp : VOP1_DPP_Pseudo <opName, P>; 138 139 let SubtargetPredicate = isGFX11Plus in { 140 if P.HasExtVOP3DPP then 141 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 142 } // End SubtargetPredicate = isGFX11Plus 143 144 def : MnemonicAlias<opName#"_e32", opName>, LetDummies; 145 def : MnemonicAlias<opName#"_e64", opName>, LetDummies; 146 147 if P.HasExtSDWA then 148 def : MnemonicAlias<opName#"_sdwa", opName>, LetDummies; 149 150 if P.HasExtDPP then 151 def : MnemonicAlias<opName#"_dpp", opName, AMDGPUAsmVariants.DPP>, LetDummies; 152} 153 154multiclass VOP1Inst_t16<string opName, 155 VOPProfile P, 156 SDPatternOperator node = null_frag> { 157 let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { 158 defm NAME : VOP1Inst<opName, P, node>; 159 } 160 let OtherPredicates = [HasTrue16BitInsts] in { 161 defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_Fake16<P>, node>; 162 } 163} 164 165// Special profile for instructions which have clamp 166// and output modifiers (but have no input modifiers) 167class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 168 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 169 170 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 171 let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); 172 let AsmVOP3Base = "$vdst, $src0$clamp$omod"; 173 174 let HasModifiers = 0; 175 let HasClamp = 1; 176} 177 178class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> : 179 VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> { 180 181 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 182 let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); 183 let AsmVOP3Base = "$vdst, $src0$clamp$omod"; 184 185 let HasModifiers = 0; 186 let HasClamp = 1; 187} 188 189def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 190def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 191def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 192def VOP1_F16_I16_t16 : VOPProfileI2F_True16 <f16, i16>; 193 194def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{ 195 let HasExtVOP3DPP = 0; 196} 197 198// OMod clears exceptions when set. OMod was always an operand, but its 199// now explicitly set. 200class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> : 201 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 202 203 let HasOMod = 1; 204} 205def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>; 206def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>; 207def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>; 208def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> { 209 let HasOMod = 1; 210} 211 212//===----------------------------------------------------------------------===// 213// VOP1 Instructions 214//===----------------------------------------------------------------------===// 215 216let VOPAsmPrefer32Bit = 1 in { 217defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>; 218} 219 220def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { 221 let InsVOPDX = (ins Src0RC32:$src0X); 222 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X); 223 let InsVOPDY = (ins Src0RC32:$src0Y); 224 let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y); 225} 226 227let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 228defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; 229 230let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in 231defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; 232} // End isMoveImm = 1 233 234// FIXME: Specify SchedRW for READFIRSTLANE_B32 235// TODO: Make profile for this, there is VOP3 encoding also 236def V_READFIRSTLANE_B32 : 237 InstSI <(outs SReg_32:$vdst), 238 (ins VRegOrLdsSrc_32:$src0), 239 "v_readfirstlane_b32 $vdst, $src0", 240 [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLdsSrc_32:$src0)))]>, 241 Enc32 { 242 243 let isCodeGenOnly = 0; 244 let UseNamedOperandTable = 1; 245 246 let Size = 4; 247 let mayLoad = 0; 248 let mayStore = 0; 249 let hasSideEffects = 0; 250 251 let VOP1 = 1; 252 let VALU = 1; 253 let Uses = [EXEC]; 254 let isConvergent = 1; 255 256 bits<8> vdst; 257 bits<9> src0; 258 259 let Inst{8-0} = src0; 260 let Inst{16-9} = 0x2; 261 let Inst{24-17} = vdst; 262 let Inst{31-25} = 0x3f; //encoding 263} 264 265let isReMaterializable = 1 in { 266let SchedRW = [WriteDoubleCvt] in { 267// OMod clears exceptions when set in this instruction 268defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_sint>; 269 270let mayRaiseFPException = 0 in { 271defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 272} 273 274defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 275defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, any_fpextend>; 276// OMod clears exceptions when set in this instruction 277defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_uint>; 278 279let mayRaiseFPException = 0 in { 280defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 281} 282 283} // End SchedRW = [WriteDoubleCvt] 284 285let SchedRW = [WriteFloatCvt] in { 286 287// XXX: Does this really not raise exceptions? The manual claims the 288// 16-bit ones can. 289let mayRaiseFPException = 0 in { 290defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 291defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 292} 293 294// OMod clears exceptions when set in these 2 instructions 295defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>; 296defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>; 297let FPDPRounding = 1, isReMaterializable = 0 in { 298 let OtherPredicates = [NotHasTrue16BitInsts] in 299 defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>; 300 let OtherPredicates = [HasTrue16BitInsts] in 301 defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>; 302} // End FPDPRounding = 1, isReMaterializable = 0 303 304let OtherPredicates = [NotHasTrue16BitInsts] in 305defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>; 306let OtherPredicates = [HasTrue16BitInsts] in 307defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>; 308 309let ReadsModeReg = 0, mayRaiseFPException = 0 in { 310defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 311defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 312defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 313} // End ReadsModeReg = 0, mayRaiseFPException = 0 314} // End SchedRW = [WriteFloatCvt] 315 316let ReadsModeReg = 0, mayRaiseFPException = 0 in { 317defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 318defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 319defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 320defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 321} // ReadsModeReg = 0, mayRaiseFPException = 0 322 323defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 324defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 325defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 326defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, froundeven>; 327defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 328 329let TRANS = 1, SchedRW = [WriteTrans32] in { 330defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, AMDGPUexp>; 331defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, AMDGPUlog>; 332defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 333defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 334defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 335defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, int_amdgcn_sqrt>; 336} // End TRANS = 1, SchedRW = [WriteTrans32] 337 338let TRANS = 1, SchedRW = [WriteTrans64] in { 339defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 340defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 341defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, int_amdgcn_sqrt>; 342} // End TRANS = 1, SchedRW = [WriteTrans64] 343 344let TRANS = 1, SchedRW = [WriteTrans32] in { 345defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 346defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 347} // End TRANS = 1, SchedRW = [WriteTrans32] 348 349defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 350defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, DivergentUnaryFrag<bitreverse>>; 351defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; 352defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; 353defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; 354 355let SchedRW = [WriteDoubleAdd] in { 356defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 357defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 358let FPDPRounding = 1 in { 359defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 360} // End FPDPRounding = 1 361} // End SchedRW = [WriteDoubleAdd] 362 363defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 364defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 365} // End isReMaterializable = 1 366 367let VOPAsmPrefer32Bit = 1 in { 368defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 369} 370 371// Restrict src0 to be VGPR 372def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { 373 let Src0RC32 = VRegSrc_32; 374 let Src0RC64 = VRegSrc_32; 375} 376 377// Special case because there are no true output operands. Hack vdst 378// to be a src operand. The custom inserter must add a tied implicit 379// def and use of the super register since there seems to be no way to 380// add an implicit def of a virtual register in tablegen. 381class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> { 382 let Src0RC32 = VOPDstOperand<VGPR_32>; 383 let Src0RC64 = VOPDstOperand<VGPR_32>; 384 385 let Outs = (outs); 386 let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0); 387 let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0); 388 let Asm32 = getAsm32<1, 1>.ret; 389 390 let OutsSDWA = (outs Src0RC32:$vdst); 391 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 392 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 393 src0_sel:$src0_sel); 394 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 395 396 let OutsDPP = (outs Src0RC32:$vdst); 397 let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0, 398 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 399 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi); 400 let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; 401 let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi); 402 let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret; 403 404 let OutsVOP3DPP = (outs Src0RC64:$vdst); 405 let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 406 let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 407 let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 408 409 let AsmVOP3Base = 410 getAsmVOP3Base<NumSrcArgs, 1 /* HasDst */, HasClamp, 411 HasOpSel, HasOMod, IsVOP3P, HasModifiers, 412 HasModifiers, HasModifiers, HasModifiers>.ret; 413 414 let HasDst = 0; 415 let EmitDst = 1; // force vdst emission 416} 417 418def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>; 419def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>; 420 421let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 422 // v_movreld_b32 is a special case because the destination output 423 // register is really a source. It isn't actually read (but may be 424 // written), and is only to provide the base register to start 425 // indexing from. Tablegen seems to not let you define an implicit 426 // virtual register output for the super register being written into, 427 // so this must have an implicit def of the register added to it. 428defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 429defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; 430defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; 431} // End Uses = [M0, EXEC] 432 433let isReMaterializable = 1 in { 434let SubtargetPredicate = isGFX6GFX7 in { 435 let TRANS = 1, SchedRW = [WriteTrans32] in { 436 defm V_LOG_CLAMP_F32 : 437 VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 438 defm V_RCP_CLAMP_F32 : 439 VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; 440 defm V_RCP_LEGACY_F32 : 441 VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 442 defm V_RSQ_CLAMP_F32 : 443 VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 444 defm V_RSQ_LEGACY_F32 : 445 VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; 446 } // End TRANS = 1, SchedRW = [WriteTrans32] 447 448 let SchedRW = [WriteTrans64] in { 449 defm V_RCP_CLAMP_F64 : 450 VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; 451 defm V_RSQ_CLAMP_F64 : 452 VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 453 } // End SchedRW = [WriteTrans64] 454} // End SubtargetPredicate = isGFX6GFX7 455 456let SubtargetPredicate = isGFX7GFX8GFX9 in { 457 let TRANS = 1, SchedRW = [WriteTrans32] in { 458 defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; 459 defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; 460 } // End TRANS = 1, SchedRW = [WriteTrans32] 461} // End SubtargetPredicate = isGFX7GFX8GFX9 462 463let SubtargetPredicate = isGFX7Plus in { 464 let SchedRW = [WriteDoubleAdd] in { 465 defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; 466 defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; 467 defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, froundeven>; 468 defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; 469 } // End SchedRW = [WriteDoubleAdd] 470} // End SubtargetPredicate = isGFX7Plus 471} // End isReMaterializable = 1 472 473let FPDPRounding = 1 in { 474let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 475defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; 476defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; 477} 478let OtherPredicates = [HasTrue16BitInsts] in { 479defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>; 480defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>; 481} 482} // End FPDPRounding = 1 483// OMod clears exceptions when set in these two instructions 484let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 485defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>; 486defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>; 487} 488let OtherPredicates = [HasTrue16BitInsts] in { 489defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>; 490defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>; 491} 492let TRANS = 1, SchedRW = [WriteTrans32] in { 493defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 494defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; 495defm V_RSQ_F16 : VOP1Inst_t16 <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 496defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, AMDGPUlogf16>; 497defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, AMDGPUexpf16>; 498defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 499defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 500} // End TRANS = 1, SchedRW = [WriteTrans32] 501defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 502let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 503defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 504} 505let OtherPredicates = [HasTrue16BitInsts] in { 506defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>; 507} 508defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>; 509defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>; 510defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>; 511defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, froundeven>; 512let FPDPRounding = 1 in { 513defm V_FRACT_F16 : VOP1Inst_t16 <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 514} // End FPDPRounding = 1 515 516let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 517def : GCNPat< 518 (f32 (f16_to_fp i16:$src)), 519 (V_CVT_F32_F16_e32 $src) 520>; 521def : GCNPat< 522 (i16 (AMDGPUfp_to_f16 f32:$src)), 523 (V_CVT_F16_F32_e32 $src) 524>; 525} 526let OtherPredicates = [HasTrue16BitInsts] in { 527def : GCNPat< 528 (f32 (f16_to_fp i16:$src)), 529 (V_CVT_F32_F16_t16_e32 $src) 530>; 531def : GCNPat< 532 (i16 (AMDGPUfp_to_f16 f32:$src)), 533 (V_CVT_F16_F32_t16_e32 $src) 534>; 535} 536 537def VOP_SWAP_I32 : VOPProfile<[i32, i32, untyped, untyped]> { 538 let Outs32 = (outs VGPR_32:$vdst, VRegSrc_32:$vdst1); 539 let Ins32 = (ins VRegSrc_32:$src0, VGPR_32:$src1); 540 let Asm32 = " $vdst, $src0"; 541} 542 543let SubtargetPredicate = isGFX9Plus in { 544 def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { 545 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 546 let DisableEncoding = "$vdst1,$src1"; 547 let SchedRW = [Write64Bit, Write64Bit]; 548 } 549 550 let isReMaterializable = 1 in 551 defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>; 552 553 let mayRaiseFPException = 0 in { 554 let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 555 defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>; 556 defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>; 557 } 558 let OtherPredicates = [HasTrue16BitInsts] in { 559 defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>; 560 defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>; 561 } 562 } // End mayRaiseFPException = 0 563} // End SubtargetPredicate = isGFX9Plus 564 565let SubtargetPredicate = isGFX9Only in { 566 defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 567} // End SubtargetPredicate = isGFX9Only 568 569class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> { 570 let HasExtSDWA = 1; 571 let HasExtSDWA9 = 1; 572 let HasExt = 1; 573 let DstRCSDWA = getVALUDstForVT<vt>.ret; 574 let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0, 575 clampmod:$clamp, omod:$omod, src0_sel:$src0_sel); 576 let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel 577 let AsmSDWA9 = AsmSDWA; 578 let EmitDstSel = 0; 579} 580 581def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>; 582def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>; 583 584let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0, 585 SchedRW = [WriteFloatCvt] in { 586 defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>; 587 defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>; 588 defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>; 589 defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>; 590} 591 592class Cvt_F32_F8_Pat<SDPatternOperator node, int index, 593 VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 594 (f32 (node i32:$src, index)), 595 (inst_sdwa 0, $src, 0, 0, index) 596>; 597 598let OtherPredicates = [HasCvtFP8VOP1Bug] in { 599 def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), 600 (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>; 601 def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), 602 (V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>; 603} 604 605let OtherPredicates = [HasNoCvtFP8VOP1Bug] in { 606 def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), 607 (V_CVT_F32_FP8_e32 $src)>; 608 def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), 609 (V_CVT_F32_BF8_e32 $src)>; 610} 611 612foreach Index = [1, 2, 3] in { 613 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>; 614 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>; 615} 616 617class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, 618 VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 619 (v2f32 (node i32:$src, index)), 620 !if (index, 621 (inst_sdwa 0, $src, 0, 0, SDWA.WORD_1), 622 (inst_e32 $src)) 623>; 624 625foreach Index = [0, -1] in { 626 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, 627 V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; 628 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, 629 V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; 630} 631 632let SubtargetPredicate = isGFX10Plus in { 633 defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>; 634 635 let Uses = [M0] in { 636 defm V_MOVRELSD_2_B32 : 637 VOP1Inst<"v_movrelsd_2_b32", VOP_MOVRELSD>; 638 639 def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { 640 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 641 let DisableEncoding = "$vdst1,$src1"; 642 let SchedRW = [Write64Bit, Write64Bit]; 643 } 644 } // End Uses = [M0] 645} // End SubtargetPredicate = isGFX10Plus 646 647def VOPProfileAccMov : VOP_NO_EXT<VOP_I32_I32> { 648 let DstRC = RegisterOperand<AGPR_32>; 649 let Src0RC32 = ARegSrc_32; 650 let Asm32 = " $vdst, $src0"; 651} 652 653def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1> { 654 let SubtargetPredicate = isGFX90APlus; 655 let isReMaterializable = 1; 656 let isAsCheapAsAMove = 1; 657} 658 659let SubtargetPredicate = isGFX11Plus in { 660 // Restrict src0 to be VGPR 661 def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, 662 getVOP1Pat64<int_amdgcn_permlane64, 663 VOP_MOVRELS>.ret, 664 /*VOP1Only=*/ 1>; 665 defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>; 666 defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>; 667 defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>; 668 defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>; 669} // End SubtargetPredicate = isGFX11Plus 670 671//===----------------------------------------------------------------------===// 672// Target-specific instruction encodings. 673//===----------------------------------------------------------------------===// 674 675class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : 676 VOP_DPP<ps.OpName, p, isDPP16> { 677 let hasSideEffects = ps.hasSideEffects; 678 let Defs = ps.Defs; 679 let SchedRW = ps.SchedRW; 680 let Uses = ps.Uses; 681 let TRANS = ps.TRANS; 682 683 bits<8> vdst; 684 let Inst{8-0} = 0xfa; 685 let Inst{16-9} = op; 686 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 687 let Inst{31-25} = 0x3f; 688} 689 690class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = ps.Pfl> : 691 VOP1_DPP<op, ps, p, 1>, 692 SIMCInstr <ps.PseudoInstr, subtarget> { 693 let AssemblerPredicate = HasDPP16; 694 let SubtargetPredicate = HasDPP16; 695} 696 697class VOP1_DPP16_Gen<bits<8> op, VOP1_DPP_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : 698 VOP1_DPP16 <op, ps, Gen.Subtarget, p> { 699 let AssemblerPredicate = Gen.AssemblerPredicate; 700 let DecoderNamespace = "DPP"#Gen.DecoderNamespace; 701} 702 703 704class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 705 VOP_DPP8<ps.OpName, p> { 706 let hasSideEffects = ps.hasSideEffects; 707 let Defs = ps.Defs; 708 let SchedRW = ps.SchedRW; 709 let Uses = ps.Uses; 710 711 bits<8> vdst; 712 let Inst{8-0} = fi; 713 let Inst{16-9} = op; 714 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 715 let Inst{31-25} = 0x3f; 716} 717 718class VOP1_DPP8_Gen<bits<8> op, VOP1_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : 719 VOP1_DPP8<op, ps, p> { 720 let AssemblerPredicate = Gen.AssemblerPredicate; 721 let DecoderNamespace = "DPP8"#Gen.DecoderNamespace; 722} 723 724//===----------------------------------------------------------------------===// 725// GFX11, GFX12 726//===----------------------------------------------------------------------===// 727 728multiclass VOP1Only_Real<GFXGen Gen, bits<9> op> { 729 let IsSingle = 1 in 730 def Gen.Suffix : 731 VOP1_Real_Gen<!cast<VOP1_Pseudo>(NAME), Gen>, 732 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 733} 734 735multiclass VOP1_Real_e32<GFXGen Gen, bits<9> op, string opName = NAME> { 736 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 737 def _e32#Gen.Suffix : 738 VOP1_Real_Gen<ps, Gen>, 739 VOP1e<op{7-0}, ps.Pfl>; 740} 741 742multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName, 743 string asmName> { 744 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 745 let AsmString = asmName # ps.AsmOperands in { 746 defm NAME : VOP1_Real_e32<Gen, op, opName>; 747 } 748} 749 750multiclass VOP1_Real_e64<GFXGen Gen, bits<9> op> { 751 def _e64#Gen.Suffix : 752 VOP3_Real_Gen<!cast<VOP3_Pseudo>(NAME#"_e64"), Gen>, 753 VOP3e_gfx11_gfx12<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 754} 755 756multiclass VOP1_Real_dpp<GFXGen Gen, bits<9> op, string opName = NAME> { 757 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 758 def _dpp#Gen.Suffix : VOP1_DPP16_Gen<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), Gen>; 759} 760 761multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName, 762 string asmName> { 763 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 764 let AsmString = asmName # ps.Pfl.AsmDPP16 in { 765 defm NAME : VOP1_Real_dpp<Gen, op, opName>; 766 } 767} 768 769multiclass VOP1_Real_dpp8<GFXGen Gen, bits<9> op, string opName = NAME> { 770 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 771 def _dpp8#Gen.Suffix : VOP1_DPP8_Gen<op{7-0}, ps, Gen>; 772} 773 774multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName, 775 string asmName> { 776 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 777 let AsmString = asmName # ps.Pfl.AsmDPP8 in { 778 defm NAME : VOP1_Real_dpp8<Gen, op, opName>; 779 } 780} 781 782multiclass VOP1_Realtriple_e64<GFXGen Gen, bits<9> op> { 783 defm NAME : VOP3_Realtriple<Gen, {0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>; 784} 785 786multiclass VOP1_Realtriple_e64_with_name<GFXGen Gen, bits<9> op, string opName, 787 string asmName> { 788 defm NAME : VOP3_Realtriple_with_name<Gen, {0, 1, 1, op{6-0}}, opName, 789 asmName>; 790} 791 792multiclass VOP1_Real_FULL<GFXGen Gen, bits<9> op> : 793 VOP1_Real_e32<Gen, op>, VOP1_Realtriple_e64<Gen, op>, 794 VOP1_Real_dpp<Gen, op>, VOP1_Real_dpp8<Gen, op>; 795 796multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName, 797 string asmName> { 798 defm NAME : VOP1_Real_e32_with_name<GFX11Gen, op, opName, asmName>, 799 VOP1_Real_dpp_with_name<GFX11Gen, op, opName, asmName>, 800 VOP1_Real_dpp8_with_name<GFX11Gen, op, opName, asmName>; 801 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 802 def gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, 803 Requires<[isGFX11Plus]>; 804} 805 806multiclass VOP1_Real_NO_VOP3_with_name_gfx12<bits<9> op, string opName, 807 string asmName> { 808 defm NAME : VOP1_Real_e32_with_name<GFX12Gen, op, opName, asmName>, 809 VOP1_Real_dpp_with_name<GFX12Gen, op, opName, asmName>, 810 VOP1_Real_dpp8_with_name<GFX12Gen, op, opName, asmName>; 811} 812 813multiclass VOP1_Real_FULL_with_name<GFXGen Gen, bits<9> op, string opName, 814 string asmName> : 815 VOP1_Real_e32_with_name<Gen, op, opName, asmName>, 816 VOP1_Real_dpp_with_name<Gen, op, opName, asmName>, 817 VOP1_Real_dpp8_with_name<Gen, op, opName, asmName>, 818 VOP1_Realtriple_e64_with_name<Gen, op, opName, asmName>; 819 820multiclass VOP1_Real_NO_DPP<GFXGen Gen, bits<9> op> : 821 VOP1_Real_e32<Gen, op>, VOP1_Real_e64<Gen, op>; 822 823multiclass VOP1_Real_FULL_t16_gfx11_gfx12<bits<9> op, string asmName, 824 string opName = NAME> : 825 VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 826 VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 827 828multiclass VOP1_Real_FULL_with_name_gfx11_gfx12<bits<9> op, string opName, 829 string asmName> : 830 VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 831 VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 832 833multiclass VOP1Only_Real_gfx11_gfx12<bits<9> op> : 834 VOP1Only_Real<GFX11Gen, op>, VOP1Only_Real<GFX12Gen, op>; 835 836multiclass VOP1_Real_FULL_gfx11_gfx12<bits<9> op> : 837 VOP1_Real_FULL<GFX11Gen, op>, VOP1_Real_FULL<GFX12Gen, op>; 838 839multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op, 840 string opName, string asmName> : 841 VOP1_Real_e32_with_name<Gen, op, opName, asmName>, 842 VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>; 843 844 845defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c, 846 "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; 847defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00d, 848 "V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">; 849defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x039, 850 "V_FFBH_U32", "v_clz_i32_u32">; 851defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03a, 852 "V_FFBL_B32", "v_ctz_i32_b32">; 853defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b, 854 "V_FFBH_I32", "v_cls_i32">; 855defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>; 856defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">; 857defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">; 858defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">; 859defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">; 860 861defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">; 862defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">; 863defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">; 864defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">; 865defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">; 866defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">; 867defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">; 868defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">; 869defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; 870defm V_FREXP_MANT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">; 871defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">; 872defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; 873defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; 874defm V_TRUNC_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">; 875defm V_RNDNE_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">; 876defm V_FRACT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">; 877defm V_SIN_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">; 878defm V_COS_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">; 879defm V_SAT_PK_U8_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">; 880defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">; 881defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">; 882 883defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">; 884defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">; 885 886//===----------------------------------------------------------------------===// 887// GFX10. 888//===----------------------------------------------------------------------===// 889 890let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 891 multiclass VOP1Only_Real_gfx10<bits<9> op> { 892 def _gfx10 : 893 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, 894 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 895 } 896 multiclass VOP1_Real_e32_gfx10<bits<9> op> { 897 def _e32_gfx10 : 898 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 899 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 900 } 901 multiclass VOP1_Real_e64_gfx10<bits<9> op> { 902 def _e64_gfx10 : 903 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 904 VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 905 } 906 multiclass VOP1_Real_sdwa_gfx10<bits<9> op> { 907 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 908 def _sdwa_gfx10 : 909 VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 910 VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 911 let DecoderNamespace = "SDWA10"; 912 } 913 } 914 multiclass VOP1_Real_dpp_gfx10<bits<9> op> { 915 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 916 def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> { 917 let DecoderNamespace = "SDWA10"; 918 } 919 } 920 multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { 921 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 922 def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> { 923 let DecoderNamespace = "DPP8"; 924 } 925 } 926} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 927 928multiclass VOP1_Real_gfx10<bits<9> op> : 929 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 930 VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>, 931 VOP1_Real_dpp8_gfx10<op>; 932 933multiclass VOP1_Real_gfx10_FULL_gfx11_gfx12<bits<9> op> : 934 VOP1_Real_gfx10<op>, 935 VOP1_Real_FULL<GFX11Gen, op>, 936 VOP1_Real_FULL<GFX12Gen, op>; 937 938multiclass VOP1_Real_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> : 939 VOP1_Real_gfx10<op>, 940 VOP1_Real_NO_DPP<GFX11Gen, op>, 941 VOP1_Real_NO_DPP<GFX12Gen, op>; 942 943multiclass VOP1Only_Real_gfx10_gfx11_gfx12<bits<9> op> : 944 VOP1Only_Real_gfx10<op>, 945 VOP1Only_Real<GFX11Gen, op>, 946 VOP1Only_Real<GFX12Gen, op>; 947 948defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11_gfx12<0x01b>; 949defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11_gfx12<0x048>; 950defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; 951defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; 952defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; 953defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; 954defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; 955defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; 956defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; 957defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; 958defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; 959defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; 960defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; 961defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; 962defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; 963defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; 964defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; 965defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; 966defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; 967defm V_COS_F16 : VOP1_Real_gfx10<0x061>; 968defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; 969defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; 970defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; 971 972defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11_gfx12<0x065>; 973defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11_gfx12<0x068>; 974 975//===----------------------------------------------------------------------===// 976// GFX7, GFX10, GFX11, GFX12 977//===----------------------------------------------------------------------===// 978 979let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 980 multiclass VOP1_Real_e32_gfx7<bits<9> op> { 981 def _e32_gfx7 : 982 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 983 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 984 } 985 multiclass VOP1_Real_e64_gfx7<bits<9> op> { 986 def _e64_gfx7 : 987 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 988 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 989 } 990} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 991 992multiclass VOP1_Real_gfx7<bits<9> op> : 993 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>; 994 995multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> : 996 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>, 997 VOP1_Real_NO_DPP<GFX12Gen, op>; 998 999defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; 1000defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; 1001 1002defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x017>; 1003defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x018>; 1004defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x019>; 1005defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x01a>; 1006 1007//===----------------------------------------------------------------------===// 1008// GFX6, GFX7, GFX10, GFX11, GFX12 1009//===----------------------------------------------------------------------===// 1010 1011let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 1012 multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> { 1013 def _e32_gfx6_gfx7 : 1014 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 1015 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1016 } 1017 multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> { 1018 def _e64_gfx6_gfx7 : 1019 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1020 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1021 } 1022} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 1023 1024multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : 1025 VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>; 1026 1027multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : 1028 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; 1029 1030multiclass VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<bits<9> op> : 1031 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_FULL<GFX11Gen, op>, 1032 VOP1_Real_FULL<GFX12Gen, op>; 1033 1034multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> : 1035 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>, 1036 VOP1_Real_NO_DPP<GFX12Gen, op>; 1037 1038defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; 1039defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; 1040defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; 1041defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; 1042defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; 1043defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; 1044defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; 1045 1046defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x000>; 1047defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x001>; 1048defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x003>; 1049defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x004>; 1050defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x005>; 1051defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x006>; 1052defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x007>; 1053defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x008>; 1054defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; 1055defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; 1056defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; 1057defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; 1058defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x00e>; 1059defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x00f>; 1060defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x010>; 1061defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x011>; 1062defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x012>; 1063defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x013>; 1064defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x014>; 1065defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x015>; 1066defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x016>; 1067defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x020>; 1068defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x021>; 1069defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x022>; 1070defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x023>; 1071defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x024>; 1072defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x025>; 1073defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x027>; 1074defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02a>; 1075defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02b>; 1076defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02e>; 1077defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x02f>; 1078defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x031>; 1079defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x033>; 1080defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x034>; 1081defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x035>; 1082defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x036>; 1083defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x037>; 1084defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x038>; 1085defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; 1086defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; 1087defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; 1088defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03c>; 1089defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03d>; 1090defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03e>; 1091defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x03f>; 1092defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x040>; 1093defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; 1094defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x042>; 1095defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x043>; 1096defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x044>; 1097 1098//===----------------------------------------------------------------------===// 1099// GFX8, GFX9 (VI). 1100//===----------------------------------------------------------------------===// 1101 1102class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1103 VOP_DPPe <P> { 1104 bits<8> vdst; 1105 let Inst{8-0} = 0xfa; // dpp 1106 let Inst{16-9} = op; 1107 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1108 let Inst{31-25} = 0x3f; //encoding 1109} 1110 1111multiclass VOP1Only_Real_vi <bits<10> op> { 1112 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1113 def _vi : 1114 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 1115 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 1116 } 1117} 1118 1119multiclass VOP1_Real_e32e64_vi <bits<10> op> { 1120 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1121 def _e32_vi : 1122 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1123 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1124 def _e64_vi : 1125 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1126 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1127 } 1128} 1129 1130multiclass VOP1_Real_vi <bits<10> op> { 1131 defm NAME : VOP1_Real_e32e64_vi <op>; 1132 1133 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then 1134 def _sdwa_vi : 1135 VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1136 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1137 1138 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1139 def _sdwa_gfx9 : 1140 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1141 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1142 1143 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1144 def _dpp_vi : 1145 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 1146 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1147} 1148 1149defm V_NOP : VOP1_Real_vi <0x0>; 1150defm V_MOV_B32 : VOP1_Real_vi <0x1>; 1151defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 1152defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 1153defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 1154defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 1155defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 1156defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 1157defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 1158defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 1159defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 1160defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 1161defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 1162defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 1163defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 1164defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 1165defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 1166defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 1167defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 1168defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 1169defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 1170defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 1171defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 1172defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 1173defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 1174defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 1175defm V_EXP_F32 : VOP1_Real_vi <0x20>; 1176defm V_LOG_F32 : VOP1_Real_vi <0x21>; 1177defm V_RCP_F32 : VOP1_Real_vi <0x22>; 1178defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 1179defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 1180defm V_RCP_F64 : VOP1_Real_vi <0x25>; 1181defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 1182defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 1183defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 1184defm V_SIN_F32 : VOP1_Real_vi <0x29>; 1185defm V_COS_F32 : VOP1_Real_vi <0x2a>; 1186defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 1187defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 1188defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 1189defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 1190defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 1191defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 1192defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 1193defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 1194defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 1195defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 1196defm V_CLREXCP : VOP1_Real_vi <0x35>; 1197defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 1198defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 1199defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 1200defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 1201defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 1202defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 1203defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 1204defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 1205defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 1206defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 1207defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 1208defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 1209defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 1210defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 1211defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 1212defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 1213defm V_LOG_F16 : VOP1_Real_vi <0x40>; 1214defm V_EXP_F16 : VOP1_Real_vi <0x41>; 1215defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 1216defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 1217defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 1218defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 1219defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 1220defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 1221defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 1222defm V_SIN_F16 : VOP1_Real_vi <0x49>; 1223defm V_COS_F16 : VOP1_Real_vi <0x4a>; 1224defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 1225 1226defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 1227defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 1228defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 1229 1230defm V_ACCVGPR_MOV_B32 : VOP1Only_Real_vi<0x52>; 1231 1232let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0], Size = V_MOV_B32_e32.Size in { 1233 1234// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 1235// indexing mode. vdst can't be treated as a def for codegen purposes, 1236// and an implicit use and def of the super register should be added. 1237def V_MOV_B32_indirect_write : VPseudoInstSI<(outs), 1238 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32, 0>.ret:$src0)>, 1239 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1240 getVOPSrc0ForVT<i32, 0>.ret:$src0)>; 1241 1242// Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the 1243// super register should be added. 1244def V_MOV_B32_indirect_read : VPseudoInstSI< 1245 (outs getVALUDstForVT<i32>.ret:$vdst), 1246 (ins getVOPSrc0ForVT<i32, 0>.ret:$src0)>, 1247 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1248 getVOPSrc0ForVT<i32, 0>.ret:$src0)>; 1249 1250} // End VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [M0] 1251 1252let OtherPredicates = [isGFX8Plus] in { 1253 1254def : GCNPat < 1255 (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, 1256 timm:$bank_mask, timm:$bound_ctrl)), 1257 (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1258 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1259 (as_i1timm $bound_ctrl)) 1260>; 1261 1262class UpdateDPPPat<ValueType vt> : GCNPat < 1263 (vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl, 1264 timm:$row_mask, timm:$bank_mask, 1265 timm:$bound_ctrl)), 1266 (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1267 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1268 (as_i1timm $bound_ctrl)) 1269>; 1270 1271def : UpdateDPPPat<i32>; 1272def : UpdateDPPPat<f32>; 1273def : UpdateDPPPat<v2i16>; 1274def : UpdateDPPPat<v2f16>; 1275 1276} // End OtherPredicates = [isGFX8Plus] 1277 1278let OtherPredicates = [isGFX8Plus] in { 1279def : GCNPat< 1280 (i32 (anyext i16:$src)), 1281 (COPY $src) 1282>; 1283 1284def : GCNPat< 1285 (i64 (anyext i16:$src)), 1286 (REG_SEQUENCE VReg_64, 1287 (i32 (COPY $src)), sub0, 1288 (V_MOV_B32_e32 (i32 0)), sub1) 1289>; 1290 1291def : GCNPat< 1292 (i16 (trunc i32:$src)), 1293 (COPY $src) 1294>; 1295 1296def : GCNPat < 1297 (i16 (trunc i64:$src)), 1298 (EXTRACT_SUBREG $src, sub0) 1299>; 1300 1301} // End OtherPredicates = [isGFX8Plus] 1302 1303//===----------------------------------------------------------------------===// 1304// GFX9 1305//===----------------------------------------------------------------------===// 1306 1307multiclass VOP1_Real_gfx9 <bits<10> op> { 1308 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 1309 defm NAME : VOP1_Real_e32e64_vi <op>; 1310 } 1311 1312 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1313 def _sdwa_gfx9 : 1314 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1315 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1316 1317 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1318 def _dpp_gfx9 : 1319 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1320 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1321 1322} 1323 1324multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> { 1325 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 1326 defm NAME : VOP1_Real_e32e64_vi <op>; 1327 } 1328 1329 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1330 def _sdwa_gfx9 : 1331 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1332 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1333 let Inst{42-40} = 6; 1334 } 1335 1336 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1337 def _dpp_gfx9 : 1338 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1339 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1340} 1341 1342defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 1343 1344let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in 1345defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; 1346 1347let OtherPredicates = [HasFP8Insts] in { 1348defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>; 1349defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>; 1350defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; 1351defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; 1352} 1353 1354//===----------------------------------------------------------------------===// 1355// GFX10 1356//===----------------------------------------------------------------------===// 1357 1358let OtherPredicates = [isGFX10Only] in { 1359def : GCNPat < 1360 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1361 (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src, 1362 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 1363>; 1364} // End OtherPredicates = [isGFX10Only] 1365 1366//===----------------------------------------------------------------------===// 1367// GFX11 1368//===----------------------------------------------------------------------===// 1369 1370let OtherPredicates = [isGFX11Only] in { 1371def : GCNPat < 1372 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1373 (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src, 1374 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 1375>; 1376} // End OtherPredicates = [isGFX11Only] 1377 1378//===----------------------------------------------------------------------===// 1379// GFX12 1380//===----------------------------------------------------------------------===// 1381 1382let OtherPredicates = [isGFX12Only] in { 1383def : GCNPat < 1384 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1385 (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, VGPR_32:$src, 1386 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 1387>; 1388} // End OtherPredicates = [isGFX12Only] 1389