1//===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10//===----------------------------------------------------------------------===// 11// VOP1 Classes 12//===----------------------------------------------------------------------===// 13 14class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 15 bits<8> vdst; 16 bits<9> src0; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, 0); 19 let Inst{16-9} = op; 20 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 21 let Inst{31-25} = 0x3f; //encoding 22} 23 24class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 25 bits<8> vdst; 26 27 let Inst{8-0} = 0xf9; // sdwa 28 let Inst{16-9} = op; 29 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 30 let Inst{31-25} = 0x3f; // encoding 31} 32 33class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 34 bits<8> vdst; 35 36 let Inst{8-0} = 0xf9; // sdwa 37 let Inst{16-9} = op; 38 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 39 let Inst{31-25} = 0x3f; // encoding 40} 41 42class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 43 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 44 45 let AsmOperands = P.Asm32; 46 47 let Size = 4; 48 let mayLoad = 0; 49 let mayStore = 0; 50 let hasSideEffects = 0; 51 let SubtargetPredicate = isGCN; 52 53 let VOP1 = 1; 54 let VALU = 1; 55 let Uses = [EXEC]; 56 57 let AsmVariantName = AMDGPUAsmVariants.Default; 58} 59 60class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> : 61 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 62 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 63 64 let isPseudo = 0; 65 let isCodeGenOnly = 0; 66 67 let Constraints = ps.Constraints; 68 let DisableEncoding = ps.DisableEncoding; 69 70 // copy relevant pseudo op flags 71 let SubtargetPredicate = ps.SubtargetPredicate; 72 let AsmMatchConverter = ps.AsmMatchConverter; 73 let AsmVariantName = ps.AsmVariantName; 74 let Constraints = ps.Constraints; 75 let DisableEncoding = ps.DisableEncoding; 76 let TSFlags = ps.TSFlags; 77 let UseNamedOperandTable = ps.UseNamedOperandTable; 78 let Uses = ps.Uses; 79 let Defs = ps.Defs; 80} 81 82class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 83 VOP_SDWA_Pseudo <OpName, P, pattern> { 84 let AsmMatchConverter = "cvtSdwaVOP1"; 85} 86 87class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 88 list<dag> ret = 89 !if(P.HasModifiers, 90 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, 91 i32:$src0_modifiers, 92 i1:$clamp, i32:$omod))))], 93 !if(P.HasOMod, 94 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 95 i1:$clamp, i32:$omod))))], 96 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] 97 ) 98 ); 99} 100 101multiclass VOP1Inst <string opName, VOPProfile P, 102 SDPatternOperator node = null_frag> { 103 def _e32 : VOP1_Pseudo <opName, P>; 104 def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>; 105 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 106} 107 108// Special profile for instructions which have clamp 109// and output modifiers (but have no input modifiers) 110class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 111 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 112 113 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 114 let Asm64 = "$vdst, $src0$clamp$omod"; 115 116 let HasModifiers = 0; 117 let HasClamp = 1; 118 let HasOMod = 1; 119} 120 121def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 122def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 123def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 124 125//===----------------------------------------------------------------------===// 126// VOP1 Instructions 127//===----------------------------------------------------------------------===// 128 129let VOPAsmPrefer32Bit = 1 in { 130defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>; 131} 132 133let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in { 134defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; 135} // End isMoveImm = 1 136 137// FIXME: Specify SchedRW for READFIRSTLANE_B32 138// TODO: Make profile for this, there is VOP3 encoding also 139def V_READFIRSTLANE_B32 : 140 InstSI <(outs SReg_32:$vdst), 141 (ins VGPR_32:$src0), 142 "v_readfirstlane_b32 $vdst, $src0", 143 [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>, 144 Enc32 { 145 146 let isCodeGenOnly = 0; 147 let UseNamedOperandTable = 1; 148 149 let Size = 4; 150 let mayLoad = 0; 151 let mayStore = 0; 152 let hasSideEffects = 0; 153 let SubtargetPredicate = isGCN; 154 155 let VOP1 = 1; 156 let VALU = 1; 157 let Uses = [EXEC]; 158 let isConvergent = 1; 159 160 bits<8> vdst; 161 bits<9> src0; 162 163 let Inst{8-0} = src0; 164 let Inst{16-9} = 0x2; 165 let Inst{24-17} = vdst; 166 let Inst{31-25} = 0x3f; //encoding 167} 168 169let SchedRW = [WriteQuarterRate32] in { 170defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; 171defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 172defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 173defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 174defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; 175defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; 176defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; 177defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; 178defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 179defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 180defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 181defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 182defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; 183defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 184defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 185defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 186defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 187defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; 188defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 189} // End SchedRW = [WriteQuarterRate32] 190 191defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 192defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 193defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 194defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; 195defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 196 197let SchedRW = [WriteQuarterRate32] in { 198defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; 199defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; 200defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 201defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 202defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 203defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>; 204} // End SchedRW = [WriteQuarterRate32] 205 206let SchedRW = [WriteDouble] in { 207defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 208defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 209} // End SchedRW = [WriteDouble]; 210 211let SchedRW = [WriteDouble] in { 212defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>; 213} // End SchedRW = [WriteDouble] 214 215let SchedRW = [WriteQuarterRate32] in { 216defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 217defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 218} // End SchedRW = [WriteQuarterRate32] 219 220defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 221defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>; 222defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>; 223defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>; 224defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>; 225 226let SchedRW = [WriteDoubleAdd] in { 227defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; 228defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 229defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 230} // End SchedRW = [WriteDoubleAdd] 231 232defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 233defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 234 235let VOPAsmPrefer32Bit = 1 in { 236defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 237} 238 239// Restrict src0 to be VGPR 240def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> { 241 let Src0RC32 = VRegSrc_32; 242 let Src0RC64 = VRegSrc_32; 243 244 let HasExt = 0; 245 let HasSDWA9 = 0; 246} 247 248// Special case because there are no true output operands. Hack vdst 249// to be a src operand. The custom inserter must add a tied implicit 250// def and use of the super register since there seems to be no way to 251// add an implicit def of a virtual register in tablegen. 252def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { 253 let Src0RC32 = VOPDstOperand<VGPR_32>; 254 let Src0RC64 = VOPDstOperand<VGPR_32>; 255 256 let Outs = (outs); 257 let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0); 258 let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); 259 let InsDPP = (ins DstRC:$vdst, DstRC:$old, Src0RC32:$src0, 260 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 261 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 262 263 let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 264 clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, 265 src0_sel:$src0_sel); 266 267 let Asm32 = getAsm32<1, 1>.ret; 268 let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret; 269 let AsmDPP = getAsmDPP<1, 1, 0>.ret; 270 let AsmSDWA = getAsmSDWA<1, 1>.ret; 271 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 272 273 let HasExt = 0; 274 let HasSDWA9 = 0; 275 let HasDst = 0; 276 let EmitDst = 1; // force vdst emission 277} 278 279let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 280// v_movreld_b32 is a special case because the destination output 281 // register is really a source. It isn't actually read (but may be 282 // written), and is only to provide the base register to start 283 // indexing from. Tablegen seems to not let you define an implicit 284 // virtual register output for the super register being written into, 285 // so this must have an implicit def of the register added to it. 286defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 287defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>; 288defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>; 289} // End Uses = [M0, EXEC] 290 291defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; 292 293// These instruction only exist on SI and CI 294let SubtargetPredicate = isSICI in { 295 296let SchedRW = [WriteQuarterRate32] in { 297defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 298defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>; 299defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 300defm V_RSQ_CLAMP_F32 : VOP1Inst <"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 301defm V_RSQ_LEGACY_F32 : VOP1Inst <"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>; 302} // End SchedRW = [WriteQuarterRate32] 303 304let SchedRW = [WriteDouble] in { 305defm V_RCP_CLAMP_F64 : VOP1Inst <"v_rcp_clamp_f64", VOP_F64_F64>; 306defm V_RSQ_CLAMP_F64 : VOP1Inst <"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 307} // End SchedRW = [WriteDouble] 308 309} // End SubtargetPredicate = isSICI 310 311 312let SubtargetPredicate = isCIVI in { 313 314let SchedRW = [WriteDoubleAdd] in { 315defm V_TRUNC_F64 : VOP1Inst <"v_trunc_f64", VOP_F64_F64, ftrunc>; 316defm V_CEIL_F64 : VOP1Inst <"v_ceil_f64", VOP_F64_F64, fceil>; 317defm V_FLOOR_F64 : VOP1Inst <"v_floor_f64", VOP_F64_F64, ffloor>; 318defm V_RNDNE_F64 : VOP1Inst <"v_rndne_f64", VOP_F64_F64, frint>; 319} // End SchedRW = [WriteDoubleAdd] 320 321let SchedRW = [WriteQuarterRate32] in { 322defm V_LOG_LEGACY_F32 : VOP1Inst <"v_log_legacy_f32", VOP_F32_F32>; 323defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>; 324} // End SchedRW = [WriteQuarterRate32] 325 326} // End SubtargetPredicate = isCIVI 327 328 329let SubtargetPredicate = Has16BitInsts in { 330 331defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; 332defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; 333defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; 334defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; 335let SchedRW = [WriteQuarterRate32] in { 336defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 337defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, fsqrt>; 338defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 339defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>; 340defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; 341defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 342defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 343} // End SchedRW = [WriteQuarterRate32] 344defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 345defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>; 346defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; 347defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>; 348defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>; 349defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>; 350defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 351 352} 353 354let OtherPredicates = [Has16BitInsts] in { 355 356def : GCNPat< 357 (f32 (f16_to_fp i16:$src)), 358 (V_CVT_F32_F16_e32 $src) 359>; 360 361def : GCNPat< 362 (i16 (AMDGPUfp_to_f16 f32:$src)), 363 (V_CVT_F16_F32_e32 $src) 364>; 365 366} 367 368def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> { 369 let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1); 370 let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1); 371 let Outs64 = Outs32; 372 let Asm32 = " $vdst, $src0"; 373 let Asm64 = ""; 374 let Ins64 = (ins); 375} 376 377let SubtargetPredicate = isGFX9 in { 378 let Constraints = "$vdst = $src1, $vdst1 = $src0", 379 DisableEncoding="$vdst1,$src1", 380 SchedRW = [Write64Bit, Write64Bit] in { 381// Never VOP3. Takes as long as 2 v_mov_b32s 382def V_SWAP_B32 : VOP1_Pseudo <"v_swap_b32", VOP_SWAP_I32, [], 1>; 383} 384 385defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 386 387defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; 388defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>; 389defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>; 390 391} // End SubtargetPredicate = isGFX9 392 393//===----------------------------------------------------------------------===// 394// Target 395//===----------------------------------------------------------------------===// 396 397//===----------------------------------------------------------------------===// 398// SI 399//===----------------------------------------------------------------------===// 400 401multiclass VOP1_Real_si <bits<9> op> { 402 let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { 403 def _e32_si : 404 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 405 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 406 def _e64_si : 407 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 408 VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 409 } 410} 411 412defm V_NOP : VOP1_Real_si <0x0>; 413defm V_MOV_B32 : VOP1_Real_si <0x1>; 414defm V_CVT_I32_F64 : VOP1_Real_si <0x3>; 415defm V_CVT_F64_I32 : VOP1_Real_si <0x4>; 416defm V_CVT_F32_I32 : VOP1_Real_si <0x5>; 417defm V_CVT_F32_U32 : VOP1_Real_si <0x6>; 418defm V_CVT_U32_F32 : VOP1_Real_si <0x7>; 419defm V_CVT_I32_F32 : VOP1_Real_si <0x8>; 420defm V_MOV_FED_B32 : VOP1_Real_si <0x9>; 421defm V_CVT_F16_F32 : VOP1_Real_si <0xa>; 422defm V_CVT_F32_F16 : VOP1_Real_si <0xb>; 423defm V_CVT_RPI_I32_F32 : VOP1_Real_si <0xc>; 424defm V_CVT_FLR_I32_F32 : VOP1_Real_si <0xd>; 425defm V_CVT_OFF_F32_I4 : VOP1_Real_si <0xe>; 426defm V_CVT_F32_F64 : VOP1_Real_si <0xf>; 427defm V_CVT_F64_F32 : VOP1_Real_si <0x10>; 428defm V_CVT_F32_UBYTE0 : VOP1_Real_si <0x11>; 429defm V_CVT_F32_UBYTE1 : VOP1_Real_si <0x12>; 430defm V_CVT_F32_UBYTE2 : VOP1_Real_si <0x13>; 431defm V_CVT_F32_UBYTE3 : VOP1_Real_si <0x14>; 432defm V_CVT_U32_F64 : VOP1_Real_si <0x15>; 433defm V_CVT_F64_U32 : VOP1_Real_si <0x16>; 434defm V_FRACT_F32 : VOP1_Real_si <0x20>; 435defm V_TRUNC_F32 : VOP1_Real_si <0x21>; 436defm V_CEIL_F32 : VOP1_Real_si <0x22>; 437defm V_RNDNE_F32 : VOP1_Real_si <0x23>; 438defm V_FLOOR_F32 : VOP1_Real_si <0x24>; 439defm V_EXP_F32 : VOP1_Real_si <0x25>; 440defm V_LOG_CLAMP_F32 : VOP1_Real_si <0x26>; 441defm V_LOG_F32 : VOP1_Real_si <0x27>; 442defm V_RCP_CLAMP_F32 : VOP1_Real_si <0x28>; 443defm V_RCP_LEGACY_F32 : VOP1_Real_si <0x29>; 444defm V_RCP_F32 : VOP1_Real_si <0x2a>; 445defm V_RCP_IFLAG_F32 : VOP1_Real_si <0x2b>; 446defm V_RSQ_CLAMP_F32 : VOP1_Real_si <0x2c>; 447defm V_RSQ_LEGACY_F32 : VOP1_Real_si <0x2d>; 448defm V_RSQ_F32 : VOP1_Real_si <0x2e>; 449defm V_RCP_F64 : VOP1_Real_si <0x2f>; 450defm V_RCP_CLAMP_F64 : VOP1_Real_si <0x30>; 451defm V_RSQ_F64 : VOP1_Real_si <0x31>; 452defm V_RSQ_CLAMP_F64 : VOP1_Real_si <0x32>; 453defm V_SQRT_F32 : VOP1_Real_si <0x33>; 454defm V_SQRT_F64 : VOP1_Real_si <0x34>; 455defm V_SIN_F32 : VOP1_Real_si <0x35>; 456defm V_COS_F32 : VOP1_Real_si <0x36>; 457defm V_NOT_B32 : VOP1_Real_si <0x37>; 458defm V_BFREV_B32 : VOP1_Real_si <0x38>; 459defm V_FFBH_U32 : VOP1_Real_si <0x39>; 460defm V_FFBL_B32 : VOP1_Real_si <0x3a>; 461defm V_FFBH_I32 : VOP1_Real_si <0x3b>; 462defm V_FREXP_EXP_I32_F64 : VOP1_Real_si <0x3c>; 463defm V_FREXP_MANT_F64 : VOP1_Real_si <0x3d>; 464defm V_FRACT_F64 : VOP1_Real_si <0x3e>; 465defm V_FREXP_EXP_I32_F32 : VOP1_Real_si <0x3f>; 466defm V_FREXP_MANT_F32 : VOP1_Real_si <0x40>; 467defm V_CLREXCP : VOP1_Real_si <0x41>; 468defm V_MOVRELD_B32 : VOP1_Real_si <0x42>; 469defm V_MOVRELS_B32 : VOP1_Real_si <0x43>; 470defm V_MOVRELSD_B32 : VOP1_Real_si <0x44>; 471 472//===----------------------------------------------------------------------===// 473// CI 474//===----------------------------------------------------------------------===// 475 476multiclass VOP1_Real_ci <bits<9> op> { 477 let AssemblerPredicates = [isCIOnly], DecoderNamespace = "CI" in { 478 def _e32_ci : 479 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 480 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 481 def _e64_ci : 482 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 483 VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 484 } 485} 486 487defm V_TRUNC_F64 : VOP1_Real_ci <0x17>; 488defm V_CEIL_F64 : VOP1_Real_ci <0x18>; 489defm V_FLOOR_F64 : VOP1_Real_ci <0x1A>; 490defm V_RNDNE_F64 : VOP1_Real_ci <0x19>; 491defm V_LOG_LEGACY_F32 : VOP1_Real_ci <0x45>; 492defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>; 493 494//===----------------------------------------------------------------------===// 495// VI 496//===----------------------------------------------------------------------===// 497 498class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> : 499 VOP_DPP <ps.OpName, P> { 500 let Defs = ps.Defs; 501 let Uses = ps.Uses; 502 let SchedRW = ps.SchedRW; 503 let hasSideEffects = ps.hasSideEffects; 504 505 bits<8> vdst; 506 let Inst{8-0} = 0xfa; // dpp 507 let Inst{16-9} = op; 508 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 509 let Inst{31-25} = 0x3f; //encoding 510} 511 512multiclass VOP1Only_Real_vi <bits<10> op> { 513 let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { 514 def _vi : 515 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 516 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 517 } 518} 519 520multiclass VOP1_Real_e32e64_vi <bits<10> op> { 521 let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { 522 def _e32_vi : 523 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 524 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 525 def _e64_vi : 526 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 527 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 528 } 529} 530 531multiclass VOP1_Real_vi <bits<10> op> { 532 defm NAME : VOP1_Real_e32e64_vi <op>; 533 534 def _sdwa_vi : 535 VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 536 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 537 538 def _sdwa_gfx9 : 539 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 540 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 541 542 // For now left dpp only for asm/dasm 543 // TODO: add corresponding pseudo 544 def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>; 545} 546 547defm V_NOP : VOP1_Real_vi <0x0>; 548defm V_MOV_B32 : VOP1_Real_vi <0x1>; 549defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 550defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 551defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 552defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 553defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 554defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 555defm V_MOV_FED_B32 : VOP1_Real_vi <0x9>; 556defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 557defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 558defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 559defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 560defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 561defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 562defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 563defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 564defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 565defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 566defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 567defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 568defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 569defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 570defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 571defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 572defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 573defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 574defm V_EXP_F32 : VOP1_Real_vi <0x20>; 575defm V_LOG_F32 : VOP1_Real_vi <0x21>; 576defm V_RCP_F32 : VOP1_Real_vi <0x22>; 577defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 578defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 579defm V_RCP_F64 : VOP1_Real_vi <0x25>; 580defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 581defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 582defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 583defm V_SIN_F32 : VOP1_Real_vi <0x29>; 584defm V_COS_F32 : VOP1_Real_vi <0x2a>; 585defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 586defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 587defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 588defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 589defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 590defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 591defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 592defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 593defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 594defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 595defm V_CLREXCP : VOP1_Real_vi <0x35>; 596defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 597defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 598defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 599defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 600defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 601defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 602defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 603defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 604defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 605defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 606defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 607defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 608defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 609defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 610defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 611defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 612defm V_LOG_F16 : VOP1_Real_vi <0x40>; 613defm V_EXP_F16 : VOP1_Real_vi <0x41>; 614defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 615defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 616defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 617defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 618defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 619defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 620defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 621defm V_SIN_F16 : VOP1_Real_vi <0x49>; 622defm V_COS_F16 : VOP1_Real_vi <0x4a>; 623defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 624 625defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 626defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 627defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 628 629// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 630// indexing mode. vdst can't be treated as a def for codegen purposes, 631// and an implicit use and def of the super register should be added. 632def V_MOV_B32_indirect : VPseudoInstSI<(outs), 633 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32>.ret:$src0)>, 634 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 635 getVOPSrc0ForVT<i32>.ret:$src0)> { 636 let VOP1 = 1; 637 let SubtargetPredicate = isVI; 638} 639 640// This is a pseudo variant of the v_movreld_b32 instruction in which the 641// vector operand appears only twice, once as def and once as use. Using this 642// pseudo avoids problems with the Two Address instructions pass. 643class V_MOVRELD_B32_pseudo<RegisterClass rc> : VPseudoInstSI < 644 (outs rc:$vdst), 645 (ins rc:$vsrc, VSrc_b32:$val, i32imm:$offset)> { 646 let VOP1 = 1; 647 648 let Constraints = "$vsrc = $vdst"; 649 let Uses = [M0, EXEC]; 650 651 let SubtargetPredicate = HasMovrel; 652} 653 654def V_MOVRELD_B32_V1 : V_MOVRELD_B32_pseudo<VGPR_32>; 655def V_MOVRELD_B32_V2 : V_MOVRELD_B32_pseudo<VReg_64>; 656def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>; 657def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>; 658def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>; 659 660let OtherPredicates = [isVI] in { 661 662def : GCNPat < 663 (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, 664 imm:$bound_ctrl)), 665 (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl), 666 (as_i32imm $row_mask), (as_i32imm $bank_mask), 667 (as_i1imm $bound_ctrl)) 668>; 669 670def : GCNPat < 671 (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, 672 imm:$bank_mask, imm:$bound_ctrl)), 673 (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl), 674 (as_i32imm $row_mask), (as_i32imm $bank_mask), 675 (as_i1imm $bound_ctrl)) 676>; 677 678def : GCNPat< 679 (i32 (anyext i16:$src)), 680 (COPY $src) 681>; 682 683def : GCNPat< 684 (i64 (anyext i16:$src)), 685 (REG_SEQUENCE VReg_64, 686 (i32 (COPY $src)), sub0, 687 (V_MOV_B32_e32 (i32 0)), sub1) 688>; 689 690def : GCNPat< 691 (i16 (trunc i32:$src)), 692 (COPY $src) 693>; 694 695def : GCNPat < 696 (i16 (trunc i64:$src)), 697 (EXTRACT_SUBREG $src, sub0) 698>; 699 700} // End OtherPredicates = [isVI] 701 702//===----------------------------------------------------------------------===// 703// GFX9 704//===----------------------------------------------------------------------===// 705 706multiclass VOP1_Real_gfx9 <bits<10> op> { 707 let AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" in { 708 defm NAME : VOP1_Real_e32e64_vi <op>; 709 } 710 711 def _sdwa_gfx9 : 712 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 713 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 714 715 // For now left dpp only for asm/dasm 716 // TODO: add corresponding pseudo 717 def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>; 718} 719 720defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 721