1//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file contains DAG node defintions for the AMDGPU target. 10// 11//===----------------------------------------------------------------------===// 12 13//===----------------------------------------------------------------------===// 14// AMDGPU DAG Profiles 15//===----------------------------------------------------------------------===// 16 17def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ 18 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> 19]>; 20 21def AMDGPUTrigPreOp : SDTypeProfile<1, 2, 22 [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] 23>; 24 25def AMDGPULdExpOp : SDTypeProfile<1, 2, 26 [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] 27>; 28 29def AMDGPUFPClassOp : SDTypeProfile<1, 2, 30 [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>] 31>; 32 33def AMDGPUFPPackOp : SDTypeProfile<1, 2, 34 [SDTCisFP<1>, SDTCisSameAs<1, 2>] 35>; 36 37def AMDGPUIntPackOp : SDTypeProfile<1, 2, 38 [SDTCisInt<1>, SDTCisSameAs<1, 2>] 39>; 40 41def AMDGPUDivScaleOp : SDTypeProfile<2, 3, 42 [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>] 43>; 44 45// float, float, float, vcc 46def AMDGPUFmasOp : SDTypeProfile<1, 4, 47 [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>] 48>; 49 50def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 51 52def AMDGPUIfOp : SDTypeProfile<1, 2, 53 [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>] 54>; 55 56def AMDGPUElseOp : SDTypeProfile<1, 2, 57 [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>] 58>; 59 60def AMDGPULoopOp : SDTypeProfile<0, 2, 61 [SDTCisVT<0, i1>, SDTCisVT<1, OtherVT>] 62>; 63 64def AMDGPUIfBreakOp : SDTypeProfile<1, 2, 65 [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, i1>] 66>; 67 68//===----------------------------------------------------------------------===// 69// AMDGPU DAG Nodes 70// 71 72def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>; 73def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>; 74def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>; 75 76def callseq_start : SDNode<"ISD::CALLSEQ_START", 77 SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, 78 [SDNPHasChain, SDNPOutGlue] 79>; 80 81def callseq_end : SDNode<"ISD::CALLSEQ_END", 82 SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, 83 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 84>; 85 86def AMDGPUcall : SDNode<"AMDGPUISD::CALL", 87 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 88 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 89 SDNPVariadic] 90>; 91 92def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN", 93 SDTypeProfile<0, 3, [SDTCisPtrTy<0>]>, 94 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 95>; 96 97def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP", 98 SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>, 99 [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue] 100>; 101 102def AMDGPUconstdata_ptr : SDNode< 103 "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>, 104 SDTCisVT<0, iPTR>]> 105>; 106 107// This argument to this node is a dword address. 108def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; 109 110// Force dependencies for vector trunc stores 111def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>; 112 113def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; 114def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; 115 116// out = a - floor(a) 117def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; 118 119// out = 1.0 / a 120def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>; 121 122// out = 1.0 / sqrt(a) 123def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; 124 125// out = 1.0 / sqrt(a) 126def AMDGPUrcp_legacy : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>; 127def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; 128 129def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>; 130 131// out = 1.0 / sqrt(a) result clamped to +/- max_float. 132def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>; 133 134def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; 135 136def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>; 137def AMDGPUpknorm_i16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>; 138def AMDGPUpknorm_u16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>; 139def AMDGPUpk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>; 140def AMDGPUpk_u16_u32 : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>; 141def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>; 142def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>; 143 144 145def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; 146 147// out = max(a, b) a and b are floats, where a nan comparison fails. 148// This is not commutative because this gives the second operand: 149// x < nan ? x : nan -> nan 150// nan < x ? nan : x -> x 151def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp, 152 [] 153>; 154 155def AMDGPUfmul_legacy : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp, 156 [SDNPCommutative, SDNPAssociative] 157>; 158 159// out = min(a, b) a and b are floats, where a nan comparison fails. 160def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp, 161 [] 162>; 163 164// FIXME: TableGen doesn't like commutative instructions with more 165// than 2 operands. 166// out = max(a, b, c) a, b and c are floats 167def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp, 168 [/*SDNPCommutative, SDNPAssociative*/] 169>; 170 171// out = max(a, b, c) a, b, and c are signed ints 172def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp, 173 [/*SDNPCommutative, SDNPAssociative*/] 174>; 175 176// out = max(a, b, c) a, b and c are unsigned ints 177def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp, 178 [/*SDNPCommutative, SDNPAssociative*/] 179>; 180 181// out = min(a, b, c) a, b and c are floats 182def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp, 183 [/*SDNPCommutative, SDNPAssociative*/] 184>; 185 186// out = min(a, b, c) a, b and c are signed ints 187def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp, 188 [/*SDNPCommutative, SDNPAssociative*/] 189>; 190 191// out = min(a, b) a and b are unsigned ints 192def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp, 193 [/*SDNPCommutative, SDNPAssociative*/] 194>; 195 196// out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0 197def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>; 198 199// out = (src1 > src0) ? 1 : 0 200def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; 201 202def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc 203 SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> 204]>; 205 206def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>; 207 208def AMDGPUSetRegOp : SDTypeProfile<0, 2, [ 209 SDTCisInt<0>, SDTCisInt<1> 210]>; 211 212def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [ 213 SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; 214 215def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [ 216 SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 217 218def AMDGPUmul : SDNode<"AMDGPUISD::FMUL_W_CHAIN", SDTFPBinOp, [ 219 SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 220 221def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0", 222 SDTIntToFPOp, []>; 223def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1", 224 SDTIntToFPOp, []>; 225def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2", 226 SDTIntToFPOp, []>; 227def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3", 228 SDTIntToFPOp, []>; 229 230 231// urecip - This operation is a helper for integer division, it returns the 232// result of 1 / a as a fractional unsigned integer. 233// out = (2^32 / a) + e 234// e is rounding error 235def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; 236 237// Special case divide preop and flags. 238def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>; 239 240// Special case divide FMA with scale and flags (src0 = Quotient, 241// src1 = Denominator, src2 = Numerator). 242def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp, 243 [SDNPOptInGlue]>; 244 245// Single or double precision division fixup. 246// Special case divide fixup and flags(src0 = Quotient, src1 = 247// Denominator, src2 = Numerator). 248def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>; 249 250def AMDGPUfmad_ftz : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>; 251 252// Look Up 2.0 / pi src0 with segment select src1[4:0] 253def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>; 254 255def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD", 256 SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>, 257 [SDNPHasChain, SDNPMayLoad]>; 258 259def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", 260 SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, 261 [SDNPHasChain, SDNPMayStore]>; 262 263// MSKOR instructions are atomic memory instructions used mainly for storing 264// 8-bit and 16-bit values. The definition is: 265// 266// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src) 267// 268// src0: vec4(src, 0, 0, mask) 269// src1: dst - rat offset (aka pointer) in dwords 270def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR", 271 SDTypeProfile<0, 2, []>, 272 [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 273 274def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP", 275 SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisVec<2>]>, 276 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, 277 SDNPMemOperand]>; 278 279def AMDGPUround : SDNode<"ISD::FROUND", 280 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>; 281 282def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>; 283def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; 284def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; 285def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; 286 287def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>; 288def AMDGPUffbh_i32 : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>; 289 290def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>; 291 292// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore 293// when performing the mulitply. The result is a 32-bit value. 294def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, 295 [SDNPCommutative, SDNPAssociative] 296>; 297def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp, 298 [SDNPCommutative, SDNPAssociative] 299>; 300 301def AMDGPUmulhi_u24 : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp, 302 [SDNPCommutative, SDNPAssociative] 303>; 304def AMDGPUmulhi_i24 : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp, 305 [SDNPCommutative, SDNPAssociative] 306>; 307 308def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp, 309 [] 310>; 311def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp, 312 [] 313>; 314 315def AMDGPUsmed3 : SDNode<"AMDGPUISD::SMED3", AMDGPUDTIntTernaryOp, 316 [] 317>; 318 319def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp, 320 [] 321>; 322 323def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; 324 325def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2", 326 SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>, 327 SDTCisFP<0>, SDTCisVec<1>, 328 SDTCisInt<4>]>, 329 []>; 330 331def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>; 332 333def AMDGPUinit_exec : SDNode<"AMDGPUISD::INIT_EXEC", 334 SDTypeProfile<0, 1, [SDTCisInt<0>]>, 335 [SDNPHasChain, SDNPInGlue]>; 336 337def AMDGPUinit_exec_from_input : SDNode<"AMDGPUISD::INIT_EXEC_FROM_INPUT", 338 SDTypeProfile<0, 2, 339 [SDTCisInt<0>, SDTCisInt<1>]>, 340 [SDNPHasChain, SDNPInGlue]>; 341 342def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG", 343 SDTypeProfile<0, 1, [SDTCisInt<0>]>, 344 [SDNPHasChain, SDNPInGlue]>; 345 346def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT", 347 SDTypeProfile<0, 1, [SDTCisInt<0>]>, 348 [SDNPHasChain, SDNPInGlue]>; 349 350def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV", 351 SDTypeProfile<1, 3, [SDTCisFP<0>]>, 352 [SDNPInGlue]>; 353 354def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1", 355 SDTypeProfile<1, 3, [SDTCisFP<0>]>, 356 [SDNPInGlue, SDNPOutGlue]>; 357 358def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2", 359 SDTypeProfile<1, 4, [SDTCisFP<0>]>, 360 [SDNPInGlue]>; 361 362def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16", 363 SDTypeProfile<1, 7, [SDTCisFP<0>]>, 364 [SDNPInGlue, SDNPOutGlue]>; 365 366def AMDGPUinterp_p1lv_f16 : SDNode<"AMDGPUISD::INTERP_P1LV_F16", 367 SDTypeProfile<1, 9, [SDTCisFP<0>]>, 368 [SDNPInGlue, SDNPOutGlue]>; 369 370def AMDGPUinterp_p2_f16 : SDNode<"AMDGPUISD::INTERP_P2_F16", 371 SDTypeProfile<1, 8, [SDTCisFP<0>]>, 372 [SDNPInGlue]>; 373 374def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT, 375 [SDNPHasChain, SDNPSideEffect]>; 376 377// SI+ export 378def AMDGPUExportOp : SDTypeProfile<0, 8, [ 379 SDTCisInt<0>, // i8 tgt 380 SDTCisInt<1>, // i8 en 381 // i32 or f32 src0 382 SDTCisSameAs<3, 2>, // f32 src1 383 SDTCisSameAs<4, 2>, // f32 src2 384 SDTCisSameAs<5, 2>, // f32 src3 385 SDTCisInt<6>, // i1 compr 386 // skip done 387 SDTCisInt<1> // i1 vm 388 389]>; 390 391def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp, 392 [SDNPHasChain, SDNPMayStore]>; 393 394def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp, 395 [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; 396 397 398def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; 399 400def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp, 401 [SDNPHasChain, SDNPSideEffect]>; 402 403//===----------------------------------------------------------------------===// 404// Flow Control Profile Types 405//===----------------------------------------------------------------------===// 406// Branch instruction where second and third are basic blocks 407def SDTIL_BRCond : SDTypeProfile<0, 2, [ 408 SDTCisVT<0, OtherVT> 409 ]>; 410 411//===----------------------------------------------------------------------===// 412// Flow Control DAG Nodes 413//===----------------------------------------------------------------------===// 414def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>; 415 416//===----------------------------------------------------------------------===// 417// Call/Return DAG Nodes 418//===----------------------------------------------------------------------===// 419def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone, 420 [SDNPHasChain, SDNPOptInGlue]>; 421 422def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, 423 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 424 425def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, 426 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 427>; 428