1//===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines all of the R600-specific intrinsics. 10// 11//===----------------------------------------------------------------------===// 12 13class AMDGPUReadPreloadRegisterIntrinsic 14 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; 15 16class AMDGPUReadPreloadRegisterIntrinsicNamed<string name> 17 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<name>; 18 19// Used to tag image and resource intrinsics with information used to generate 20// mem operands. 21class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = 0> { 22 int RsrcArg = rsrcarg; 23 bit IsImage = isimage; 24} 25 26let TargetPrefix = "r600" in { 27 28multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz { 29 def _x : AMDGPUReadPreloadRegisterIntrinsic; 30 def _y : AMDGPUReadPreloadRegisterIntrinsic; 31 def _z : AMDGPUReadPreloadRegisterIntrinsic; 32} 33 34multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> { 35 def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>; 36 def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>; 37 def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>; 38} 39 40defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 41 <"__builtin_r600_read_global_size">; 42defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 43 <"__builtin_r600_read_ngroups">; 44defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 45 <"__builtin_r600_read_tgid">; 46 47defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz; 48defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz; 49 50def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">, 51 Intrinsic<[], [], [IntrConvergent]>; 52 53// AS 7 is PARAM_I_ADDRESS, used for kernel arguments 54def int_r600_implicitarg_ptr : 55 GCCBuiltin<"__builtin_r600_implicitarg_ptr">, 56 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], 57 [IntrNoMem, IntrSpeculatable]>; 58 59def int_r600_rat_store_typed : 60 // 1st parameter: Data 61 // 2nd parameter: Index 62 // 3rd parameter: Constant RAT ID 63 Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>, 64 GCCBuiltin<"__builtin_r600_rat_store_typed">; 65 66def int_r600_recipsqrt_ieee : Intrinsic< 67 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 68>; 69 70def int_r600_recipsqrt_clamped : Intrinsic< 71 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 72>; 73 74def int_r600_cube : Intrinsic< 75 [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable] 76>; 77 78def int_r600_store_stream_output : Intrinsic< 79 [], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [] 80>; 81 82class TextureIntrinsicFloatInput : Intrinsic<[llvm_v4f32_ty], [ 83 llvm_v4f32_ty, // Coord 84 llvm_i32_ty, // offset_x 85 llvm_i32_ty, // offset_y, 86 llvm_i32_ty, // offset_z, 87 llvm_i32_ty, // resource_id 88 llvm_i32_ty, // samplerid 89 llvm_i32_ty, // coord_type_x 90 llvm_i32_ty, // coord_type_y 91 llvm_i32_ty, // coord_type_z 92 llvm_i32_ty], // coord_type_w 93 [IntrNoMem] 94>; 95 96class TextureIntrinsicInt32Input : Intrinsic<[llvm_v4i32_ty], [ 97 llvm_v4i32_ty, // Coord 98 llvm_i32_ty, // offset_x 99 llvm_i32_ty, // offset_y, 100 llvm_i32_ty, // offset_z, 101 llvm_i32_ty, // resource_id 102 llvm_i32_ty, // samplerid 103 llvm_i32_ty, // coord_type_x 104 llvm_i32_ty, // coord_type_y 105 llvm_i32_ty, // coord_type_z 106 llvm_i32_ty], // coord_type_w 107 [IntrNoMem] 108>; 109 110def int_r600_store_swizzle : 111 Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [] 112>; 113 114def int_r600_tex : TextureIntrinsicFloatInput; 115def int_r600_texc : TextureIntrinsicFloatInput; 116def int_r600_txl : TextureIntrinsicFloatInput; 117def int_r600_txlc : TextureIntrinsicFloatInput; 118def int_r600_txb : TextureIntrinsicFloatInput; 119def int_r600_txbc : TextureIntrinsicFloatInput; 120def int_r600_txf : TextureIntrinsicInt32Input; 121def int_r600_txq : TextureIntrinsicInt32Input; 122def int_r600_ddx : TextureIntrinsicFloatInput; 123def int_r600_ddy : TextureIntrinsicFloatInput; 124 125def int_r600_dot4 : Intrinsic<[llvm_float_ty], 126 [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable] 127>; 128 129def int_r600_kill : Intrinsic<[], [llvm_float_ty], []>; 130 131} // End TargetPrefix = "r600" 132 133let TargetPrefix = "amdgcn" in { 134 135//===----------------------------------------------------------------------===// 136// ABI Special Intrinsics 137//===----------------------------------------------------------------------===// 138 139defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz; 140defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 141 <"__builtin_amdgcn_workgroup_id">; 142 143def int_amdgcn_dispatch_ptr : 144 GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, 145 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [], 146 [IntrNoMem, IntrSpeculatable]>; 147 148def int_amdgcn_queue_ptr : 149 GCCBuiltin<"__builtin_amdgcn_queue_ptr">, 150 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [], 151 [IntrNoMem, IntrSpeculatable]>; 152 153def int_amdgcn_kernarg_segment_ptr : 154 GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, 155 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [], 156 [IntrNoMem, IntrSpeculatable]>; 157 158def int_amdgcn_implicitarg_ptr : 159 GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, 160 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [], 161 [IntrNoMem, IntrSpeculatable]>; 162 163def int_amdgcn_groupstaticsize : 164 GCCBuiltin<"__builtin_amdgcn_groupstaticsize">, 165 Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; 166 167def int_amdgcn_dispatch_id : 168 GCCBuiltin<"__builtin_amdgcn_dispatch_id">, 169 Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; 170 171def int_amdgcn_implicit_buffer_ptr : 172 GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, 173 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [], 174 [IntrNoMem, IntrSpeculatable]>; 175 176// Set EXEC to the 64-bit value given. 177// This is always moved to the beginning of the basic block. 178// FIXME: Should be mangled for wave size. 179def int_amdgcn_init_exec : Intrinsic<[], 180 [llvm_i64_ty], // 64-bit literal constant 181 [IntrConvergent, ImmArg<0>]>; 182 183// Set EXEC according to a thread count packed in an SGPR input: 184// thread_count = (input >> bitoffset) & 0x7f; 185// This is always moved to the beginning of the basic block. 186def int_amdgcn_init_exec_from_input : Intrinsic<[], 187 [llvm_i32_ty, // 32-bit SGPR input 188 llvm_i32_ty], // bit offset of the thread count 189 [IntrConvergent, ImmArg<1>]>; 190 191def int_amdgcn_wavefrontsize : 192 GCCBuiltin<"__builtin_amdgcn_wavefrontsize">, 193 Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; 194 195 196//===----------------------------------------------------------------------===// 197// Instruction Intrinsics 198//===----------------------------------------------------------------------===// 199 200// The first parameter is s_sendmsg immediate (i16), 201// the second one is copied to m0 202def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, 203 Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], 204 [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>; 205def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, 206 Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], 207 [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>; 208 209def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, 210 Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent]>; 211 212def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">, 213 Intrinsic<[], [], [IntrConvergent]>; 214 215def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">, 216 Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>; 217 218def int_amdgcn_div_scale : Intrinsic< 219 // 1st parameter: Numerator 220 // 2nd parameter: Denominator 221 // 3rd parameter: Constant to select between first and 222 // second. (0 = first, 1 = second). 223 [llvm_anyfloat_ty, llvm_i1_ty], 224 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], 225 [IntrNoMem, IntrSpeculatable, ImmArg<2>] 226>; 227 228def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty], 229 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], 230 [IntrNoMem, IntrSpeculatable] 231>; 232 233def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty], 234 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], 235 [IntrNoMem, IntrSpeculatable] 236>; 237 238def int_amdgcn_trig_preop : Intrinsic< 239 [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], 240 [IntrNoMem, IntrSpeculatable] 241>; 242 243def int_amdgcn_sin : Intrinsic< 244 [llvm_anyfloat_ty], [LLVMMatchType<0>], 245 [IntrNoMem, IntrSpeculatable] 246>; 247 248def int_amdgcn_cos : Intrinsic< 249 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 250>; 251 252def int_amdgcn_log_clamp : Intrinsic< 253 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 254>; 255 256def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">, 257 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], 258 [IntrNoMem, IntrSpeculatable] 259>; 260 261def int_amdgcn_rcp : Intrinsic< 262 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 263>; 264 265def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">, 266 Intrinsic<[llvm_float_ty], [llvm_float_ty], 267 [IntrNoMem, IntrSpeculatable] 268>; 269 270def int_amdgcn_rsq : Intrinsic< 271 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 272>; 273 274def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">, 275 Intrinsic< 276 [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable] 277>; 278 279def int_amdgcn_rsq_clamp : Intrinsic< 280 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; 281 282def int_amdgcn_ldexp : Intrinsic< 283 [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], 284 [IntrNoMem, IntrSpeculatable] 285>; 286 287def int_amdgcn_frexp_mant : Intrinsic< 288 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 289>; 290 291def int_amdgcn_frexp_exp : Intrinsic< 292 [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable] 293>; 294 295// v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0 296// and always uses rtz, so is not suitable for implementing the OpenCL 297// fract function. It should be ok on VI. 298def int_amdgcn_fract : Intrinsic< 299 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 300>; 301 302def int_amdgcn_cvt_pkrtz : GCCBuiltin<"__builtin_amdgcn_cvt_pkrtz">, 303 Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], 304 [IntrNoMem, IntrSpeculatable] 305>; 306 307def int_amdgcn_cvt_pknorm_i16 : 308 GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">, 309 Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], 310 [IntrNoMem, IntrSpeculatable] 311>; 312 313def int_amdgcn_cvt_pknorm_u16 : 314 GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">, 315 Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], 316 [IntrNoMem, IntrSpeculatable] 317>; 318 319def int_amdgcn_cvt_pk_i16 : 320 GCCBuiltin<"__builtin_amdgcn_cvt_pk_i16">, 321 Intrinsic< 322 [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], 323 [IntrNoMem, IntrSpeculatable] 324>; 325 326def int_amdgcn_cvt_pk_u16 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u16">, 327 Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], 328 [IntrNoMem, IntrSpeculatable] 329>; 330 331def int_amdgcn_class : Intrinsic< 332 [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], 333 [IntrNoMem, IntrSpeculatable] 334>; 335 336def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">, 337 Intrinsic<[llvm_anyfloat_ty], 338 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], 339 [IntrNoMem, IntrSpeculatable] 340>; 341 342def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">, 343 Intrinsic<[llvm_float_ty], 344 [llvm_float_ty, llvm_float_ty, llvm_float_ty], 345 [IntrNoMem, IntrSpeculatable] 346>; 347 348def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">, 349 Intrinsic<[llvm_float_ty], 350 [llvm_float_ty, llvm_float_ty, llvm_float_ty], 351 [IntrNoMem, IntrSpeculatable] 352>; 353 354def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">, 355 Intrinsic<[llvm_float_ty], 356 [llvm_float_ty, llvm_float_ty, llvm_float_ty], 357 [IntrNoMem, IntrSpeculatable] 358>; 359 360def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">, 361 Intrinsic<[llvm_float_ty], 362 [llvm_float_ty, llvm_float_ty, llvm_float_ty], 363 [IntrNoMem, IntrSpeculatable] 364>; 365 366// v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz 367// should be used. 368def int_amdgcn_sffbh : 369 Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], 370 [IntrNoMem, IntrSpeculatable] 371>; 372 373// v_mad_f32|f16/v_mac_f32|f16, selected regardless of denorm support. 374def int_amdgcn_fmad_ftz : 375 Intrinsic<[llvm_anyfloat_ty], 376 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], 377 [IntrNoMem, IntrSpeculatable] 378>; 379 380// Fields should mirror atomicrmw 381class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty], 382 [llvm_anyptr_ty, 383 LLVMMatchType<0>, 384 llvm_i32_ty, // ordering 385 llvm_i32_ty, // scope 386 llvm_i1_ty], // isVolatile 387 [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>], "", 388 [SDNPMemOperand] 389>; 390 391def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin; 392def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin; 393 394class AMDGPULDSF32Intrin<string clang_builtin> : 395 GCCBuiltin<clang_builtin>, 396 Intrinsic<[llvm_float_ty], 397 [LLVMQualPointerType<llvm_float_ty, 3>, 398 llvm_float_ty, 399 llvm_i32_ty, // ordering 400 llvm_i32_ty, // scope 401 llvm_i1_ty], // isVolatile 402 [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>] 403>; 404 405// FIXME: The m0 argument should be moved after the normal arguments 406class AMDGPUDSOrderedIntrinsic : Intrinsic< 407 [llvm_i32_ty], 408 // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that 409 // the bit packing can be optimized at the IR level. 410 [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0) 411 llvm_i32_ty, // value to add or swap 412 llvm_i32_ty, // ordering 413 llvm_i32_ty, // scope 414 llvm_i1_ty, // isVolatile 415 llvm_i32_ty, // ordered count index (OA index), also added to the address 416 // gfx10: bits 24-27 indicate the number of active threads/dwords 417 llvm_i1_ty, // wave release, usually set to 1 418 llvm_i1_ty], // wave done, set to 1 for the last ordered instruction 419 [NoCapture<0>, 420 ImmArg<2>, ImmArg<3>, ImmArg<4>, 421 ImmArg<5>, ImmArg<6>, ImmArg<7> 422 ] 423>; 424 425class AMDGPUDSAppendConsumedIntrinsic : Intrinsic< 426 [llvm_i32_ty], 427 [llvm_anyptr_ty, // LDS or GDS ptr 428 llvm_i1_ty], // isVolatile 429 [IntrConvergent, IntrArgMemOnly, NoCapture<0>, ImmArg<1>], 430 "", 431 [SDNPMemOperand] 432>; 433 434def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic; 435def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; 436 437// The pointer argument is assumed to be dynamically uniform if a VGPR. 438def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic; 439def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic; 440 441def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">; 442def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">; 443def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">; 444 445} // TargetPrefix = "amdgcn" 446 447// New-style image intrinsics 448 449////////////////////////////////////////////////////////////////////////// 450// Dimension-aware image intrinsics framework 451////////////////////////////////////////////////////////////////////////// 452 453// Helper class to represent (type, name) combinations of arguments. The 454// argument names are explanatory and used as DAG operand names for codegen 455// pattern matching. 456class AMDGPUArg<LLVMType ty, string name> { 457 LLVMType Type = ty; 458 string Name = name; 459} 460 461// Return [AMDGPUArg<basety, names[0]>, AMDGPUArg<LLVMMatchType<0>, names[1]>, ...] 462class makeArgList<list<string> names, LLVMType basety> { 463 list<AMDGPUArg> ret = 464 !listconcat([AMDGPUArg<basety, names[0]>], 465 !foreach(name, !tail(names), AMDGPUArg<LLVMMatchType<0>, name>)); 466} 467 468// Return arglist, with LLVMMatchType's references shifted by 'shift'. 469class arglistmatchshift<list<AMDGPUArg> arglist, int shift> { 470 list<AMDGPUArg> ret = 471 !foreach(arg, arglist, 472 !if(!isa<LLVMMatchType>(arg.Type), 473 AMDGPUArg<LLVMMatchType<!add(!cast<LLVMMatchType>(arg.Type).Number, shift)>, 474 arg.Name>, 475 arg)); 476} 477 478// Return the concatenation of the given arglists. LLVMMatchType's are adjusted 479// accordingly, and shifted by an additional 'shift'. 480class arglistconcat<list<list<AMDGPUArg>> arglists, int shift = 0> { 481 list<AMDGPUArg> ret = 482 !foldl([]<AMDGPUArg>, arglists, lhs, rhs, 483 !listconcat( 484 lhs, 485 arglistmatchshift<rhs, 486 !add(shift, !foldl(0, lhs, a, b, 487 !add(a, b.Type.isAny)))>.ret)); 488} 489 490// Represent texture/image types / dimensionality. 491class AMDGPUDimProps<bits<3> enc, string name, string asmsuffix, 492 list<string> coord_names, list<string> slice_names> { 493 AMDGPUDimProps Dim = !cast<AMDGPUDimProps>(NAME); 494 string Name = name; // e.g. "2darraymsaa" 495 string AsmSuffix = asmsuffix; // e.g. 2D_MSAA_ARRAY (used in assembly strings) 496 bits<3> Encoding = enc; 497 bit DA = 0; // DA bit in MIMG encoding 498 499 list<AMDGPUArg> CoordSliceArgs = 500 makeArgList<!listconcat(coord_names, slice_names), llvm_anyfloat_ty>.ret; 501 list<AMDGPUArg> CoordSliceIntArgs = 502 makeArgList<!listconcat(coord_names, slice_names), llvm_anyint_ty>.ret; 503 list<AMDGPUArg> GradientArgs = 504 makeArgList<!listconcat(!foreach(name, coord_names, "d" # name # "dh"), 505 !foreach(name, coord_names, "d" # name # "dv")), 506 llvm_anyfloat_ty>.ret; 507 508 bits<8> NumCoords = !size(CoordSliceArgs); 509 bits<8> NumGradients = !size(GradientArgs); 510} 511 512def AMDGPUDim1D : AMDGPUDimProps<0x0, "1d", "1D", ["s"], []>; 513def AMDGPUDim2D : AMDGPUDimProps<0x1, "2d", "2D", ["s", "t"], []>; 514def AMDGPUDim3D : AMDGPUDimProps<0x2, "3d", "3D", ["s", "t", "r"], []>; 515let DA = 1 in { 516 def AMDGPUDimCube : AMDGPUDimProps<0x3, "cube", "CUBE", ["s", "t"], ["face"]>; 517 def AMDGPUDim1DArray : AMDGPUDimProps<0x4, "1darray", "1D_ARRAY", ["s"], ["slice"]>; 518 def AMDGPUDim2DArray : AMDGPUDimProps<0x5, "2darray", "2D_ARRAY", ["s", "t"], ["slice"]>; 519} 520def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"]>; 521let DA = 1 in { 522 def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"]>; 523} 524 525def AMDGPUDims { 526 list<AMDGPUDimProps> NoMsaa = [AMDGPUDim1D, AMDGPUDim2D, AMDGPUDim3D, 527 AMDGPUDimCube, AMDGPUDim1DArray, 528 AMDGPUDim2DArray]; 529 list<AMDGPUDimProps> Msaa = [AMDGPUDim2DMsaa, AMDGPUDim2DArrayMsaa]; 530 list<AMDGPUDimProps> All = !listconcat(NoMsaa, Msaa); 531} 532 533// Represent sample variants, i.e. _C, _O, _B, ... and combinations thereof. 534class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr> { 535 string UpperCaseMod = ucmod; 536 string LowerCaseMod = lcmod; 537 538 // {offset} {bias} {z-compare} 539 list<AMDGPUArg> ExtraAddrArgs = extra_addr; 540 bit Gradients = 0; 541 542 // Name of the {lod} or {clamp} argument that is appended to the coordinates, 543 // if any. 544 string LodOrClamp = ""; 545} 546 547// AMDGPUSampleVariants: all variants supported by IMAGE_SAMPLE 548// AMDGPUSampleVariantsNoGradients: variants supported by IMAGE_GATHER4 549defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = { 550 multiclass AMDGPUSampleHelper_Offset<string ucmod, string lcmod, 551 list<AMDGPUArg> extra_addr> { 552 def NAME#lcmod : AMDGPUSampleVariant<ucmod, lcmod, extra_addr>; 553 def NAME#lcmod#_o : AMDGPUSampleVariant< 554 ucmod#"_O", lcmod#"_o", !listconcat([AMDGPUArg<llvm_i32_ty, "offset">], extra_addr)>; 555 } 556 557 multiclass AMDGPUSampleHelper_Compare<string ucmod, string lcmod, 558 list<AMDGPUArg> extra_addr> { 559 defm NAME : AMDGPUSampleHelper_Offset<ucmod, lcmod, extra_addr>; 560 defm NAME : AMDGPUSampleHelper_Offset< 561 "_C"#ucmod, "_c"#lcmod, !listconcat(extra_addr, [AMDGPUArg<llvm_float_ty, "zcompare">])>; 562 } 563 564 multiclass AMDGPUSampleHelper_Clamp<string ucmod, string lcmod, 565 list<AMDGPUArg> extra_addr> { 566 defm NAME : AMDGPUSampleHelper_Compare<ucmod, lcmod, extra_addr>; 567 let LodOrClamp = "clamp" in 568 defm NAME : AMDGPUSampleHelper_Compare<ucmod#"_CL", lcmod#"_cl", extra_addr>; 569 } 570 571 defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = { 572 defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>; 573 defm AMDGPUSample : AMDGPUSampleHelper_Clamp< 574 "_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>; 575 let LodOrClamp = "lod" in 576 defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_L", "_l", []>; 577 defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_LZ", "_lz", []>; 578 } 579 580 let Gradients = 1 in { 581 defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_D", "_d", []>; 582 defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_CD", "_cd", []>; 583 } 584} 585 586// Helper class to capture the profile of a dimension-aware image intrinsic. 587// This information is used to generate the intrinsic's type and to inform 588// codegen pattern matching. 589class AMDGPUDimProfile<string opmod, 590 AMDGPUDimProps dim> { 591 AMDGPUDimProps Dim = dim; 592 string OpMod = opmod; // the corresponding instruction is named IMAGE_OpMod 593 594 // These are entended to be overwritten by subclasses 595 bit IsSample = 0; 596 bit IsAtomic = 0; 597 list<LLVMType> RetTypes = []; 598 list<AMDGPUArg> DataArgs = []; 599 list<AMDGPUArg> ExtraAddrArgs = []; 600 bit Gradients = 0; 601 string LodClampMip = ""; 602 603 int NumRetAndDataAnyTypes = 604 !foldl(0, !listconcat(RetTypes, !foreach(arg, DataArgs, arg.Type)), a, b, 605 !add(a, b.isAny)); 606 607 list<AMDGPUArg> AddrArgs = 608 arglistconcat<[ExtraAddrArgs, 609 !if(Gradients, dim.GradientArgs, []), 610 !listconcat(!if(IsSample, dim.CoordSliceArgs, dim.CoordSliceIntArgs), 611 !if(!eq(LodClampMip, ""), 612 []<AMDGPUArg>, 613 [AMDGPUArg<LLVMMatchType<0>, LodClampMip>]))], 614 NumRetAndDataAnyTypes>.ret; 615 list<LLVMType> AddrTypes = !foreach(arg, AddrArgs, arg.Type); 616 list<AMDGPUArg> AddrDefaultArgs = 617 !foreach(arg, AddrArgs, 618 AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)), 619 !if(IsSample, llvm_float_ty, llvm_i32_ty), arg.Type), 620 arg.Name>); 621 list<AMDGPUArg> AddrA16Args = 622 !foreach(arg, AddrArgs, 623 AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)), 624 !if(IsSample, llvm_half_ty, llvm_i16_ty), arg.Type), 625 arg.Name>); 626} 627 628class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod, base.Dim> { 629 let IsSample = base.IsSample; 630 let IsAtomic = base.IsAtomic; 631 let RetTypes = base.RetTypes; 632 let DataArgs = base.DataArgs; 633 let ExtraAddrArgs = base.ExtraAddrArgs; 634 let Gradients = base.Gradients; 635 let LodClampMip = base.LodClampMip; 636} 637 638class AMDGPUDimSampleProfile<string opmod, 639 AMDGPUDimProps dim, 640 AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> { 641 let IsSample = 1; 642 let RetTypes = [llvm_any_ty]; 643 let ExtraAddrArgs = sample.ExtraAddrArgs; 644 let Gradients = sample.Gradients; 645 let LodClampMip = sample.LodOrClamp; 646} 647 648class AMDGPUDimNoSampleProfile<string opmod, 649 AMDGPUDimProps dim, 650 list<LLVMType> retty, 651 list<AMDGPUArg> dataargs, 652 bit Mip = 0> : AMDGPUDimProfile<opmod, dim> { 653 let RetTypes = retty; 654 let DataArgs = dataargs; 655 let LodClampMip = !if(Mip, "mip", ""); 656} 657 658class AMDGPUDimAtomicProfile<string opmod, 659 AMDGPUDimProps dim, 660 list<AMDGPUArg> dataargs> : AMDGPUDimProfile<opmod, dim> { 661 let RetTypes = [llvm_anyint_ty]; 662 let DataArgs = dataargs; 663 let IsAtomic = 1; 664} 665 666class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> { 667 let RetTypes = [llvm_anyfloat_ty]; 668 let DataArgs = []; 669 let AddrArgs = [AMDGPUArg<llvm_anyint_ty, "mip">]; 670 let LodClampMip = "mip"; 671} 672 673// Helper class for figuring out image intrinsic argument indexes. 674class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> { 675 int NumDataArgs = !size(P_.DataArgs); 676 int NumDmaskArgs = !if(P_.IsAtomic, 0, 1); 677 int NumVAddrArgs = !size(P_.AddrArgs); 678 int NumRSrcArgs = 1; 679 int NumSampArgs = !if(P_.IsSample, 2, 0); 680 int DmaskArgIndex = NumDataArgs; 681 int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1); 682 int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs); 683 int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1); 684} 685 686// All dimension-aware intrinsics are derived from this class. 687class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_, 688 list<IntrinsicProperty> props, 689 list<SDNodeProperty> sdnodeprops> : Intrinsic< 690 P_.RetTypes, // vdata(VGPR) -- for load/atomic-with-return 691 !listconcat( 692 !foreach(arg, P_.DataArgs, arg.Type), // vdata(VGPR) -- for store/atomic 693 !if(P_.IsAtomic, [], [llvm_i32_ty]), // dmask(imm) 694 P_.AddrTypes, // vaddr(VGPR) 695 [llvm_v8i32_ty], // rsrc(SGPR) 696 !if(P_.IsSample, [llvm_v4i32_ty, // samp(SGPR) 697 llvm_i1_ty], []), // unorm(imm) 698 [llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe) 699 llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc) 700 !listconcat(props, 701 !if(P_.IsAtomic, [], [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>]), 702 !if(P_.IsSample, [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.UnormArgIndex>], []), 703 [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.TexFailCtrlArgIndex>, 704 ImmArg<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>]), 705 "", sdnodeprops>, 706 AMDGPURsrcIntrinsic<!add(!size(P_.DataArgs), !size(P_.AddrTypes), 707 !if(P_.IsAtomic, 0, 1)), 1> { 708 AMDGPUDimProfile P = P_; 709 710 AMDGPUImageDimIntrinsic Intr = !cast<AMDGPUImageDimIntrinsic>(NAME); 711 712 let TargetPrefix = "amdgcn"; 713} 714 715// Marker class for intrinsics with a DMask that determines the returned 716// channels. 717class AMDGPUImageDMaskIntrinsic; 718 719defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = { 720 721 ////////////////////////////////////////////////////////////////////////// 722 // Load and store intrinsics 723 ////////////////////////////////////////////////////////////////////////// 724 multiclass AMDGPUImageDimIntrinsicsNoMsaa<string opmod, 725 list<LLVMType> retty, 726 list<AMDGPUArg> dataargs, 727 list<IntrinsicProperty> props, 728 list<SDNodeProperty> sdnodeprops, 729 bit Mip = 0> { 730 foreach dim = AMDGPUDims.NoMsaa in { 731 def !strconcat(NAME, "_", dim.Name) 732 : AMDGPUImageDimIntrinsic< 733 AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>, 734 props, sdnodeprops>; 735 } 736 } 737 738 multiclass AMDGPUImageDimIntrinsicsAll<string opmod, 739 list<LLVMType> retty, 740 list<AMDGPUArg> dataargs, 741 list<IntrinsicProperty> props, 742 list<SDNodeProperty> sdnodeprops, 743 bit Mip = 0> { 744 foreach dim = AMDGPUDims.All in { 745 def !strconcat(NAME, "_", dim.Name) 746 : AMDGPUImageDimIntrinsic< 747 AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>, 748 props, sdnodeprops>; 749 } 750 } 751 752 defm int_amdgcn_image_load 753 : AMDGPUImageDimIntrinsicsAll<"LOAD", [llvm_any_ty], [], [IntrReadMem], 754 [SDNPMemOperand]>, 755 AMDGPUImageDMaskIntrinsic; 756 defm int_amdgcn_image_load_mip 757 : AMDGPUImageDimIntrinsicsNoMsaa<"LOAD_MIP", [llvm_any_ty], [], 758 [IntrReadMem], [SDNPMemOperand], 1>, 759 AMDGPUImageDMaskIntrinsic; 760 761 defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll< 762 "STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">], 763 [IntrWriteMem], [SDNPMemOperand]>; 764 defm int_amdgcn_image_store_mip : AMDGPUImageDimIntrinsicsNoMsaa< 765 "STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">], 766 [IntrWriteMem], [SDNPMemOperand], 1>; 767 768 ////////////////////////////////////////////////////////////////////////// 769 // sample and getlod intrinsics 770 ////////////////////////////////////////////////////////////////////////// 771 multiclass AMDGPUImageDimSampleDims<string opmod, 772 AMDGPUSampleVariant sample, 773 bit NoMem = 0> { 774 foreach dim = AMDGPUDims.NoMsaa in { 775 def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic< 776 AMDGPUDimSampleProfile<opmod, dim, sample>, 777 !if(NoMem, [IntrNoMem], [IntrReadMem]), 778 !if(NoMem, [], [SDNPMemOperand])>; 779 } 780 } 781 782 foreach sample = AMDGPUSampleVariants in { 783 defm int_amdgcn_image_sample # sample.LowerCaseMod 784 : AMDGPUImageDimSampleDims<"SAMPLE" # sample.UpperCaseMod, sample>, 785 AMDGPUImageDMaskIntrinsic; 786 } 787 788 defm int_amdgcn_image_getlod 789 : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>, 790 AMDGPUImageDMaskIntrinsic; 791 792 ////////////////////////////////////////////////////////////////////////// 793 // getresinfo intrinsics 794 ////////////////////////////////////////////////////////////////////////// 795 foreach dim = AMDGPUDims.All in { 796 def !strconcat("int_amdgcn_image_getresinfo_", dim.Name) 797 : AMDGPUImageDimIntrinsic<AMDGPUDimGetResInfoProfile<dim>, [IntrNoMem], []>, 798 AMDGPUImageDMaskIntrinsic; 799 } 800 801 ////////////////////////////////////////////////////////////////////////// 802 // gather4 intrinsics 803 ////////////////////////////////////////////////////////////////////////// 804 foreach sample = AMDGPUSampleVariantsNoGradients in { 805 foreach dim = [AMDGPUDim2D, AMDGPUDimCube, AMDGPUDim2DArray] in { 806 def int_amdgcn_image_gather4 # sample.LowerCaseMod # _ # dim.Name: 807 AMDGPUImageDimIntrinsic< 808 AMDGPUDimSampleProfile<"GATHER4" # sample.UpperCaseMod, dim, sample>, 809 [IntrReadMem], [SDNPMemOperand]>; 810 } 811 } 812} 813 814////////////////////////////////////////////////////////////////////////// 815// atomic intrinsics 816////////////////////////////////////////////////////////////////////////// 817defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = { 818 multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs> { 819 foreach dim = AMDGPUDims.All in { 820 def !strconcat(NAME, "_", dim.Name) 821 : AMDGPUImageDimIntrinsic< 822 AMDGPUDimAtomicProfile<opmod, dim, dataargs>, 823 [], [SDNPMemOperand]>; 824 } 825 } 826 827 multiclass AMDGPUImageDimAtomic<string opmod> { 828 defm "" : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">]>; 829 } 830 831 defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">; 832 defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">; 833 defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">; 834 defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">; 835 defm int_amdgcn_image_atomic_umin : AMDGPUImageDimAtomic<"ATOMIC_UMIN">; 836 defm int_amdgcn_image_atomic_smax : AMDGPUImageDimAtomic<"ATOMIC_SMAX">; 837 defm int_amdgcn_image_atomic_umax : AMDGPUImageDimAtomic<"ATOMIC_UMAX">; 838 defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">; 839 defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">; 840 defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">; 841 defm int_amdgcn_image_atomic_inc : AMDGPUImageDimAtomic<"ATOMIC_INC">; 842 defm int_amdgcn_image_atomic_dec : AMDGPUImageDimAtomic<"ATOMIC_DEC">; 843 844 defm int_amdgcn_image_atomic_cmpswap : 845 AMDGPUImageDimAtomicX<"ATOMIC_CMPSWAP", [AMDGPUArg<LLVMMatchType<0>, "src">, 846 AMDGPUArg<LLVMMatchType<0>, "cmp">]>; 847} 848 849////////////////////////////////////////////////////////////////////////// 850// Buffer intrinsics 851////////////////////////////////////////////////////////////////////////// 852 853let TargetPrefix = "amdgcn" in { 854 855defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = { 856 857class AMDGPUBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic < 858 [data_ty], 859 [llvm_v4i32_ty, // rsrc(SGPR) 860 llvm_i32_ty, // vindex(VGPR) 861 llvm_i32_ty, // offset(SGPR/VGPR/imm) 862 llvm_i1_ty, // glc(imm) 863 llvm_i1_ty], // slc(imm) 864 [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, 865 AMDGPURsrcIntrinsic<0>; 866def int_amdgcn_buffer_load_format : AMDGPUBufferLoad<llvm_anyfloat_ty>; 867def int_amdgcn_buffer_load : AMDGPUBufferLoad; 868 869def int_amdgcn_s_buffer_load : Intrinsic < 870 [llvm_any_ty], 871 [llvm_v4i32_ty, // rsrc(SGPR) 872 llvm_i32_ty, // byte offset(SGPR/imm) 873 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 2 = dlc) 874 [IntrNoMem, ImmArg<2>]>, 875 AMDGPURsrcIntrinsic<0>; 876 877class AMDGPUBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic < 878 [], 879 [data_ty, // vdata(VGPR) 880 llvm_v4i32_ty, // rsrc(SGPR) 881 llvm_i32_ty, // vindex(VGPR) 882 llvm_i32_ty, // offset(SGPR/VGPR/imm) 883 llvm_i1_ty, // glc(imm) 884 llvm_i1_ty], // slc(imm) 885 [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, 886 AMDGPURsrcIntrinsic<1>; 887def int_amdgcn_buffer_store_format : AMDGPUBufferStore<llvm_anyfloat_ty>; 888def int_amdgcn_buffer_store : AMDGPUBufferStore; 889 890// New buffer intrinsics with separate raw and struct variants. The raw 891// variant never has an index. The struct variant always has an index, even if 892// it is const 0. A struct intrinsic with constant 0 index is different to the 893// corresponding raw intrinsic on gfx9+ because the behavior of bound checking 894// and swizzling changes depending on whether idxen is set in the instruction. 895// These new instrinsics also keep the offset and soffset arguments separate as 896// they behave differently in bounds checking and swizzling. 897class AMDGPURawBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic < 898 [data_ty], 899 [llvm_v4i32_ty, // rsrc(SGPR) 900 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 901 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 902 llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, 903 // bit 1 = slc, 904 // bit 2 = dlc on gfx10+), 905 // swizzled buffer (bit 3 = swz)) 906 [IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>, 907 AMDGPURsrcIntrinsic<0>; 908def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad<llvm_anyfloat_ty>; 909def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad; 910 911class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic < 912 [data_ty], 913 [llvm_v4i32_ty, // rsrc(SGPR) 914 llvm_i32_ty, // vindex(VGPR) 915 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 916 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 917 llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, 918 // bit 1 = slc, 919 // bit 2 = dlc on gfx10+), 920 // swizzled buffer (bit 3 = swz)) 921 [IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>, 922 AMDGPURsrcIntrinsic<0>; 923def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad<llvm_anyfloat_ty>; 924def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad; 925 926class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic < 927 [], 928 [data_ty, // vdata(VGPR) 929 llvm_v4i32_ty, // rsrc(SGPR) 930 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 931 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 932 llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, 933 // bit 1 = slc, 934 // bit 2 = dlc on gfx10+), 935 // swizzled buffer (bit 3 = swz)) 936 [IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>, 937 AMDGPURsrcIntrinsic<1>; 938def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore<llvm_anyfloat_ty>; 939def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore; 940 941class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic < 942 [], 943 [data_ty, // vdata(VGPR) 944 llvm_v4i32_ty, // rsrc(SGPR) 945 llvm_i32_ty, // vindex(VGPR) 946 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 947 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 948 llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, 949 // bit 1 = slc, 950 // bit 2 = dlc on gfx10+), 951 // swizzled buffer (bit 3 = swz)) 952 [IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>, 953 AMDGPURsrcIntrinsic<1>; 954def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore<llvm_anyfloat_ty>; 955def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore; 956 957class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic < 958 [data_ty], 959 [LLVMMatchType<0>, // vdata(VGPR) 960 llvm_v4i32_ty, // rsrc(SGPR) 961 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 962 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 963 llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) 964 [ImmArg<4>], "", [SDNPMemOperand]>, 965 AMDGPURsrcIntrinsic<1, 0>; 966def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic; 967def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic; 968def int_amdgcn_raw_buffer_atomic_sub : AMDGPURawBufferAtomic; 969def int_amdgcn_raw_buffer_atomic_smin : AMDGPURawBufferAtomic; 970def int_amdgcn_raw_buffer_atomic_umin : AMDGPURawBufferAtomic; 971def int_amdgcn_raw_buffer_atomic_smax : AMDGPURawBufferAtomic; 972def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic; 973def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic; 974def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic; 975def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic; 976def int_amdgcn_raw_buffer_atomic_inc : AMDGPURawBufferAtomic; 977def int_amdgcn_raw_buffer_atomic_dec : AMDGPURawBufferAtomic; 978def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic< 979 [llvm_anyint_ty], 980 [LLVMMatchType<0>, // src(VGPR) 981 LLVMMatchType<0>, // cmp(VGPR) 982 llvm_v4i32_ty, // rsrc(SGPR) 983 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 984 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 985 llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) 986 [ImmArg<5>], "", [SDNPMemOperand]>, 987 AMDGPURsrcIntrinsic<2, 0>; 988 989class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic < 990 [data_ty], 991 [LLVMMatchType<0>, // vdata(VGPR) 992 llvm_v4i32_ty, // rsrc(SGPR) 993 llvm_i32_ty, // vindex(VGPR) 994 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 995 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 996 llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) 997 [ImmArg<5>], "", [SDNPMemOperand]>, 998 AMDGPURsrcIntrinsic<1, 0>; 999def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic; 1000def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic; 1001def int_amdgcn_struct_buffer_atomic_sub : AMDGPUStructBufferAtomic; 1002def int_amdgcn_struct_buffer_atomic_smin : AMDGPUStructBufferAtomic; 1003def int_amdgcn_struct_buffer_atomic_umin : AMDGPUStructBufferAtomic; 1004def int_amdgcn_struct_buffer_atomic_smax : AMDGPUStructBufferAtomic; 1005def int_amdgcn_struct_buffer_atomic_umax : AMDGPUStructBufferAtomic; 1006def int_amdgcn_struct_buffer_atomic_and : AMDGPUStructBufferAtomic; 1007def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic; 1008def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic; 1009def int_amdgcn_struct_buffer_atomic_inc : AMDGPUStructBufferAtomic; 1010def int_amdgcn_struct_buffer_atomic_dec : AMDGPUStructBufferAtomic; 1011def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic< 1012 [llvm_anyint_ty], 1013 [LLVMMatchType<0>, // src(VGPR) 1014 LLVMMatchType<0>, // cmp(VGPR) 1015 llvm_v4i32_ty, // rsrc(SGPR) 1016 llvm_i32_ty, // vindex(VGPR) 1017 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 1018 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 1019 llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) 1020 [ImmArg<6>], "", [SDNPMemOperand]>, 1021 AMDGPURsrcIntrinsic<2, 0>; 1022 1023// Obsolescent tbuffer intrinsics. 1024def int_amdgcn_tbuffer_load : Intrinsic < 1025 [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 1026 [llvm_v4i32_ty, // rsrc(SGPR) 1027 llvm_i32_ty, // vindex(VGPR) 1028 llvm_i32_ty, // voffset(VGPR) 1029 llvm_i32_ty, // soffset(SGPR) 1030 llvm_i32_ty, // offset(imm) 1031 llvm_i32_ty, // dfmt(imm) 1032 llvm_i32_ty, // nfmt(imm) 1033 llvm_i1_ty, // glc(imm) 1034 llvm_i1_ty], // slc(imm) 1035 [IntrReadMem, ImmArg<4>, ImmArg<5>, ImmArg<6>, 1036 ImmArg<7>, ImmArg<8>], "", [SDNPMemOperand]>, 1037 AMDGPURsrcIntrinsic<0>; 1038 1039def int_amdgcn_tbuffer_store : Intrinsic < 1040 [], 1041 [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 1042 llvm_v4i32_ty, // rsrc(SGPR) 1043 llvm_i32_ty, // vindex(VGPR) 1044 llvm_i32_ty, // voffset(VGPR) 1045 llvm_i32_ty, // soffset(SGPR) 1046 llvm_i32_ty, // offset(imm) 1047 llvm_i32_ty, // dfmt(imm) 1048 llvm_i32_ty, // nfmt(imm) 1049 llvm_i1_ty, // glc(imm) 1050 llvm_i1_ty], // slc(imm) 1051 [IntrWriteMem, ImmArg<5>, ImmArg<6>, ImmArg<7>, 1052 ImmArg<8>, ImmArg<9>], "", [SDNPMemOperand]>, 1053 AMDGPURsrcIntrinsic<1>; 1054 1055// New tbuffer intrinsics, with: 1056// - raw and struct variants 1057// - joint format field 1058// - joint cachepolicy field 1059def int_amdgcn_raw_tbuffer_load : Intrinsic < 1060 [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 1061 [llvm_v4i32_ty, // rsrc(SGPR) 1062 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 1063 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 1064 llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) 1065 llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, 1066 // bit 1 = slc, 1067 // bit 2 = dlc on gfx10+), 1068 // swizzled buffer (bit 3 = swz)) 1069 [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, 1070 AMDGPURsrcIntrinsic<0>; 1071 1072def int_amdgcn_raw_tbuffer_store : Intrinsic < 1073 [], 1074 [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 1075 llvm_v4i32_ty, // rsrc(SGPR) 1076 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 1077 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 1078 llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) 1079 llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, 1080 // bit 1 = slc, 1081 // bit 2 = dlc on gfx10+), 1082 // swizzled buffer (bit 3 = swz)) 1083 [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, 1084 AMDGPURsrcIntrinsic<1>; 1085 1086def int_amdgcn_struct_tbuffer_load : Intrinsic < 1087 [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 1088 [llvm_v4i32_ty, // rsrc(SGPR) 1089 llvm_i32_ty, // vindex(VGPR) 1090 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 1091 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 1092 llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) 1093 llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, 1094 // bit 1 = slc, 1095 // bit 2 = dlc on gfx10+), 1096 // swizzled buffer (bit 3 = swz)) 1097 [IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, 1098 AMDGPURsrcIntrinsic<0>; 1099 1100def int_amdgcn_struct_tbuffer_store : Intrinsic < 1101 [], 1102 [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 1103 llvm_v4i32_ty, // rsrc(SGPR) 1104 llvm_i32_ty, // vindex(VGPR) 1105 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 1106 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 1107 llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) 1108 llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, 1109 // bit 1 = slc, 1110 // bit 2 = dlc on gfx10+), 1111 // swizzled buffer (bit 3 = swz)) 1112 [IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>, 1113 AMDGPURsrcIntrinsic<1>; 1114 1115class AMDGPUBufferAtomic : Intrinsic < 1116 [llvm_anyint_ty], 1117 [LLVMMatchType<0>, // vdata(VGPR) 1118 llvm_v4i32_ty, // rsrc(SGPR) 1119 llvm_i32_ty, // vindex(VGPR) 1120 llvm_i32_ty, // offset(SGPR/VGPR/imm) 1121 llvm_i1_ty], // slc(imm) 1122 [ImmArg<4>], "", [SDNPMemOperand]>, 1123 AMDGPURsrcIntrinsic<1, 0>; 1124def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; 1125def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; 1126def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic; 1127def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic; 1128def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic; 1129def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic; 1130def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic; 1131def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic; 1132def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic; 1133def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic; 1134def int_amdgcn_buffer_atomic_cmpswap : Intrinsic< 1135 [llvm_i32_ty], 1136 [llvm_i32_ty, // src(VGPR) 1137 llvm_i32_ty, // cmp(VGPR) 1138 llvm_v4i32_ty, // rsrc(SGPR) 1139 llvm_i32_ty, // vindex(VGPR) 1140 llvm_i32_ty, // offset(SGPR/VGPR/imm) 1141 llvm_i1_ty], // slc(imm) 1142 [ImmArg<5>], "", [SDNPMemOperand]>, 1143 AMDGPURsrcIntrinsic<2, 0>; 1144 1145} // defset AMDGPUBufferIntrinsics 1146 1147// Uses that do not set the done bit should set IntrWriteMem on the 1148// call site. 1149def int_amdgcn_exp : Intrinsic <[], [ 1150 llvm_i32_ty, // tgt, 1151 llvm_i32_ty, // en 1152 llvm_any_ty, // src0 (f32 or i32) 1153 LLVMMatchType<0>, // src1 1154 LLVMMatchType<0>, // src2 1155 LLVMMatchType<0>, // src3 1156 llvm_i1_ty, // done 1157 llvm_i1_ty // vm 1158 ], 1159 [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrInaccessibleMemOnly] 1160>; 1161 1162// exp with compr bit set. 1163def int_amdgcn_exp_compr : Intrinsic <[], [ 1164 llvm_i32_ty, // tgt, 1165 llvm_i32_ty, // en 1166 llvm_anyvector_ty, // src0 (v2f16 or v2i16) 1167 LLVMMatchType<0>, // src1 1168 llvm_i1_ty, // done 1169 llvm_i1_ty], // vm 1170 [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrInaccessibleMemOnly] 1171>; 1172 1173def int_amdgcn_buffer_wbinvl1_sc : 1174 GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">, 1175 Intrinsic<[], [], []>; 1176 1177def int_amdgcn_buffer_wbinvl1 : 1178 GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, 1179 Intrinsic<[], [], []>; 1180 1181def int_amdgcn_s_dcache_inv : 1182 GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">, 1183 Intrinsic<[], [], []>; 1184 1185def int_amdgcn_s_memtime : 1186 GCCBuiltin<"__builtin_amdgcn_s_memtime">, 1187 Intrinsic<[llvm_i64_ty], []>; 1188 1189def int_amdgcn_s_sleep : 1190 GCCBuiltin<"__builtin_amdgcn_s_sleep">, 1191 Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> { 1192} 1193 1194def int_amdgcn_s_incperflevel : 1195 GCCBuiltin<"__builtin_amdgcn_s_incperflevel">, 1196 Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> { 1197} 1198 1199def int_amdgcn_s_decperflevel : 1200 GCCBuiltin<"__builtin_amdgcn_s_decperflevel">, 1201 Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> { 1202} 1203 1204def int_amdgcn_s_getreg : 1205 GCCBuiltin<"__builtin_amdgcn_s_getreg">, 1206 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], 1207 [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg<0>] 1208>; 1209 1210// int_amdgcn_s_getpc is provided to allow a specific style of position 1211// independent code to determine the high part of its address when it is 1212// known (through convention) that the code and any data of interest does 1213// not cross a 4Gb address boundary. Use for any other purpose may not 1214// produce the desired results as optimizations may cause code movement, 1215// especially as we explicitly use IntrNoMem to allow optimizations. 1216def int_amdgcn_s_getpc : 1217 GCCBuiltin<"__builtin_amdgcn_s_getpc">, 1218 Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; 1219 1220// __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0> 1221// param values: 0 = P10, 1 = P20, 2 = P0 1222def int_amdgcn_interp_mov : 1223 GCCBuiltin<"__builtin_amdgcn_interp_mov">, 1224 Intrinsic<[llvm_float_ty], 1225 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1226 [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>]>; 1227 1228// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0> 1229// This intrinsic reads from lds, but the memory values are constant, 1230// so it behaves like IntrNoMem. 1231def int_amdgcn_interp_p1 : 1232 GCCBuiltin<"__builtin_amdgcn_interp_p1">, 1233 Intrinsic<[llvm_float_ty], 1234 [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1235 [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>]>; 1236 1237// __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0> 1238def int_amdgcn_interp_p2 : 1239 GCCBuiltin<"__builtin_amdgcn_interp_p2">, 1240 Intrinsic<[llvm_float_ty], 1241 [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1242 [IntrNoMem, IntrSpeculatable, ImmArg<2>, ImmArg<3>]>; 1243 // See int_amdgcn_v_interp_p1 for why this is IntrNoMem. 1244 1245// __builtin_amdgcn_interp_p1_f16 <i>, <attr_chan>, <attr>, <high>, <m0> 1246def int_amdgcn_interp_p1_f16 : 1247 GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">, 1248 Intrinsic<[llvm_float_ty], 1249 [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty], 1250 [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>; 1251 1252// __builtin_amdgcn_interp_p2_f16 <p1>, <j>, <attr_chan>, <attr>, <high>, <m0> 1253def int_amdgcn_interp_p2_f16 : 1254 GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">, 1255 Intrinsic<[llvm_half_ty], 1256 [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty], 1257 [IntrNoMem, IntrSpeculatable, ImmArg<2>, ImmArg<3>, ImmArg<4>]>; 1258 1259// Pixel shaders only: whether the current pixel is live (i.e. not a helper 1260// invocation for derivative computation). 1261def int_amdgcn_ps_live : Intrinsic < 1262 [llvm_i1_ty], 1263 [], 1264 [IntrNoMem]>; 1265 1266def int_amdgcn_mbcnt_lo : 1267 GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">, 1268 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 1269 1270def int_amdgcn_mbcnt_hi : 1271 GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">, 1272 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 1273 1274// llvm.amdgcn.ds.swizzle src offset 1275def int_amdgcn_ds_swizzle : 1276 GCCBuiltin<"__builtin_amdgcn_ds_swizzle">, 1277 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], 1278 [IntrNoMem, IntrConvergent, ImmArg<1>]>; 1279 1280def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty], 1281 [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], 1282 [IntrNoMem, IntrSpeculatable] 1283>; 1284 1285def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty], 1286 [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], 1287 [IntrNoMem, IntrSpeculatable] 1288>; 1289 1290def int_amdgcn_lerp : 1291 GCCBuiltin<"__builtin_amdgcn_lerp">, 1292 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1293 [IntrNoMem, IntrSpeculatable] 1294>; 1295 1296def int_amdgcn_sad_u8 : 1297 GCCBuiltin<"__builtin_amdgcn_sad_u8">, 1298 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1299 [IntrNoMem, IntrSpeculatable] 1300>; 1301 1302def int_amdgcn_msad_u8 : 1303 GCCBuiltin<"__builtin_amdgcn_msad_u8">, 1304 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1305 [IntrNoMem, IntrSpeculatable] 1306>; 1307 1308def int_amdgcn_sad_hi_u8 : 1309 GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">, 1310 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1311 [IntrNoMem, IntrSpeculatable] 1312>; 1313 1314def int_amdgcn_sad_u16 : 1315 GCCBuiltin<"__builtin_amdgcn_sad_u16">, 1316 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1317 [IntrNoMem, IntrSpeculatable] 1318>; 1319 1320def int_amdgcn_qsad_pk_u16_u8 : 1321 GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">, 1322 Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], 1323 [IntrNoMem, IntrSpeculatable] 1324>; 1325 1326def int_amdgcn_mqsad_pk_u16_u8 : 1327 GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">, 1328 Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], 1329 [IntrNoMem, IntrSpeculatable] 1330>; 1331 1332def int_amdgcn_mqsad_u32_u8 : 1333 GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, 1334 Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], 1335 [IntrNoMem, IntrSpeculatable] 1336>; 1337 1338def int_amdgcn_cvt_pk_u8_f32 : 1339 GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">, 1340 Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], 1341 [IntrNoMem, IntrSpeculatable] 1342>; 1343 1344def int_amdgcn_icmp : 1345 Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty], 1346 [IntrNoMem, IntrConvergent, ImmArg<2>]>; 1347 1348def int_amdgcn_fcmp : 1349 Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>, llvm_i32_ty], 1350 [IntrNoMem, IntrConvergent, ImmArg<2>]>; 1351 1352def int_amdgcn_readfirstlane : 1353 GCCBuiltin<"__builtin_amdgcn_readfirstlane">, 1354 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 1355 1356// The lane argument must be uniform across the currently active threads of the 1357// current wave. Otherwise, the result is undefined. 1358def int_amdgcn_readlane : 1359 GCCBuiltin<"__builtin_amdgcn_readlane">, 1360 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 1361 1362// The value to write and lane select arguments must be uniform across the 1363// currently active threads of the current wave. Otherwise, the result is 1364// undefined. 1365def int_amdgcn_writelane : 1366 GCCBuiltin<"__builtin_amdgcn_writelane">, 1367 Intrinsic<[llvm_i32_ty], [ 1368 llvm_i32_ty, // uniform value to write: returned by the selected lane 1369 llvm_i32_ty, // uniform lane select 1370 llvm_i32_ty // returned by all lanes other than the selected one 1371 ], 1372 [IntrNoMem, IntrConvergent] 1373>; 1374 1375def int_amdgcn_alignbit : 1376 GCCBuiltin<"__builtin_amdgcn_alignbit">, Intrinsic<[llvm_i32_ty], 1377 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1378 [IntrNoMem, IntrSpeculatable] 1379>; 1380 1381def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">, 1382 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1383 [IntrNoMem, IntrSpeculatable] 1384>; 1385 1386def int_amdgcn_mul_i24 : Intrinsic<[llvm_i32_ty], 1387 [llvm_i32_ty, llvm_i32_ty], 1388 [IntrNoMem, IntrSpeculatable] 1389>; 1390 1391def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty], 1392 [llvm_i32_ty, llvm_i32_ty], 1393 [IntrNoMem, IntrSpeculatable] 1394>; 1395 1396// llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id) 1397// 1398// bar_val is the total number of waves that will wait on this 1399// barrier, minus 1. 1400def int_amdgcn_ds_gws_init : 1401 GCCBuiltin<"__builtin_amdgcn_ds_gws_init">, 1402 Intrinsic<[], 1403 [llvm_i32_ty, llvm_i32_ty], 1404 [IntrConvergent, IntrWriteMem, IntrInaccessibleMemOnly], "", 1405 [SDNPMemOperand] 1406>; 1407 1408// llvm.amdgcn.ds.gws.barrier(i32 vsrc0, i32 resource_id) 1409// bar_val is the total number of waves that will wait on this 1410// barrier, minus 1. 1411def int_amdgcn_ds_gws_barrier : 1412 GCCBuiltin<"__builtin_amdgcn_ds_gws_barrier">, 1413 Intrinsic<[], 1414 [llvm_i32_ty, llvm_i32_ty], 1415 [IntrConvergent, IntrInaccessibleMemOnly], "", 1416 [SDNPMemOperand] 1417>; 1418 1419// llvm.amdgcn.ds.gws.sema.v(i32 resource_id) 1420def int_amdgcn_ds_gws_sema_v : 1421 GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_v">, 1422 Intrinsic<[], 1423 [llvm_i32_ty], 1424 [IntrConvergent, IntrInaccessibleMemOnly], "", 1425 [SDNPMemOperand] 1426>; 1427 1428// llvm.amdgcn.ds.gws.sema.br(i32 vsrc, i32 resource_id) 1429def int_amdgcn_ds_gws_sema_br : 1430 GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_br">, 1431 Intrinsic<[], 1432 [llvm_i32_ty, llvm_i32_ty], 1433 [IntrConvergent, IntrInaccessibleMemOnly], "", 1434 [SDNPMemOperand] 1435>; 1436 1437// llvm.amdgcn.ds.gws.sema.p(i32 resource_id) 1438def int_amdgcn_ds_gws_sema_p : 1439 GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_p">, 1440 Intrinsic<[], 1441 [llvm_i32_ty], 1442 [IntrConvergent, IntrInaccessibleMemOnly], "", 1443 [SDNPMemOperand] 1444>; 1445 1446// llvm.amdgcn.ds.gws.sema.release.all(i32 resource_id) 1447def int_amdgcn_ds_gws_sema_release_all : 1448 GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">, 1449 Intrinsic<[], 1450 [llvm_i32_ty], 1451 [IntrConvergent, IntrInaccessibleMemOnly], "", 1452 [SDNPMemOperand] 1453>; 1454 1455 1456// Copies the source value to the destination value, with the guarantee that 1457// the source value is computed as if the entire program were executed in WQM. 1458def int_amdgcn_wqm : Intrinsic<[llvm_any_ty], 1459 [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 1460>; 1461 1462// Copies the source value to the destination value, such that the source 1463// is computed as if the entire program were executed in WQM if any other 1464// program code executes in WQM. 1465def int_amdgcn_softwqm : Intrinsic<[llvm_any_ty], 1466 [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 1467>; 1468 1469// Return true if at least one thread within the pixel quad passes true into 1470// the function. 1471def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], 1472 [llvm_i1_ty], [IntrNoMem, IntrConvergent] 1473>; 1474 1475// If false, set EXEC=0 for the current thread until the end of program. 1476def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>; 1477 1478// Copies the active channels of the source value to the destination value, 1479// with the guarantee that the source value is computed as if the entire 1480// program were executed in Whole Wavefront Mode, i.e. with all channels 1481// enabled, with a few exceptions: - Phi nodes with require WWM return an 1482// undefined value. 1483def int_amdgcn_wwm : Intrinsic<[llvm_any_ty], 1484 [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrConvergent] 1485>; 1486 1487// Given a value, copies it while setting all the inactive lanes to a given 1488// value. Note that OpenGL helper lanes are considered active, so if the 1489// program ever uses WQM, then the instruction and the first source will be 1490// computed in WQM. 1491def int_amdgcn_set_inactive : 1492 Intrinsic<[llvm_anyint_ty], 1493 [LLVMMatchType<0>, // value to be copied 1494 LLVMMatchType<0>], // value for the inactive lanes to take 1495 [IntrNoMem, IntrConvergent]>; 1496 1497// Return if the given flat pointer points to a local memory address. 1498def int_amdgcn_is_shared : GCCBuiltin<"__builtin_amdgcn_is_shared">, 1499 Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], 1500 [IntrNoMem, IntrSpeculatable, NoCapture<0>] 1501>; 1502 1503// Return if the given flat pointer points to a prvate memory address. 1504def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">, 1505 Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], 1506 [IntrNoMem, IntrSpeculatable, NoCapture<0>] 1507>; 1508 1509//===----------------------------------------------------------------------===// 1510// CI+ Intrinsics 1511//===----------------------------------------------------------------------===// 1512 1513def int_amdgcn_s_dcache_inv_vol : 1514 GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">, 1515 Intrinsic<[], [], []>; 1516 1517def int_amdgcn_buffer_wbinvl1_vol : 1518 GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">, 1519 Intrinsic<[], [], []>; 1520 1521//===----------------------------------------------------------------------===// 1522// VI Intrinsics 1523//===----------------------------------------------------------------------===// 1524 1525// llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> 1526def int_amdgcn_mov_dpp : 1527 Intrinsic<[llvm_anyint_ty], 1528 [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, 1529 llvm_i1_ty], [IntrNoMem, IntrConvergent, ImmArg<1>, 1530 ImmArg<2>, ImmArg<3>, ImmArg<4>]>; 1531 1532// llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> 1533// Should be equivalent to: 1534// v_mov_b32 <dest> <old> 1535// v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> 1536def int_amdgcn_update_dpp : 1537 Intrinsic<[llvm_anyint_ty], 1538 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, 1539 llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], 1540 [IntrNoMem, IntrConvergent, 1541 ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1542 1543def int_amdgcn_s_dcache_wb : 1544 GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, 1545 Intrinsic<[], [], []>; 1546 1547def int_amdgcn_s_dcache_wb_vol : 1548 GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, 1549 Intrinsic<[], [], []>; 1550 1551def int_amdgcn_s_memrealtime : 1552 GCCBuiltin<"__builtin_amdgcn_s_memrealtime">, 1553 Intrinsic<[llvm_i64_ty]>; 1554 1555// llvm.amdgcn.ds.permute <index> <src> 1556def int_amdgcn_ds_permute : 1557 GCCBuiltin<"__builtin_amdgcn_ds_permute">, 1558 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 1559 1560// llvm.amdgcn.ds.bpermute <index> <src> 1561def int_amdgcn_ds_bpermute : 1562 GCCBuiltin<"__builtin_amdgcn_ds_bpermute">, 1563 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 1564 1565//===----------------------------------------------------------------------===// 1566// GFX10 Intrinsics 1567//===----------------------------------------------------------------------===// 1568 1569// llvm.amdgcn.permlane16 <old> <src0> <src1> <src2> <fi> <bound_control> 1570def int_amdgcn_permlane16 : GCCBuiltin<"__builtin_amdgcn_permlane16">, 1571 Intrinsic<[llvm_i32_ty], 1572 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], 1573 [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>; 1574 1575// llvm.amdgcn.permlanex16 <old> <src0> <src1> <src2> <fi> <bound_control> 1576def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">, 1577 Intrinsic<[llvm_i32_ty], 1578 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], 1579 [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>; 1580 1581// llvm.amdgcn.mov.dpp8.i32 <src> <sel> 1582// <sel> is a 32-bit constant whose high 8 bits must be zero which selects 1583// the lanes to read from. 1584def int_amdgcn_mov_dpp8 : 1585 Intrinsic<[llvm_anyint_ty], 1586 [LLVMMatchType<0>, llvm_i32_ty], 1587 [IntrNoMem, IntrConvergent, ImmArg<1>]>; 1588 1589def int_amdgcn_s_get_waveid_in_workgroup : 1590 GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">, 1591 Intrinsic<[llvm_i32_ty], [], [IntrReadMem, IntrInaccessibleMemOnly]>; 1592 1593//===----------------------------------------------------------------------===// 1594// Deep learning intrinsics. 1595//===----------------------------------------------------------------------===// 1596 1597// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c, i1 %clamp) 1598// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c 1599def int_amdgcn_fdot2 : 1600 GCCBuiltin<"__builtin_amdgcn_fdot2">, 1601 Intrinsic< 1602 [llvm_float_ty], // %r 1603 [ 1604 llvm_v2f16_ty, // %a 1605 llvm_v2f16_ty, // %b 1606 llvm_float_ty, // %c 1607 llvm_i1_ty // %clamp 1608 ], 1609 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1610 >; 1611 1612// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp) 1613// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c 1614def int_amdgcn_sdot2 : 1615 GCCBuiltin<"__builtin_amdgcn_sdot2">, 1616 Intrinsic< 1617 [llvm_i32_ty], // %r 1618 [ 1619 llvm_v2i16_ty, // %a 1620 llvm_v2i16_ty, // %b 1621 llvm_i32_ty, // %c 1622 llvm_i1_ty // %clamp 1623 ], 1624 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1625 >; 1626 1627// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp) 1628// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c 1629def int_amdgcn_udot2 : 1630 GCCBuiltin<"__builtin_amdgcn_udot2">, 1631 Intrinsic< 1632 [llvm_i32_ty], // %r 1633 [ 1634 llvm_v2i16_ty, // %a 1635 llvm_v2i16_ty, // %b 1636 llvm_i32_ty, // %c 1637 llvm_i1_ty // %clamp 1638 ], 1639 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1640 >; 1641 1642// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp) 1643// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c 1644def int_amdgcn_sdot4 : 1645 GCCBuiltin<"__builtin_amdgcn_sdot4">, 1646 Intrinsic< 1647 [llvm_i32_ty], // %r 1648 [ 1649 llvm_i32_ty, // %a 1650 llvm_i32_ty, // %b 1651 llvm_i32_ty, // %c 1652 llvm_i1_ty // %clamp 1653 ], 1654 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1655 >; 1656 1657// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp) 1658// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c 1659def int_amdgcn_udot4 : 1660 GCCBuiltin<"__builtin_amdgcn_udot4">, 1661 Intrinsic< 1662 [llvm_i32_ty], // %r 1663 [ 1664 llvm_i32_ty, // %a 1665 llvm_i32_ty, // %b 1666 llvm_i32_ty, // %c 1667 llvm_i1_ty // %clamp 1668 ], 1669 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1670 >; 1671 1672// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp) 1673// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + 1674// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c 1675def int_amdgcn_sdot8 : 1676 GCCBuiltin<"__builtin_amdgcn_sdot8">, 1677 Intrinsic< 1678 [llvm_i32_ty], // %r 1679 [ 1680 llvm_i32_ty, // %a 1681 llvm_i32_ty, // %b 1682 llvm_i32_ty, // %c 1683 llvm_i1_ty // %clamp 1684 ], 1685 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1686 >; 1687 1688// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp) 1689// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + 1690// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c 1691def int_amdgcn_udot8 : 1692 GCCBuiltin<"__builtin_amdgcn_udot8">, 1693 Intrinsic< 1694 [llvm_i32_ty], // %r 1695 [ 1696 llvm_i32_ty, // %a 1697 llvm_i32_ty, // %b 1698 llvm_i32_ty, // %c 1699 llvm_i1_ty // %clamp 1700 ], 1701 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1702 >; 1703 1704//===----------------------------------------------------------------------===// 1705// gfx908 intrinsics 1706// ===----------------------------------------------------------------------===// 1707 1708class AMDGPUBufferAtomicNoRtn : Intrinsic < 1709 [], 1710 [llvm_anyfloat_ty, // vdata(VGPR) 1711 llvm_v4i32_ty, // rsrc(SGPR) 1712 llvm_i32_ty, // vindex(VGPR) 1713 llvm_i32_ty, // offset(SGPR/VGPR/imm) 1714 llvm_i1_ty], // slc(imm) 1715 [], "", [SDNPMemOperand]>, 1716 AMDGPURsrcIntrinsic<1, 0>; 1717 1718class AMDGPUGlobalAtomicNoRtn : Intrinsic < 1719 [], 1720 [llvm_anyptr_ty, // vaddr 1721 llvm_anyfloat_ty], // vdata(VGPR) 1722 [IntrArgMemOnly, NoCapture<0>], "", [SDNPMemOperand]>; 1723 1724def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicNoRtn; 1725def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn; 1726 1727// llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp 1728def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty], 1729 [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty, 1730 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1731 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1732 1733def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty], 1734 [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, 1735 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1736 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1737 1738def int_amdgcn_mfma_f32_4x4x1f32 : Intrinsic<[llvm_v4f32_ty], 1739 [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, 1740 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1741 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1742 1743def int_amdgcn_mfma_f32_32x32x2f32 : Intrinsic<[llvm_v16f32_ty], 1744 [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, 1745 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1746 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1747 1748def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty], 1749 [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, 1750 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1751 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1752 1753def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32f32_ty], 1754 [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty, 1755 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1756 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1757 1758def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty], 1759 [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, 1760 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1761 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1762 1763def int_amdgcn_mfma_f32_4x4x4f16 : Intrinsic<[llvm_v4f32_ty], 1764 [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, 1765 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1766 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1767 1768def int_amdgcn_mfma_f32_32x32x8f16 : Intrinsic<[llvm_v16f32_ty], 1769 [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, 1770 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1771 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1772 1773def int_amdgcn_mfma_f32_16x16x16f16 : Intrinsic<[llvm_v4f32_ty], 1774 [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, 1775 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1776 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1777 1778def int_amdgcn_mfma_i32_32x32x4i8 : Intrinsic<[llvm_v32i32_ty], 1779 [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty, 1780 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1781 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1782 1783def int_amdgcn_mfma_i32_16x16x4i8 : Intrinsic<[llvm_v16i32_ty], 1784 [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, 1785 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1786 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1787 1788def int_amdgcn_mfma_i32_4x4x4i8 : Intrinsic<[llvm_v4i32_ty], 1789 [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, 1790 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1791 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1792 1793def int_amdgcn_mfma_i32_32x32x8i8 : Intrinsic<[llvm_v16i32_ty], 1794 [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, 1795 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1796 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1797 1798def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty], 1799 [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, 1800 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1801 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1802 1803def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32f32_ty], 1804 [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty, 1805 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1806 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1807 1808def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty], 1809 [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, 1810 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1811 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1812 1813def int_amdgcn_mfma_f32_4x4x2bf16 : Intrinsic<[llvm_v4f32_ty], 1814 [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, 1815 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1816 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1817 1818def int_amdgcn_mfma_f32_32x32x4bf16 : Intrinsic<[llvm_v16f32_ty], 1819 [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, 1820 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1821 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1822 1823def int_amdgcn_mfma_f32_16x16x8bf16 : Intrinsic<[llvm_v4f32_ty], 1824 [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, 1825 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1826 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1827 1828//===----------------------------------------------------------------------===// 1829// Special Intrinsics for backend internal use only. No frontend 1830// should emit calls to these. 1831// ===----------------------------------------------------------------------===// 1832def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty], 1833 [llvm_i1_ty], [IntrConvergent] 1834>; 1835 1836def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty], 1837 [llvm_anyint_ty], [IntrConvergent] 1838>; 1839 1840def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty], 1841 [llvm_i1_ty, llvm_anyint_ty], [IntrNoMem, IntrConvergent] 1842>; 1843 1844def int_amdgcn_loop : Intrinsic<[llvm_i1_ty], 1845 [llvm_anyint_ty], [IntrConvergent] 1846>; 1847 1848def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty], [IntrConvergent]>; 1849 1850// Represent unreachable in a divergent region. 1851def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>; 1852 1853// Emit 2.5 ulp, no denormal division. Should only be inserted by 1854// pass based on !fpmath metadata. 1855def int_amdgcn_fdiv_fast : Intrinsic< 1856 [llvm_float_ty], [llvm_float_ty, llvm_float_ty], 1857 [IntrNoMem, IntrSpeculatable] 1858>; 1859} 1860