1//===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines all of the R600-specific intrinsics. 10// 11//===----------------------------------------------------------------------===// 12 13class AMDGPUReadPreloadRegisterIntrinsic 14 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; 15 16class AMDGPUReadPreloadRegisterIntrinsicNamed<string name> 17 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<name>; 18 19// Used to tag image and resource intrinsics with information used to generate 20// mem operands. 21class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = 0> { 22 int RsrcArg = rsrcarg; 23 bit IsImage = isimage; 24} 25 26let TargetPrefix = "r600" in { 27 28multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz { 29 def _x : AMDGPUReadPreloadRegisterIntrinsic; 30 def _y : AMDGPUReadPreloadRegisterIntrinsic; 31 def _z : AMDGPUReadPreloadRegisterIntrinsic; 32} 33 34multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> { 35 def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>; 36 def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>; 37 def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>; 38} 39 40defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 41 <"__builtin_r600_read_global_size">; 42defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 43 <"__builtin_r600_read_ngroups">; 44defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 45 <"__builtin_r600_read_tgid">; 46 47defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz; 48defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz; 49 50def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">, 51 Intrinsic<[], [], [IntrConvergent]>; 52 53// AS 7 is PARAM_I_ADDRESS, used for kernel arguments 54def int_r600_implicitarg_ptr : 55 GCCBuiltin<"__builtin_r600_implicitarg_ptr">, 56 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], 57 [IntrNoMem, IntrSpeculatable]>; 58 59def int_r600_rat_store_typed : 60 // 1st parameter: Data 61 // 2nd parameter: Index 62 // 3rd parameter: Constant RAT ID 63 Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>, 64 GCCBuiltin<"__builtin_r600_rat_store_typed">; 65 66def int_r600_recipsqrt_ieee : Intrinsic< 67 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 68>; 69 70def int_r600_recipsqrt_clamped : Intrinsic< 71 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 72>; 73 74def int_r600_cube : Intrinsic< 75 [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable] 76>; 77 78def int_r600_store_stream_output : Intrinsic< 79 [], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [] 80>; 81 82class TextureIntrinsicFloatInput : Intrinsic<[llvm_v4f32_ty], [ 83 llvm_v4f32_ty, // Coord 84 llvm_i32_ty, // offset_x 85 llvm_i32_ty, // offset_y, 86 llvm_i32_ty, // offset_z, 87 llvm_i32_ty, // resource_id 88 llvm_i32_ty, // samplerid 89 llvm_i32_ty, // coord_type_x 90 llvm_i32_ty, // coord_type_y 91 llvm_i32_ty, // coord_type_z 92 llvm_i32_ty], // coord_type_w 93 [IntrNoMem] 94>; 95 96class TextureIntrinsicInt32Input : Intrinsic<[llvm_v4i32_ty], [ 97 llvm_v4i32_ty, // Coord 98 llvm_i32_ty, // offset_x 99 llvm_i32_ty, // offset_y, 100 llvm_i32_ty, // offset_z, 101 llvm_i32_ty, // resource_id 102 llvm_i32_ty, // samplerid 103 llvm_i32_ty, // coord_type_x 104 llvm_i32_ty, // coord_type_y 105 llvm_i32_ty, // coord_type_z 106 llvm_i32_ty], // coord_type_w 107 [IntrNoMem] 108>; 109 110def int_r600_store_swizzle : 111 Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [] 112>; 113 114def int_r600_tex : TextureIntrinsicFloatInput; 115def int_r600_texc : TextureIntrinsicFloatInput; 116def int_r600_txl : TextureIntrinsicFloatInput; 117def int_r600_txlc : TextureIntrinsicFloatInput; 118def int_r600_txb : TextureIntrinsicFloatInput; 119def int_r600_txbc : TextureIntrinsicFloatInput; 120def int_r600_txf : TextureIntrinsicInt32Input; 121def int_r600_txq : TextureIntrinsicInt32Input; 122def int_r600_ddx : TextureIntrinsicFloatInput; 123def int_r600_ddy : TextureIntrinsicFloatInput; 124 125def int_r600_dot4 : Intrinsic<[llvm_float_ty], 126 [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable] 127>; 128 129def int_r600_kill : Intrinsic<[], [llvm_float_ty], []>; 130 131} // End TargetPrefix = "r600" 132 133let TargetPrefix = "amdgcn" in { 134 135//===----------------------------------------------------------------------===// 136// ABI Special Intrinsics 137//===----------------------------------------------------------------------===// 138 139defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz; 140defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 141 <"__builtin_amdgcn_workgroup_id">; 142 143def int_amdgcn_dispatch_ptr : 144 GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, 145 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [], 146 [IntrNoMem, IntrSpeculatable]>; 147 148def int_amdgcn_queue_ptr : 149 GCCBuiltin<"__builtin_amdgcn_queue_ptr">, 150 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [], 151 [IntrNoMem, IntrSpeculatable]>; 152 153def int_amdgcn_kernarg_segment_ptr : 154 GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, 155 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [], 156 [IntrNoMem, IntrSpeculatable]>; 157 158def int_amdgcn_implicitarg_ptr : 159 GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, 160 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [], 161 [IntrNoMem, IntrSpeculatable]>; 162 163def int_amdgcn_groupstaticsize : 164 GCCBuiltin<"__builtin_amdgcn_groupstaticsize">, 165 Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; 166 167def int_amdgcn_dispatch_id : 168 GCCBuiltin<"__builtin_amdgcn_dispatch_id">, 169 Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; 170 171def int_amdgcn_implicit_buffer_ptr : 172 GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, 173 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [], 174 [IntrNoMem, IntrSpeculatable]>; 175 176// Set EXEC to the 64-bit value given. 177// This is always moved to the beginning of the basic block. 178def int_amdgcn_init_exec : Intrinsic<[], 179 [llvm_i64_ty], // 64-bit literal constant 180 [IntrConvergent, ImmArg<0>]>; 181 182// Set EXEC according to a thread count packed in an SGPR input: 183// thread_count = (input >> bitoffset) & 0x7f; 184// This is always moved to the beginning of the basic block. 185def int_amdgcn_init_exec_from_input : Intrinsic<[], 186 [llvm_i32_ty, // 32-bit SGPR input 187 llvm_i32_ty], // bit offset of the thread count 188 [IntrConvergent]>; 189 190def int_amdgcn_wavefrontsize : 191 GCCBuiltin<"__builtin_amdgcn_wavefrontsize">, 192 Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; 193 194 195//===----------------------------------------------------------------------===// 196// Instruction Intrinsics 197//===----------------------------------------------------------------------===// 198 199// The first parameter is s_sendmsg immediate (i16), 200// the second one is copied to m0 201def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, 202 Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>; 203def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, 204 Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>; 205 206def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, 207 Intrinsic<[], [], [IntrConvergent]>; 208 209def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">, 210 Intrinsic<[], [], [IntrConvergent]>; 211 212def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">, 213 Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>; 214 215def int_amdgcn_div_scale : Intrinsic< 216 // 1st parameter: Numerator 217 // 2nd parameter: Denominator 218 // 3rd parameter: Constant to select select between first and 219 // second. (0 = first, 1 = second). 220 [llvm_anyfloat_ty, llvm_i1_ty], 221 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], 222 [IntrNoMem, IntrSpeculatable, ImmArg<2>] 223>; 224 225def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty], 226 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], 227 [IntrNoMem, IntrSpeculatable] 228>; 229 230def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty], 231 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], 232 [IntrNoMem, IntrSpeculatable] 233>; 234 235def int_amdgcn_trig_preop : Intrinsic< 236 [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], 237 [IntrNoMem, IntrSpeculatable] 238>; 239 240def int_amdgcn_sin : Intrinsic< 241 [llvm_anyfloat_ty], [LLVMMatchType<0>], 242 [IntrNoMem, IntrSpeculatable] 243>; 244 245def int_amdgcn_cos : Intrinsic< 246 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 247>; 248 249def int_amdgcn_log_clamp : Intrinsic< 250 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 251>; 252 253def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">, 254 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], 255 [IntrNoMem, IntrSpeculatable] 256>; 257 258def int_amdgcn_rcp : Intrinsic< 259 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 260>; 261 262def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">, 263 Intrinsic<[llvm_float_ty], [llvm_float_ty], 264 [IntrNoMem, IntrSpeculatable] 265>; 266 267def int_amdgcn_rsq : Intrinsic< 268 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 269>; 270 271def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">, 272 Intrinsic< 273 [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable] 274>; 275 276def int_amdgcn_rsq_clamp : Intrinsic< 277 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; 278 279def int_amdgcn_ldexp : Intrinsic< 280 [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], 281 [IntrNoMem, IntrSpeculatable] 282>; 283 284def int_amdgcn_frexp_mant : Intrinsic< 285 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 286>; 287 288def int_amdgcn_frexp_exp : Intrinsic< 289 [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable] 290>; 291 292// v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0 293// and always uses rtz, so is not suitable for implementing the OpenCL 294// fract function. It should be ok on VI. 295def int_amdgcn_fract : Intrinsic< 296 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 297>; 298 299def int_amdgcn_cvt_pkrtz : GCCBuiltin<"__builtin_amdgcn_cvt_pkrtz">, 300 Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], 301 [IntrNoMem, IntrSpeculatable] 302>; 303 304def int_amdgcn_cvt_pknorm_i16 : 305 GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">, 306 Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], 307 [IntrNoMem, IntrSpeculatable] 308>; 309 310def int_amdgcn_cvt_pknorm_u16 : 311 GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">, 312 Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], 313 [IntrNoMem, IntrSpeculatable] 314>; 315 316def int_amdgcn_cvt_pk_i16 : 317 GCCBuiltin<"__builtin_amdgcn_cvt_pk_i16">, 318 Intrinsic< 319 [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], 320 [IntrNoMem, IntrSpeculatable] 321>; 322 323def int_amdgcn_cvt_pk_u16 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u16">, 324 Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], 325 [IntrNoMem, IntrSpeculatable] 326>; 327 328def int_amdgcn_class : Intrinsic< 329 [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], 330 [IntrNoMem, IntrSpeculatable] 331>; 332 333def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">, 334 Intrinsic<[llvm_anyfloat_ty], 335 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], 336 [IntrNoMem, IntrSpeculatable] 337>; 338 339def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">, 340 Intrinsic<[llvm_float_ty], 341 [llvm_float_ty, llvm_float_ty, llvm_float_ty], 342 [IntrNoMem, IntrSpeculatable] 343>; 344 345def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">, 346 Intrinsic<[llvm_float_ty], 347 [llvm_float_ty, llvm_float_ty, llvm_float_ty], 348 [IntrNoMem, IntrSpeculatable] 349>; 350 351def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">, 352 Intrinsic<[llvm_float_ty], 353 [llvm_float_ty, llvm_float_ty, llvm_float_ty], 354 [IntrNoMem, IntrSpeculatable] 355>; 356 357def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">, 358 Intrinsic<[llvm_float_ty], 359 [llvm_float_ty, llvm_float_ty, llvm_float_ty], 360 [IntrNoMem, IntrSpeculatable] 361>; 362 363// v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz 364// should be used. 365def int_amdgcn_sffbh : 366 Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], 367 [IntrNoMem, IntrSpeculatable] 368>; 369 370// v_mad_f32|f16/v_mac_f32|f16, selected regardless of denorm support. 371def int_amdgcn_fmad_ftz : 372 Intrinsic<[llvm_anyfloat_ty], 373 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], 374 [IntrNoMem, IntrSpeculatable] 375>; 376 377// Fields should mirror atomicrmw 378class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty], 379 [llvm_anyptr_ty, 380 LLVMMatchType<0>, 381 llvm_i32_ty, // ordering 382 llvm_i32_ty, // scope 383 llvm_i1_ty], // isVolatile 384 [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>], "", 385 [SDNPMemOperand] 386>; 387 388def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin; 389def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin; 390 391class AMDGPULDSF32Intrin<string clang_builtin> : 392 GCCBuiltin<clang_builtin>, 393 Intrinsic<[llvm_float_ty], 394 [LLVMQualPointerType<llvm_float_ty, 3>, 395 llvm_float_ty, 396 llvm_i32_ty, // ordering 397 llvm_i32_ty, // scope 398 llvm_i1_ty], // isVolatile 399 [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>] 400>; 401 402// FIXME: The m0 argument should be moved after the normal arguments 403class AMDGPUDSOrderedIntrinsic : Intrinsic< 404 [llvm_i32_ty], 405 // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that 406 // the bit packing can be optimized at the IR level. 407 [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0) 408 llvm_i32_ty, // value to add or swap 409 llvm_i32_ty, // ordering 410 llvm_i32_ty, // scope 411 llvm_i1_ty, // isVolatile 412 llvm_i32_ty, // ordered count index (OA index), also added to the address 413 // gfx10: bits 24-27 indicate the number of active threads/dwords 414 llvm_i1_ty, // wave release, usually set to 1 415 llvm_i1_ty], // wave done, set to 1 for the last ordered instruction 416 [NoCapture<0>, 417 ImmArg<2>, ImmArg<3>, ImmArg<4>, 418 ImmArg<5>, ImmArg<6>, ImmArg<7> 419 ] 420>; 421 422class AMDGPUDSAppendConsumedIntrinsic : Intrinsic< 423 [llvm_i32_ty], 424 [llvm_anyptr_ty, // LDS or GDS ptr 425 llvm_i1_ty], // isVolatile 426 [IntrConvergent, IntrArgMemOnly, NoCapture<0>, ImmArg<1>], 427 "", 428 [SDNPMemOperand] 429>; 430 431def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic; 432def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; 433 434// The pointer argument is assumed to be dynamically uniform if a VGPR. 435def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic; 436def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic; 437 438def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">; 439def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">; 440def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">; 441 442} // TargetPrefix = "amdgcn" 443 444// New-style image intrinsics 445 446////////////////////////////////////////////////////////////////////////// 447// Dimension-aware image intrinsics framework 448////////////////////////////////////////////////////////////////////////// 449 450// Helper class to represent (type, name) combinations of arguments. The 451// argument names are explanatory and used as DAG operand names for codegen 452// pattern matching. 453class AMDGPUArg<LLVMType ty, string name> { 454 LLVMType Type = ty; 455 string Name = name; 456} 457 458// Return [AMDGPUArg<basety, names[0]>, AMDGPUArg<LLVMMatchType<0>, names[1]>, ...] 459class makeArgList<list<string> names, LLVMType basety> { 460 list<AMDGPUArg> ret = 461 !listconcat([AMDGPUArg<basety, names[0]>], 462 !foreach(name, !tail(names), AMDGPUArg<LLVMMatchType<0>, name>)); 463} 464 465// Return arglist, with LLVMMatchType's references shifted by 'shift'. 466class arglistmatchshift<list<AMDGPUArg> arglist, int shift> { 467 list<AMDGPUArg> ret = 468 !foreach(arg, arglist, 469 !if(!isa<LLVMMatchType>(arg.Type), 470 AMDGPUArg<LLVMMatchType<!add(!cast<LLVMMatchType>(arg.Type).Number, shift)>, 471 arg.Name>, 472 arg)); 473} 474 475// Return the concatenation of the given arglists. LLVMMatchType's are adjusted 476// accordingly, and shifted by an additional 'shift'. 477class arglistconcat<list<list<AMDGPUArg>> arglists, int shift = 0> { 478 list<AMDGPUArg> ret = 479 !foldl([]<AMDGPUArg>, arglists, lhs, rhs, 480 !listconcat( 481 lhs, 482 arglistmatchshift<rhs, 483 !add(shift, !foldl(0, lhs, a, b, 484 !add(a, b.Type.isAny)))>.ret)); 485} 486 487// Represent texture/image types / dimensionality. 488class AMDGPUDimProps<bits<3> enc, string name, string asmsuffix, 489 list<string> coord_names, list<string> slice_names> { 490 AMDGPUDimProps Dim = !cast<AMDGPUDimProps>(NAME); 491 string Name = name; // e.g. "2darraymsaa" 492 string AsmSuffix = asmsuffix; // e.g. 2D_MSAA_ARRAY (used in assembly strings) 493 bits<3> Encoding = enc; 494 bit DA = 0; // DA bit in MIMG encoding 495 496 list<AMDGPUArg> CoordSliceArgs = 497 makeArgList<!listconcat(coord_names, slice_names), llvm_anyfloat_ty>.ret; 498 list<AMDGPUArg> CoordSliceIntArgs = 499 makeArgList<!listconcat(coord_names, slice_names), llvm_anyint_ty>.ret; 500 list<AMDGPUArg> GradientArgs = 501 makeArgList<!listconcat(!foreach(name, coord_names, "d" # name # "dh"), 502 !foreach(name, coord_names, "d" # name # "dv")), 503 llvm_anyfloat_ty>.ret; 504 505 bits<8> NumCoords = !size(CoordSliceArgs); 506 bits<8> NumGradients = !size(GradientArgs); 507} 508 509def AMDGPUDim1D : AMDGPUDimProps<0x0, "1d", "1D", ["s"], []>; 510def AMDGPUDim2D : AMDGPUDimProps<0x1, "2d", "2D", ["s", "t"], []>; 511def AMDGPUDim3D : AMDGPUDimProps<0x2, "3d", "3D", ["s", "t", "r"], []>; 512let DA = 1 in { 513 def AMDGPUDimCube : AMDGPUDimProps<0x3, "cube", "CUBE", ["s", "t"], ["face"]>; 514 def AMDGPUDim1DArray : AMDGPUDimProps<0x4, "1darray", "1D_ARRAY", ["s"], ["slice"]>; 515 def AMDGPUDim2DArray : AMDGPUDimProps<0x5, "2darray", "2D_ARRAY", ["s", "t"], ["slice"]>; 516} 517def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"]>; 518let DA = 1 in { 519 def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"]>; 520} 521 522def AMDGPUDims { 523 list<AMDGPUDimProps> NoMsaa = [AMDGPUDim1D, AMDGPUDim2D, AMDGPUDim3D, 524 AMDGPUDimCube, AMDGPUDim1DArray, 525 AMDGPUDim2DArray]; 526 list<AMDGPUDimProps> Msaa = [AMDGPUDim2DMsaa, AMDGPUDim2DArrayMsaa]; 527 list<AMDGPUDimProps> All = !listconcat(NoMsaa, Msaa); 528} 529 530// Represent sample variants, i.e. _C, _O, _B, ... and combinations thereof. 531class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr> { 532 string UpperCaseMod = ucmod; 533 string LowerCaseMod = lcmod; 534 535 // {offset} {bias} {z-compare} 536 list<AMDGPUArg> ExtraAddrArgs = extra_addr; 537 bit Gradients = 0; 538 539 // Name of the {lod} or {clamp} argument that is appended to the coordinates, 540 // if any. 541 string LodOrClamp = ""; 542} 543 544// AMDGPUSampleVariants: all variants supported by IMAGE_SAMPLE 545// AMDGPUSampleVariantsNoGradients: variants supported by IMAGE_GATHER4 546defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = { 547 multiclass AMDGPUSampleHelper_Offset<string ucmod, string lcmod, 548 list<AMDGPUArg> extra_addr> { 549 def NAME#lcmod : AMDGPUSampleVariant<ucmod, lcmod, extra_addr>; 550 def NAME#lcmod#_o : AMDGPUSampleVariant< 551 ucmod#"_O", lcmod#"_o", !listconcat([AMDGPUArg<llvm_i32_ty, "offset">], extra_addr)>; 552 } 553 554 multiclass AMDGPUSampleHelper_Compare<string ucmod, string lcmod, 555 list<AMDGPUArg> extra_addr> { 556 defm NAME : AMDGPUSampleHelper_Offset<ucmod, lcmod, extra_addr>; 557 defm NAME : AMDGPUSampleHelper_Offset< 558 "_C"#ucmod, "_c"#lcmod, !listconcat(extra_addr, [AMDGPUArg<llvm_float_ty, "zcompare">])>; 559 } 560 561 multiclass AMDGPUSampleHelper_Clamp<string ucmod, string lcmod, 562 list<AMDGPUArg> extra_addr> { 563 defm NAME : AMDGPUSampleHelper_Compare<ucmod, lcmod, extra_addr>; 564 let LodOrClamp = "clamp" in 565 defm NAME : AMDGPUSampleHelper_Compare<ucmod#"_CL", lcmod#"_cl", extra_addr>; 566 } 567 568 defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = { 569 defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>; 570 defm AMDGPUSample : AMDGPUSampleHelper_Clamp< 571 "_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>; 572 let LodOrClamp = "lod" in 573 defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_L", "_l", []>; 574 defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_LZ", "_lz", []>; 575 } 576 577 let Gradients = 1 in { 578 defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_D", "_d", []>; 579 defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_CD", "_cd", []>; 580 } 581} 582 583// Helper class to capture the profile of a dimension-aware image intrinsic. 584// This information is used to generate the intrinsic's type and to inform 585// codegen pattern matching. 586class AMDGPUDimProfile<string opmod, 587 AMDGPUDimProps dim> { 588 AMDGPUDimProps Dim = dim; 589 string OpMod = opmod; // the corresponding instruction is named IMAGE_OpMod 590 591 // These are entended to be overwritten by subclasses 592 bit IsSample = 0; 593 bit IsAtomic = 0; 594 list<LLVMType> RetTypes = []; 595 list<AMDGPUArg> DataArgs = []; 596 list<AMDGPUArg> ExtraAddrArgs = []; 597 bit Gradients = 0; 598 string LodClampMip = ""; 599 600 int NumRetAndDataAnyTypes = 601 !foldl(0, !listconcat(RetTypes, !foreach(arg, DataArgs, arg.Type)), a, b, 602 !add(a, b.isAny)); 603 604 list<AMDGPUArg> AddrArgs = 605 arglistconcat<[ExtraAddrArgs, 606 !if(Gradients, dim.GradientArgs, []), 607 !listconcat(!if(IsSample, dim.CoordSliceArgs, dim.CoordSliceIntArgs), 608 !if(!eq(LodClampMip, ""), 609 []<AMDGPUArg>, 610 [AMDGPUArg<LLVMMatchType<0>, LodClampMip>]))], 611 NumRetAndDataAnyTypes>.ret; 612 list<LLVMType> AddrTypes = !foreach(arg, AddrArgs, arg.Type); 613 list<AMDGPUArg> AddrDefaultArgs = 614 !foreach(arg, AddrArgs, 615 AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)), 616 !if(IsSample, llvm_float_ty, llvm_i32_ty), arg.Type), 617 arg.Name>); 618 list<AMDGPUArg> AddrA16Args = 619 !foreach(arg, AddrArgs, 620 AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)), 621 !if(IsSample, llvm_half_ty, llvm_i16_ty), arg.Type), 622 arg.Name>); 623} 624 625class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod, base.Dim> { 626 let IsSample = base.IsSample; 627 let IsAtomic = base.IsAtomic; 628 let RetTypes = base.RetTypes; 629 let DataArgs = base.DataArgs; 630 let ExtraAddrArgs = base.ExtraAddrArgs; 631 let Gradients = base.Gradients; 632 let LodClampMip = base.LodClampMip; 633} 634 635class AMDGPUDimSampleProfile<string opmod, 636 AMDGPUDimProps dim, 637 AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> { 638 let IsSample = 1; 639 let RetTypes = [llvm_any_ty]; 640 let ExtraAddrArgs = sample.ExtraAddrArgs; 641 let Gradients = sample.Gradients; 642 let LodClampMip = sample.LodOrClamp; 643} 644 645class AMDGPUDimNoSampleProfile<string opmod, 646 AMDGPUDimProps dim, 647 list<LLVMType> retty, 648 list<AMDGPUArg> dataargs, 649 bit Mip = 0> : AMDGPUDimProfile<opmod, dim> { 650 let RetTypes = retty; 651 let DataArgs = dataargs; 652 let LodClampMip = !if(Mip, "mip", ""); 653} 654 655class AMDGPUDimAtomicProfile<string opmod, 656 AMDGPUDimProps dim, 657 list<AMDGPUArg> dataargs> : AMDGPUDimProfile<opmod, dim> { 658 let RetTypes = [llvm_anyint_ty]; 659 let DataArgs = dataargs; 660 let IsAtomic = 1; 661} 662 663class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> { 664 let RetTypes = [llvm_anyfloat_ty]; 665 let DataArgs = []; 666 let AddrArgs = [AMDGPUArg<llvm_anyint_ty, "mip">]; 667 let LodClampMip = "mip"; 668} 669 670// Helper class for figuring out image intrinsic argument indexes. 671class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> { 672 int NumDataArgs = !size(P_.DataArgs); 673 int NumDmaskArgs = !if(P_.IsAtomic, 0, 1); 674 int NumVAddrArgs = !size(P_.AddrArgs); 675 int NumRSrcArgs = 1; 676 int NumSampArgs = !if(P_.IsSample, 2, 0); 677 int DmaskArgIndex = NumDataArgs; 678 int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1); 679 int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs); 680 int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1); 681} 682 683// All dimension-aware intrinsics are derived from this class. 684class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_, 685 list<IntrinsicProperty> props, 686 list<SDNodeProperty> sdnodeprops> : Intrinsic< 687 P_.RetTypes, // vdata(VGPR) -- for load/atomic-with-return 688 !listconcat( 689 !foreach(arg, P_.DataArgs, arg.Type), // vdata(VGPR) -- for store/atomic 690 !if(P_.IsAtomic, [], [llvm_i32_ty]), // dmask(imm) 691 P_.AddrTypes, // vaddr(VGPR) 692 [llvm_v8i32_ty], // rsrc(SGPR) 693 !if(P_.IsSample, [llvm_v4i32_ty, // samp(SGPR) 694 llvm_i1_ty], []), // unorm(imm) 695 [llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe) 696 llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc) 697 !listconcat(props, 698 !if(P_.IsAtomic, [], [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>]), 699 !if(P_.IsSample, [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.UnormArgIndex>], []), 700 [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.TexFailCtrlArgIndex>, 701 ImmArg<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>]), 702 "", sdnodeprops>, 703 AMDGPURsrcIntrinsic<!add(!size(P_.DataArgs), !size(P_.AddrTypes), 704 !if(P_.IsAtomic, 0, 1)), 1> { 705 AMDGPUDimProfile P = P_; 706 707 AMDGPUImageDimIntrinsic Intr = !cast<AMDGPUImageDimIntrinsic>(NAME); 708 709 let TargetPrefix = "amdgcn"; 710} 711 712// Marker class for intrinsics with a DMask that determines the returned 713// channels. 714class AMDGPUImageDMaskIntrinsic; 715 716defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = { 717 718 ////////////////////////////////////////////////////////////////////////// 719 // Load and store intrinsics 720 ////////////////////////////////////////////////////////////////////////// 721 multiclass AMDGPUImageDimIntrinsicsNoMsaa<string opmod, 722 list<LLVMType> retty, 723 list<AMDGPUArg> dataargs, 724 list<IntrinsicProperty> props, 725 list<SDNodeProperty> sdnodeprops, 726 bit Mip = 0> { 727 foreach dim = AMDGPUDims.NoMsaa in { 728 def !strconcat(NAME, "_", dim.Name) 729 : AMDGPUImageDimIntrinsic< 730 AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>, 731 props, sdnodeprops>; 732 } 733 } 734 735 multiclass AMDGPUImageDimIntrinsicsAll<string opmod, 736 list<LLVMType> retty, 737 list<AMDGPUArg> dataargs, 738 list<IntrinsicProperty> props, 739 list<SDNodeProperty> sdnodeprops, 740 bit Mip = 0> { 741 foreach dim = AMDGPUDims.All in { 742 def !strconcat(NAME, "_", dim.Name) 743 : AMDGPUImageDimIntrinsic< 744 AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>, 745 props, sdnodeprops>; 746 } 747 } 748 749 defm int_amdgcn_image_load 750 : AMDGPUImageDimIntrinsicsAll<"LOAD", [llvm_any_ty], [], [IntrReadMem], 751 [SDNPMemOperand]>, 752 AMDGPUImageDMaskIntrinsic; 753 defm int_amdgcn_image_load_mip 754 : AMDGPUImageDimIntrinsicsNoMsaa<"LOAD_MIP", [llvm_any_ty], [], 755 [IntrReadMem], [SDNPMemOperand], 1>, 756 AMDGPUImageDMaskIntrinsic; 757 758 defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll< 759 "STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">], 760 [IntrWriteMem], [SDNPMemOperand]>; 761 defm int_amdgcn_image_store_mip : AMDGPUImageDimIntrinsicsNoMsaa< 762 "STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">], 763 [IntrWriteMem], [SDNPMemOperand], 1>; 764 765 ////////////////////////////////////////////////////////////////////////// 766 // sample and getlod intrinsics 767 ////////////////////////////////////////////////////////////////////////// 768 multiclass AMDGPUImageDimSampleDims<string opmod, 769 AMDGPUSampleVariant sample, 770 bit NoMem = 0> { 771 foreach dim = AMDGPUDims.NoMsaa in { 772 def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic< 773 AMDGPUDimSampleProfile<opmod, dim, sample>, 774 !if(NoMem, [IntrNoMem], [IntrReadMem]), 775 !if(NoMem, [], [SDNPMemOperand])>; 776 } 777 } 778 779 foreach sample = AMDGPUSampleVariants in { 780 defm int_amdgcn_image_sample # sample.LowerCaseMod 781 : AMDGPUImageDimSampleDims<"SAMPLE" # sample.UpperCaseMod, sample>, 782 AMDGPUImageDMaskIntrinsic; 783 } 784 785 defm int_amdgcn_image_getlod 786 : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>, 787 AMDGPUImageDMaskIntrinsic; 788 789 ////////////////////////////////////////////////////////////////////////// 790 // getresinfo intrinsics 791 ////////////////////////////////////////////////////////////////////////// 792 foreach dim = AMDGPUDims.All in { 793 def !strconcat("int_amdgcn_image_getresinfo_", dim.Name) 794 : AMDGPUImageDimIntrinsic<AMDGPUDimGetResInfoProfile<dim>, [IntrNoMem], []>, 795 AMDGPUImageDMaskIntrinsic; 796 } 797 798 ////////////////////////////////////////////////////////////////////////// 799 // gather4 intrinsics 800 ////////////////////////////////////////////////////////////////////////// 801 foreach sample = AMDGPUSampleVariantsNoGradients in { 802 foreach dim = [AMDGPUDim2D, AMDGPUDimCube, AMDGPUDim2DArray] in { 803 def int_amdgcn_image_gather4 # sample.LowerCaseMod # _ # dim.Name: 804 AMDGPUImageDimIntrinsic< 805 AMDGPUDimSampleProfile<"GATHER4" # sample.UpperCaseMod, dim, sample>, 806 [IntrReadMem], [SDNPMemOperand]>; 807 } 808 } 809} 810 811////////////////////////////////////////////////////////////////////////// 812// atomic intrinsics 813////////////////////////////////////////////////////////////////////////// 814defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = { 815 multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs> { 816 foreach dim = AMDGPUDims.All in { 817 def !strconcat(NAME, "_", dim.Name) 818 : AMDGPUImageDimIntrinsic< 819 AMDGPUDimAtomicProfile<opmod, dim, dataargs>, 820 [], [SDNPMemOperand]>; 821 } 822 } 823 824 multiclass AMDGPUImageDimAtomic<string opmod> { 825 defm "" : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">]>; 826 } 827 828 defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">; 829 defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">; 830 defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">; 831 defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">; 832 defm int_amdgcn_image_atomic_umin : AMDGPUImageDimAtomic<"ATOMIC_UMIN">; 833 defm int_amdgcn_image_atomic_smax : AMDGPUImageDimAtomic<"ATOMIC_SMAX">; 834 defm int_amdgcn_image_atomic_umax : AMDGPUImageDimAtomic<"ATOMIC_UMAX">; 835 defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">; 836 defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">; 837 defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">; 838 839 // TODO: INC/DEC are weird: they seem to have a vdata argument in hardware, 840 // even though it clearly shouldn't be needed 841 defm int_amdgcn_image_atomic_inc : AMDGPUImageDimAtomic<"ATOMIC_INC">; 842 defm int_amdgcn_image_atomic_dec : AMDGPUImageDimAtomic<"ATOMIC_DEC">; 843 844 defm int_amdgcn_image_atomic_cmpswap : 845 AMDGPUImageDimAtomicX<"ATOMIC_CMPSWAP", [AMDGPUArg<LLVMMatchType<0>, "src">, 846 AMDGPUArg<LLVMMatchType<0>, "cmp">]>; 847} 848 849////////////////////////////////////////////////////////////////////////// 850// Buffer intrinsics 851////////////////////////////////////////////////////////////////////////// 852 853let TargetPrefix = "amdgcn" in { 854 855defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = { 856 857class AMDGPUBufferLoad : Intrinsic < 858 [llvm_any_ty], 859 [llvm_v4i32_ty, // rsrc(SGPR) 860 llvm_i32_ty, // vindex(VGPR) 861 llvm_i32_ty, // offset(SGPR/VGPR/imm) 862 llvm_i1_ty, // glc(imm) 863 llvm_i1_ty], // slc(imm) 864 [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, 865 AMDGPURsrcIntrinsic<0>; 866def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; 867def int_amdgcn_buffer_load : AMDGPUBufferLoad; 868 869def int_amdgcn_s_buffer_load : Intrinsic < 870 [llvm_any_ty], 871 [llvm_v4i32_ty, // rsrc(SGPR) 872 llvm_i32_ty, // byte offset(SGPR/imm) 873 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 2 = dlc) 874 [IntrNoMem, ImmArg<2>]>, 875 AMDGPURsrcIntrinsic<0>; 876 877class AMDGPUBufferStore : Intrinsic < 878 [], 879 [llvm_any_ty, // vdata(VGPR) 880 llvm_v4i32_ty, // rsrc(SGPR) 881 llvm_i32_ty, // vindex(VGPR) 882 llvm_i32_ty, // offset(SGPR/VGPR/imm) 883 llvm_i1_ty, // glc(imm) 884 llvm_i1_ty], // slc(imm) 885 [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, 886 AMDGPURsrcIntrinsic<1>; 887def int_amdgcn_buffer_store_format : AMDGPUBufferStore; 888def int_amdgcn_buffer_store : AMDGPUBufferStore; 889 890// New buffer intrinsics with separate raw and struct variants. The raw 891// variant never has an index. The struct variant always has an index, even if 892// it is const 0. A struct intrinsic with constant 0 index is different to the 893// corresponding raw intrinsic on gfx9+ because the behavior of bound checking 894// and swizzling changes depending on whether idxen is set in the instruction. 895// These new instrinsics also keep the offset and soffset arguments separate as 896// they behave differently in bounds checking and swizzling. 897class AMDGPURawBufferLoad : Intrinsic < 898 [llvm_any_ty], 899 [llvm_v4i32_ty, // rsrc(SGPR) 900 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 901 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 902 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) 903 [IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>, 904 AMDGPURsrcIntrinsic<0>; 905def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad; 906def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad; 907 908class AMDGPUStructBufferLoad : Intrinsic < 909 [llvm_any_ty], 910 [llvm_v4i32_ty, // rsrc(SGPR) 911 llvm_i32_ty, // vindex(VGPR) 912 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 913 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 914 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) 915 [IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>, 916 AMDGPURsrcIntrinsic<0>; 917def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad; 918def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad; 919 920class AMDGPURawBufferStore : Intrinsic < 921 [], 922 [llvm_any_ty, // vdata(VGPR) 923 llvm_v4i32_ty, // rsrc(SGPR) 924 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 925 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 926 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) 927 [IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>, 928 AMDGPURsrcIntrinsic<1>; 929def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore; 930def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore; 931 932class AMDGPUStructBufferStore : Intrinsic < 933 [], 934 [llvm_any_ty, // vdata(VGPR) 935 llvm_v4i32_ty, // rsrc(SGPR) 936 llvm_i32_ty, // vindex(VGPR) 937 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 938 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 939 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) 940 [IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>, 941 AMDGPURsrcIntrinsic<1>; 942def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore; 943def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore; 944 945class AMDGPURawBufferAtomic : Intrinsic < 946 [llvm_anyint_ty], 947 [LLVMMatchType<0>, // vdata(VGPR) 948 llvm_v4i32_ty, // rsrc(SGPR) 949 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 950 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 951 llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) 952 [ImmArg<4>], "", [SDNPMemOperand]>, 953 AMDGPURsrcIntrinsic<1, 0>; 954def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic; 955def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic; 956def int_amdgcn_raw_buffer_atomic_sub : AMDGPURawBufferAtomic; 957def int_amdgcn_raw_buffer_atomic_smin : AMDGPURawBufferAtomic; 958def int_amdgcn_raw_buffer_atomic_umin : AMDGPURawBufferAtomic; 959def int_amdgcn_raw_buffer_atomic_smax : AMDGPURawBufferAtomic; 960def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic; 961def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic; 962def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic; 963def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic; 964def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic< 965 [llvm_anyint_ty], 966 [LLVMMatchType<0>, // src(VGPR) 967 LLVMMatchType<0>, // cmp(VGPR) 968 llvm_v4i32_ty, // rsrc(SGPR) 969 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 970 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 971 llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) 972 [ImmArg<5>], "", [SDNPMemOperand]>, 973 AMDGPURsrcIntrinsic<2, 0>; 974 975class AMDGPUStructBufferAtomic : Intrinsic < 976 [llvm_anyint_ty], 977 [LLVMMatchType<0>, // vdata(VGPR) 978 llvm_v4i32_ty, // rsrc(SGPR) 979 llvm_i32_ty, // vindex(VGPR) 980 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 981 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 982 llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) 983 [ImmArg<5>], "", [SDNPMemOperand]>, 984 AMDGPURsrcIntrinsic<1, 0>; 985def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic; 986def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic; 987def int_amdgcn_struct_buffer_atomic_sub : AMDGPUStructBufferAtomic; 988def int_amdgcn_struct_buffer_atomic_smin : AMDGPUStructBufferAtomic; 989def int_amdgcn_struct_buffer_atomic_umin : AMDGPUStructBufferAtomic; 990def int_amdgcn_struct_buffer_atomic_smax : AMDGPUStructBufferAtomic; 991def int_amdgcn_struct_buffer_atomic_umax : AMDGPUStructBufferAtomic; 992def int_amdgcn_struct_buffer_atomic_and : AMDGPUStructBufferAtomic; 993def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic; 994def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic; 995def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic< 996 [llvm_anyint_ty], 997 [LLVMMatchType<0>, // src(VGPR) 998 LLVMMatchType<0>, // cmp(VGPR) 999 llvm_v4i32_ty, // rsrc(SGPR) 1000 llvm_i32_ty, // vindex(VGPR) 1001 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 1002 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 1003 llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) 1004 [ImmArg<6>], "", [SDNPMemOperand]>, 1005 AMDGPURsrcIntrinsic<2, 0>; 1006 1007// Obsolescent tbuffer intrinsics. 1008def int_amdgcn_tbuffer_load : Intrinsic < 1009 [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 1010 [llvm_v4i32_ty, // rsrc(SGPR) 1011 llvm_i32_ty, // vindex(VGPR) 1012 llvm_i32_ty, // voffset(VGPR) 1013 llvm_i32_ty, // soffset(SGPR) 1014 llvm_i32_ty, // offset(imm) 1015 llvm_i32_ty, // dfmt(imm) 1016 llvm_i32_ty, // nfmt(imm) 1017 llvm_i1_ty, // glc(imm) 1018 llvm_i1_ty], // slc(imm) 1019 [IntrReadMem, ImmArg<4>, ImmArg<5>, ImmArg<6>, 1020 ImmArg<7>, ImmArg<8>], "", [SDNPMemOperand]>, 1021 AMDGPURsrcIntrinsic<0>; 1022 1023def int_amdgcn_tbuffer_store : Intrinsic < 1024 [], 1025 [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 1026 llvm_v4i32_ty, // rsrc(SGPR) 1027 llvm_i32_ty, // vindex(VGPR) 1028 llvm_i32_ty, // voffset(VGPR) 1029 llvm_i32_ty, // soffset(SGPR) 1030 llvm_i32_ty, // offset(imm) 1031 llvm_i32_ty, // dfmt(imm) 1032 llvm_i32_ty, // nfmt(imm) 1033 llvm_i1_ty, // glc(imm) 1034 llvm_i1_ty], // slc(imm) 1035 [IntrWriteMem, ImmArg<5>, ImmArg<6>, ImmArg<7>, 1036 ImmArg<8>, ImmArg<9>], "", [SDNPMemOperand]>, 1037 AMDGPURsrcIntrinsic<1>; 1038 1039// New tbuffer intrinsics, with: 1040// - raw and struct variants 1041// - joint format field 1042// - joint cachepolicy field 1043def int_amdgcn_raw_tbuffer_load : Intrinsic < 1044 [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 1045 [llvm_v4i32_ty, // rsrc(SGPR) 1046 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 1047 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 1048 llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) 1049 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) 1050 [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, 1051 AMDGPURsrcIntrinsic<0>; 1052 1053def int_amdgcn_raw_tbuffer_store : Intrinsic < 1054 [], 1055 [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 1056 llvm_v4i32_ty, // rsrc(SGPR) 1057 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 1058 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 1059 llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) 1060 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) 1061 [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, 1062 AMDGPURsrcIntrinsic<1>; 1063 1064def int_amdgcn_struct_tbuffer_load : Intrinsic < 1065 [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 1066 [llvm_v4i32_ty, // rsrc(SGPR) 1067 llvm_i32_ty, // vindex(VGPR) 1068 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 1069 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 1070 llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) 1071 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) 1072 [IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, 1073 AMDGPURsrcIntrinsic<0>; 1074 1075def int_amdgcn_struct_tbuffer_store : Intrinsic < 1076 [], 1077 [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 1078 llvm_v4i32_ty, // rsrc(SGPR) 1079 llvm_i32_ty, // vindex(VGPR) 1080 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) 1081 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) 1082 llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) 1083 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) 1084 [IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>, 1085 AMDGPURsrcIntrinsic<1>; 1086 1087class AMDGPUBufferAtomic : Intrinsic < 1088 [llvm_anyint_ty], 1089 [LLVMMatchType<0>, // vdata(VGPR) 1090 llvm_v4i32_ty, // rsrc(SGPR) 1091 llvm_i32_ty, // vindex(VGPR) 1092 llvm_i32_ty, // offset(SGPR/VGPR/imm) 1093 llvm_i1_ty], // slc(imm) 1094 [ImmArg<4>], "", [SDNPMemOperand]>, 1095 AMDGPURsrcIntrinsic<1, 0>; 1096def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; 1097def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; 1098def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic; 1099def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic; 1100def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic; 1101def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic; 1102def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic; 1103def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic; 1104def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic; 1105def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic; 1106def int_amdgcn_buffer_atomic_cmpswap : Intrinsic< 1107 [llvm_i32_ty], 1108 [llvm_i32_ty, // src(VGPR) 1109 llvm_i32_ty, // cmp(VGPR) 1110 llvm_v4i32_ty, // rsrc(SGPR) 1111 llvm_i32_ty, // vindex(VGPR) 1112 llvm_i32_ty, // offset(SGPR/VGPR/imm) 1113 llvm_i1_ty], // slc(imm) 1114 [ImmArg<5>], "", [SDNPMemOperand]>, 1115 AMDGPURsrcIntrinsic<2, 0>; 1116 1117} // defset AMDGPUBufferIntrinsics 1118 1119// Uses that do not set the done bit should set IntrWriteMem on the 1120// call site. 1121def int_amdgcn_exp : Intrinsic <[], [ 1122 llvm_i32_ty, // tgt, 1123 llvm_i32_ty, // en 1124 llvm_any_ty, // src0 (f32 or i32) 1125 LLVMMatchType<0>, // src1 1126 LLVMMatchType<0>, // src2 1127 LLVMMatchType<0>, // src3 1128 llvm_i1_ty, // done 1129 llvm_i1_ty // vm 1130 ], 1131 [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrInaccessibleMemOnly] 1132>; 1133 1134// exp with compr bit set. 1135def int_amdgcn_exp_compr : Intrinsic <[], [ 1136 llvm_i32_ty, // tgt, 1137 llvm_i32_ty, // en 1138 llvm_anyvector_ty, // src0 (v2f16 or v2i16) 1139 LLVMMatchType<0>, // src1 1140 llvm_i1_ty, // done 1141 llvm_i1_ty], // vm 1142 [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrInaccessibleMemOnly] 1143>; 1144 1145def int_amdgcn_buffer_wbinvl1_sc : 1146 GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">, 1147 Intrinsic<[], [], []>; 1148 1149def int_amdgcn_buffer_wbinvl1 : 1150 GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, 1151 Intrinsic<[], [], []>; 1152 1153def int_amdgcn_s_dcache_inv : 1154 GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">, 1155 Intrinsic<[], [], []>; 1156 1157def int_amdgcn_s_memtime : 1158 GCCBuiltin<"__builtin_amdgcn_s_memtime">, 1159 Intrinsic<[llvm_i64_ty], []>; 1160 1161def int_amdgcn_s_sleep : 1162 GCCBuiltin<"__builtin_amdgcn_s_sleep">, 1163 Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> { 1164} 1165 1166def int_amdgcn_s_incperflevel : 1167 GCCBuiltin<"__builtin_amdgcn_s_incperflevel">, 1168 Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> { 1169} 1170 1171def int_amdgcn_s_decperflevel : 1172 GCCBuiltin<"__builtin_amdgcn_s_decperflevel">, 1173 Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> { 1174} 1175 1176def int_amdgcn_s_getreg : 1177 GCCBuiltin<"__builtin_amdgcn_s_getreg">, 1178 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], 1179 [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg<0>] 1180>; 1181 1182// int_amdgcn_s_getpc is provided to allow a specific style of position 1183// independent code to determine the high part of its address when it is 1184// known (through convention) that the code and any data of interest does 1185// not cross a 4Gb address boundary. Use for any other purpose may not 1186// produce the desired results as optimizations may cause code movement, 1187// especially as we explicitly use IntrNoMem to allow optimizations. 1188def int_amdgcn_s_getpc : 1189 GCCBuiltin<"__builtin_amdgcn_s_getpc">, 1190 Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; 1191 1192// __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0> 1193// param values: 0 = P10, 1 = P20, 2 = P0 1194def int_amdgcn_interp_mov : 1195 GCCBuiltin<"__builtin_amdgcn_interp_mov">, 1196 Intrinsic<[llvm_float_ty], 1197 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1198 [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>]>; 1199 1200// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0> 1201// This intrinsic reads from lds, but the memory values are constant, 1202// so it behaves like IntrNoMem. 1203def int_amdgcn_interp_p1 : 1204 GCCBuiltin<"__builtin_amdgcn_interp_p1">, 1205 Intrinsic<[llvm_float_ty], 1206 [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1207 [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>]>; 1208 1209// __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0> 1210def int_amdgcn_interp_p2 : 1211 GCCBuiltin<"__builtin_amdgcn_interp_p2">, 1212 Intrinsic<[llvm_float_ty], 1213 [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1214 [IntrNoMem, IntrSpeculatable, ImmArg<2>, ImmArg<3>]>; 1215 // See int_amdgcn_v_interp_p1 for why this is IntrNoMem. 1216 1217// __builtin_amdgcn_interp_p1_f16 <i>, <attr_chan>, <attr>, <high>, <m0> 1218def int_amdgcn_interp_p1_f16 : 1219 GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">, 1220 Intrinsic<[llvm_float_ty], 1221 [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty], 1222 [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>; 1223 1224// __builtin_amdgcn_interp_p2_f16 <p1>, <j>, <attr_chan>, <attr>, <high>, <m0> 1225def int_amdgcn_interp_p2_f16 : 1226 GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">, 1227 Intrinsic<[llvm_half_ty], 1228 [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty], 1229 [IntrNoMem, IntrSpeculatable, ImmArg<2>, ImmArg<3>, ImmArg<4>]>; 1230 1231// Pixel shaders only: whether the current pixel is live (i.e. not a helper 1232// invocation for derivative computation). 1233def int_amdgcn_ps_live : Intrinsic < 1234 [llvm_i1_ty], 1235 [], 1236 [IntrNoMem]>; 1237 1238def int_amdgcn_mbcnt_lo : 1239 GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">, 1240 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 1241 1242def int_amdgcn_mbcnt_hi : 1243 GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">, 1244 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 1245 1246// llvm.amdgcn.ds.swizzle src offset 1247def int_amdgcn_ds_swizzle : 1248 GCCBuiltin<"__builtin_amdgcn_ds_swizzle">, 1249 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], 1250 [IntrNoMem, IntrConvergent, ImmArg<1>]>; 1251 1252def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty], 1253 [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], 1254 [IntrNoMem, IntrSpeculatable] 1255>; 1256 1257def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty], 1258 [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], 1259 [IntrNoMem, IntrSpeculatable] 1260>; 1261 1262def int_amdgcn_lerp : 1263 GCCBuiltin<"__builtin_amdgcn_lerp">, 1264 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1265 [IntrNoMem, IntrSpeculatable] 1266>; 1267 1268def int_amdgcn_sad_u8 : 1269 GCCBuiltin<"__builtin_amdgcn_sad_u8">, 1270 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1271 [IntrNoMem, IntrSpeculatable] 1272>; 1273 1274def int_amdgcn_msad_u8 : 1275 GCCBuiltin<"__builtin_amdgcn_msad_u8">, 1276 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1277 [IntrNoMem, IntrSpeculatable] 1278>; 1279 1280def int_amdgcn_sad_hi_u8 : 1281 GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">, 1282 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1283 [IntrNoMem, IntrSpeculatable] 1284>; 1285 1286def int_amdgcn_sad_u16 : 1287 GCCBuiltin<"__builtin_amdgcn_sad_u16">, 1288 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1289 [IntrNoMem, IntrSpeculatable] 1290>; 1291 1292def int_amdgcn_qsad_pk_u16_u8 : 1293 GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">, 1294 Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], 1295 [IntrNoMem, IntrSpeculatable] 1296>; 1297 1298def int_amdgcn_mqsad_pk_u16_u8 : 1299 GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">, 1300 Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], 1301 [IntrNoMem, IntrSpeculatable] 1302>; 1303 1304def int_amdgcn_mqsad_u32_u8 : 1305 GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, 1306 Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], 1307 [IntrNoMem, IntrSpeculatable] 1308>; 1309 1310def int_amdgcn_cvt_pk_u8_f32 : 1311 GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">, 1312 Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], 1313 [IntrNoMem, IntrSpeculatable] 1314>; 1315 1316def int_amdgcn_icmp : 1317 Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty], 1318 [IntrNoMem, IntrConvergent, ImmArg<2>]>; 1319 1320def int_amdgcn_fcmp : 1321 Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>, llvm_i32_ty], 1322 [IntrNoMem, IntrConvergent, ImmArg<2>]>; 1323 1324def int_amdgcn_readfirstlane : 1325 GCCBuiltin<"__builtin_amdgcn_readfirstlane">, 1326 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 1327 1328// The lane argument must be uniform across the currently active threads of the 1329// current wave. Otherwise, the result is undefined. 1330def int_amdgcn_readlane : 1331 GCCBuiltin<"__builtin_amdgcn_readlane">, 1332 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 1333 1334// The value to write and lane select arguments must be uniform across the 1335// currently active threads of the current wave. Otherwise, the result is 1336// undefined. 1337def int_amdgcn_writelane : 1338 GCCBuiltin<"__builtin_amdgcn_writelane">, 1339 Intrinsic<[llvm_i32_ty], [ 1340 llvm_i32_ty, // uniform value to write: returned by the selected lane 1341 llvm_i32_ty, // uniform lane select 1342 llvm_i32_ty // returned by all lanes other than the selected one 1343 ], 1344 [IntrNoMem, IntrConvergent] 1345>; 1346 1347def int_amdgcn_alignbit : 1348 GCCBuiltin<"__builtin_amdgcn_alignbit">, Intrinsic<[llvm_i32_ty], 1349 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1350 [IntrNoMem, IntrSpeculatable] 1351>; 1352 1353def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">, 1354 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1355 [IntrNoMem, IntrSpeculatable] 1356>; 1357 1358def int_amdgcn_mul_i24 : Intrinsic<[llvm_i32_ty], 1359 [llvm_i32_ty, llvm_i32_ty], 1360 [IntrNoMem, IntrSpeculatable] 1361>; 1362 1363def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty], 1364 [llvm_i32_ty, llvm_i32_ty], 1365 [IntrNoMem, IntrSpeculatable] 1366>; 1367 1368// llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id) 1369// 1370// bar_val is the total number of waves that will wait on this 1371// barrier, minus 1. 1372def int_amdgcn_ds_gws_init : 1373 GCCBuiltin<"__builtin_amdgcn_ds_gws_init">, 1374 Intrinsic<[], 1375 [llvm_i32_ty, llvm_i32_ty], 1376 [IntrConvergent, IntrWriteMem, IntrInaccessibleMemOnly], "", 1377 [SDNPMemOperand] 1378>; 1379 1380// llvm.amdgcn.ds.gws.barrier(i32 vsrc0, i32 resource_id) 1381// bar_val is the total number of waves that will wait on this 1382// barrier, minus 1. 1383def int_amdgcn_ds_gws_barrier : 1384 GCCBuiltin<"__builtin_amdgcn_ds_gws_barrier">, 1385 Intrinsic<[], 1386 [llvm_i32_ty, llvm_i32_ty], 1387 [IntrConvergent, IntrInaccessibleMemOnly], "", 1388 [SDNPMemOperand] 1389>; 1390 1391// llvm.amdgcn.ds.gws.sema.v(i32 resource_id) 1392def int_amdgcn_ds_gws_sema_v : 1393 GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_v">, 1394 Intrinsic<[], 1395 [llvm_i32_ty], 1396 [IntrConvergent, IntrInaccessibleMemOnly], "", 1397 [SDNPMemOperand] 1398>; 1399 1400// llvm.amdgcn.ds.gws.sema.br(i32 vsrc, i32 resource_id) 1401def int_amdgcn_ds_gws_sema_br : 1402 GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_br">, 1403 Intrinsic<[], 1404 [llvm_i32_ty, llvm_i32_ty], 1405 [IntrConvergent, IntrInaccessibleMemOnly], "", 1406 [SDNPMemOperand] 1407>; 1408 1409// llvm.amdgcn.ds.gws.sema.p(i32 resource_id) 1410def int_amdgcn_ds_gws_sema_p : 1411 GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_p">, 1412 Intrinsic<[], 1413 [llvm_i32_ty], 1414 [IntrConvergent, IntrInaccessibleMemOnly], "", 1415 [SDNPMemOperand] 1416>; 1417 1418// llvm.amdgcn.ds.gws.sema.release.all(i32 resource_id) 1419def int_amdgcn_ds_gws_sema_release_all : 1420 GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">, 1421 Intrinsic<[], 1422 [llvm_i32_ty], 1423 [IntrConvergent, IntrInaccessibleMemOnly], "", 1424 [SDNPMemOperand] 1425>; 1426 1427 1428// Copies the source value to the destination value, with the guarantee that 1429// the source value is computed as if the entire program were executed in WQM. 1430def int_amdgcn_wqm : Intrinsic<[llvm_any_ty], 1431 [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] 1432>; 1433 1434// Return true if at least one thread within the pixel quad passes true into 1435// the function. 1436def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], 1437 [llvm_i1_ty], [IntrNoMem, IntrConvergent] 1438>; 1439 1440// If false, set EXEC=0 for the current thread until the end of program. 1441def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>; 1442 1443// Copies the active channels of the source value to the destination value, 1444// with the guarantee that the source value is computed as if the entire 1445// program were executed in Whole Wavefront Mode, i.e. with all channels 1446// enabled, with a few exceptions: - Phi nodes with require WWM return an 1447// undefined value. 1448def int_amdgcn_wwm : Intrinsic<[llvm_any_ty], 1449 [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrConvergent] 1450>; 1451 1452// Given a value, copies it while setting all the inactive lanes to a given 1453// value. Note that OpenGL helper lanes are considered active, so if the 1454// program ever uses WQM, then the instruction and the first source will be 1455// computed in WQM. 1456def int_amdgcn_set_inactive : 1457 Intrinsic<[llvm_anyint_ty], 1458 [LLVMMatchType<0>, // value to be copied 1459 LLVMMatchType<0>], // value for the inactive lanes to take 1460 [IntrNoMem, IntrConvergent]>; 1461 1462//===----------------------------------------------------------------------===// 1463// CI+ Intrinsics 1464//===----------------------------------------------------------------------===// 1465 1466def int_amdgcn_s_dcache_inv_vol : 1467 GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">, 1468 Intrinsic<[], [], []>; 1469 1470def int_amdgcn_buffer_wbinvl1_vol : 1471 GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">, 1472 Intrinsic<[], [], []>; 1473 1474//===----------------------------------------------------------------------===// 1475// VI Intrinsics 1476//===----------------------------------------------------------------------===// 1477 1478// llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> 1479def int_amdgcn_mov_dpp : 1480 Intrinsic<[llvm_anyint_ty], 1481 [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, 1482 llvm_i1_ty], [IntrNoMem, IntrConvergent, ImmArg<1>, 1483 ImmArg<2>, ImmArg<3>, ImmArg<4>]>; 1484 1485// llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> 1486// Should be equivalent to: 1487// v_mov_b32 <dest> <old> 1488// v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> 1489def int_amdgcn_update_dpp : 1490 Intrinsic<[llvm_anyint_ty], 1491 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, 1492 llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], 1493 [IntrNoMem, IntrConvergent, 1494 ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1495 1496def int_amdgcn_s_dcache_wb : 1497 GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, 1498 Intrinsic<[], [], []>; 1499 1500def int_amdgcn_s_dcache_wb_vol : 1501 GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, 1502 Intrinsic<[], [], []>; 1503 1504def int_amdgcn_s_memrealtime : 1505 GCCBuiltin<"__builtin_amdgcn_s_memrealtime">, 1506 Intrinsic<[llvm_i64_ty]>; 1507 1508// llvm.amdgcn.ds.permute <index> <src> 1509def int_amdgcn_ds_permute : 1510 GCCBuiltin<"__builtin_amdgcn_ds_permute">, 1511 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 1512 1513// llvm.amdgcn.ds.bpermute <index> <src> 1514def int_amdgcn_ds_bpermute : 1515 GCCBuiltin<"__builtin_amdgcn_ds_bpermute">, 1516 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 1517 1518//===----------------------------------------------------------------------===// 1519// GFX10 Intrinsics 1520//===----------------------------------------------------------------------===// 1521 1522// llvm.amdgcn.permlane16 <old> <src0> <src1> <src2> <fi> <bound_control> 1523def int_amdgcn_permlane16 : GCCBuiltin<"__builtin_amdgcn_permlane16">, 1524 Intrinsic<[llvm_i32_ty], 1525 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], 1526 [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>; 1527 1528// llvm.amdgcn.permlanex16 <old> <src0> <src1> <src2> <fi> <bound_control> 1529def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">, 1530 Intrinsic<[llvm_i32_ty], 1531 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], 1532 [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>; 1533 1534// llvm.amdgcn.mov.dpp8.i32 <src> <sel> 1535// <sel> is a 32-bit constant whose high 8 bits must be zero which selects 1536// the lanes to read from. 1537def int_amdgcn_mov_dpp8 : 1538 Intrinsic<[llvm_anyint_ty], 1539 [LLVMMatchType<0>, llvm_i32_ty], 1540 [IntrNoMem, IntrConvergent, ImmArg<1>]>; 1541 1542def int_amdgcn_s_get_waveid_in_workgroup : 1543 GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">, 1544 Intrinsic<[llvm_i32_ty], [], [IntrReadMem, IntrInaccessibleMemOnly]>; 1545 1546//===----------------------------------------------------------------------===// 1547// Deep learning intrinsics. 1548//===----------------------------------------------------------------------===// 1549 1550// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c, i1 %clamp) 1551// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c 1552def int_amdgcn_fdot2 : 1553 GCCBuiltin<"__builtin_amdgcn_fdot2">, 1554 Intrinsic< 1555 [llvm_float_ty], // %r 1556 [ 1557 llvm_v2f16_ty, // %a 1558 llvm_v2f16_ty, // %b 1559 llvm_float_ty, // %c 1560 llvm_i1_ty // %clamp 1561 ], 1562 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1563 >; 1564 1565// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp) 1566// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c 1567def int_amdgcn_sdot2 : 1568 GCCBuiltin<"__builtin_amdgcn_sdot2">, 1569 Intrinsic< 1570 [llvm_i32_ty], // %r 1571 [ 1572 llvm_v2i16_ty, // %a 1573 llvm_v2i16_ty, // %b 1574 llvm_i32_ty, // %c 1575 llvm_i1_ty // %clamp 1576 ], 1577 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1578 >; 1579 1580// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp) 1581// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c 1582def int_amdgcn_udot2 : 1583 GCCBuiltin<"__builtin_amdgcn_udot2">, 1584 Intrinsic< 1585 [llvm_i32_ty], // %r 1586 [ 1587 llvm_v2i16_ty, // %a 1588 llvm_v2i16_ty, // %b 1589 llvm_i32_ty, // %c 1590 llvm_i1_ty // %clamp 1591 ], 1592 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1593 >; 1594 1595// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp) 1596// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c 1597def int_amdgcn_sdot4 : 1598 GCCBuiltin<"__builtin_amdgcn_sdot4">, 1599 Intrinsic< 1600 [llvm_i32_ty], // %r 1601 [ 1602 llvm_i32_ty, // %a 1603 llvm_i32_ty, // %b 1604 llvm_i32_ty, // %c 1605 llvm_i1_ty // %clamp 1606 ], 1607 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1608 >; 1609 1610// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp) 1611// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c 1612def int_amdgcn_udot4 : 1613 GCCBuiltin<"__builtin_amdgcn_udot4">, 1614 Intrinsic< 1615 [llvm_i32_ty], // %r 1616 [ 1617 llvm_i32_ty, // %a 1618 llvm_i32_ty, // %b 1619 llvm_i32_ty, // %c 1620 llvm_i1_ty // %clamp 1621 ], 1622 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1623 >; 1624 1625// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp) 1626// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + 1627// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c 1628def int_amdgcn_sdot8 : 1629 GCCBuiltin<"__builtin_amdgcn_sdot8">, 1630 Intrinsic< 1631 [llvm_i32_ty], // %r 1632 [ 1633 llvm_i32_ty, // %a 1634 llvm_i32_ty, // %b 1635 llvm_i32_ty, // %c 1636 llvm_i1_ty // %clamp 1637 ], 1638 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1639 >; 1640 1641// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp) 1642// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + 1643// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c 1644def int_amdgcn_udot8 : 1645 GCCBuiltin<"__builtin_amdgcn_udot8">, 1646 Intrinsic< 1647 [llvm_i32_ty], // %r 1648 [ 1649 llvm_i32_ty, // %a 1650 llvm_i32_ty, // %b 1651 llvm_i32_ty, // %c 1652 llvm_i1_ty // %clamp 1653 ], 1654 [IntrNoMem, IntrSpeculatable, ImmArg<3>] 1655 >; 1656 1657//===----------------------------------------------------------------------===// 1658// gfx908 intrinsics 1659// ===----------------------------------------------------------------------===// 1660 1661class AMDGPUBufferAtomicNoRtn : Intrinsic < 1662 [], 1663 [llvm_anyfloat_ty, // vdata(VGPR) 1664 llvm_v4i32_ty, // rsrc(SGPR) 1665 llvm_i32_ty, // vindex(VGPR) 1666 llvm_i32_ty, // offset(SGPR/VGPR/imm) 1667 llvm_i1_ty], // slc(imm) 1668 [], "", [SDNPMemOperand]>, 1669 AMDGPURsrcIntrinsic<1, 0>; 1670 1671class AMDGPUGlobalAtomicNoRtn : Intrinsic < 1672 [], 1673 [llvm_anyptr_ty, // vaddr 1674 llvm_anyfloat_ty], // vdata(VGPR) 1675 [IntrArgMemOnly, NoCapture<0>], "", [SDNPMemOperand]>; 1676 1677def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicNoRtn; 1678def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn; 1679 1680// llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp 1681def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty], 1682 [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty, 1683 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1684 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1685 1686def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty], 1687 [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, 1688 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1689 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1690 1691def int_amdgcn_mfma_f32_4x4x1f32 : Intrinsic<[llvm_v4f32_ty], 1692 [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, 1693 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1694 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1695 1696def int_amdgcn_mfma_f32_32x32x2f32 : Intrinsic<[llvm_v16f32_ty], 1697 [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, 1698 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1699 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1700 1701def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty], 1702 [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, 1703 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1704 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1705 1706def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32f32_ty], 1707 [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty, 1708 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1709 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1710 1711def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty], 1712 [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, 1713 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1714 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1715 1716def int_amdgcn_mfma_f32_4x4x4f16 : Intrinsic<[llvm_v4f32_ty], 1717 [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, 1718 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1719 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1720 1721def int_amdgcn_mfma_f32_32x32x8f16 : Intrinsic<[llvm_v16f32_ty], 1722 [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, 1723 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1724 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1725 1726def int_amdgcn_mfma_f32_16x16x16f16 : Intrinsic<[llvm_v4f32_ty], 1727 [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, 1728 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1729 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1730 1731def int_amdgcn_mfma_i32_32x32x4i8 : Intrinsic<[llvm_v32i32_ty], 1732 [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty, 1733 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1734 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1735 1736def int_amdgcn_mfma_i32_16x16x4i8 : Intrinsic<[llvm_v16i32_ty], 1737 [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, 1738 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1739 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1740 1741def int_amdgcn_mfma_i32_4x4x4i8 : Intrinsic<[llvm_v4i32_ty], 1742 [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, 1743 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1744 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1745 1746def int_amdgcn_mfma_i32_32x32x8i8 : Intrinsic<[llvm_v16i32_ty], 1747 [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, 1748 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1749 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1750 1751def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty], 1752 [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, 1753 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1754 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1755 1756def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32f32_ty], 1757 [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty, 1758 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1759 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1760 1761def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty], 1762 [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, 1763 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1764 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1765 1766def int_amdgcn_mfma_f32_4x4x2bf16 : Intrinsic<[llvm_v4f32_ty], 1767 [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, 1768 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1769 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1770 1771def int_amdgcn_mfma_f32_32x32x4bf16 : Intrinsic<[llvm_v16f32_ty], 1772 [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, 1773 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1774 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1775 1776def int_amdgcn_mfma_f32_16x16x8bf16 : Intrinsic<[llvm_v4f32_ty], 1777 [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, 1778 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 1779 [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; 1780 1781//===----------------------------------------------------------------------===// 1782// Special Intrinsics for backend internal use only. No frontend 1783// should emit calls to these. 1784// ===----------------------------------------------------------------------===// 1785def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty], 1786 [llvm_i1_ty], [IntrConvergent] 1787>; 1788 1789def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty], 1790 [llvm_anyint_ty], [IntrConvergent] 1791>; 1792 1793def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty], 1794 [llvm_i1_ty, llvm_anyint_ty], [IntrNoMem, IntrConvergent] 1795>; 1796 1797def int_amdgcn_loop : Intrinsic<[llvm_i1_ty], 1798 [llvm_anyint_ty], [IntrConvergent] 1799>; 1800 1801def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty], [IntrConvergent]>; 1802 1803// Represent unreachable in a divergent region. 1804def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>; 1805 1806// Emit 2.5 ulp, no denormal division. Should only be inserted by 1807// pass based on !fpmath metadata. 1808def int_amdgcn_fdiv_fast : Intrinsic< 1809 [llvm_float_ty], [llvm_float_ty, llvm_float_ty], 1810 [IntrNoMem, IntrSpeculatable] 1811>; 1812} 1813