1//===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines all of the R600-specific intrinsics.
10//
11//===----------------------------------------------------------------------===//
12
13class AMDGPUReadPreloadRegisterIntrinsic
14  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
15
16class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
17  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>, GCCBuiltin<name>;
18
19// Used to tag image and resource intrinsics with information used to generate
20// mem operands.
21class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = 0> {
22  int RsrcArg = rsrcarg;
23  bit IsImage = isimage;
24}
25
26let TargetPrefix = "r600" in {
27
28multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz {
29  def _x : AMDGPUReadPreloadRegisterIntrinsic;
30  def _y : AMDGPUReadPreloadRegisterIntrinsic;
31  def _z : AMDGPUReadPreloadRegisterIntrinsic;
32}
33
34multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> {
35  def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>;
36  def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>;
37  def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>;
38}
39
40defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
41                                 <"__builtin_r600_read_global_size">;
42defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
43                             <"__builtin_r600_read_ngroups">;
44defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
45                          <"__builtin_r600_read_tgid">;
46
47defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz;
48defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz;
49
50def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">,
51  Intrinsic<[], [], [IntrConvergent, IntrWillReturn]>;
52
53// AS 7 is PARAM_I_ADDRESS, used for kernel arguments
54def int_r600_implicitarg_ptr :
55  GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
56  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [],
57  [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
58
59def int_r600_rat_store_typed :
60  // 1st parameter: Data
61  // 2nd parameter: Index
62  // 3rd parameter: Constant RAT ID
63  Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrWillReturn]>,
64  GCCBuiltin<"__builtin_r600_rat_store_typed">;
65
66def int_r600_recipsqrt_ieee :  Intrinsic<
67  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
68>;
69
70def int_r600_recipsqrt_clamped : Intrinsic<
71  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
72>;
73
74def int_r600_cube : Intrinsic<
75  [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
76>;
77
78def int_r600_store_stream_output : Intrinsic<
79  [], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWillReturn]
80>;
81
82class TextureIntrinsicFloatInput : Intrinsic<[llvm_v4f32_ty], [
83  llvm_v4f32_ty, // Coord
84  llvm_i32_ty,   // offset_x
85  llvm_i32_ty,   // offset_y,
86  llvm_i32_ty,   // offset_z,
87  llvm_i32_ty,   // resource_id
88  llvm_i32_ty,   // samplerid
89  llvm_i32_ty,   // coord_type_x
90  llvm_i32_ty,   // coord_type_y
91  llvm_i32_ty,   // coord_type_z
92  llvm_i32_ty],  // coord_type_w
93  [IntrNoMem, IntrWillReturn]
94>;
95
96class TextureIntrinsicInt32Input : Intrinsic<[llvm_v4i32_ty], [
97    llvm_v4i32_ty, // Coord
98    llvm_i32_ty,   // offset_x
99    llvm_i32_ty,   // offset_y,
100    llvm_i32_ty,   // offset_z,
101    llvm_i32_ty,   // resource_id
102    llvm_i32_ty,   // samplerid
103    llvm_i32_ty,   // coord_type_x
104    llvm_i32_ty,   // coord_type_y
105    llvm_i32_ty,   // coord_type_z
106    llvm_i32_ty],  // coord_type_w
107    [IntrNoMem, IntrWillReturn]
108>;
109
110def int_r600_store_swizzle :
111  Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWillReturn]
112>;
113
114def int_r600_tex : TextureIntrinsicFloatInput;
115def int_r600_texc : TextureIntrinsicFloatInput;
116def int_r600_txl : TextureIntrinsicFloatInput;
117def int_r600_txlc : TextureIntrinsicFloatInput;
118def int_r600_txb : TextureIntrinsicFloatInput;
119def int_r600_txbc : TextureIntrinsicFloatInput;
120def int_r600_txf : TextureIntrinsicInt32Input;
121def int_r600_txq : TextureIntrinsicInt32Input;
122def int_r600_ddx : TextureIntrinsicFloatInput;
123def int_r600_ddy : TextureIntrinsicFloatInput;
124
125def int_r600_dot4 : Intrinsic<[llvm_float_ty],
126  [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
127>;
128
129def int_r600_kill : Intrinsic<[], [llvm_float_ty], [IntrWillReturn]>;
130
131} // End TargetPrefix = "r600"
132
133let TargetPrefix = "amdgcn" in {
134
135//===----------------------------------------------------------------------===//
136// ABI Special Intrinsics
137//===----------------------------------------------------------------------===//
138
139defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz;
140defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
141                               <"__builtin_amdgcn_workgroup_id">;
142
143def int_amdgcn_dispatch_ptr :
144  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
145  [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
146
147def int_amdgcn_queue_ptr :
148  GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
149  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
150  [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
151
152def int_amdgcn_kernarg_segment_ptr :
153  GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
154  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
155  [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
156
157def int_amdgcn_implicitarg_ptr :
158  GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
159  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
160  [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
161
162def int_amdgcn_groupstaticsize :
163  GCCBuiltin<"__builtin_amdgcn_groupstaticsize">,
164  Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
165
166def int_amdgcn_dispatch_id :
167  GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
168  Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
169
170def int_amdgcn_implicit_buffer_ptr :
171  GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
172  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
173  [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
174
175// Set EXEC to the 64-bit value given.
176// This is always moved to the beginning of the basic block.
177// FIXME: Should be mangled for wave size.
178def int_amdgcn_init_exec : Intrinsic<[],
179  [llvm_i64_ty],      // 64-bit literal constant
180  [IntrConvergent, ImmArg<ArgIndex<0>>]>;
181
182// Set EXEC according to a thread count packed in an SGPR input:
183//    thread_count = (input >> bitoffset) & 0x7f;
184// This is always moved to the beginning of the basic block.
185def int_amdgcn_init_exec_from_input : Intrinsic<[],
186  [llvm_i32_ty,       // 32-bit SGPR input
187   llvm_i32_ty],      // bit offset of the thread count
188  [IntrConvergent, ImmArg<ArgIndex<1>>]>;
189
190def int_amdgcn_wavefrontsize :
191  GCCBuiltin<"__builtin_amdgcn_wavefrontsize">,
192  Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
193
194
195//===----------------------------------------------------------------------===//
196// Instruction Intrinsics
197//===----------------------------------------------------------------------===//
198
199// The first parameter is s_sendmsg immediate (i16),
200// the second one is copied to m0
201def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
202  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
203  [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
204def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
205  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
206  [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
207
208def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
209  Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>;
210
211def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
212  Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>;
213
214def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
215  Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
216
217def int_amdgcn_div_scale : Intrinsic<
218  // 1st parameter: Numerator
219  // 2nd parameter: Denominator
220  // 3rd parameter: Select quotient. Must equal Numerator or Denominator.
221  //                (0 = Denominator, 1 = Numerator).
222  [llvm_anyfloat_ty, llvm_i1_ty],
223  [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
224  [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>, IntrWillReturn]
225>;
226
227def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
228  [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
229  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
230>;
231
232def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty],
233  [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
234  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
235>;
236
237// Look Up 2.0 / pi src0 with segment select src1[4:0]
238def int_amdgcn_trig_preop : Intrinsic<
239  [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
240  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
241>;
242
243def int_amdgcn_sin : Intrinsic<
244  [llvm_anyfloat_ty], [LLVMMatchType<0>],
245  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
246>;
247
248def int_amdgcn_cos : Intrinsic<
249  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
250>;
251
252def int_amdgcn_log_clamp : Intrinsic<
253  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
254>;
255
256def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
257  Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
258  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
259>;
260
261def int_amdgcn_rcp : Intrinsic<
262  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
263>;
264
265def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">,
266  Intrinsic<[llvm_float_ty], [llvm_float_ty],
267  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
268>;
269
270def int_amdgcn_sqrt :  Intrinsic<
271  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
272>;
273
274def int_amdgcn_rsq :  Intrinsic<
275  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
276>;
277
278def int_amdgcn_rsq_legacy :  GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
279  Intrinsic<
280  [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
281>;
282
283// out = 1.0 / sqrt(a) result clamped to +/- max_float.
284def int_amdgcn_rsq_clamp : Intrinsic<
285  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
286
287def int_amdgcn_ldexp : Intrinsic<
288  [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
289  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
290>;
291
292def int_amdgcn_frexp_mant : Intrinsic<
293  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
294>;
295
296def int_amdgcn_frexp_exp : Intrinsic<
297  [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
298>;
299
300// v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
301// and always uses rtz, so is not suitable for implementing the OpenCL
302// fract function. It should be ok on VI.
303def int_amdgcn_fract : Intrinsic<
304  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
305>;
306
307def int_amdgcn_cvt_pkrtz : GCCBuiltin<"__builtin_amdgcn_cvt_pkrtz">,
308  Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
309            [IntrNoMem, IntrSpeculatable, IntrWillReturn]
310>;
311
312def int_amdgcn_cvt_pknorm_i16 :
313  GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">,
314  Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
315            [IntrNoMem, IntrSpeculatable, IntrWillReturn]
316>;
317
318def int_amdgcn_cvt_pknorm_u16 :
319  GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">,
320  Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
321            [IntrNoMem, IntrSpeculatable, IntrWillReturn]
322>;
323
324def int_amdgcn_cvt_pk_i16 :
325    GCCBuiltin<"__builtin_amdgcn_cvt_pk_i16">,
326    Intrinsic<
327  [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
328  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
329>;
330
331def int_amdgcn_cvt_pk_u16 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u16">,
332  Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
333    [IntrNoMem, IntrSpeculatable, IntrWillReturn]
334>;
335
336def int_amdgcn_class : Intrinsic<
337  [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
338  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
339>;
340
341def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">,
342  Intrinsic<[llvm_anyfloat_ty],
343    [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
344    [IntrNoMem, IntrSpeculatable, IntrWillReturn]
345>;
346
347def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">,
348  Intrinsic<[llvm_float_ty],
349    [llvm_float_ty, llvm_float_ty, llvm_float_ty],
350    [IntrNoMem, IntrSpeculatable, IntrWillReturn]
351>;
352
353def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">,
354  Intrinsic<[llvm_float_ty],
355  [llvm_float_ty, llvm_float_ty, llvm_float_ty],
356  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
357>;
358
359def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">,
360  Intrinsic<[llvm_float_ty],
361    [llvm_float_ty, llvm_float_ty, llvm_float_ty],
362    [IntrNoMem, IntrSpeculatable, IntrWillReturn]
363>;
364
365def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
366  Intrinsic<[llvm_float_ty],
367    [llvm_float_ty, llvm_float_ty, llvm_float_ty],
368    [IntrNoMem, IntrSpeculatable, IntrWillReturn]
369>;
370
371// v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
372// should be used.
373def int_amdgcn_sffbh :
374  Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>],
375  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
376>;
377
378// v_mad_f32|f16/v_mac_f32|f16, selected regardless of denorm support.
379def int_amdgcn_fmad_ftz :
380  Intrinsic<[llvm_anyfloat_ty],
381            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
382            [IntrNoMem, IntrSpeculatable, IntrWillReturn]
383>;
384
385// Fields should mirror atomicrmw
386class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
387  [llvm_anyptr_ty,
388  LLVMMatchType<0>,
389  llvm_i32_ty, // ordering
390  llvm_i32_ty, // scope
391  llvm_i1_ty], // isVolatile
392  [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>,
393   ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "",
394  [SDNPMemOperand]
395>;
396
397def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
398def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
399
400class AMDGPULDSF32Intrin<string clang_builtin> :
401  GCCBuiltin<clang_builtin>,
402  Intrinsic<[llvm_float_ty],
403    [LLVMQualPointerType<llvm_float_ty, 3>,
404    llvm_float_ty,
405    llvm_i32_ty, // ordering
406    llvm_i32_ty, // scope
407    llvm_i1_ty], // isVolatile
408    [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>,
409     ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]
410>;
411
412// FIXME: The m0 argument should be moved after the normal arguments
413class AMDGPUDSOrderedIntrinsic : Intrinsic<
414  [llvm_i32_ty],
415  // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that
416  // the bit packing can be optimized at the IR level.
417  [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0)
418   llvm_i32_ty, // value to add or swap
419   llvm_i32_ty, // ordering
420   llvm_i32_ty, // scope
421   llvm_i1_ty,  // isVolatile
422   llvm_i32_ty, // ordered count index (OA index), also added to the address
423                // gfx10: bits 24-27 indicate the number of active threads/dwords
424   llvm_i1_ty,  // wave release, usually set to 1
425   llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
426  [IntrWillReturn, NoCapture<ArgIndex<0>>,
427   ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>,
428   ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>
429  ]
430>;
431
432class AMDGPUDSAppendConsumedIntrinsic : Intrinsic<
433  [llvm_i32_ty],
434  [llvm_anyptr_ty, // LDS or GDS ptr
435   llvm_i1_ty], // isVolatile
436   [IntrConvergent, IntrWillReturn, IntrArgMemOnly,
437    NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>],
438   "",
439   [SDNPMemOperand]
440>;
441
442def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
443def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
444
445// The pointer argument is assumed to be dynamically uniform if a VGPR.
446def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
447def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
448
449def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">;
450def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">;
451def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">;
452
453} // TargetPrefix = "amdgcn"
454
455// New-style image intrinsics
456
457//////////////////////////////////////////////////////////////////////////
458// Dimension-aware image intrinsics framework
459//////////////////////////////////////////////////////////////////////////
460
461// Helper class to represent (type, name) combinations of arguments. The
462// argument names are explanatory and used as DAG operand names for codegen
463// pattern matching.
464class AMDGPUArg<LLVMType ty, string name> {
465  LLVMType Type = ty;
466  string Name = name;
467}
468
469// Return [AMDGPUArg<basety, names[0]>, AMDGPUArg<LLVMMatchType<0>, names[1]>, ...]
470class makeArgList<list<string> names, LLVMType basety> {
471  list<AMDGPUArg> ret =
472    !listconcat([AMDGPUArg<basety, names[0]>],
473                !foreach(name, !tail(names), AMDGPUArg<LLVMMatchType<0>, name>));
474}
475
476// Return arglist, with LLVMMatchType's references shifted by 'shift'.
477class arglistmatchshift<list<AMDGPUArg> arglist, int shift> {
478  list<AMDGPUArg> ret =
479    !foreach(arg, arglist,
480             !if(!isa<LLVMMatchType>(arg.Type),
481                 AMDGPUArg<LLVMMatchType<!add(!cast<LLVMMatchType>(arg.Type).Number, shift)>,
482                           arg.Name>,
483                 arg));
484}
485
486// Return the concatenation of the given arglists. LLVMMatchType's are adjusted
487// accordingly, and shifted by an additional 'shift'.
488class arglistconcat<list<list<AMDGPUArg>> arglists, int shift = 0> {
489  list<AMDGPUArg> ret =
490    !foldl([]<AMDGPUArg>, arglists, lhs, rhs,
491           !listconcat(
492             lhs,
493             arglistmatchshift<rhs,
494                               !add(shift, !foldl(0, lhs, a, b,
495                                                  !add(a, b.Type.isAny)))>.ret));
496}
497
498// Represent texture/image types / dimensionality.
499class AMDGPUDimProps<bits<3> enc, string name, string asmsuffix,
500                     list<string> coord_names, list<string> slice_names> {
501  AMDGPUDimProps Dim = !cast<AMDGPUDimProps>(NAME);
502  string Name = name; // e.g. "2darraymsaa"
503  string AsmSuffix = asmsuffix; // e.g. 2D_MSAA_ARRAY (used in assembly strings)
504  bits<3> Encoding = enc;
505  bit DA = 0; // DA bit in MIMG encoding
506
507  list<AMDGPUArg> CoordSliceArgs =
508    makeArgList<!listconcat(coord_names, slice_names), llvm_anyfloat_ty>.ret;
509  list<AMDGPUArg> CoordSliceIntArgs =
510    makeArgList<!listconcat(coord_names, slice_names), llvm_anyint_ty>.ret;
511  list<AMDGPUArg> GradientArgs =
512    makeArgList<!listconcat(!foreach(name, coord_names, "d" # name # "dh"),
513                            !foreach(name, coord_names, "d" # name # "dv")),
514                llvm_anyfloat_ty>.ret;
515
516  bits<8> NumCoords = !size(CoordSliceArgs);
517  bits<8> NumGradients = !size(GradientArgs);
518}
519
520def AMDGPUDim1D : AMDGPUDimProps<0x0, "1d", "1D", ["s"], []>;
521def AMDGPUDim2D : AMDGPUDimProps<0x1, "2d", "2D", ["s", "t"], []>;
522def AMDGPUDim3D : AMDGPUDimProps<0x2, "3d", "3D", ["s", "t", "r"], []>;
523let DA = 1 in {
524  def AMDGPUDimCube : AMDGPUDimProps<0x3, "cube", "CUBE", ["s", "t"], ["face"]>;
525  def AMDGPUDim1DArray : AMDGPUDimProps<0x4, "1darray", "1D_ARRAY", ["s"], ["slice"]>;
526  def AMDGPUDim2DArray : AMDGPUDimProps<0x5, "2darray", "2D_ARRAY", ["s", "t"], ["slice"]>;
527}
528def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"]>;
529let DA = 1 in {
530  def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"]>;
531}
532
533def AMDGPUDims {
534  list<AMDGPUDimProps> NoMsaa = [AMDGPUDim1D, AMDGPUDim2D, AMDGPUDim3D,
535                                 AMDGPUDimCube, AMDGPUDim1DArray,
536                                 AMDGPUDim2DArray];
537  list<AMDGPUDimProps> Msaa = [AMDGPUDim2DMsaa, AMDGPUDim2DArrayMsaa];
538  list<AMDGPUDimProps> All = !listconcat(NoMsaa, Msaa);
539}
540
541// Represent sample variants, i.e. _C, _O, _B, ... and combinations thereof.
542class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr> {
543  string UpperCaseMod = ucmod;
544  string LowerCaseMod = lcmod;
545
546  // {offset} {bias} {z-compare}
547  list<AMDGPUArg> ExtraAddrArgs = extra_addr;
548  bit Gradients = 0;
549
550  // Name of the {lod} or {clamp} argument that is appended to the coordinates,
551  // if any.
552  string LodOrClamp = "";
553}
554
555// AMDGPUSampleVariants: all variants supported by IMAGE_SAMPLE
556// AMDGPUSampleVariantsNoGradients: variants supported by IMAGE_GATHER4
557defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
558  multiclass AMDGPUSampleHelper_Offset<string ucmod, string lcmod,
559                                       list<AMDGPUArg> extra_addr> {
560    def NAME#lcmod : AMDGPUSampleVariant<ucmod, lcmod, extra_addr>;
561    def NAME#lcmod#_o : AMDGPUSampleVariant<
562        ucmod#"_O", lcmod#"_o", !listconcat([AMDGPUArg<llvm_i32_ty, "offset">], extra_addr)>;
563  }
564
565  multiclass AMDGPUSampleHelper_Compare<string ucmod, string lcmod,
566                                        list<AMDGPUArg> extra_addr> {
567    defm NAME : AMDGPUSampleHelper_Offset<ucmod, lcmod, extra_addr>;
568    defm NAME : AMDGPUSampleHelper_Offset<
569        "_C"#ucmod, "_c"#lcmod, !listconcat(extra_addr, [AMDGPUArg<llvm_float_ty, "zcompare">])>;
570  }
571
572  multiclass AMDGPUSampleHelper_Clamp<string ucmod, string lcmod,
573                                      list<AMDGPUArg> extra_addr> {
574    defm NAME : AMDGPUSampleHelper_Compare<ucmod, lcmod, extra_addr>;
575    let LodOrClamp = "clamp" in
576    defm NAME : AMDGPUSampleHelper_Compare<ucmod#"_CL", lcmod#"_cl", extra_addr>;
577  }
578
579  defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = {
580    defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>;
581    defm AMDGPUSample : AMDGPUSampleHelper_Clamp<
582        "_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>;
583    let LodOrClamp = "lod" in
584    defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_L", "_l", []>;
585    defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_LZ", "_lz", []>;
586  }
587
588  let Gradients = 1 in {
589    defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_D", "_d", []>;
590    defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_CD", "_cd", []>;
591  }
592}
593
594// Helper class to capture the profile of a dimension-aware image intrinsic.
595// This information is used to generate the intrinsic's type and to inform
596// codegen pattern matching.
597class AMDGPUDimProfile<string opmod,
598                       AMDGPUDimProps dim> {
599  AMDGPUDimProps Dim = dim;
600  string OpMod = opmod; // the corresponding instruction is named IMAGE_OpMod
601
602  // These are intended to be overwritten by subclasses
603  bit IsSample = 0;
604  bit IsAtomic = 0;
605  list<LLVMType> RetTypes = [];
606  list<AMDGPUArg> DataArgs = [];
607  list<AMDGPUArg> ExtraAddrArgs = [];
608  bit Gradients = 0;
609  string LodClampMip = "";
610
611  int NumRetAndDataAnyTypes =
612    !foldl(0, !listconcat(RetTypes, !foreach(arg, DataArgs, arg.Type)), a, b,
613           !add(a, b.isAny));
614
615  list<AMDGPUArg> AddrArgs =
616    arglistconcat<[ExtraAddrArgs,
617                   !if(Gradients, dim.GradientArgs, []),
618                   !listconcat(!if(IsSample, dim.CoordSliceArgs, dim.CoordSliceIntArgs),
619                               !if(!eq(LodClampMip, ""),
620                                   []<AMDGPUArg>,
621                                   [AMDGPUArg<LLVMMatchType<0>, LodClampMip>]))],
622                  NumRetAndDataAnyTypes>.ret;
623  list<LLVMType> AddrTypes = !foreach(arg, AddrArgs, arg.Type);
624  list<AMDGPUArg> AddrDefaultArgs =
625    !foreach(arg, AddrArgs,
626             AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)),
627                           !if(IsSample, llvm_float_ty, llvm_i32_ty), arg.Type),
628                       arg.Name>);
629  list<AMDGPUArg> AddrA16Args =
630    !foreach(arg, AddrArgs,
631             AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)),
632                           !if(IsSample, llvm_half_ty, llvm_i16_ty), arg.Type),
633                       arg.Name>);
634}
635
636class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod, base.Dim> {
637  let IsSample = base.IsSample;
638  let IsAtomic = base.IsAtomic;
639  let RetTypes = base.RetTypes;
640  let DataArgs = base.DataArgs;
641  let ExtraAddrArgs = base.ExtraAddrArgs;
642  let Gradients = base.Gradients;
643  let LodClampMip = base.LodClampMip;
644}
645
646class AMDGPUDimSampleProfile<string opmod,
647                             AMDGPUDimProps dim,
648                             AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> {
649  let IsSample = 1;
650  let RetTypes = [llvm_any_ty];
651  let ExtraAddrArgs = sample.ExtraAddrArgs;
652  let Gradients = sample.Gradients;
653  let LodClampMip = sample.LodOrClamp;
654}
655
656class AMDGPUDimNoSampleProfile<string opmod,
657                               AMDGPUDimProps dim,
658                               list<LLVMType> retty,
659                               list<AMDGPUArg> dataargs,
660                               bit Mip = 0> : AMDGPUDimProfile<opmod, dim> {
661  let RetTypes = retty;
662  let DataArgs = dataargs;
663  let LodClampMip = !if(Mip, "mip", "");
664}
665
666class AMDGPUDimAtomicProfile<string opmod,
667                             AMDGPUDimProps dim,
668                             list<AMDGPUArg> dataargs> : AMDGPUDimProfile<opmod, dim> {
669  let RetTypes = [llvm_anyint_ty];
670  let DataArgs = dataargs;
671  let IsAtomic = 1;
672}
673
674class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> {
675  let RetTypes = [llvm_anyfloat_ty];
676  let DataArgs = [];
677  let AddrArgs = [AMDGPUArg<llvm_anyint_ty, "mip">];
678  let LodClampMip = "mip";
679}
680
681// Helper class for figuring out image intrinsic argument indexes.
682class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> {
683  int NumDataArgs = !size(P_.DataArgs);
684  int NumDmaskArgs = !if(P_.IsAtomic, 0, 1);
685  int NumVAddrArgs = !size(P_.AddrArgs);
686  int NumRSrcArgs = 1;
687  int NumSampArgs = !if(P_.IsSample, 2, 0);
688  int DmaskArgIndex = NumDataArgs;
689  int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1);
690  int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs);
691  int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1);
692}
693
694// All dimension-aware intrinsics are derived from this class.
695class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
696                              list<IntrinsicProperty> props,
697                              list<SDNodeProperty> sdnodeprops> : Intrinsic<
698    P_.RetTypes,        // vdata(VGPR) -- for load/atomic-with-return
699    !listconcat(
700      !foreach(arg, P_.DataArgs, arg.Type),      // vdata(VGPR) -- for store/atomic
701      !if(P_.IsAtomic, [], [llvm_i32_ty]),       // dmask(imm)
702      P_.AddrTypes,                              // vaddr(VGPR)
703      [llvm_v8i32_ty],                           // rsrc(SGPR)
704      !if(P_.IsSample, [llvm_v4i32_ty,           // samp(SGPR)
705                        llvm_i1_ty], []),        // unorm(imm)
706      [llvm_i32_ty,                              // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
707       llvm_i32_ty]),                            // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc)
708
709     !listconcat(props,
710          !if(P_.IsAtomic, [], [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>>]),
711          !if(P_.IsSample, [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.UnormArgIndex>>], []),
712          [IntrWillReturn],
713          [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.TexFailCtrlArgIndex>>,
714           ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>>]),
715
716
717      "", sdnodeprops>,
718  AMDGPURsrcIntrinsic<!add(!size(P_.DataArgs), !size(P_.AddrTypes),
719                           !if(P_.IsAtomic, 0, 1)), 1> {
720  AMDGPUDimProfile P = P_;
721
722  AMDGPUImageDimIntrinsic Intr = !cast<AMDGPUImageDimIntrinsic>(NAME);
723
724  let TargetPrefix = "amdgcn";
725}
726
727// Marker class for intrinsics with a DMask that determines the returned
728// channels.
729class AMDGPUImageDMaskIntrinsic;
730
731defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
732
733  //////////////////////////////////////////////////////////////////////////
734  // Load and store intrinsics
735  //////////////////////////////////////////////////////////////////////////
736  multiclass AMDGPUImageDimIntrinsicsNoMsaa<string opmod,
737                                            list<LLVMType> retty,
738                                            list<AMDGPUArg> dataargs,
739                                            list<IntrinsicProperty> props,
740                                            list<SDNodeProperty> sdnodeprops,
741                                            bit Mip = 0> {
742    foreach dim = AMDGPUDims.NoMsaa in {
743      def !strconcat(NAME, "_", dim.Name)
744        : AMDGPUImageDimIntrinsic<
745            AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>,
746            props, sdnodeprops>;
747    }
748  }
749
750  multiclass AMDGPUImageDimIntrinsicsAll<string opmod,
751                                         list<LLVMType> retty,
752                                         list<AMDGPUArg> dataargs,
753                                         list<IntrinsicProperty> props,
754                                         list<SDNodeProperty> sdnodeprops,
755                                         bit Mip = 0> {
756    foreach dim = AMDGPUDims.All in {
757      def !strconcat(NAME, "_", dim.Name)
758        : AMDGPUImageDimIntrinsic<
759            AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>,
760            props, sdnodeprops>;
761    }
762  }
763
764  defm int_amdgcn_image_load
765    : AMDGPUImageDimIntrinsicsAll<"LOAD", [llvm_any_ty], [], [IntrReadMem],
766                                  [SDNPMemOperand]>,
767      AMDGPUImageDMaskIntrinsic;
768  defm int_amdgcn_image_load_mip
769    : AMDGPUImageDimIntrinsicsNoMsaa<"LOAD_MIP", [llvm_any_ty], [],
770                                     [IntrReadMem, IntrWillReturn], [SDNPMemOperand], 1>,
771      AMDGPUImageDMaskIntrinsic;
772
773  defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll<
774              "STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
775              [IntrWriteMem, IntrWillReturn], [SDNPMemOperand]>;
776  defm int_amdgcn_image_store_mip : AMDGPUImageDimIntrinsicsNoMsaa<
777              "STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
778              [IntrWriteMem, IntrWillReturn], [SDNPMemOperand], 1>;
779
780  defm int_amdgcn_image_msaa_load
781    : AMDGPUImageDimIntrinsicsAll<"MSAA_LOAD", [llvm_any_ty], [], [IntrReadMem],
782                                  [SDNPMemOperand]>,
783      AMDGPUImageDMaskIntrinsic;
784
785  //////////////////////////////////////////////////////////////////////////
786  // sample and getlod intrinsics
787  //////////////////////////////////////////////////////////////////////////
788  multiclass AMDGPUImageDimSampleDims<string opmod,
789                                      AMDGPUSampleVariant sample,
790                                      bit NoMem = 0> {
791    foreach dim = AMDGPUDims.NoMsaa in {
792      def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
793          AMDGPUDimSampleProfile<opmod, dim, sample>,
794          !if(NoMem, [IntrNoMem], [IntrReadMem]),
795          !if(NoMem, [], [SDNPMemOperand])>;
796    }
797  }
798
799  foreach sample = AMDGPUSampleVariants in {
800    defm int_amdgcn_image_sample # sample.LowerCaseMod
801      : AMDGPUImageDimSampleDims<"SAMPLE" # sample.UpperCaseMod, sample>,
802        AMDGPUImageDMaskIntrinsic;
803  }
804
805  defm int_amdgcn_image_getlod
806    : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>,
807      AMDGPUImageDMaskIntrinsic;
808
809  //////////////////////////////////////////////////////////////////////////
810  // getresinfo intrinsics
811  //////////////////////////////////////////////////////////////////////////
812  foreach dim = AMDGPUDims.All in {
813    def !strconcat("int_amdgcn_image_getresinfo_", dim.Name)
814      : AMDGPUImageDimIntrinsic<AMDGPUDimGetResInfoProfile<dim>, [IntrNoMem], []>,
815        AMDGPUImageDMaskIntrinsic;
816  }
817
818  //////////////////////////////////////////////////////////////////////////
819  // gather4 intrinsics
820  //////////////////////////////////////////////////////////////////////////
821  foreach sample = AMDGPUSampleVariantsNoGradients in {
822    foreach dim = [AMDGPUDim2D, AMDGPUDimCube, AMDGPUDim2DArray] in {
823      def int_amdgcn_image_gather4 # sample.LowerCaseMod # _ # dim.Name:
824          AMDGPUImageDimIntrinsic<
825              AMDGPUDimSampleProfile<"GATHER4" # sample.UpperCaseMod, dim, sample>,
826              [IntrReadMem], [SDNPMemOperand]>;
827    }
828  }
829}
830
831//////////////////////////////////////////////////////////////////////////
832// atomic intrinsics
833//////////////////////////////////////////////////////////////////////////
834defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
835  multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs> {
836    foreach dim = AMDGPUDims.All in {
837      def !strconcat(NAME, "_", dim.Name)
838        : AMDGPUImageDimIntrinsic<
839            AMDGPUDimAtomicProfile<opmod, dim, dataargs>,
840            [], [SDNPMemOperand]>;
841    }
842  }
843
844  multiclass AMDGPUImageDimAtomic<string opmod> {
845    defm "" : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">]>;
846  }
847
848  defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
849  defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">;
850  defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
851  defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;
852  defm int_amdgcn_image_atomic_umin : AMDGPUImageDimAtomic<"ATOMIC_UMIN">;
853  defm int_amdgcn_image_atomic_smax : AMDGPUImageDimAtomic<"ATOMIC_SMAX">;
854  defm int_amdgcn_image_atomic_umax : AMDGPUImageDimAtomic<"ATOMIC_UMAX">;
855  defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">;
856  defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">;
857  defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">;
858  defm int_amdgcn_image_atomic_inc : AMDGPUImageDimAtomic<"ATOMIC_INC">;
859  defm int_amdgcn_image_atomic_dec : AMDGPUImageDimAtomic<"ATOMIC_DEC">;
860
861  defm int_amdgcn_image_atomic_cmpswap :
862      AMDGPUImageDimAtomicX<"ATOMIC_CMPSWAP", [AMDGPUArg<LLVMMatchType<0>, "src">,
863                                               AMDGPUArg<LLVMMatchType<0>, "cmp">]>;
864}
865
866//////////////////////////////////////////////////////////////////////////
867// Buffer intrinsics
868//////////////////////////////////////////////////////////////////////////
869
870let TargetPrefix = "amdgcn" in {
871
872defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {
873
874class AMDGPUBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
875  [data_ty],
876  [llvm_v4i32_ty,     // rsrc(SGPR)
877   llvm_i32_ty,       // vindex(VGPR)
878   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
879   llvm_i1_ty,        // glc(imm)
880   llvm_i1_ty],       // slc(imm)
881  [IntrReadMem, IntrWillReturn,
882   ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
883  AMDGPURsrcIntrinsic<0>;
884def int_amdgcn_buffer_load_format : AMDGPUBufferLoad<llvm_anyfloat_ty>;
885def int_amdgcn_buffer_load : AMDGPUBufferLoad;
886
887def int_amdgcn_s_buffer_load : Intrinsic <
888  [llvm_any_ty],
889  [llvm_v4i32_ty,     // rsrc(SGPR)
890   llvm_i32_ty,       // byte offset(SGPR/imm)
891   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 2 = dlc)
892  [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<2>>]>,
893  AMDGPURsrcIntrinsic<0>;
894
895class AMDGPUBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
896  [],
897  [data_ty,          // vdata(VGPR)
898   llvm_v4i32_ty,     // rsrc(SGPR)
899   llvm_i32_ty,       // vindex(VGPR)
900   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
901   llvm_i1_ty,        // glc(imm)
902   llvm_i1_ty],       // slc(imm)
903  [IntrWriteMem, IntrWillReturn,
904   ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
905  AMDGPURsrcIntrinsic<1>;
906def int_amdgcn_buffer_store_format : AMDGPUBufferStore<llvm_anyfloat_ty>;
907def int_amdgcn_buffer_store : AMDGPUBufferStore;
908
909// New buffer intrinsics with separate raw and struct variants.  The raw
910// variant never has an index. The struct variant always has an index, even if
911// it is const 0. A struct intrinsic with constant 0 index is different to the
912// corresponding raw intrinsic on gfx9+ because the behavior of bound checking
913// and swizzling changes depending on whether idxen is set in the instruction.
914// These new instrinsics also keep the offset and soffset arguments separate as
915// they behave differently in bounds checking and swizzling.
916class AMDGPURawBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
917  [data_ty],
918  [llvm_v4i32_ty,     // rsrc(SGPR)
919   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
920   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
921   llvm_i32_ty],      // auxiliary data (imm, cachepolicy     (bit 0 = glc,
922                      //                                       bit 1 = slc,
923                      //                                       bit 2 = dlc on gfx10+),
924                      //                      swizzled buffer (bit 3 = swz))
925  [IntrReadMem, IntrWillReturn, ImmArg<ArgIndex<3>>], "", [SDNPMemOperand]>,
926  AMDGPURsrcIntrinsic<0>;
927def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad<llvm_anyfloat_ty>;
928def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
929
930class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
931  [data_ty],
932  [llvm_v4i32_ty,     // rsrc(SGPR)
933   llvm_i32_ty,       // vindex(VGPR)
934   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
935   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
936   llvm_i32_ty],      // auxiliary data (imm, cachepolicy     (bit 0 = glc,
937                      //                                       bit 1 = slc,
938                      //                                       bit 2 = dlc on gfx10+),
939                      //                      swizzled buffer (bit 3 = swz))
940  [IntrReadMem, IntrWillReturn, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
941  AMDGPURsrcIntrinsic<0>;
942def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
943def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
944
945class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
946  [],
947  [data_ty,           // vdata(VGPR)
948   llvm_v4i32_ty,     // rsrc(SGPR)
949   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
950   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
951   llvm_i32_ty],      // auxiliary data (imm, cachepolicy     (bit 0 = glc,
952                      //                                       bit 1 = slc,
953                      //                                       bit 2 = dlc on gfx10+),
954                      //                      swizzled buffer (bit 3 = swz))
955  [IntrWriteMem, IntrWillReturn, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
956  AMDGPURsrcIntrinsic<1>;
957def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore<llvm_anyfloat_ty>;
958def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;
959
960class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
961  [],
962  [data_ty,           // vdata(VGPR)
963   llvm_v4i32_ty,     // rsrc(SGPR)
964   llvm_i32_ty,       // vindex(VGPR)
965   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
966   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
967   llvm_i32_ty],      // auxiliary data (imm, cachepolicy     (bit 0 = glc,
968                      //                                       bit 1 = slc,
969                      //                                       bit 2 = dlc on gfx10+),
970                      //                      swizzled buffer (bit 3 = swz))
971  [IntrWriteMem, IntrWillReturn, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
972  AMDGPURsrcIntrinsic<1>;
973def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
974def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
975
976class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
977  [data_ty],
978  [LLVMMatchType<0>,  // vdata(VGPR)
979   llvm_v4i32_ty,     // rsrc(SGPR)
980   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
981   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
982   llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
983  [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
984  AMDGPURsrcIntrinsic<1, 0>;
985def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic;
986def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
987def int_amdgcn_raw_buffer_atomic_sub : AMDGPURawBufferAtomic;
988def int_amdgcn_raw_buffer_atomic_smin : AMDGPURawBufferAtomic;
989def int_amdgcn_raw_buffer_atomic_umin : AMDGPURawBufferAtomic;
990def int_amdgcn_raw_buffer_atomic_smax : AMDGPURawBufferAtomic;
991def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic;
992def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic;
993def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
994def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
995def int_amdgcn_raw_buffer_atomic_inc : AMDGPURawBufferAtomic;
996def int_amdgcn_raw_buffer_atomic_dec : AMDGPURawBufferAtomic;
997def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
998  [llvm_anyint_ty],
999  [LLVMMatchType<0>,  // src(VGPR)
1000   LLVMMatchType<0>,  // cmp(VGPR)
1001   llvm_v4i32_ty,     // rsrc(SGPR)
1002   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
1003   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1004   llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
1005  [ImmArg<ArgIndex<5>>, IntrWillReturn], "", [SDNPMemOperand]>,
1006  AMDGPURsrcIntrinsic<2, 0>;
1007
1008class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
1009  [data_ty],
1010  [LLVMMatchType<0>,  // vdata(VGPR)
1011   llvm_v4i32_ty,     // rsrc(SGPR)
1012   llvm_i32_ty,       // vindex(VGPR)
1013   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
1014   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1015   llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
1016  [ImmArg<ArgIndex<5>>, IntrWillReturn], "", [SDNPMemOperand]>,
1017  AMDGPURsrcIntrinsic<1, 0>;
1018def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic;
1019def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic;
1020def int_amdgcn_struct_buffer_atomic_sub : AMDGPUStructBufferAtomic;
1021def int_amdgcn_struct_buffer_atomic_smin : AMDGPUStructBufferAtomic;
1022def int_amdgcn_struct_buffer_atomic_umin : AMDGPUStructBufferAtomic;
1023def int_amdgcn_struct_buffer_atomic_smax : AMDGPUStructBufferAtomic;
1024def int_amdgcn_struct_buffer_atomic_umax : AMDGPUStructBufferAtomic;
1025def int_amdgcn_struct_buffer_atomic_and : AMDGPUStructBufferAtomic;
1026def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic;
1027def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic;
1028def int_amdgcn_struct_buffer_atomic_inc : AMDGPUStructBufferAtomic;
1029def int_amdgcn_struct_buffer_atomic_dec : AMDGPUStructBufferAtomic;
1030def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
1031  [llvm_anyint_ty],
1032  [LLVMMatchType<0>,  // src(VGPR)
1033   LLVMMatchType<0>,  // cmp(VGPR)
1034   llvm_v4i32_ty,     // rsrc(SGPR)
1035   llvm_i32_ty,       // vindex(VGPR)
1036   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
1037   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1038   llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
1039  [ImmArg<ArgIndex<6>>, IntrWillReturn], "", [SDNPMemOperand]>,
1040  AMDGPURsrcIntrinsic<2, 0>;
1041
1042// Obsolescent tbuffer intrinsics.
1043def int_amdgcn_tbuffer_load : Intrinsic <
1044    [llvm_any_ty],    // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
1045    [llvm_v4i32_ty,   // rsrc(SGPR)
1046     llvm_i32_ty,     // vindex(VGPR)
1047     llvm_i32_ty,     // voffset(VGPR)
1048     llvm_i32_ty,     // soffset(SGPR)
1049     llvm_i32_ty,     // offset(imm)
1050     llvm_i32_ty,     // dfmt(imm)
1051     llvm_i32_ty,     // nfmt(imm)
1052     llvm_i1_ty,     // glc(imm)
1053     llvm_i1_ty],    // slc(imm)
1054    [IntrReadMem, IntrWillReturn,
1055     ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>,
1056     ImmArg<ArgIndex<7>>, ImmArg<ArgIndex<8>>], "", [SDNPMemOperand]>,
1057  AMDGPURsrcIntrinsic<0>;
1058
1059def int_amdgcn_tbuffer_store : Intrinsic <
1060    [],
1061    [llvm_any_ty,    // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
1062     llvm_v4i32_ty,  // rsrc(SGPR)
1063     llvm_i32_ty,    // vindex(VGPR)
1064     llvm_i32_ty,    // voffset(VGPR)
1065     llvm_i32_ty,    // soffset(SGPR)
1066     llvm_i32_ty,    // offset(imm)
1067     llvm_i32_ty,    // dfmt(imm)
1068     llvm_i32_ty,    // nfmt(imm)
1069     llvm_i1_ty,     // glc(imm)
1070     llvm_i1_ty],    // slc(imm)
1071    [IntrWriteMem, IntrWillReturn, ImmArg<ArgIndex<5>>,
1072     ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>,
1073     ImmArg<ArgIndex<8>>, ImmArg<ArgIndex<9>>], "", [SDNPMemOperand]>,
1074  AMDGPURsrcIntrinsic<1>;
1075
1076// New tbuffer intrinsics, with:
1077// - raw and struct variants
1078// - joint format field
1079// - joint cachepolicy field
1080def int_amdgcn_raw_tbuffer_load : Intrinsic <
1081    [llvm_any_ty],    // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
1082    [llvm_v4i32_ty,   // rsrc(SGPR)
1083     llvm_i32_ty,     // offset(VGPR/imm, included in bounds checking and swizzling)
1084     llvm_i32_ty,     // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1085     llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
1086     llvm_i32_ty],    // auxiliary data (imm, cachepolicy     (bit 0 = glc,
1087                      //                                       bit 1 = slc,
1088                      //                                       bit 2 = dlc on gfx10+),
1089                      //                      swizzled buffer (bit 3 = swz))
1090    [IntrReadMem, IntrWillReturn,
1091     ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
1092  AMDGPURsrcIntrinsic<0>;
1093
1094def int_amdgcn_raw_tbuffer_store : Intrinsic <
1095    [],
1096    [llvm_any_ty,    // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
1097     llvm_v4i32_ty,  // rsrc(SGPR)
1098     llvm_i32_ty,    // offset(VGPR/imm, included in bounds checking and swizzling)
1099     llvm_i32_ty,    // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1100     llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
1101     llvm_i32_ty],   // auxiliary data (imm, cachepolicy     (bit 0 = glc,
1102                     //                                       bit 1 = slc,
1103                     //                                       bit 2 = dlc on gfx10+),
1104                     //                      swizzled buffer (bit 3 = swz))
1105    [IntrWriteMem, IntrWillReturn,
1106     ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
1107  AMDGPURsrcIntrinsic<1>;
1108
1109def int_amdgcn_struct_tbuffer_load : Intrinsic <
1110    [llvm_any_ty],    // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
1111    [llvm_v4i32_ty,   // rsrc(SGPR)
1112     llvm_i32_ty,     // vindex(VGPR)
1113     llvm_i32_ty,     // offset(VGPR/imm, included in bounds checking and swizzling)
1114     llvm_i32_ty,     // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1115     llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
1116     llvm_i32_ty],    // auxiliary data (imm, cachepolicy     (bit 0 = glc,
1117                      //                                       bit 1 = slc,
1118                      //                                       bit 2 = dlc on gfx10+),
1119                      //                      swizzled buffer (bit 3 = swz))
1120    [IntrReadMem, IntrWillReturn,
1121     ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
1122  AMDGPURsrcIntrinsic<0>;
1123
1124def int_amdgcn_struct_tbuffer_store : Intrinsic <
1125    [],
1126    [llvm_any_ty,    // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
1127     llvm_v4i32_ty,  // rsrc(SGPR)
1128     llvm_i32_ty,    // vindex(VGPR)
1129     llvm_i32_ty,    // offset(VGPR/imm, included in bounds checking and swizzling)
1130     llvm_i32_ty,    // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1131     llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
1132     llvm_i32_ty],   // auxiliary data (imm, cachepolicy     (bit 0 = glc,
1133                     //                                       bit 1 = slc,
1134                     //                                       bit 2 = dlc on gfx10+),
1135                     //                      swizzled buffer (bit 3 = swz))
1136    [IntrWriteMem, IntrWillReturn,
1137     ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>], "", [SDNPMemOperand]>,
1138  AMDGPURsrcIntrinsic<1>;
1139
1140class AMDGPUBufferAtomic : Intrinsic <
1141  [llvm_anyint_ty],
1142  [LLVMMatchType<0>,       // vdata(VGPR)
1143   llvm_v4i32_ty,     // rsrc(SGPR)
1144   llvm_i32_ty,       // vindex(VGPR)
1145   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
1146   llvm_i1_ty],       // slc(imm)
1147  [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
1148  AMDGPURsrcIntrinsic<1, 0>;
1149def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
1150def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
1151def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
1152def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic;
1153def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic;
1154def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic;
1155def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic;
1156def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic;
1157def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic;
1158def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic;
1159def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
1160  [llvm_i32_ty],
1161  [llvm_i32_ty,       // src(VGPR)
1162   llvm_i32_ty,       // cmp(VGPR)
1163   llvm_v4i32_ty,     // rsrc(SGPR)
1164   llvm_i32_ty,       // vindex(VGPR)
1165   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
1166   llvm_i1_ty],       // slc(imm)
1167  [ImmArg<ArgIndex<5>>, IntrWillReturn], "", [SDNPMemOperand]>,
1168  AMDGPURsrcIntrinsic<2, 0>;
1169
1170def int_amdgcn_buffer_atomic_csub : AMDGPUBufferAtomic;
1171} // defset AMDGPUBufferIntrinsics
1172
1173// Uses that do not set the done bit should set IntrWriteMem on the
1174// call site.
1175def int_amdgcn_exp : Intrinsic <[], [
1176  llvm_i32_ty,       // tgt,
1177  llvm_i32_ty,       // en
1178  llvm_any_ty,       // src0 (f32 or i32)
1179  LLVMMatchType<0>,  // src1
1180  LLVMMatchType<0>,  // src2
1181  LLVMMatchType<0>,  // src3
1182  llvm_i1_ty,        // done
1183  llvm_i1_ty         // vm
1184  ],
1185  [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<6>>,
1186   ImmArg<ArgIndex<7>>, IntrWriteMem, IntrInaccessibleMemOnly,
1187   IntrWillReturn]
1188>;
1189
1190// exp with compr bit set.
1191def int_amdgcn_exp_compr : Intrinsic <[], [
1192  llvm_i32_ty,       // tgt,
1193  llvm_i32_ty,       // en
1194  llvm_anyvector_ty, // src0 (v2f16 or v2i16)
1195  LLVMMatchType<0>,  // src1
1196  llvm_i1_ty,        // done
1197  llvm_i1_ty],       // vm
1198  [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>,
1199   ImmArg<ArgIndex<5>>, IntrWriteMem, IntrInaccessibleMemOnly,
1200   IntrWillReturn]
1201>;
1202
1203def int_amdgcn_buffer_wbinvl1_sc :
1204  GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
1205  Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
1206
1207def int_amdgcn_buffer_wbinvl1 :
1208  GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
1209  Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
1210
1211def int_amdgcn_s_dcache_inv :
1212  GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
1213  Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
1214
1215def int_amdgcn_s_memtime :
1216  GCCBuiltin<"__builtin_amdgcn_s_memtime">,
1217  Intrinsic<[llvm_i64_ty], [], [IntrWillReturn]>;
1218
1219def int_amdgcn_s_sleep :
1220  GCCBuiltin<"__builtin_amdgcn_s_sleep">,
1221  Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
1222                                IntrHasSideEffects, IntrWillReturn]> {
1223}
1224
1225def int_amdgcn_s_incperflevel :
1226  GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
1227  Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
1228                                IntrHasSideEffects, IntrWillReturn]> {
1229}
1230
1231def int_amdgcn_s_decperflevel :
1232  GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
1233  Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
1234                                IntrHasSideEffects, IntrWillReturn]> {
1235}
1236
1237def int_amdgcn_s_getreg :
1238  GCCBuiltin<"__builtin_amdgcn_s_getreg">,
1239  Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
1240  [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable,
1241   IntrWillReturn, ImmArg<ArgIndex<0>>]
1242>;
1243
1244// Note this can be used to set FP environment properties that are
1245// unsafe to change in non-strictfp functions. The register properties
1246// available (and value required to access them) may differ per
1247// subtarget. llvm.amdgcn.s.setreg(hwmode, value)
1248def int_amdgcn_s_setreg :
1249  GCCBuiltin<"__builtin_amdgcn_s_setreg">,
1250  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty],
1251  [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]
1252>;
1253
1254// int_amdgcn_s_getpc is provided to allow a specific style of position
1255// independent code to determine the high part of its address when it is
1256// known (through convention) that the code and any data of interest does
1257// not cross a 4Gb address boundary. Use for any other purpose may not
1258// produce the desired results as optimizations may cause code movement,
1259// especially as we explicitly use IntrNoMem to allow optimizations.
1260def int_amdgcn_s_getpc :
1261  GCCBuiltin<"__builtin_amdgcn_s_getpc">,
1262  Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable,
1263                                IntrWillReturn]>;
1264
1265// __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0>
1266// param values: 0 = P10, 1 = P20, 2 = P0
1267def int_amdgcn_interp_mov :
1268  GCCBuiltin<"__builtin_amdgcn_interp_mov">,
1269  Intrinsic<[llvm_float_ty],
1270            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1271            [IntrNoMem, IntrSpeculatable, IntrWillReturn,
1272              ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
1273
1274// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
1275// This intrinsic reads from lds, but the memory values are constant,
1276// so it behaves like IntrNoMem.
1277def int_amdgcn_interp_p1 :
1278  GCCBuiltin<"__builtin_amdgcn_interp_p1">,
1279  Intrinsic<[llvm_float_ty],
1280            [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1281            [IntrNoMem, IntrSpeculatable, IntrWillReturn,
1282             ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
1283
1284// __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
1285def int_amdgcn_interp_p2 :
1286  GCCBuiltin<"__builtin_amdgcn_interp_p2">,
1287  Intrinsic<[llvm_float_ty],
1288            [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1289            [IntrNoMem, IntrSpeculatable, IntrWillReturn,
1290             ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
1291          // See int_amdgcn_v_interp_p1 for why this is IntrNoMem.
1292
1293// __builtin_amdgcn_interp_p1_f16 <i>, <attr_chan>, <attr>, <high>, <m0>
1294def int_amdgcn_interp_p1_f16 :
1295  GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">,
1296  Intrinsic<[llvm_float_ty],
1297            [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
1298            [IntrNoMem, IntrSpeculatable, IntrWillReturn,
1299             ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
1300
1301// __builtin_amdgcn_interp_p2_f16 <p1>, <j>, <attr_chan>, <attr>, <high>, <m0>
1302def int_amdgcn_interp_p2_f16 :
1303  GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">,
1304  Intrinsic<[llvm_half_ty],
1305            [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
1306            [IntrNoMem, IntrSpeculatable, IntrWillReturn,
1307             ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
1308
1309// Pixel shaders only: whether the current pixel is live (i.e. not a helper
1310// invocation for derivative computation).
1311def int_amdgcn_ps_live : Intrinsic <
1312  [llvm_i1_ty],
1313  [],
1314  [IntrNoMem, IntrWillReturn]>;
1315
1316def int_amdgcn_mbcnt_lo :
1317  GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
1318  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
1319   [IntrNoMem, IntrWillReturn]>;
1320
1321def int_amdgcn_mbcnt_hi :
1322  GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
1323  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
1324            [IntrNoMem, IntrWillReturn]>;
1325
1326// llvm.amdgcn.ds.swizzle src offset
1327def int_amdgcn_ds_swizzle :
1328  GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
1329  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
1330            [IntrNoMem, IntrConvergent, IntrWillReturn,
1331             ImmArg<ArgIndex<1>>]>;
1332
1333def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
1334    [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
1335    [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1336>;
1337
1338def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
1339    [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
1340    [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1341>;
1342
1343def int_amdgcn_lerp :
1344  GCCBuiltin<"__builtin_amdgcn_lerp">,
1345  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1346  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1347>;
1348
1349def int_amdgcn_sad_u8 :
1350  GCCBuiltin<"__builtin_amdgcn_sad_u8">,
1351  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1352  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1353>;
1354
1355def int_amdgcn_msad_u8 :
1356  GCCBuiltin<"__builtin_amdgcn_msad_u8">,
1357  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1358  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1359>;
1360
1361def int_amdgcn_sad_hi_u8 :
1362  GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">,
1363  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1364  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1365>;
1366
1367def int_amdgcn_sad_u16 :
1368  GCCBuiltin<"__builtin_amdgcn_sad_u16">,
1369  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1370  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1371>;
1372
1373def int_amdgcn_qsad_pk_u16_u8 :
1374  GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
1375  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
1376  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1377>;
1378
1379def int_amdgcn_mqsad_pk_u16_u8 :
1380  GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
1381  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
1382  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1383>;
1384
1385def int_amdgcn_mqsad_u32_u8 :
1386  GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
1387  Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty],
1388  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1389>;
1390
1391def int_amdgcn_cvt_pk_u8_f32 :
1392  GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
1393  Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
1394  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1395>;
1396
1397def int_amdgcn_icmp :
1398  Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty],
1399            [IntrNoMem, IntrConvergent, IntrWillReturn,
1400             ImmArg<ArgIndex<2>>]>;
1401
1402def int_amdgcn_fcmp :
1403  Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>, llvm_i32_ty],
1404            [IntrNoMem, IntrConvergent, IntrWillReturn,
1405             ImmArg<ArgIndex<2>>]>;
1406
1407def int_amdgcn_ballot :
1408  Intrinsic<[llvm_anyint_ty], [llvm_i1_ty],
1409            [IntrNoMem, IntrConvergent, IntrWillReturn]>;
1410
1411def int_amdgcn_readfirstlane :
1412  GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
1413  Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
1414            [IntrNoMem, IntrConvergent, IntrWillReturn]>;
1415
1416// The lane argument must be uniform across the currently active threads of the
1417// current wave. Otherwise, the result is undefined.
1418def int_amdgcn_readlane :
1419  GCCBuiltin<"__builtin_amdgcn_readlane">,
1420  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
1421            [IntrNoMem, IntrConvergent, IntrWillReturn]>;
1422
1423// The value to write and lane select arguments must be uniform across the
1424// currently active threads of the current wave. Otherwise, the result is
1425// undefined.
1426def int_amdgcn_writelane :
1427  GCCBuiltin<"__builtin_amdgcn_writelane">,
1428  Intrinsic<[llvm_i32_ty], [
1429    llvm_i32_ty,    // uniform value to write: returned by the selected lane
1430    llvm_i32_ty,    // uniform lane select
1431    llvm_i32_ty     // returned by all lanes other than the selected one
1432  ],
1433  [IntrNoMem, IntrConvergent, IntrWillReturn]
1434>;
1435
1436// FIXME: Deprecated. This is equivalent to llvm.fshr
1437def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty],
1438  [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1439  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1440>;
1441
1442def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">,
1443  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1444  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1445>;
1446
1447def int_amdgcn_mul_i24 : Intrinsic<[llvm_i32_ty],
1448  [llvm_i32_ty, llvm_i32_ty],
1449  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1450>;
1451
1452def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty],
1453  [llvm_i32_ty, llvm_i32_ty],
1454  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1455>;
1456
1457// llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
1458//
1459// bar_val is the total number of waves that will wait on this
1460// barrier, minus 1.
1461def int_amdgcn_ds_gws_init :
1462  GCCBuiltin<"__builtin_amdgcn_ds_gws_init">,
1463  Intrinsic<[],
1464  [llvm_i32_ty, llvm_i32_ty],
1465  [IntrConvergent, IntrWriteMem,
1466   IntrInaccessibleMemOnly, IntrWillReturn], "",
1467  [SDNPMemOperand]
1468>;
1469
1470// llvm.amdgcn.ds.gws.barrier(i32 vsrc0, i32 resource_id)
1471// bar_val is the total number of waves that will wait on this
1472// barrier, minus 1.
1473def int_amdgcn_ds_gws_barrier :
1474  GCCBuiltin<"__builtin_amdgcn_ds_gws_barrier">,
1475  Intrinsic<[],
1476  [llvm_i32_ty, llvm_i32_ty],
1477  [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
1478  [SDNPMemOperand]
1479>;
1480
1481// llvm.amdgcn.ds.gws.sema.v(i32 resource_id)
1482def int_amdgcn_ds_gws_sema_v :
1483  GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_v">,
1484  Intrinsic<[],
1485  [llvm_i32_ty],
1486  [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
1487  [SDNPMemOperand]
1488>;
1489
1490// llvm.amdgcn.ds.gws.sema.br(i32 vsrc, i32 resource_id)
1491def int_amdgcn_ds_gws_sema_br :
1492  GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_br">,
1493  Intrinsic<[],
1494  [llvm_i32_ty, llvm_i32_ty],
1495  [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
1496  [SDNPMemOperand]
1497>;
1498
1499// llvm.amdgcn.ds.gws.sema.p(i32 resource_id)
1500def int_amdgcn_ds_gws_sema_p :
1501  GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_p">,
1502  Intrinsic<[],
1503  [llvm_i32_ty],
1504  [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
1505  [SDNPMemOperand]
1506>;
1507
1508// llvm.amdgcn.ds.gws.sema.release.all(i32 resource_id)
1509def int_amdgcn_ds_gws_sema_release_all :
1510  GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">,
1511  Intrinsic<[],
1512  [llvm_i32_ty],
1513  [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
1514  [SDNPMemOperand]
1515>;
1516
1517
1518// Copies the source value to the destination value, with the guarantee that
1519// the source value is computed as if the entire program were executed in WQM.
1520def int_amdgcn_wqm : Intrinsic<[llvm_any_ty],
1521  [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1522>;
1523
1524// Copies the source value to the destination value, such that the source
1525// is computed as if the entire program were executed in WQM if any other
1526// program code executes in WQM.
1527def int_amdgcn_softwqm : Intrinsic<[llvm_any_ty],
1528  [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1529>;
1530
1531// Return true if at least one thread within the pixel quad passes true into
1532// the function.
1533def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty],
1534  [llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn]
1535>;
1536
1537// If false, set EXEC=0 for the current thread until the end of program.
1538// FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn?
1539def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>;
1540
1541// Copies the active channels of the source value to the destination value,
1542// with the guarantee that the source value is computed as if the entire
1543// program were executed in Whole Wavefront Mode, i.e. with all channels
1544// enabled, with a few exceptions: - Phi nodes with require WWM return an
1545// undefined value.
1546def int_amdgcn_wwm : Intrinsic<[llvm_any_ty],
1547  [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable,
1548                       IntrConvergent, IntrWillReturn]
1549>;
1550
1551// Given a value, copies it while setting all the inactive lanes to a given
1552// value. Note that OpenGL helper lanes are considered active, so if the
1553// program ever uses WQM, then the instruction and the first source will be
1554// computed in WQM.
1555def int_amdgcn_set_inactive :
1556  Intrinsic<[llvm_anyint_ty],
1557            [LLVMMatchType<0>, // value to be copied
1558             LLVMMatchType<0>], // value for the inactive lanes to take
1559            [IntrNoMem, IntrConvergent, IntrWillReturn]>;
1560
1561// Return if the given flat pointer points to a local memory address.
1562def int_amdgcn_is_shared : GCCBuiltin<"__builtin_amdgcn_is_shared">,
1563  Intrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1564  [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>, IntrWillReturn]
1565>;
1566
1567// Return if the given flat pointer points to a prvate memory address.
1568def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">,
1569  Intrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1570  [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>, IntrWillReturn]
1571>;
1572
1573//===----------------------------------------------------------------------===//
1574// CI+ Intrinsics
1575//===----------------------------------------------------------------------===//
1576
1577def int_amdgcn_s_dcache_inv_vol :
1578  GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
1579  Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
1580
1581def int_amdgcn_buffer_wbinvl1_vol :
1582  GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
1583  Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
1584
1585//===----------------------------------------------------------------------===//
1586// VI Intrinsics
1587//===----------------------------------------------------------------------===//
1588
1589// llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
1590def int_amdgcn_mov_dpp :
1591  Intrinsic<[llvm_anyint_ty],
1592            [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
1593             llvm_i1_ty],
1594             [IntrNoMem, IntrConvergent, IntrWillReturn,
1595             ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>,
1596             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
1597
1598// llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
1599// Should be equivalent to:
1600// v_mov_b32 <dest> <old>
1601// v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
1602def int_amdgcn_update_dpp :
1603  Intrinsic<[llvm_anyint_ty],
1604            [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty,
1605            llvm_i32_ty, llvm_i32_ty, llvm_i1_ty],
1606             [IntrNoMem, IntrConvergent, IntrWillReturn,
1607              ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>,
1608              ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1609
1610def int_amdgcn_s_dcache_wb :
1611  GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
1612  Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
1613
1614def int_amdgcn_s_dcache_wb_vol :
1615  GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
1616  Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
1617
1618def int_amdgcn_s_memrealtime :
1619  GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
1620  Intrinsic<[llvm_i64_ty], [], [IntrWillReturn]>;
1621
1622// llvm.amdgcn.ds.permute <index> <src>
1623def int_amdgcn_ds_permute :
1624  GCCBuiltin<"__builtin_amdgcn_ds_permute">,
1625  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
1626    [IntrNoMem, IntrConvergent, IntrWillReturn]>;
1627
1628// llvm.amdgcn.ds.bpermute <index> <src>
1629def int_amdgcn_ds_bpermute :
1630  GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
1631  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
1632     [IntrNoMem, IntrConvergent, IntrWillReturn]>;
1633
1634//===----------------------------------------------------------------------===//
1635// GFX10 Intrinsics
1636//===----------------------------------------------------------------------===//
1637
1638// llvm.amdgcn.permlane16 <old> <src0> <src1> <src2> <fi> <bound_control>
1639def int_amdgcn_permlane16 : GCCBuiltin<"__builtin_amdgcn_permlane16">,
1640  Intrinsic<[llvm_i32_ty],
1641            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
1642            [IntrNoMem, IntrConvergent, IntrWillReturn,
1643             ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1644
1645// llvm.amdgcn.permlanex16 <old> <src0> <src1> <src2> <fi> <bound_control>
1646def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">,
1647  Intrinsic<[llvm_i32_ty],
1648            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
1649            [IntrNoMem, IntrConvergent, IntrWillReturn,
1650             ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1651
1652// llvm.amdgcn.mov.dpp8.i32 <src> <sel>
1653// <sel> is a 32-bit constant whose high 8 bits must be zero which selects
1654// the lanes to read from.
1655def int_amdgcn_mov_dpp8 :
1656  Intrinsic<[llvm_anyint_ty],
1657            [LLVMMatchType<0>, llvm_i32_ty],
1658            [IntrNoMem, IntrConvergent, IntrWillReturn,
1659             ImmArg<ArgIndex<1>>]>;
1660
1661def int_amdgcn_s_get_waveid_in_workgroup :
1662  GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">,
1663  Intrinsic<[llvm_i32_ty], [],
1664    [IntrReadMem, IntrInaccessibleMemOnly, IntrWillReturn]>;
1665
1666class AMDGPUGlobalAtomicRtn<LLVMType vt> : Intrinsic <
1667  [vt],
1668  [llvm_anyptr_ty,    // vaddr
1669   vt],               // vdata(VGPR)
1670  [IntrArgMemOnly, NoCapture<ArgIndex<0>>], "", [SDNPMemOperand]>;
1671
1672def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn<llvm_i32_ty>;
1673
1674//===----------------------------------------------------------------------===//
1675// Deep learning intrinsics.
1676//===----------------------------------------------------------------------===//
1677
1678// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c, i1 %clamp)
1679//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
1680def int_amdgcn_fdot2 :
1681  GCCBuiltin<"__builtin_amdgcn_fdot2">,
1682  Intrinsic<
1683    [llvm_float_ty], // %r
1684    [
1685      llvm_v2f16_ty, // %a
1686      llvm_v2f16_ty, // %b
1687      llvm_float_ty, // %c
1688      llvm_i1_ty     // %clamp
1689    ],
1690    [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
1691  >;
1692
1693// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp)
1694//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
1695def int_amdgcn_sdot2 :
1696  GCCBuiltin<"__builtin_amdgcn_sdot2">,
1697  Intrinsic<
1698    [llvm_i32_ty], // %r
1699    [
1700      llvm_v2i16_ty, // %a
1701      llvm_v2i16_ty, // %b
1702      llvm_i32_ty,   // %c
1703      llvm_i1_ty     // %clamp
1704    ],
1705    [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
1706  >;
1707
1708// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp)
1709//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
1710def int_amdgcn_udot2 :
1711  GCCBuiltin<"__builtin_amdgcn_udot2">,
1712  Intrinsic<
1713    [llvm_i32_ty], // %r
1714    [
1715      llvm_v2i16_ty, // %a
1716      llvm_v2i16_ty, // %b
1717      llvm_i32_ty,   // %c
1718      llvm_i1_ty     // %clamp
1719    ],
1720    [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
1721  >;
1722
1723// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp)
1724//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
1725def int_amdgcn_sdot4 :
1726  GCCBuiltin<"__builtin_amdgcn_sdot4">,
1727  Intrinsic<
1728    [llvm_i32_ty], // %r
1729    [
1730      llvm_i32_ty, // %a
1731      llvm_i32_ty, // %b
1732      llvm_i32_ty, // %c
1733      llvm_i1_ty   // %clamp
1734    ],
1735    [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
1736  >;
1737
1738// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp)
1739//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
1740def int_amdgcn_udot4 :
1741  GCCBuiltin<"__builtin_amdgcn_udot4">,
1742  Intrinsic<
1743    [llvm_i32_ty], // %r
1744    [
1745      llvm_i32_ty, // %a
1746      llvm_i32_ty, // %b
1747      llvm_i32_ty, // %c
1748      llvm_i1_ty   // %clamp
1749    ],
1750    [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
1751  >;
1752
1753// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp)
1754//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
1755//        %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
1756def int_amdgcn_sdot8 :
1757  GCCBuiltin<"__builtin_amdgcn_sdot8">,
1758  Intrinsic<
1759    [llvm_i32_ty], // %r
1760    [
1761      llvm_i32_ty, // %a
1762      llvm_i32_ty, // %b
1763      llvm_i32_ty, // %c
1764      llvm_i1_ty   // %clamp
1765    ],
1766    [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
1767  >;
1768
1769// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp)
1770//   %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
1771//        %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
1772def int_amdgcn_udot8 :
1773  GCCBuiltin<"__builtin_amdgcn_udot8">,
1774  Intrinsic<
1775    [llvm_i32_ty], // %r
1776    [
1777      llvm_i32_ty, // %a
1778      llvm_i32_ty, // %b
1779      llvm_i32_ty, // %c
1780      llvm_i1_ty   // %clamp
1781    ],
1782    [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
1783  >;
1784
1785//===----------------------------------------------------------------------===//
1786// gfx908 intrinsics
1787// ===----------------------------------------------------------------------===//
1788
1789class AMDGPUBufferAtomicNoRtn : Intrinsic <
1790  [],
1791  [llvm_anyfloat_ty,  // vdata(VGPR)
1792   llvm_v4i32_ty,     // rsrc(SGPR)
1793   llvm_i32_ty,       // vindex(VGPR)
1794   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
1795   llvm_i1_ty],       // slc(imm)
1796  [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
1797  AMDGPURsrcIntrinsic<1, 0>;
1798
1799class AMDGPUGlobalAtomicNoRtn : Intrinsic <
1800  [],
1801  [llvm_anyptr_ty,    // vaddr
1802   llvm_anyfloat_ty],               // vdata(VGPR)
1803  [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>], "",
1804  [SDNPMemOperand]>;
1805
1806def int_amdgcn_buffer_atomic_fadd    : AMDGPUBufferAtomicNoRtn;
1807def int_amdgcn_global_atomic_fadd    : AMDGPUGlobalAtomicNoRtn;
1808
1809// llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
1810def int_amdgcn_mfma_f32_32x32x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x1f32">,
1811  Intrinsic<[llvm_v32f32_ty],
1812            [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty,
1813            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1814            [IntrConvergent, IntrNoMem, IntrWillReturn,
1815             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1816
1817def int_amdgcn_mfma_f32_16x16x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x1f32">,
1818  Intrinsic<[llvm_v16f32_ty],
1819            [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
1820            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1821            [IntrConvergent, IntrNoMem, IntrWillReturn,
1822             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1823
1824def int_amdgcn_mfma_f32_4x4x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x1f32">,
1825  Intrinsic<[llvm_v4f32_ty],
1826            [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
1827            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1828            [IntrConvergent, IntrNoMem, IntrWillReturn,
1829             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1830
1831def int_amdgcn_mfma_f32_32x32x2f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2f32">,
1832  Intrinsic<[llvm_v16f32_ty],
1833            [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
1834            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1835            [IntrConvergent, IntrNoMem, IntrWillReturn,
1836             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1837
1838def int_amdgcn_mfma_f32_16x16x4f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f32">,
1839  Intrinsic<[llvm_v4f32_ty],
1840            [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
1841            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1842            [IntrConvergent, IntrNoMem, IntrWillReturn,
1843             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1844
1845def int_amdgcn_mfma_f32_32x32x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4f16">,
1846  Intrinsic<[llvm_v32f32_ty],
1847            [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty,
1848            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1849            [IntrConvergent, IntrNoMem, IntrWillReturn,
1850             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1851
1852def int_amdgcn_mfma_f32_16x16x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f16">,
1853  Intrinsic<[llvm_v16f32_ty],
1854            [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
1855            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1856            [IntrConvergent, IntrNoMem, IntrWillReturn,
1857             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1858
1859def int_amdgcn_mfma_f32_4x4x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x4f16">,
1860  Intrinsic<[llvm_v4f32_ty],
1861            [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
1862            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1863            [IntrConvergent, IntrNoMem, IntrWillReturn,
1864             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1865
1866def int_amdgcn_mfma_f32_32x32x8f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x8f16">,
1867  Intrinsic<[llvm_v16f32_ty],
1868            [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
1869            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1870            [IntrConvergent, IntrNoMem, IntrWillReturn,
1871             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1872
1873def int_amdgcn_mfma_f32_16x16x16f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x16f16">,
1874  Intrinsic<[llvm_v4f32_ty],
1875            [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
1876            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1877            [IntrConvergent, IntrNoMem, IntrWillReturn,
1878             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1879
1880def int_amdgcn_mfma_i32_32x32x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x4i8">,
1881  Intrinsic<[llvm_v32i32_ty],
1882            [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty,
1883            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1884            [IntrConvergent, IntrNoMem, IntrWillReturn,
1885             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1886
1887def int_amdgcn_mfma_i32_16x16x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x4i8">,
1888  Intrinsic<[llvm_v16i32_ty],
1889            [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
1890            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1891            [IntrConvergent, IntrNoMem, IntrWillReturn,
1892             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1893
1894def int_amdgcn_mfma_i32_4x4x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_4x4x4i8">,
1895  Intrinsic<[llvm_v4i32_ty],
1896            [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
1897            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1898            [IntrConvergent, IntrNoMem, IntrWillReturn,
1899             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1900
1901def int_amdgcn_mfma_i32_32x32x8i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x8i8">,
1902  Intrinsic<[llvm_v16i32_ty],
1903            [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
1904            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1905            [IntrConvergent, IntrNoMem, IntrWillReturn,
1906             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1907
1908def int_amdgcn_mfma_i32_16x16x16i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x16i8">,
1909  Intrinsic<[llvm_v4i32_ty],
1910            [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
1911            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1912            [IntrConvergent, IntrNoMem, IntrWillReturn,
1913             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1914
1915def int_amdgcn_mfma_f32_32x32x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2bf16">,
1916  Intrinsic<[llvm_v32f32_ty],
1917            [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty,
1918            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1919            [IntrConvergent, IntrNoMem, IntrWillReturn,
1920             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1921
1922def int_amdgcn_mfma_f32_16x16x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x2bf16">,
1923  Intrinsic<[llvm_v16f32_ty],
1924            [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
1925            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1926            [IntrConvergent, IntrNoMem, IntrWillReturn,
1927             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1928
1929def int_amdgcn_mfma_f32_4x4x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x2bf16">,
1930  Intrinsic<[llvm_v4f32_ty],
1931            [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
1932            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1933            [IntrConvergent, IntrNoMem, IntrWillReturn,
1934             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1935
1936def int_amdgcn_mfma_f32_32x32x4bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4bf16">,
1937  Intrinsic<[llvm_v16f32_ty],
1938            [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
1939            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1940            [IntrConvergent, IntrNoMem, IntrWillReturn,
1941             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1942
1943def int_amdgcn_mfma_f32_16x16x8bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x8bf16">,
1944  Intrinsic<[llvm_v4f32_ty],
1945            [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
1946            llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1947            [IntrConvergent, IntrNoMem, IntrWillReturn,
1948             ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1949
1950//===----------------------------------------------------------------------===//
1951// Special Intrinsics for backend internal use only. No frontend
1952// should emit calls to these.
1953// ===----------------------------------------------------------------------===//
1954def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
1955  [llvm_i1_ty], [IntrConvergent, IntrWillReturn]
1956>;
1957
1958def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
1959  [llvm_anyint_ty], [IntrConvergent, IntrWillReturn]
1960>;
1961
1962def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty],
1963  [llvm_i1_ty, LLVMMatchType<0>],
1964  [IntrNoMem, IntrConvergent, IntrWillReturn]
1965>;
1966
1967def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
1968  [llvm_anyint_ty], [IntrConvergent, IntrWillReturn]
1969>;
1970
1971def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty],
1972  [IntrConvergent, IntrWillReturn]>;
1973
1974// Represent unreachable in a divergent region.
1975def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>;
1976
1977// Emit 2.5 ulp, no denormal division. Should only be inserted by
1978// pass based on !fpmath metadata.
1979def int_amdgcn_fdiv_fast : Intrinsic<
1980  [llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1981  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1982>;
1983
1984// Represent a relocation constant.
1985def int_amdgcn_reloc_constant : Intrinsic<
1986  [llvm_i32_ty], [llvm_metadata_ty],
1987  [IntrNoMem, IntrSpeculatable, IntrWillReturn]
1988>;
1989}
1990