1//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains DAG node defintions for the AMDGPU target.
10//
11//===----------------------------------------------------------------------===//
12
13//===----------------------------------------------------------------------===//
14// AMDGPU DAG Profiles
15//===----------------------------------------------------------------------===//
16
17def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
18  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
19]>;
20
21def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
22  [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
23>;
24
25def AMDGPULdExpOp : SDTypeProfile<1, 2,
26  [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
27>;
28
29def AMDGPUFPClassOp : SDTypeProfile<1, 2,
30  [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>]
31>;
32
33def AMDGPUFPPackOp : SDTypeProfile<1, 2,
34  [SDTCisFP<1>, SDTCisSameAs<1, 2>]
35>;
36
37def AMDGPUIntPackOp : SDTypeProfile<1, 2,
38  [SDTCisInt<1>, SDTCisSameAs<1, 2>]
39>;
40
41def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
42  [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
43>;
44
45// float, float, float, vcc
46def AMDGPUFmasOp : SDTypeProfile<1, 4,
47  [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
48>;
49
50def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
51
52def AMDGPUIfOp : SDTypeProfile<1, 2,
53  [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
54>;
55
56def AMDGPUElseOp : SDTypeProfile<1, 2,
57  [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
58>;
59
60def AMDGPULoopOp : SDTypeProfile<0, 2,
61  [SDTCisVT<0, i1>, SDTCisVT<1, OtherVT>]
62>;
63
64def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
65  [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, i1>]
66>;
67
68//===----------------------------------------------------------------------===//
69// AMDGPU DAG Nodes
70//
71
72def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>;
73def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>;
74def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>;
75
76def callseq_start : SDNode<"ISD::CALLSEQ_START",
77  SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>,
78  [SDNPHasChain, SDNPOutGlue]
79>;
80
81def callseq_end : SDNode<"ISD::CALLSEQ_END",
82 SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>,
83  [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
84>;
85
86def AMDGPUcall : SDNode<"AMDGPUISD::CALL",
87  SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
88  [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
89  SDNPVariadic]
90>;
91
92def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN",
93  SDTypeProfile<0, 3, [SDTCisPtrTy<0>]>,
94  [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
95>;
96
97def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP",
98  SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>,
99    [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue]
100>;
101
102def AMDGPUconstdata_ptr : SDNode<
103  "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>,
104                                                     SDTCisVT<0, iPTR>]>
105>;
106
107// This argument to this node is a dword address.
108def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
109
110// Force dependencies for vector trunc stores
111def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
112
113def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
114def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
115
116// out = a - floor(a)
117def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
118
119// out = 1.0 / a
120def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
121
122// out = 1.0 / sqrt(a)
123def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
124
125// out = 1.0 / sqrt(a)
126def AMDGPUrcp_legacy : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>;
127def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
128
129def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>;
130
131// out = 1.0 / sqrt(a) result clamped to +/- max_float.
132def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;
133
134def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
135
136def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
137def AMDGPUpknorm_i16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>;
138def AMDGPUpknorm_u16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>;
139def AMDGPUpk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>;
140def AMDGPUpk_u16_u32 : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>;
141def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>;
142def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>;
143
144
145def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>;
146
147// out = max(a, b) a and b are floats, where a nan comparison fails.
148// This is not commutative because this gives the second operand:
149//   x < nan ? x : nan -> nan
150//   nan < x ? nan : x -> x
151def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp,
152  []
153>;
154
155def AMDGPUfmul_legacy : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp,
156  [SDNPCommutative, SDNPAssociative]
157>;
158
159// out = min(a, b) a and b are floats, where a nan comparison fails.
160def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
161  []
162>;
163
164// FIXME: TableGen doesn't like commutative instructions with more
165// than 2 operands.
166// out = max(a, b, c) a, b and c are floats
167def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp,
168  [/*SDNPCommutative, SDNPAssociative*/]
169>;
170
171// out = max(a, b, c) a, b, and c are signed ints
172def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp,
173  [/*SDNPCommutative, SDNPAssociative*/]
174>;
175
176// out = max(a, b, c) a, b and c are unsigned ints
177def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp,
178  [/*SDNPCommutative, SDNPAssociative*/]
179>;
180
181// out = min(a, b, c) a, b and c are floats
182def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp,
183  [/*SDNPCommutative, SDNPAssociative*/]
184>;
185
186// out = min(a, b, c) a, b and c are signed ints
187def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp,
188  [/*SDNPCommutative, SDNPAssociative*/]
189>;
190
191// out = min(a, b) a and b are unsigned ints
192def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp,
193  [/*SDNPCommutative, SDNPAssociative*/]
194>;
195
196// out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0
197def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>;
198
199// out = (src1 > src0) ? 1 : 0
200def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;
201
202def AMDGPUSetCCOp : SDTypeProfile<1, 3, [        // setcc
203  SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
204]>;
205
206def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
207
208def AMDGPUSetRegOp :  SDTypeProfile<0, 2, [
209  SDTCisInt<0>, SDTCisInt<1>
210]>;
211
212def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [
213  SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
214
215def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [
216   SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
217
218def AMDGPUmul : SDNode<"AMDGPUISD::FMUL_W_CHAIN", SDTFPBinOp, [
219  SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
220
221def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
222  SDTIntToFPOp, []>;
223def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",
224  SDTIntToFPOp, []>;
225def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2",
226  SDTIntToFPOp, []>;
227def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3",
228  SDTIntToFPOp, []>;
229
230
231// urecip - This operation is a helper for integer division, it returns the
232// result of 1 / a as a fractional unsigned integer.
233// out = (2^32 / a) + e
234// e is rounding error
235def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
236
237// Special case divide preop and flags.
238def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
239
240//  Special case divide FMA with scale and flags (src0 = Quotient,
241//  src1 = Denominator, src2 = Numerator).
242def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
243                            [SDNPOptInGlue]>;
244
245// Single or double precision division fixup.
246// Special case divide fixup and flags(src0 = Quotient, src1 =
247// Denominator, src2 = Numerator).
248def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
249
250def AMDGPUfmad_ftz : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
251
252// Look Up 2.0 / pi src0 with segment select src1[4:0]
253def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
254
255def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
256                          SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
257                          [SDNPHasChain, SDNPMayLoad]>;
258
259def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
260                           SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
261                           [SDNPHasChain, SDNPMayStore]>;
262
263// MSKOR instructions are atomic memory instructions used mainly for storing
264// 8-bit and 16-bit values.  The definition is:
265//
266// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src)
267//
268// src0: vec4(src, 0, 0, mask)
269// src1: dst - rat offset (aka pointer) in dwords
270def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
271                        SDTypeProfile<0, 2, []>,
272                        [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
273
274def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP",
275                            SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisVec<2>]>,
276                            [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
277                             SDNPMemOperand]>;
278
279def AMDGPUround : SDNode<"ISD::FROUND",
280                         SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;
281
282def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
283def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
284def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
285def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
286
287def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>;
288def AMDGPUffbh_i32 : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>;
289
290def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>;
291
292// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
293// when performing the mulitply. The result is a 32-bit value.
294def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
295  [SDNPCommutative, SDNPAssociative]
296>;
297def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
298  [SDNPCommutative, SDNPAssociative]
299>;
300
301def AMDGPUmulhi_u24 : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp,
302  [SDNPCommutative, SDNPAssociative]
303>;
304def AMDGPUmulhi_i24 : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp,
305  [SDNPCommutative, SDNPAssociative]
306>;
307
308def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,
309  []
310>;
311def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,
312  []
313>;
314
315def AMDGPUsmed3 : SDNode<"AMDGPUISD::SMED3", AMDGPUDTIntTernaryOp,
316  []
317>;
318
319def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp,
320  []
321>;
322
323def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>;
324
325def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2",
326                  SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>,
327                                       SDTCisFP<0>, SDTCisVec<1>,
328                                       SDTCisInt<4>]>,
329                  []>;
330
331def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
332
333def AMDGPUinit_exec : SDNode<"AMDGPUISD::INIT_EXEC",
334                      SDTypeProfile<0, 1, [SDTCisInt<0>]>,
335                      [SDNPHasChain, SDNPInGlue]>;
336
337def AMDGPUinit_exec_from_input : SDNode<"AMDGPUISD::INIT_EXEC_FROM_INPUT",
338                                 SDTypeProfile<0, 2,
339                                 [SDTCisInt<0>, SDTCisInt<1>]>,
340                                 [SDNPHasChain, SDNPInGlue]>;
341
342def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
343                    SDTypeProfile<0, 1, [SDTCisInt<0>]>,
344                    [SDNPHasChain, SDNPInGlue]>;
345
346def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT",
347                    SDTypeProfile<0, 1, [SDTCisInt<0>]>,
348                    [SDNPHasChain, SDNPInGlue]>;
349
350def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
351                        SDTypeProfile<1, 3, [SDTCisFP<0>]>,
352                        [SDNPInGlue]>;
353
354def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",
355                      SDTypeProfile<1, 3, [SDTCisFP<0>]>,
356                      [SDNPInGlue, SDNPOutGlue]>;
357
358def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
359                      SDTypeProfile<1, 4, [SDTCisFP<0>]>,
360                      [SDNPInGlue]>;
361
362def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16",
363                            SDTypeProfile<1, 7, [SDTCisFP<0>]>,
364                            [SDNPInGlue, SDNPOutGlue]>;
365
366def AMDGPUinterp_p1lv_f16 : SDNode<"AMDGPUISD::INTERP_P1LV_F16",
367                            SDTypeProfile<1, 9, [SDTCisFP<0>]>,
368                            [SDNPInGlue, SDNPOutGlue]>;
369
370def AMDGPUinterp_p2_f16 : SDNode<"AMDGPUISD::INTERP_P2_F16",
371                          SDTypeProfile<1, 8, [SDTCisFP<0>]>,
372                          [SDNPInGlue]>;
373
374def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT,
375  [SDNPHasChain, SDNPSideEffect]>;
376
377// SI+ export
378def AMDGPUExportOp : SDTypeProfile<0, 8, [
379  SDTCisInt<0>,       // i8 tgt
380  SDTCisInt<1>,       // i8 en
381                      // i32 or f32 src0
382  SDTCisSameAs<3, 2>, // f32 src1
383  SDTCisSameAs<4, 2>, // f32 src2
384  SDTCisSameAs<5, 2>, // f32 src3
385  SDTCisInt<6>,       // i1 compr
386  // skip done
387  SDTCisInt<1>        // i1 vm
388
389]>;
390
391def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp,
392  [SDNPHasChain, SDNPMayStore]>;
393
394def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp,
395  [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
396
397
398def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
399
400def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp,
401  [SDNPHasChain, SDNPSideEffect]>;
402
403//===----------------------------------------------------------------------===//
404// Flow Control Profile Types
405//===----------------------------------------------------------------------===//
406// Branch instruction where second and third are basic blocks
407def SDTIL_BRCond : SDTypeProfile<0, 2, [
408    SDTCisVT<0, OtherVT>
409    ]>;
410
411//===----------------------------------------------------------------------===//
412// Flow Control DAG Nodes
413//===----------------------------------------------------------------------===//
414def IL_brcond      : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
415
416//===----------------------------------------------------------------------===//
417// Call/Return DAG Nodes
418//===----------------------------------------------------------------------===//
419def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
420    [SDNPHasChain, SDNPOptInGlue]>;
421
422def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
423    [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
424
425def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
426  [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
427>;
428