1//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This describes the calling conventions for the AMD Radeon GPUs.
10//
11//===----------------------------------------------------------------------===//
12
13// Inversion of CCIfInReg
14class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
15class CCIfExtend<CCAction A>
16  : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
17
18// Calling convention for SI
19def CC_SI_Gfx : CallingConv<[
20  // 0-3 are reserved for the stack buffer descriptor
21  // 30-31 are reserved for the return address
22  // 32 is reserved for the stack pointer
23  // 33 is reserved for the frame pointer
24  // 34 is reserved for the base pointer
25  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
26    SGPR4, SGPR5, SGPR6, SGPR7,
27    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
28    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
29    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29
30  ]>>>,
31
32  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
33    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
34    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
35    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
36    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
37  ]>>>,
38
39  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>
40]>;
41
42def RetCC_SI_Gfx : CallingConv<[
43  CCIfType<[i1], CCPromoteToType<i32>>,
44  CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
45
46  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
47    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
48    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
49    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
50    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
51    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
52    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
53    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
54    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
55    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
56    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
57    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
58    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
59    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
60    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
61    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
62    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
63    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
64  ]>>>,
65]>;
66
67def CC_SI_SHADER : CallingConv<[
68
69  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
70    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
71    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
72    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
73    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
74    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
75    SGPR40, SGPR41, SGPR42, SGPR43
76  ]>>>,
77
78  // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
79  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
80    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
81    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
82    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
83    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
84    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
85    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
86    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
87    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
88    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
89    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
90    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
91    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
92    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
93    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
94    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
95    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
96    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
97  ]>>>
98]>;
99
100def RetCC_SI_Shader : CallingConv<[
101  CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
102  CCIfType<[i32, i16, v2i16] , CCAssignToReg<[
103    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
104    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
105    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
106    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
107    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
108    SGPR40, SGPR41, SGPR42, SGPR43
109  ]>>,
110
111  // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
112  CCIfType<[f32, f16, v2f16] , CCAssignToReg<[
113    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
114    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
115    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
116    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
117    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
118    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
119    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
120    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
121    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
122    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
123    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
124    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
125    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
126    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
127    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
128    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
129    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
130  ]>>
131]>;
132
133def CSR_AMDGPU_VGPRs : CalleeSavedRegs<
134  // The CSRs & scratch-registers are interleaved at a split boundary of 8.
135  (add (sequence "VGPR%u", 40, 47),
136    (sequence "VGPR%u", 56, 63),
137    (sequence "VGPR%u", 72, 79),
138    (sequence "VGPR%u", 88, 95),
139    (sequence "VGPR%u", 104, 111),
140    (sequence "VGPR%u", 120, 127),
141    (sequence "VGPR%u", 136, 143),
142    (sequence "VGPR%u", 152, 159),
143    (sequence "VGPR%u", 168, 175),
144    (sequence "VGPR%u", 184, 191),
145    (sequence "VGPR%u", 200, 207),
146    (sequence "VGPR%u", 216, 223),
147    (sequence "VGPR%u", 232, 239),
148    (sequence "VGPR%u", 248, 255))
149>;
150
151def CSR_AMDGPU_AGPRs : CalleeSavedRegs<
152  (sequence "AGPR%u", 32, 255)
153>;
154
155def CSR_AMDGPU_SGPRs : CalleeSavedRegs<
156  (sequence "SGPR%u", 30, 105)
157>;
158
159def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs<
160  (add (sequence "SGPR%u", 4, 31), (sequence "SGPR%u", 64, 105))
161>;
162
163def CSR_AMDGPU : CalleeSavedRegs<
164  (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs)
165>;
166
167def CSR_AMDGPU_GFX90AInsts : CalleeSavedRegs<
168  (add CSR_AMDGPU, CSR_AMDGPU_AGPRs)
169>;
170
171def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs<
172  (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs)
173>;
174
175def CSR_AMDGPU_SI_Gfx_GFX90AInsts : CalleeSavedRegs<
176  (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs)
177>;
178
179def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>;
180
181// Calling convention for leaf functions
182def CC_AMDGPU_Func : CallingConv<[
183  CCIfByVal<CCPassByVal<4, 4>>,
184  CCIfType<[i1], CCPromoteToType<i32>>,
185  CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
186  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[
187    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
188    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
189    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
190    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
191  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>
192]>;
193
194// Calling convention for leaf functions
195def RetCC_AMDGPU_Func : CallingConv<[
196  CCIfType<[i1], CCPromoteToType<i32>>,
197  CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
198  CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[
199    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
200    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
201    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
202    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
203]>;
204
205def CC_AMDGPU : CallingConv<[
206   CCIf<"static_cast<const GCNSubtarget&>"
207         "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
208           "AMDGPUSubtarget::SOUTHERN_ISLANDS",
209        CCDelegateTo<CC_SI_SHADER>>,
210   CCIf<"static_cast<const GCNSubtarget&>"
211         "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
212           "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
213        CCDelegateTo<CC_AMDGPU_Func>>
214]>;
215
216// Trivial class to denote when a def is used only to get a RegMask, i.e.
217// SaveList is ignored and the def is not used as part of any calling
218// convention.
219class RegMask<dag mask> : CalleeSavedRegs<mask>;
220
221def AMDGPU_AllVGPRs : RegMask<
222  (sequence "VGPR%u", 0, 255)
223>;
224
225def AMDGPU_AllAGPRs : RegMask<
226  (sequence "AGPR%u", 0, 255)
227>;
228
229def AMDGPU_AllVectorRegs : RegMask<
230  (add AMDGPU_AllVGPRs, AMDGPU_AllAGPRs)
231>;
232
233def AMDGPU_AllAllocatableSRegs : RegMask<
234  (add (sequence "SGPR%u", 0, 105), VCC_LO, VCC_HI)
235>;
236