1; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
4
5; Make sure to run a GPU with the SGPR allocation bug.
6
7; GCN-LABEL: {{^}}use_vcc:
8; GCN: ; NumSgprs: 34
9; GCN: ; NumVgprs: 0
10define void @use_vcc() #1 {
11  call void asm sideeffect "", "~{vcc}" () #0
12  ret void
13}
14
15; GCN-LABEL: {{^}}indirect_use_vcc:
16; GCN: v_writelane_b32 v40, s33, 2
17; GCN: v_writelane_b32 v40, s30, 0
18; GCN: v_writelane_b32 v40, s31, 1
19; GCN: s_swappc_b64
20; GCN: v_readlane_b32 s4, v40, 0
21; GCN: v_readlane_b32 s5, v40, 1
22; GCN: v_readlane_b32 s33, v40, 2
23; GCN: ; NumSgprs: 36
24; GCN: ; NumVgprs: 41
25define void @indirect_use_vcc() #1 {
26  call void @use_vcc()
27  ret void
28}
29
30; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
31; GCN: is_dynamic_callstack = 0
32; CI: ; NumSgprs: 38
33; VI-NOBUG: ; NumSgprs: 40
34; VI-BUG: ; NumSgprs: 96
35; GCN: ; NumVgprs: 41
36define amdgpu_kernel void @indirect_2level_use_vcc_kernel(i32 addrspace(1)* %out) #0 {
37  call void @indirect_use_vcc()
38  ret void
39}
40
41; GCN-LABEL: {{^}}use_flat_scratch:
42; CI: ; NumSgprs: 36
43; VI: ; NumSgprs: 38
44; GCN: ; NumVgprs: 0
45define void @use_flat_scratch() #1 {
46  call void asm sideeffect "", "~{flat_scratch}" () #0
47  ret void
48}
49
50; GCN-LABEL: {{^}}indirect_use_flat_scratch:
51; CI: ; NumSgprs: 38
52; VI: ; NumSgprs: 40
53; GCN: ; NumVgprs: 41
54define void @indirect_use_flat_scratch() #1 {
55  call void @use_flat_scratch()
56  ret void
57}
58
59; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
60; GCN: is_dynamic_callstack = 0
61; CI: ; NumSgprs: 38
62; VI-NOBUG: ; NumSgprs: 40
63; VI-BUG: ; NumSgprs: 96
64; GCN: ; NumVgprs: 41
65define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(i32 addrspace(1)* %out) #0 {
66  call void @indirect_use_flat_scratch()
67  ret void
68}
69
70; GCN-LABEL: {{^}}use_10_vgpr:
71; GCN: ; NumVgprs: 10
72define void @use_10_vgpr() #1 {
73  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() #0
74  call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() #0
75  ret void
76}
77
78; GCN-LABEL: {{^}}indirect_use_10_vgpr:
79; GCN: ; NumVgprs: 41
80define void @indirect_use_10_vgpr() #0 {
81  call void @use_10_vgpr()
82  ret void
83}
84
85; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
86; GCN: is_dynamic_callstack = 0
87; GCN: ; NumVgprs: 41
88define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
89  call void @indirect_use_10_vgpr()
90  ret void
91}
92
93; GCN-LABEL: {{^}}use_50_vgpr:
94; GCN: ; NumVgprs: 50
95define void @use_50_vgpr() #1 {
96  call void asm sideeffect "", "~{v49}"() #0
97  ret void
98}
99
100; GCN-LABEL: {{^}}indirect_use_50_vgpr:
101; GCN: ; NumVgprs: 50
102define void @indirect_use_50_vgpr() #0 {
103  call void @use_50_vgpr()
104  ret void
105}
106
107; GCN-LABEL: {{^}}use_80_sgpr:
108; GCN: ; NumSgprs: 80
109define void @use_80_sgpr() #1 {
110  call void asm sideeffect "", "~{s79}"() #0
111  ret void
112}
113
114; GCN-LABEL: {{^}}indirect_use_80_sgpr:
115; GCN: ; NumSgprs: 82
116define void @indirect_use_80_sgpr() #1 {
117  call void @use_80_sgpr()
118  ret void
119}
120
121; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr:
122; GCN: is_dynamic_callstack = 0
123; CI: ; NumSgprs: 84
124; VI-NOBUG: ; NumSgprs: 86
125; VI-BUG: ; NumSgprs: 96
126define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
127  call void @indirect_use_80_sgpr()
128  ret void
129}
130
131
132; GCN-LABEL: {{^}}use_stack0:
133; GCN: ScratchSize: 2052
134define void @use_stack0() #1 {
135  %alloca = alloca [512 x i32], align 4, addrspace(5)
136  call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0
137  ret void
138}
139
140; GCN-LABEL: {{^}}use_stack1:
141; GCN: ScratchSize: 404
142define void @use_stack1() #1 {
143  %alloca = alloca [100 x i32], align 4, addrspace(5)
144  call void asm sideeffect "; use $0", "v"([100 x i32] addrspace(5)* %alloca) #0
145  ret void
146}
147
148; GCN-LABEL: {{^}}indirect_use_stack:
149; GCN: ScratchSize: 2132
150define void @indirect_use_stack() #1 {
151  %alloca = alloca [16 x i32], align 4, addrspace(5)
152  call void asm sideeffect "; use $0", "v"([16 x i32] addrspace(5)* %alloca) #0
153  call void @use_stack0()
154  ret void
155}
156
157; GCN-LABEL: {{^}}indirect_2_level_use_stack:
158; GCN: is_dynamic_callstack = 0
159; GCN: ScratchSize: 2132
160define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
161  call void @indirect_use_stack()
162  ret void
163}
164
165
166; Should be maximum of callee usage
167; GCN-LABEL: {{^}}multi_call_use_use_stack:
168; GCN: is_dynamic_callstack = 0
169; GCN: ScratchSize: 2052
170define amdgpu_kernel void @multi_call_use_use_stack() #0 {
171  call void @use_stack0()
172  call void @use_stack1()
173  ret void
174}
175
176
177declare void @external() #0
178
179; GCN-LABEL: {{^}}usage_external:
180; GCN: is_dynamic_callstack = 1
181; NumSgprs: 48
182; NumVgprs: 24
183; GCN: ScratchSize: 16384
184define amdgpu_kernel void @usage_external() #0 {
185  call void @external()
186  ret void
187}
188
189declare void @external_recurse() #2
190
191; GCN-LABEL: {{^}}usage_external_recurse:
192; GCN: is_dynamic_callstack = 1
193; NumSgprs: 48
194; NumVgprs: 24
195; GCN: ScratchSize: 16384
196define amdgpu_kernel void @usage_external_recurse() #0 {
197  call void @external_recurse()
198  ret void
199}
200
201; GCN-LABEL: {{^}}direct_recursion_use_stack:
202; GCN: ScratchSize: 2064
203define void @direct_recursion_use_stack(i32 %val) #2 {
204  %alloca = alloca [512 x i32], align 4, addrspace(5)
205  call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0
206  %cmp = icmp eq i32 %val, 0
207  br i1 %cmp, label %ret, label %call
208
209call:
210  %val.sub1 = sub i32 %val, 1
211  call void @direct_recursion_use_stack(i32 %val.sub1)
212  br label %ret
213
214ret:
215  ret void
216}
217
218; GCN-LABEL: {{^}}usage_direct_recursion:
219; GCN: is_ptr64 = 1
220; GCN: is_dynamic_callstack = 1
221; GCN: workitem_private_segment_byte_size = 2064
222define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
223  call void @direct_recursion_use_stack(i32 %n)
224  ret void
225}
226
227; Make sure there's no assert when a sgpr96 is used.
228; GCN-LABEL: {{^}}count_use_sgpr96_external_call
229; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}]
230; CI: NumSgprs: 48
231; VI-NOBUG: NumSgprs: 48
232; VI-BUG: NumSgprs: 96
233; GCN: NumVgprs: 24
234define amdgpu_kernel void @count_use_sgpr96_external_call()  {
235entry:
236  tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
237  call void @external()
238  ret void
239}
240
241; Make sure there's no assert when a sgpr160 is used.
242; GCN-LABEL: {{^}}count_use_sgpr160_external_call
243; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}]
244; CI: NumSgprs: 48
245; VI-NOBUG: NumSgprs: 48
246; VI-BUG: NumSgprs: 96
247; GCN: NumVgprs: 24
248define amdgpu_kernel void @count_use_sgpr160_external_call()  {
249entry:
250  tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
251  call void @external()
252  ret void
253}
254
255; Make sure there's no assert when a vgpr160 is used.
256; GCN-LABEL: {{^}}count_use_vgpr160_external_call
257; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}]
258; CI: NumSgprs: 48
259; VI-NOBUG: NumSgprs: 48
260; VI-BUG: NumSgprs: 96
261; GCN: NumVgprs: 24
262define amdgpu_kernel void @count_use_vgpr160_external_call()  {
263entry:
264  tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
265  call void @external()
266  ret void
267}
268
269attributes #0 = { nounwind noinline norecurse }
270attributes #1 = { nounwind noinline norecurse }
271attributes #2 = { nounwind noinline }
272