1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX89 %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX89 %s
4
5declare void @external_void_func_void() #0
6
7declare i1 @external_i1_func_void() #0
8declare zeroext i1 @external_i1_zeroext_func_void() #0
9declare signext i1 @external_i1_signext_func_void() #0
10
11declare i8 @external_i8_func_void() #0
12declare zeroext i8 @external_i8_zeroext_func_void() #0
13declare signext i8 @external_i8_signext_func_void() #0
14
15declare i16 @external_i16_func_void() #0
16declare <2 x i16> @external_v2i16_func_void() #0
17declare <4 x i16> @external_v4i16_func_void() #0
18declare zeroext i16 @external_i16_zeroext_func_void() #0
19declare signext i16 @external_i16_signext_func_void() #0
20
21declare i32 @external_i32_func_void() #0
22declare i64 @external_i64_func_void() #0
23declare half @external_f16_func_void() #0
24declare float @external_f32_func_void() #0
25declare double @external_f64_func_void() #0
26
27declare <2 x half> @external_v2f16_func_void() #0
28declare <4 x half> @external_v4f16_func_void() #0
29declare <3 x float> @external_v3f32_func_void() #0
30declare <5 x float> @external_v5f32_func_void() #0
31declare <2 x double> @external_v2f64_func_void() #0
32
33declare <2 x i24> @external_v2i24_func_void() #0
34
35declare <2 x i32> @external_v2i32_func_void() #0
36declare <3 x i32> @external_v3i32_func_void() #0
37declare <4 x i32> @external_v4i32_func_void() #0
38declare <5 x i32> @external_v5i32_func_void() #0
39declare <8 x i32> @external_v8i32_func_void() #0
40declare <16 x i32> @external_v16i32_func_void() #0
41declare <32 x i32> @external_v32i32_func_void() #0
42declare { <32 x i32>, i32 } @external_v32i32_i32_func_void() #0
43
44declare { i32, i64 } @external_i32_i64_func_void() #0
45
46; GCN-LABEL: {{^}}test_call_external_void_func_void:
47define amdgpu_kernel void @test_call_external_void_func_void() #0 {
48  call void @external_void_func_void()
49  ret void
50}
51
52; GCN-LABEL: {{^}}test_call_external_void_func_void_x2:
53define amdgpu_kernel void @test_call_external_void_func_void_x2() #0 {
54  call void @external_void_func_void()
55  call void @external_void_func_void()
56  ret void
57}
58
59; GCN-LABEL: {{^}}test_call_external_i1_func_void:
60define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
61  %val = call i1 @external_i1_func_void()
62  store volatile i1 %val, i1 addrspace(1)* undef
63  ret void
64}
65
66; GCN-LABEL: {{^}}test_call_external_i1_zeroext_func_void:
67define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
68  %val = call i1 @external_i1_zeroext_func_void()
69  %val.ext = zext i1 %val to i32
70  store volatile i32 %val.ext, i32 addrspace(1)* undef
71  ret void
72}
73
74; GCN-LABEL: {{^}}test_call_external_i1_signext_func_void:
75define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
76  %val = call i1 @external_i1_signext_func_void()
77  %val.ext = zext i1 %val to i32
78  store volatile i32 %val.ext, i32 addrspace(1)* undef
79  ret void
80}
81
82; GCN-LABEL: {{^}}test_call_external_i8_func_void:
83define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
84  %val = call i8 @external_i8_func_void()
85  store volatile i8 %val, i8 addrspace(1)* undef
86  ret void
87}
88
89; GCN-LABEL: {{^}}test_call_external_i8_zeroext_func_void:
90define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
91  %val = call i8 @external_i8_zeroext_func_void()
92  %val.ext = zext i8 %val to i32
93  store volatile i32 %val.ext, i32 addrspace(1)* undef
94  ret void
95}
96
97; GCN-LABEL: {{^}}test_call_external_i8_signext_func_void:
98define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
99  %val = call i8 @external_i8_signext_func_void()
100  %val.ext = zext i8 %val to i32
101  store volatile i32 %val.ext, i32 addrspace(1)* undef
102  ret void
103}
104
105; GCN-LABEL: {{^}}test_call_external_i16_func_void:
106define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
107  %val = call i16 @external_i16_func_void()
108  store volatile i16 %val, i16 addrspace(1)* undef
109  ret void
110}
111
112; GCN-LABEL: {{^}}test_call_external_i16_zeroext_func_void:
113define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
114  %val = call i16 @external_i16_zeroext_func_void()
115  %val.ext = zext i16 %val to i32
116  store volatile i32 %val.ext, i32 addrspace(1)* undef
117  ret void
118}
119
120; GCN-LABEL: {{^}}test_call_external_i16_signext_func_void:
121define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
122  %val = call i16 @external_i16_signext_func_void()
123  %val.ext = zext i16 %val to i32
124  store volatile i32 %val.ext, i32 addrspace(1)* undef
125  ret void
126}
127
128; GCN-LABEL: {{^}}test_call_external_i32_func_void:
129define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
130  %val = call i32 @external_i32_func_void()
131  store volatile i32 %val, i32 addrspace(1)* undef
132  ret void
133}
134
135; GCN-LABEL: {{^}}test_call_external_i64_func_void:
136define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
137  %val = call i64 @external_i64_func_void()
138  store volatile i64 %val, i64 addrspace(1)* undef
139  ret void
140}
141
142; GCN-LABEL: {{^}}test_call_external_f16_func_void:
143define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
144  %val = call half @external_f16_func_void()
145  store volatile half %val, half addrspace(1)* undef
146  ret void
147}
148
149; GCN-LABEL: {{^}}test_call_external_f32_func_void:
150define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
151  %val = call float @external_f32_func_void()
152  store volatile float %val, float addrspace(1)* undef
153  ret void
154}
155
156; GCN-LABEL: {{^}}test_call_external_f64_func_void:
157define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
158  %val = call double @external_f64_func_void()
159  store volatile double %val, double addrspace(1)* undef
160  ret void
161}
162
163; GCN-LABEL: {{^}}test_call_external_v2f64_func_void:
164define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
165  %val = call <2 x double> @external_v2f64_func_void()
166  store volatile <2 x double> %val, <2 x double> addrspace(1)* undef
167  ret void
168}
169
170; GCN-LABEL: {{^}}test_call_external_v2i32_func_void:
171define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
172  %val = call <2 x i32> @external_v2i32_func_void()
173  store volatile <2 x i32> %val, <2 x i32> addrspace(1)* undef
174  ret void
175}
176
177; GCN-LABEL: {{^}}test_call_external_v3i32_func_void:
178; GCN: s_swappc
179; GFX7-DAG: flat_store_dwordx3 {{.*}}, v[0:2]
180; GFX89-DAG: buffer_store_dwordx3 v[0:2]
181define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
182  %val = call <3 x i32> @external_v3i32_func_void()
183  store volatile <3 x i32> %val, <3 x i32> addrspace(1)* undef, align 8
184  ret void
185}
186
187; GCN-LABEL: {{^}}test_call_external_v4i32_func_void:
188define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
189  %val = call <4 x i32> @external_v4i32_func_void()
190  store volatile <4 x i32> %val, <4 x i32> addrspace(1)* undef, align 8
191  ret void
192}
193
194; GCN-LABEL: {{^}}test_call_external_v5i32_func_void:
195; GCN: s_swappc
196; GFX7-DAG: flat_store_dwordx4 {{.*}}, v[0:3]
197; GFX7-DAG: flat_store_dword {{.*}}, v4
198; GFX89-DAG: buffer_store_dwordx4 v[0:3]
199; GFX89-DAG: buffer_store_dword v4
200define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
201  %val = call <5 x i32> @external_v5i32_func_void()
202  store volatile <5 x i32> %val, <5 x i32> addrspace(1)* undef, align 8
203  ret void
204}
205
206; GCN-LABEL: {{^}}test_call_external_v8i32_func_void:
207define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
208  %val = call <8 x i32> @external_v8i32_func_void()
209  store volatile <8 x i32> %val, <8 x i32> addrspace(1)* undef, align 8
210  ret void
211}
212
213; GCN-LABEL: {{^}}test_call_external_v16i32_func_void:
214define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
215  %val = call <16 x i32> @external_v16i32_func_void()
216  store volatile <16 x i32> %val, <16 x i32> addrspace(1)* undef, align 8
217  ret void
218}
219
220; GCN-LABEL: {{^}}test_call_external_v32i32_func_void:
221define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
222  %val = call <32 x i32> @external_v32i32_func_void()
223  store volatile <32 x i32> %val, <32 x i32> addrspace(1)* undef, align 8
224  ret void
225}
226
227; GCN-LABEL: {{^}}test_call_external_v2i16_func_void:
228define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
229  %val = call <2 x i16> @external_v2i16_func_void()
230  store volatile <2 x i16> %val, <2 x i16> addrspace(1)* undef
231  ret void
232}
233
234; GCN-LABEL: {{^}}test_call_external_v4i16_func_void:
235define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
236  %val = call <4 x i16> @external_v4i16_func_void()
237  store volatile <4 x i16> %val, <4 x i16> addrspace(1)* undef
238  ret void
239}
240
241; GCN-LABEL: {{^}}test_call_external_v2f16_func_void:
242define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
243  %val = call <2 x half> @external_v2f16_func_void()
244  store volatile <2 x half> %val, <2 x half> addrspace(1)* undef
245  ret void
246}
247
248; GCN-LABEL: {{^}}test_call_external_v4f16_func_void:
249define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
250  %val = call <4 x half> @external_v4f16_func_void()
251  store volatile <4 x half> %val, <4 x half> addrspace(1)* undef
252  ret void
253}
254
255; GCN-LABEL: {{^}}test_call_external_v2i24_func_void:
256; GCN: s_swappc_b64
257; GCN: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}v0, v1
258define amdgpu_kernel void @test_call_external_v2i24_func_void() #0 {
259  %val = call <2 x i24> @external_v2i24_func_void()
260  %elt0 = extractelement <2 x i24> %val, i32 0
261  %elt1 = extractelement <2 x i24> %val, i32 1
262  %add = add i24 %elt0, %elt1
263  store volatile i24 %add, i24 addrspace(1)* undef
264  ret void
265}
266
267; GCN-LABEL: {{^}}test_call_external_v3f32_func_void:
268; GCN: s_swappc
269; GFX7-DAG: flat_store_dwordx3 {{.*}}, v[0:2]
270; GFX89-DAG: buffer_store_dwordx3 v[0:2]
271define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
272  %val = call <3 x float> @external_v3f32_func_void()
273  store volatile <3 x float> %val, <3 x float> addrspace(1)* undef
274  ret void
275}
276
277; GCN-LABEL: {{^}}test_call_external_v5f32_func_void:
278; GCN: s_swappc
279; GFX7-DAG: flat_store_dwordx4 {{.*}}, v[0:3]
280; GFX7-DAG: flat_store_dword {{.*}}, v4
281; GFX89-DAG: buffer_store_dwordx4 v[0:3]
282; GFX89-DAG: buffer_store_dword v4
283define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
284  %val = call <5 x float> @external_v5f32_func_void()
285  store volatile <5 x float> %val, <5 x float> addrspace(1)* undef
286  ret void
287}
288
289; GCN-LABEL: {{^}}test_call_external_i32_i64_func_void:
290define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
291  %val = call { i32, i64 } @external_i32_i64_func_void()
292  %val.0 = extractvalue { i32, i64 } %val, 0
293  %val.1 = extractvalue { i32, i64 } %val, 1
294  store volatile i32 %val.0, i32 addrspace(1)* undef
295  store volatile i64 %val.1, i64 addrspace(1)* undef
296  ret void
297}
298
299; Requires writing results to stack
300; GCN-LABEL: {{^}}test_call_external_v32i32_i32_func_void:
301define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
302  %val = call { <32 x i32>, i32 } @external_v32i32_i32_func_void()
303  %val0 = extractvalue { <32 x i32>, i32 } %val, 0
304  %val1 = extractvalue { <32 x i32>, i32 } %val, 1
305  store volatile <32 x i32> %val0, <32 x i32> addrspace(1)* undef, align 8
306  store volatile i32 %val1, i32 addrspace(1)* undef
307  ret void
308}
309
310attributes #0 = { nounwind }
311attributes #1 = { nounwind readnone }
312attributes #2 = { nounwind noinline }
313