1; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,MESA %s
2; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MESA %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,VI,MESA %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,HSA %s
5
6declare hidden void @external_void_func_i1(i1) #0
7declare hidden void @external_void_func_i1_signext(i1 signext) #0
8declare hidden void @external_void_func_i1_zeroext(i1 zeroext) #0
9
10declare hidden void @external_void_func_i8(i8) #0
11declare hidden void @external_void_func_i8_signext(i8 signext) #0
12declare hidden void @external_void_func_i8_zeroext(i8 zeroext) #0
13
14declare hidden void @external_void_func_i16(i16) #0
15declare hidden void @external_void_func_i16_signext(i16 signext) #0
16declare hidden void @external_void_func_i16_zeroext(i16 zeroext) #0
17
18declare hidden void @external_void_func_i32(i32) #0
19declare hidden void @external_void_func_i64(i64) #0
20declare hidden void @external_void_func_v2i64(<2 x i64>) #0
21declare hidden void @external_void_func_v3i64(<3 x i64>) #0
22declare hidden void @external_void_func_v4i64(<4 x i64>) #0
23
24declare hidden void @external_void_func_f16(half) #0
25declare hidden void @external_void_func_f32(float) #0
26declare hidden void @external_void_func_f64(double) #0
27declare hidden void @external_void_func_v2f32(<2 x float>) #0
28declare hidden void @external_void_func_v2f64(<2 x double>) #0
29declare hidden void @external_void_func_v3f32(<3 x float>) #0
30declare hidden void @external_void_func_v3f64(<3 x double>) #0
31declare hidden void @external_void_func_v5f32(<5 x float>) #0
32
33declare hidden void @external_void_func_v2i16(<2 x i16>) #0
34declare hidden void @external_void_func_v2f16(<2 x half>) #0
35declare hidden void @external_void_func_v3i16(<3 x i16>) #0
36declare hidden void @external_void_func_v3f16(<3 x half>) #0
37declare hidden void @external_void_func_v4i16(<4 x i16>) #0
38declare hidden void @external_void_func_v4f16(<4 x half>) #0
39
40declare hidden void @external_void_func_v2i32(<2 x i32>) #0
41declare hidden void @external_void_func_v3i32(<3 x i32>) #0
42declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
43declare hidden void @external_void_func_v4i32(<4 x i32>) #0
44declare hidden void @external_void_func_v5i32(<5 x i32>) #0
45declare hidden void @external_void_func_v8i32(<8 x i32>) #0
46declare hidden void @external_void_func_v16i32(<16 x i32>) #0
47declare hidden void @external_void_func_v32i32(<32 x i32>) #0
48declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
49
50; return value and argument
51declare hidden i32 @external_i32_func_i32(i32) #0
52
53; Structs
54declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0
55declare hidden void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 })) #0
56declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }), { i8, i32 } addrspace(5)* byval({ i8, i32 })) #0
57
58declare hidden void @external_void_func_v16i8(<16 x i8>) #0
59
60
61; FIXME: Should be passing -1
62; GCN-LABEL: {{^}}test_call_external_void_func_i1_imm:
63; MESA: s_mov_b32 s36, SCRATCH_RSRC_DWORD
64
65; MESA-DAG: s_mov_b64 s[0:1], s[36:37]
66
67; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
68; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1@rel32@lo+4
69; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1@rel32@hi+12
70; GCN-DAG: v_mov_b32_e32 v0, 1{{$}}
71; MESA-DAG: s_mov_b64 s[2:3], s[38:39]
72
73; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
74; GCN-NEXT: s_endpgm
75define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
76  call void @external_void_func_i1(i1 true)
77  ret void
78}
79
80; GCN-LABEL: {{^}}test_call_external_void_func_i1_signext:
81
82; HSA: buffer_load_ubyte [[VAR:v[0-9]+]]
83; HSA: s_mov_b32 s32, 0
84; MESA-DAG: buffer_load_ubyte [[VAR:v[0-9]+]]
85; MESA-DAG: s_mov_b32 s32, 0{{$}}
86
87; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
88; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_signext@rel32@lo+4
89; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_signext@rel32@hi+12
90; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1
91; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
92; GCN-NEXT: s_endpgm
93define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
94  %var = load volatile i1, i1 addrspace(1)* undef
95  call void @external_void_func_i1_signext(i1 signext %var)
96  ret void
97}
98
99; FIXME: load should be scheduled before getpc
100; GCN-LABEL: {{^}}test_call_external_void_func_i1_zeroext:
101
102; HSA: buffer_load_ubyte v0
103; HSA-DAG: s_mov_b32 s32, 0{{$}}
104
105; MESA: buffer_load_ubyte v0
106; MESA-DAG: s_mov_b32 s32, 0{{$}}
107
108; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
109; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_zeroext@rel32@lo+4
110; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_zeroext@rel32@hi+12
111; GCN-NEXT: v_and_b32_e32 v0, 1, v0
112; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
113; GCN-NEXT: s_endpgm
114define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
115  %var = load volatile i1, i1 addrspace(1)* undef
116  call void @external_void_func_i1_zeroext(i1 zeroext %var)
117  ret void
118}
119
120; GCN-LABEL: {{^}}test_call_external_void_func_i8_imm:
121
122; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
123; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8@rel32@lo+4
124; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8@rel32@hi+12
125; GCN-DAG: v_mov_b32_e32 v0, 0x7b
126
127; GCN-DAG: s_mov_b32 s32, 0{{$}}
128
129; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
130; GCN-NEXT: s_endpgm
131define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
132  call void @external_void_func_i8(i8 123)
133  ret void
134}
135
136; FIXME: don't wait before call
137; GCN-LABEL: {{^}}test_call_external_void_func_i8_signext:
138
139; GCN-DAG: buffer_load_sbyte v0
140; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
141; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_signext@rel32@lo+4
142; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_signext@rel32@hi+12
143
144; GCN-DAG: s_mov_b32 s32, 0
145
146; GCN-NOT: s_waitcnt
147; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
148; GCN-NEXT: s_endpgm
149define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
150  %var = load volatile i8, i8 addrspace(1)* undef
151  call void @external_void_func_i8_signext(i8 signext %var)
152  ret void
153}
154
155; GCN-LABEL: {{^}}test_call_external_void_func_i8_zeroext:
156
157; GCN-DAG: buffer_load_ubyte v0
158; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
159; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_zeroext@rel32@lo+4
160; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_zeroext@rel32@hi+12
161
162; GCN-DAG: s_mov_b32 s32, 0
163
164; GCN-NOT: s_waitcnt
165; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
166; GCN-NEXT: s_endpgm
167define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
168  %var = load volatile i8, i8 addrspace(1)* undef
169  call void @external_void_func_i8_zeroext(i8 zeroext %var)
170  ret void
171}
172
173; GCN-LABEL: {{^}}test_call_external_void_func_i16_imm:
174; GCN-DAG: v_mov_b32_e32 v0, 0x7b{{$}}
175
176; GCN-DAG: s_mov_b32 s32, 0
177
178; GCN: s_swappc_b64
179define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
180  call void @external_void_func_i16(i16 123)
181  ret void
182}
183
184; GCN-LABEL: {{^}}test_call_external_void_func_i16_signext:
185
186; GCN-DAG: buffer_load_sshort v0
187; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
188; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_signext@rel32@lo+4
189; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_signext@rel32@hi+12
190
191; GCN-DAG: s_mov_b32 s32, 0
192
193; GCN-NOT: s_waitcnt
194; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
195; GCN-NEXT: s_endpgm
196define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
197  %var = load volatile i16, i16 addrspace(1)* undef
198  call void @external_void_func_i16_signext(i16 signext %var)
199  ret void
200}
201
202; GCN-LABEL: {{^}}test_call_external_void_func_i16_zeroext:
203
204; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
205; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_zeroext@rel32@lo+4
206; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_zeroext@rel32@hi+12
207
208; GCN-DAG: s_mov_b32 s32, 0
209
210; GCN-NOT: s_waitcnt
211; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
212; GCN-NEXT: s_endpgm
213define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
214  %var = load volatile i16, i16 addrspace(1)* undef
215  call void @external_void_func_i16_zeroext(i16 zeroext %var)
216  ret void
217}
218
219; GCN-LABEL: {{^}}test_call_external_void_func_i32_imm:
220
221; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
222; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i32@rel32@lo+4
223; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i32@rel32@hi+12
224; GCN-DAG: v_mov_b32_e32 v0, 42
225; GCN-DAG: s_mov_b32 s32, 0
226
227; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
228; GCN-NEXT: s_endpgm
229define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
230  call void @external_void_func_i32(i32 42)
231  ret void
232}
233
234; GCN-LABEL: {{^}}test_call_external_void_func_i64_imm:
235; GCN-DAG: v_mov_b32_e32 v0, 0x7b{{$}}
236; GCN-DAG: v_mov_b32_e32 v1, 0{{$}}
237; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
238; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i64@rel32@lo+4
239; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i64@rel32@hi+12
240; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
241; GCN-NEXT: s_endpgm
242define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
243  call void @external_void_func_i64(i64 123)
244  ret void
245}
246
247; GCN-LABEL: {{^}}test_call_external_void_func_v2i64:
248; GCN: buffer_load_dwordx4 v[0:3]
249; GCN-NOT: s_waitcnt
250; GCN: s_swappc_b64
251define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
252  %val = load <2 x i64>, <2 x i64> addrspace(1)* null
253  call void @external_void_func_v2i64(<2 x i64> %val)
254  ret void
255}
256
257; GCN-LABEL: {{^}}test_call_external_void_func_v2i64_imm:
258; GCN-DAG: v_mov_b32_e32 v0, 1
259; GCN-DAG: v_mov_b32_e32 v1, 2
260; GCN-DAG: v_mov_b32_e32 v2, 3
261; GCN-DAG: v_mov_b32_e32 v3, 4
262; GCN: s_swappc_b64
263define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
264  call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
265  ret void
266}
267
268; GCN-LABEL: {{^}}test_call_external_void_func_v3i64:
269; GCN: buffer_load_dwordx4 v[0:3]
270; GCN: v_mov_b32_e32 v4, 1
271; GCN: v_mov_b32_e32 v5, 2
272; GCN-NOT: s_waitcnt
273; GCN: s_swappc_b64
274define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
275  %load = load <2 x i64>, <2 x i64> addrspace(1)* null
276  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
277
278  call void @external_void_func_v3i64(<3 x i64> %val)
279  ret void
280}
281
282; GCN-LABEL: {{^}}test_call_external_void_func_v4i64:
283; GCN: buffer_load_dwordx4 v[0:3]
284; GCN-DAG: v_mov_b32_e32 v4, 1
285; GCN-DAG: v_mov_b32_e32 v5, 2
286; GCN-DAG: v_mov_b32_e32 v6, 3
287; GCN-DAG: v_mov_b32_e32 v7, 4
288
289; GCN-NOT: s_waitcnt
290; GCN: s_swappc_b64
291define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
292  %load = load <2 x i64>, <2 x i64> addrspace(1)* null
293  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
294  call void @external_void_func_v4i64(<4 x i64> %val)
295  ret void
296}
297
298; GCN-LABEL: {{^}}test_call_external_void_func_f16_imm:
299; VI: v_mov_b32_e32 v0, 0x4400
300; CI: v_mov_b32_e32 v0, 4.0
301; GCN-NOT: v0
302; GCN: s_swappc_b64
303define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
304  call void @external_void_func_f16(half 4.0)
305  ret void
306}
307
308; GCN-LABEL: {{^}}test_call_external_void_func_f32_imm:
309; GCN: v_mov_b32_e32 v0, 4.0
310; GCN-NOT: v0
311; GCN: s_swappc_b64
312define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
313  call void @external_void_func_f32(float 4.0)
314  ret void
315}
316
317; GCN-LABEL: {{^}}test_call_external_void_func_v2f32_imm:
318; GCN-DAG: v_mov_b32_e32 v0, 1.0
319; GCN-DAG: v_mov_b32_e32 v1, 2.0
320; GCN: s_swappc_b64
321define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
322  call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
323  ret void
324}
325
326; GCN-LABEL: {{^}}test_call_external_void_func_v3f32_imm:
327; GCN-DAG: v_mov_b32_e32 v0, 1.0
328; GCN-DAG: v_mov_b32_e32 v1, 2.0
329; GCN-DAG: v_mov_b32_e32 v2, 4.0
330; GCN-NOT: v3,
331; GCN: s_swappc_b64
332define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
333  call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
334  ret void
335}
336
337; GCN-LABEL: {{^}}test_call_external_void_func_v5f32_imm:
338; GCN-DAG: v_mov_b32_e32 v0, 1.0
339; GCN-DAG: v_mov_b32_e32 v1, 2.0
340; GCN-DAG: v_mov_b32_e32 v2, 4.0
341; GCN-DAG: v_mov_b32_e32 v3, -1.0
342; GCN-DAG: v_mov_b32_e32 v4, 0.5
343; GCN-NOT: v5,
344; GCN: s_swappc_b64
345define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
346  call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
347  ret void
348}
349
350; GCN-LABEL: {{^}}test_call_external_void_func_f64_imm:
351; GCN: v_mov_b32_e32 v0, 0{{$}}
352; GCN: v_mov_b32_e32 v1, 0x40100000
353; GCN: s_swappc_b64
354define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
355  call void @external_void_func_f64(double 4.0)
356  ret void
357}
358
359; GCN-LABEL: {{^}}test_call_external_void_func_v2f64_imm:
360; GCN: v_mov_b32_e32 v0, 0{{$}}
361; GCN: v_mov_b32_e32 v1, 2.0
362; GCN: v_mov_b32_e32 v2, 0{{$}}
363; GCN: v_mov_b32_e32 v3, 0x40100000
364; GCN: s_swappc_b64
365define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
366  call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
367  ret void
368}
369
370; GCN-LABEL: {{^}}test_call_external_void_func_v3f64_imm:
371; GCN-DAG: v_mov_b32_e32 v0, 0{{$}}
372; GCN-DAG: v_mov_b32_e32 v1, 2.0
373; GCN-DAG: v_mov_b32_e32 v2, 0{{$}}
374; GCN-DAG: v_mov_b32_e32 v3, 0x40100000
375; GCN-DAG: v_mov_b32_e32 v4, 0{{$}}
376; GCN-DAG: v_mov_b32_e32 v5, 0x40200000
377; GCN-DAG: s_swappc_b64
378define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
379  call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
380  ret void
381}
382
383; GCN-LABEL: {{^}}test_call_external_void_func_v2i16:
384; GFX9: buffer_load_dword v0
385; GFX9-NOT: v0
386; GFX9: s_swappc_b64
387define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
388  %val = load <2 x i16>, <2 x i16> addrspace(1)* undef
389  call void @external_void_func_v2i16(<2 x i16> %val)
390  ret void
391}
392
393; GCN-LABEL: {{^}}test_call_external_void_func_v3i16:
394; GFX9: buffer_load_dwordx2 v[0:1]
395; GFX9-NOT: v0
396; GFX9-NOT: v1
397; GFX9: s_swappc_b64
398define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
399  %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
400  call void @external_void_func_v3i16(<3 x i16> %val)
401  ret void
402}
403
404; GCN-LABEL: {{^}}test_call_external_void_func_v3f16:
405; GFX9: buffer_load_dwordx2 v[0:1]
406; GFX9-NOT: v0
407; GFX9-NOT: v1
408; GFX9: s_swappc_b64
409define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
410  %val = load <3 x half>, <3 x half> addrspace(1)* undef
411  call void @external_void_func_v3f16(<3 x half> %val)
412  ret void
413}
414
415; GCN-LABEL: {{^}}test_call_external_void_func_v3i16_imm:
416; GFX9: v_mov_b32_e32 v0, 0x20001
417; GFX9: v_mov_b32_e32 v1, 3
418; GFX9: s_swappc_b64
419define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
420  call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
421  ret void
422}
423
424; GCN-LABEL: {{^}}test_call_external_void_func_v3f16_imm:
425; GFX9: v_mov_b32_e32 v0, 0x40003c00
426; GFX9: v_mov_b32_e32 v1, 0x4400
427; GFX9: s_swappc_b64
428define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
429  call void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>)
430  ret void
431}
432
433; GCN-LABEL: {{^}}test_call_external_void_func_v4i16:
434; GFX9: buffer_load_dwordx2 v[0:1]
435; GFX9-NOT: v0
436; GFX9-NOT: v1
437; GFX9: s_swappc_b64
438define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
439  %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
440  call void @external_void_func_v4i16(<4 x i16> %val)
441  ret void
442}
443
444; GCN-LABEL: {{^}}test_call_external_void_func_v4i16_imm:
445; GFX9-DAG: v_mov_b32_e32 v0, 0x20001
446; GFX9-DAG: v_mov_b32_e32 v1, 0x40003
447; GFX9: s_swappc_b64
448define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
449  call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
450  ret void
451}
452
453; GCN-LABEL: {{^}}test_call_external_void_func_v2f16:
454; GFX9: buffer_load_dword v0
455; GFX9-NOT: v0
456; GFX9: s_swappc_b64
457define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
458  %val = load <2 x half>, <2 x half> addrspace(1)* undef
459  call void @external_void_func_v2f16(<2 x half> %val)
460  ret void
461}
462
463; GCN-LABEL: {{^}}test_call_external_void_func_v2i32:
464; GCN: buffer_load_dwordx2 v[0:1]
465; GCN-NOT: s_waitcnt
466; GCN: s_swappc_b64
467define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
468  %val = load <2 x i32>, <2 x i32> addrspace(1)* undef
469  call void @external_void_func_v2i32(<2 x i32> %val)
470  ret void
471}
472
473; GCN-LABEL: {{^}}test_call_external_void_func_v2i32_imm:
474; GCN-DAG: v_mov_b32_e32 v0, 1
475; GCN-DAG: v_mov_b32_e32 v1, 2
476; GCN: s_swappc_b64
477define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
478  call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
479  ret void
480}
481
482; GCN-LABEL: {{^}}test_call_external_void_func_v3i32_imm: {{.*}}
483
484; GCN-NOT: v3
485; GCN-DAG: v_mov_b32_e32 v0, 3
486; GCN-DAG: v_mov_b32_e32 v1, 4
487; GCN-DAG: v_mov_b32_e32 v2, 5
488
489; GCN: s_swappc_b64
490define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
491  call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
492  ret void
493}
494
495; GCN-LABEL: {{^}}test_call_external_void_func_v3i32_i32:
496; GCN-DAG: v_mov_b32_e32 v0, 3
497; GCN-DAG: v_mov_b32_e32 v1, 4
498; GCN-DAG: v_mov_b32_e32 v2, 5
499; GCN-DAG: v_mov_b32_e32 v3, 6
500define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
501  call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
502  ret void
503}
504
505; GCN-LABEL: {{^}}test_call_external_void_func_v4i32:
506; GCN: buffer_load_dwordx4 v[0:3]
507; GCN-NOT: s_waitcnt
508; GCN: s_swappc_b64
509define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
510  %val = load <4 x i32>, <4 x i32> addrspace(1)* undef
511  call void @external_void_func_v4i32(<4 x i32> %val)
512  ret void
513}
514
515; GCN-LABEL: {{^}}test_call_external_void_func_v4i32_imm:
516; GCN-DAG: v_mov_b32_e32 v0, 1
517; GCN-DAG: v_mov_b32_e32 v1, 2
518; GCN-DAG: v_mov_b32_e32 v2, 3
519; GCN-DAG: v_mov_b32_e32 v3, 4
520; GCN: s_swappc_b64
521define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
522  call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
523  ret void
524}
525
526; GCN-LABEL: {{^}}test_call_external_void_func_v5i32_imm:
527; GCN-DAG: v_mov_b32_e32 v0, 1
528; GCN-DAG: v_mov_b32_e32 v1, 2
529; GCN-DAG: v_mov_b32_e32 v2, 3
530; GCN-DAG: v_mov_b32_e32 v3, 4
531; GCN-DAG: v_mov_b32_e32 v4, 5
532; GCN-NOT: v5,
533; GCN: s_swappc_b64
534define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
535  call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
536  ret void
537}
538
539; GCN-LABEL: {{^}}test_call_external_void_func_v8i32:
540; GCN-DAG: buffer_load_dwordx4 v[0:3], off
541; GCN-DAG: buffer_load_dwordx4 v[4:7], off
542; GCN-NOT: s_waitcnt
543; GCN: s_swappc_b64
544define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
545  %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
546  %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
547  call void @external_void_func_v8i32(<8 x i32> %val)
548  ret void
549}
550
551; GCN-LABEL: {{^}}test_call_external_void_func_v8i32_imm:
552; GCN-DAG: v_mov_b32_e32 v0, 1
553; GCN-DAG: v_mov_b32_e32 v1, 2
554; GCN-DAG: v_mov_b32_e32 v2, 3
555; GCN-DAG: v_mov_b32_e32 v3, 4
556; GCN-DAG: v_mov_b32_e32 v4, 5
557; GCN-DAG: v_mov_b32_e32 v5, 6
558; GCN-DAG: v_mov_b32_e32 v6, 7
559; GCN-DAG: v_mov_b32_e32 v7, 8
560; GCN: s_swappc_b64
561define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
562  call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
563  ret void
564}
565
566; GCN-LABEL: {{^}}test_call_external_void_func_v16i32:
567; GCN-DAG: buffer_load_dwordx4 v[0:3], off
568; GCN-DAG: buffer_load_dwordx4 v[4:7], off
569; GCN-DAG: buffer_load_dwordx4 v[8:11], off
570; GCN-DAG: buffer_load_dwordx4 v[12:15], off
571; GCN-NOT: s_waitcnt
572; GCN: s_swappc_b64
573define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
574  %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
575  %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
576  call void @external_void_func_v16i32(<16 x i32> %val)
577  ret void
578}
579
580; GCN-LABEL: {{^}}test_call_external_void_func_v32i32:
581; GCN-DAG: buffer_load_dwordx4 v[0:3], off
582; GCN-DAG: buffer_load_dwordx4 v[4:7], off
583; GCN-DAG: buffer_load_dwordx4 v[8:11], off
584; GCN-DAG: buffer_load_dwordx4 v[12:15], off
585; GCN-DAG: buffer_load_dwordx4 v[16:19], off
586; GCN-DAG: buffer_load_dwordx4 v[20:23], off
587; GCN-DAG: buffer_load_dwordx4 v[24:27], off
588; GCN-DAG: buffer_load_dwordx4 v[28:31], off
589; GCN-NOT: s_waitcnt
590; GCN: s_swappc_b64
591define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
592  %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
593  %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
594  call void @external_void_func_v32i32(<32 x i32> %val)
595  ret void
596}
597
598; GCN-LABEL: {{^}}test_call_external_void_func_v32i32_i32:
599; HSA-NOT: s_add_u32 s32
600
601; MESA-NOT: s_add_u32 s32
602
603; GCN-DAG: buffer_load_dword [[VAL1:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
604; GCN-DAG: buffer_load_dwordx4 v[0:3], off
605; GCN-DAG: buffer_load_dwordx4 v[4:7], off
606; GCN-DAG: buffer_load_dwordx4 v[8:11], off
607; GCN-DAG: buffer_load_dwordx4 v[12:15], off
608; GCN-DAG: buffer_load_dwordx4 v[16:19], off
609; GCN-DAG: buffer_load_dwordx4 v[20:23], off
610; GCN-DAG: buffer_load_dwordx4 v[24:27], off
611; GCN-DAG: buffer_load_dwordx4 v[28:31], off
612
613; GCN: s_waitcnt
614; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], s32{{$}}
615; GCN: s_swappc_b64
616; GCN-NEXT: s_endpgm
617define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
618  %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
619  %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
620  %val1 = load i32, i32 addrspace(1)* undef
621  call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
622  ret void
623}
624
625; GCN-LABEL: {{^}}test_call_external_i32_func_i32_imm:
626; GCN: v_mov_b32_e32 v0, 42
627; GCN: s_swappc_b64 s[30:31],
628; GCN-NOT: s_waitcnt
629; GCN: buffer_store_dword v0, off, s[36:39], 0
630define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 {
631  %val = call i32 @external_i32_func_i32(i32 42)
632  store volatile i32 %val, i32 addrspace(1)* %out
633  ret void
634}
635
636; GCN-LABEL: {{^}}test_call_external_void_func_struct_i8_i32:
637; GCN: buffer_load_ubyte v0, off
638; GCN: buffer_load_dword v1, off
639; GCN-NOT: s_waitcnt
640; GCN: s_swappc_b64
641define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
642  %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
643  %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
644  call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
645  ret void
646}
647
648; GCN-LABEL: {{^}}test_call_external_void_func_byval_struct_i8_i32:
649; GCN-DAG: v_mov_b32_e32 [[VAL0:v[0-9]+]], 3
650; GCN-DAG: v_mov_b32_e32 [[VAL1:v[0-9]+]], 8
651; MESA-DAG: buffer_store_byte [[VAL0]], off, s[36:39], 0 offset:8
652; MESA-DAG: buffer_store_dword [[VAL1]], off, s[36:39], 0 offset:12
653
654; HSA-DAG: buffer_store_byte [[VAL0]], off, s[0:3], 0 offset:8
655; HSA-DAG: buffer_store_dword [[VAL1]], off, s[0:3], 0 offset:12
656
657; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], 0 offset:8
658; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], 0 offset:12
659
660; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], 0 offset:8
661; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], 0 offset:12
662
663; GCN-DAG: s_movk_i32 [[SP:s[0-9]+]], 0x400{{$}}
664
665; HSA-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]]{{$}}
666; HSA-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:4
667
668; MESA-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]]{{$}}
669; MESA-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:4
670
671; GCN-NEXT: s_swappc_b64
672; GCN-NOT: [[SP]]
673define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
674  %val = alloca { i8, i32 }, align 4, addrspace(5)
675  %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0
676  %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1
677  store i8 3, i8 addrspace(5)* %gep0
678  store i32 8, i32 addrspace(5)* %gep1
679  call void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %val)
680  ret void
681}
682
683; GCN-LABEL: {{^}}test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
684; GCN-DAG: s_movk_i32 [[SP:s[0-9]+]], 0x800{{$}}
685
686; GCN-DAG: v_mov_b32_e32 [[VAL0:v[0-9]+]], 3
687; GCN-DAG: v_mov_b32_e32 [[VAL1:v[0-9]+]], 8
688; GCN-DAG: buffer_store_byte [[VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
689; GCN-DAG: buffer_store_dword [[VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12
690
691; GCN-DAG: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
692; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12
693
694; GCN-NOT: s_add_u32 [[SP]]
695; GCN-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]]{{$}}
696; GCN-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4
697; GCN: s_swappc_b64
698; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
699; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:20
700; GCN-NOT: s_sub_u32 [[SP]]
701
702; GCN: buffer_store_byte [[LOAD_OUT_VAL0]], off
703; GCN: buffer_store_dword [[LOAD_OUT_VAL1]], off
704define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
705  %in.val = alloca { i8, i32 }, align 4, addrspace(5)
706  %out.val = alloca { i8, i32 }, align 4, addrspace(5)
707  %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0
708  %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1
709  store i8 3, i8 addrspace(5)* %in.gep0
710  store i32 8, i32 addrspace(5)* %in.gep1
711  call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %out.val, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %in.val)
712  %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0
713  %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1
714  %out.val0 = load i8, i8 addrspace(5)* %out.gep0
715  %out.val1 = load i32, i32 addrspace(5)* %out.gep1
716
717  store volatile i8 %out.val0, i8 addrspace(1)* undef
718  store volatile i32 %out.val1, i32 addrspace(1)* undef
719  ret void
720}
721
722; GCN-LABEL: {{^}}test_call_external_void_func_v16i8:
723define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
724  %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
725  %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
726  call void @external_void_func_v16i8(<16 x i8> %val)
727  ret void
728}
729
730; GCN-LABEL: {{^}}stack_passed_arg_alignment_v32i32_f64:
731; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32{{$}}
732; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:4
733; GCN: s_swappc_b64
734define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
735entry:
736  call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
737  ret void
738}
739
740; GCN-LABEL: {{^}}tail_call_byval_align16:
741; GCN-NOT: s32
742; GCN: buffer_load_dword [[VREG2:v[0-9]+]], off, s[0:3], s32 offset:12
743; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32 offset:8
744
745; GCN: s_getpc_b64
746
747; GCN: buffer_store_dword [[VREG2]], off, s[0:3], s32 offset:4
748; GCN: buffer_store_dword [[VREG1]], off, s[0:3], s32{{$}}
749; GCN-NOT: s32
750; GCN: s_setpc_b64
751define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
752entry:
753  %alloca = alloca double, align 8, addrspace(5)
754  tail call void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval(double) align 16 %alloca)
755  ret void
756}
757
758; GCN-LABEL: {{^}}tail_call_stack_passed_arg_alignment_v32i32_f64:
759; GCN-NOT: s32
760; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4
761; GCN: buffer_load_dword v33, off, s[0:3], s32{{$}}
762; GCN: s_getpc_b64
763; GCN: buffer_store_dword v33, off, s[0:3], s32{{$}}
764; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4
765; GCN-NOT: s32
766; GCN: s_setpc_b64
767define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
768entry:
769  tail call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
770  ret void
771}
772
773; GCN-LABEL: {{^}}stack_12xv3i32:
774; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
775; GCN: buffer_store_dword [[REG12]], {{.*$}}
776; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
777; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4
778; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
779; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8
780; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
781; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12
782; GCN: v_mov_b32_e32 v31, 11
783; GCN: s_getpc
784define void @stack_12xv3i32() #0 {
785entry:
786  call void @external_void_func_12xv3i32(
787      <3 x i32><i32 0, i32 0, i32 0>,
788      <3 x i32><i32 1, i32 1, i32 1>,
789      <3 x i32><i32 2, i32 2, i32 2>,
790      <3 x i32><i32 3, i32 3, i32 3>,
791      <3 x i32><i32 4, i32 4, i32 4>,
792      <3 x i32><i32 5, i32 5, i32 5>,
793      <3 x i32><i32 6, i32 6, i32 6>,
794      <3 x i32><i32 7, i32 7, i32 7>,
795      <3 x i32><i32 8, i32 8, i32 8>,
796      <3 x i32><i32 9, i32 9, i32 9>,
797      <3 x i32><i32 10, i32 11, i32 12>,
798      <3 x i32><i32 13, i32 14, i32 15>)
799  ret void
800}
801
802; GCN-LABEL: {{^}}stack_12xv3f32:
803; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000
804; GCN: buffer_store_dword [[REG12]], {{.*$}}
805; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000
806; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4
807; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000
808; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8
809; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000
810; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12
811; GCN: v_mov_b32_e32 v31, 0x41300000
812; GCN: s_getpc
813define void @stack_12xv3f32() #0 {
814entry:
815  call void @external_void_func_12xv3f32(
816      <3 x float><float 0.0, float 0.0, float 0.0>,
817      <3 x float><float 1.0, float 1.0, float 1.0>,
818      <3 x float><float 2.0, float 2.0, float 2.0>,
819      <3 x float><float 3.0, float 3.0, float 3.0>,
820      <3 x float><float 4.0, float 4.0, float 4.0>,
821      <3 x float><float 5.0, float 5.0, float 5.0>,
822      <3 x float><float 6.0, float 6.0, float 6.0>,
823      <3 x float><float 7.0, float 7.0, float 7.0>,
824      <3 x float><float 8.0, float 8.0, float 8.0>,
825      <3 x float><float 9.0, float 9.0, float 9.0>,
826      <3 x float><float 10.0, float 11.0, float 12.0>,
827      <3 x float><float 13.0, float 14.0, float 15.0>)
828  ret void
829}
830
831; GCN-LABEL: {{^}}stack_8xv5i32:
832
833; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 8
834; GCN: buffer_store_dword [[REG8]], {{.*$}}
835; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9
836; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4
837; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10
838; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8
839; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11
840; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12
841; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
842; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16
843; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
844; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20
845; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
846; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24
847; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
848; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28
849
850; GCN: v_mov_b32_e32 v31, 7
851; GCN: s_getpc
852define void @stack_8xv5i32() #0 {
853entry:
854  call void @external_void_func_8xv5i32(
855      <5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>,
856      <5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>,
857      <5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>,
858      <5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>,
859      <5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>,
860      <5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>,
861      <5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>,
862      <5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>)
863  ret void
864}
865
866; GCN-LABEL: {{^}}stack_8xv5f32:
867; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 0x41000000
868; GCN: buffer_store_dword [[REG8]], {{.*$}}
869; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000
870; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4
871; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000
872; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8
873; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000
874; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12
875; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000
876; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16
877; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000
878; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20
879; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000
880; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24
881; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000
882; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28
883
884; GCN: v_mov_b32_e32 v31, 0x40e00000
885; GCN: s_getpc
886define void @stack_8xv5f32() #0 {
887entry:
888  call void @external_void_func_8xv5f32(
889      <5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>,
890      <5 x float><float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>,
891      <5 x float><float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>,
892      <5 x float><float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>,
893      <5 x float><float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>,
894      <5 x float><float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>,
895      <5 x float><float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>,
896      <5 x float><float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>)
897  ret void
898}
899
900declare hidden void @byval_align16_f64_arg(<32 x i32>, double addrspace(5)* byval(double) align 16) #0
901declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0
902declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
903    <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
904declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
905    <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
906declare hidden void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
907    <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
908declare hidden void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
909    <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
910attributes #0 = { nounwind }
911attributes #1 = { nounwind readnone }
912attributes #2 = { nounwind noinline }
913