1; RUN: llc < %s -march=ppc64le -mcpu=pwr8 -mattr=+altivec -mattr=-vsx | FileCheck %s
2
3; Currently VSX support is disabled for this test because we generate lxsdx
4; instead of lfd, and stxsdx instead of stfd.  That is a poor choice when we
5; have reg+imm addressing, and is on the list of things to be fixed.
6
7target datalayout = "e-m:e-i64:64-n32:64"
8target triple = "powerpc64le-unknown-linux-gnu"
9
10;
11; Verify use of registers for float/vector aggregate return.
12;
13
14define [8 x float] @return_float([8 x float] %x) {
15entry:
16  ret [8 x float] %x
17}
18; CHECK-LABEL: @return_float
19; CHECK: %entry
20; CHECK-NEXT: blr
21
22define [8 x double] @return_double([8 x double] %x) {
23entry:
24  ret [8 x double] %x
25}
26; CHECK-LABEL: @return_double
27; CHECK: %entry
28; CHECK-NEXT: blr
29
30define [4 x ppc_fp128] @return_ppcf128([4 x ppc_fp128] %x) {
31entry:
32  ret [4 x ppc_fp128] %x
33}
34; CHECK-LABEL: @return_ppcf128
35; CHECK: %entry
36; CHECK-NEXT: blr
37
38define [8 x <4 x i32>] @return_v4i32([8 x <4 x i32>] %x) {
39entry:
40  ret [8 x <4 x i32>] %x
41}
42; CHECK-LABEL: @return_v4i32
43; CHECK: %entry
44; CHECK-NEXT: blr
45
46
47;
48; Verify amount of space taken up by aggregates in the parameter save area.
49;
50
51define i64 @callee_float([7 x float] %a, [7 x float] %b, i64 %c) {
52entry:
53  ret i64 %c
54}
55; CHECK-LABEL: @callee_float
56; CHECK: ld 3, 96(1)
57; CHECK: blr
58
59define void @caller_float(i64 %x, [7 x float] %y) {
60entry:
61  tail call void @test_float([7 x float] %y, [7 x float] %y, i64 %x)
62  ret void
63}
64; CHECK-LABEL: @caller_float
65; CHECK: std 3, 96(1)
66; CHECK: bl test_float
67
68declare void @test_float([7 x float], [7 x float], i64)
69
70define i64 @callee_double(i64 %a, [7 x double] %b, i64 %c) {
71entry:
72  ret i64 %c
73}
74; CHECK-LABEL: @callee_double
75; CHECK: ld 3, 96(1)
76; CHECK: blr
77
78define void @caller_double(i64 %x, [7 x double] %y) {
79entry:
80  tail call void @test_double(i64 %x, [7 x double] %y, i64 %x)
81  ret void
82}
83; CHECK-LABEL: @caller_double
84; CHECK: std 3, 96(1)
85; CHECK: bl test_double
86
87declare void @test_double(i64, [7 x double], i64)
88
89define i64 @callee_ppcf128(i64 %a, [4 x ppc_fp128] %b, i64 %c) {
90entry:
91  ret i64 %c
92}
93; CHECK-LABEL: @callee_ppcf128
94; CHECK: ld 3, 104(1)
95; CHECK: blr
96
97define void @caller_ppcf128(i64 %x, [4 x ppc_fp128] %y) {
98entry:
99  tail call void @test_ppcf128(i64 %x, [4 x ppc_fp128] %y, i64 %x)
100  ret void
101}
102; CHECK-LABEL: @caller_ppcf128
103; CHECK: std 3, 104(1)
104; CHECK: bl test_ppcf128
105
106declare void @test_ppcf128(i64, [4 x ppc_fp128], i64)
107
108define i64 @callee_i64(i64 %a, [7 x i64] %b, i64 %c) {
109entry:
110  ret i64 %c
111}
112; CHECK-LABEL: @callee_i64
113; CHECK: ld 3, 96(1)
114; CHECK: blr
115
116define void @caller_i64(i64 %x, [7 x i64] %y) {
117entry:
118  tail call void @test_i64(i64 %x, [7 x i64] %y, i64 %x)
119  ret void
120}
121; CHECK-LABEL: @caller_i64
122; CHECK: std 3, 96(1)
123; CHECK: bl test_i64
124
125declare void @test_i64(i64, [7 x i64], i64)
126
127define i64 @callee_i128(i64 %a, [4 x i128] %b, i64 %c) {
128entry:
129  ret i64 %c
130}
131; CHECK-LABEL: @callee_i128
132; CHECK: ld 3, 112(1)
133; CHECK: blr
134
135define void @caller_i128(i64 %x, [4 x i128] %y) {
136entry:
137  tail call void @test_i128(i64 %x, [4 x i128] %y, i64 %x)
138  ret void
139}
140; CHECK-LABEL: @caller_i128
141; CHECK: std 3, 112(1)
142; CHECK: bl test_i128
143
144declare void @test_i128(i64, [4 x i128], i64)
145
146define i64 @callee_v4i32(i64 %a, [4 x <4 x i32>] %b, i64 %c) {
147entry:
148  ret i64 %c
149}
150; CHECK-LABEL: @callee_v4i32
151; CHECK: ld 3, 112(1)
152; CHECK: blr
153
154define void @caller_v4i32(i64 %x, [4 x <4 x i32>] %y) {
155entry:
156  tail call void @test_v4i32(i64 %x, [4 x <4 x i32>] %y, i64 %x)
157  ret void
158}
159; CHECK-LABEL: @caller_v4i32
160; CHECK: std 3, 112(1)
161; CHECK: bl test_v4i32
162
163declare void @test_v4i32(i64, [4 x <4 x i32>], i64)
164
165
166;
167; Verify handling of floating point arguments in GPRs
168;
169
170%struct.float8 = type { [8 x float] }
171%struct.float5 = type { [5 x float] }
172%struct.float2 = type { [2 x float] }
173
174@g8 = common global %struct.float8 zeroinitializer, align 4
175@g5 = common global %struct.float5 zeroinitializer, align 4
176@g2 = common global %struct.float2 zeroinitializer, align 4
177
178define float @callee0([7 x float] %a, [7 x float] %b) {
179entry:
180  %b.extract = extractvalue [7 x float] %b, 6
181  ret float %b.extract
182}
183; CHECK-LABEL: @callee0
184; CHECK: stw 10, [[OFF:.*]](1)
185; CHECK: lfs 1, [[OFF]](1)
186; CHECK: blr
187
188define void @caller0([7 x float] %a) {
189entry:
190  tail call void @test0([7 x float] %a, [7 x float] %a)
191  ret void
192}
193; CHECK-LABEL: @caller0
194; CHECK-DAG: fmr 8, 1
195; CHECK-DAG: fmr 9, 2
196; CHECK-DAG: fmr 10, 3
197; CHECK-DAG: fmr 11, 4
198; CHECK-DAG: fmr 12, 5
199; CHECK-DAG: fmr 13, 6
200; CHECK-DAG: stfs 7, [[OFF:[0-9]+]](1)
201; CHECK-DAG: lwz 10, [[OFF]](1)
202; CHECK: bl test0
203
204declare void @test0([7 x float], [7 x float])
205
206define float @callee1([8 x float] %a, [8 x float] %b) {
207entry:
208  %b.extract = extractvalue [8 x float] %b, 7
209  ret float %b.extract
210}
211; CHECK-LABEL: @callee1
212; CHECK: rldicl [[REG:[0-9]+]], 10, 32, 32
213; CHECK: stw [[REG]], [[OFF:.*]](1)
214; CHECK: lfs 1, [[OFF]](1)
215; CHECK: blr
216
217define void @caller1([8 x float] %a) {
218entry:
219  tail call void @test1([8 x float] %a, [8 x float] %a)
220  ret void
221}
222; CHECK-LABEL: @caller1
223; CHECK-DAG: fmr 9, 1
224; CHECK-DAG: fmr 10, 2
225; CHECK-DAG: fmr 11, 3
226; CHECK-DAG: fmr 12, 4
227; CHECK-DAG: fmr 13, 5
228; CHECK-DAG: stfs 5, [[OFF0:[0-9]+]](1)
229; CHECK-DAG: stfs 6, [[OFF1:[0-9]+]](1)
230; CHECK-DAG: stfs 7, [[OFF2:[0-9]+]](1)
231; CHECK-DAG: stfs 8, [[OFF3:[0-9]+]](1)
232; CHECK-DAG: lwz [[REG0:[0-9]+]], [[OFF0]](1)
233; CHECK-DAG: lwz [[REG1:[0-9]+]], [[OFF1]](1)
234; CHECK-DAG: lwz [[REG2:[0-9]+]], [[OFF2]](1)
235; CHECK-DAG: lwz [[REG3:[0-9]+]], [[OFF3]](1)
236; CHECK-DAG: sldi [[REG1]], [[REG1]], 32
237; CHECK-DAG: sldi [[REG3]], [[REG3]], 32
238; CHECK-DAG: or 9, [[REG0]], [[REG1]]
239; CHECK-DAG: or 10, [[REG2]], [[REG3]]
240; CHECK: bl test1
241
242declare void @test1([8 x float], [8 x float])
243
244define float @callee2([8 x float] %a, [5 x float] %b, [2 x float] %c) {
245entry:
246  %c.extract = extractvalue [2 x float] %c, 1
247  ret float %c.extract
248}
249; CHECK-LABEL: @callee2
250; CHECK: rldicl [[REG:[0-9]+]], 10, 32, 32
251; CHECK: stw [[REG]], [[OFF:.*]](1)
252; CHECK: lfs 1, [[OFF]](1)
253; CHECK: blr
254
255define void @caller2() {
256entry:
257  %0 = load [8 x float]* getelementptr inbounds (%struct.float8* @g8, i64 0, i32 0), align 4
258  %1 = load [5 x float]* getelementptr inbounds (%struct.float5* @g5, i64 0, i32 0), align 4
259  %2 = load [2 x float]* getelementptr inbounds (%struct.float2* @g2, i64 0, i32 0), align 4
260  tail call void @test2([8 x float] %0, [5 x float] %1, [2 x float] %2)
261  ret void
262}
263; CHECK-LABEL: @caller2
264; CHECK: ld [[REG:[0-9]+]], .LC
265; CHECK-DAG: lfs 1, 0([[REG]])
266; CHECK-DAG: lfs 2, 4([[REG]])
267; CHECK-DAG: lfs 3, 8([[REG]])
268; CHECK-DAG: lfs 4, 12([[REG]])
269; CHECK-DAG: lfs 5, 16([[REG]])
270; CHECK-DAG: lfs 6, 20([[REG]])
271; CHECK-DAG: lfs 7, 24([[REG]])
272; CHECK-DAG: lfs 8, 28([[REG]])
273; CHECK: ld [[REG:[0-9]+]], .LC
274; CHECK-DAG: lfs 9, 0([[REG]])
275; CHECK-DAG: lfs 10, 4([[REG]])
276; CHECK-DAG: lfs 11, 8([[REG]])
277; CHECK-DAG: lfs 12, 12([[REG]])
278; CHECK-DAG: lfs 13, 16([[REG]])
279; CHECK: ld [[REG:[0-9]+]], .LC
280; CHECK-DAG: lwz [[REG0:[0-9]+]], 0([[REG]])
281; CHECK-DAG: lwz [[REG1:[0-9]+]], 4([[REG]])
282; CHECK-DAG: sldi [[REG1]], [[REG1]], 32
283; CHECK-DAG: or 10, [[REG0]], [[REG1]]
284; CHECK: bl test2
285
286declare void @test2([8 x float], [5 x float], [2 x float])
287
288define double @callee3([8 x float] %a, [5 x float] %b, double %c) {
289entry:
290  ret double %c
291}
292; CHECK-LABEL: @callee3
293; CHECK: std 10, [[OFF:.*]](1)
294; CHECK: lfd 1, [[OFF]](1)
295; CHECK: blr
296
297define void @caller3(double %d) {
298entry:
299  %0 = load [8 x float]* getelementptr inbounds (%struct.float8* @g8, i64 0, i32 0), align 4
300  %1 = load [5 x float]* getelementptr inbounds (%struct.float5* @g5, i64 0, i32 0), align 4
301  tail call void @test3([8 x float] %0, [5 x float] %1, double %d)
302  ret void
303}
304; CHECK-LABEL: @caller3
305; CHECK: stfd 1, [[OFF:.*]](1)
306; CHECK: ld 10, [[OFF]](1)
307; CHECK: bl test3
308
309declare void @test3([8 x float], [5 x float], double)
310
311define float @callee4([8 x float] %a, [5 x float] %b, float %c) {
312entry:
313  ret float %c
314}
315; CHECK-LABEL: @callee4
316; CHECK: stw 10, [[OFF:.*]](1)
317; CHECK: lfs 1, [[OFF]](1)
318; CHECK: blr
319
320define void @caller4(float %f) {
321entry:
322  %0 = load [8 x float]* getelementptr inbounds (%struct.float8* @g8, i64 0, i32 0), align 4
323  %1 = load [5 x float]* getelementptr inbounds (%struct.float5* @g5, i64 0, i32 0), align 4
324  tail call void @test4([8 x float] %0, [5 x float] %1, float %f)
325  ret void
326}
327; CHECK-LABEL: @caller4
328; CHECK: stfs 1, [[OFF:.*]](1)
329; CHECK: lwz 10, [[OFF]](1)
330; CHECK: bl test4
331
332declare void @test4([8 x float], [5 x float], float)
333
334