1; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
2
3
4declare i32 @sample_add(i32, i32)
5declare i32 @stack_callee_int(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
6declare i32 @stack_callee_int_szext(i1 signext, i8 zeroext, i32, i32, i32, i32, i32, i32, i16 zeroext, i8 signext)
7declare float @stack_callee_float(float, float, float, float, float, float, float, float, float, float)
8declare void @test(i64)
9
10; Scalar argument passing must not change (same tests as in VE/Scalar/call.ll below - this time with +vpu)
11
12define fastcc i32 @sample_call() {
13; CHECK-LABEL: sample_call:
14; CHECK:       .LBB{{[0-9]+}}_2:
15; CHECK-NEXT:    lea %s0, sample_add@lo
16; CHECK-NEXT:    and %s0, %s0, (32)0
17; CHECK-NEXT:    lea.sl %s12, sample_add@hi(, %s0)
18; CHECK-NEXT:    or %s0, 1, (0)1
19; CHECK-NEXT:    or %s1, 2, (0)1
20; CHECK-NEXT:    bsic %s10, (, %s12)
21; CHECK-NEXT:    or %s11, 0, %s9
22  %r = tail call fastcc i32 @sample_add(i32 1, i32 2)
23  ret i32 %r
24}
25
26define fastcc i32 @stack_call_int() {
27; CHECK-LABEL: stack_call_int:
28; CHECK:       .LBB{{[0-9]+}}_2:
29; CHECK-NEXT:    or %s0, 10, (0)1
30; CHECK-NEXT:    st %s0, 248(, %s11)
31; CHECK-NEXT:    or %s34, 9, (0)1
32; CHECK-NEXT:    lea %s0, stack_callee_int@lo
33; CHECK-NEXT:    and %s0, %s0, (32)0
34; CHECK-NEXT:    lea.sl %s12, stack_callee_int@hi(, %s0)
35; CHECK-NEXT:    or %s0, 1, (0)1
36; CHECK-NEXT:    or %s1, 2, (0)1
37; CHECK-NEXT:    or %s2, 3, (0)1
38; CHECK-NEXT:    or %s3, 4, (0)1
39; CHECK-NEXT:    or %s4, 5, (0)1
40; CHECK-NEXT:    or %s5, 6, (0)1
41; CHECK-NEXT:    or %s6, 7, (0)1
42; CHECK-NEXT:    or %s7, 8, (0)1
43; CHECK-NEXT:    st %s34, 240(, %s11)
44; CHECK-NEXT:    bsic %s10, (, %s12)
45; CHECK-NEXT:    or %s11, 0, %s9
46  %r = tail call fastcc i32 @stack_callee_int(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
47  ret i32 %r
48}
49
50define fastcc i32 @stack_call_int_szext() {
51; CHECK-LABEL: stack_call_int_szext:
52; CHECK:       .LBB{{[0-9]+}}_2:
53; CHECK-NEXT:    or %s0, -1, (0)1
54; CHECK-NEXT:    st %s0, 248(, %s11)
55; CHECK-NEXT:    lea %s34, 65535
56; CHECK-NEXT:    lea %s0, stack_callee_int_szext@lo
57; CHECK-NEXT:    and %s0, %s0, (32)0
58; CHECK-NEXT:    lea.sl %s12, stack_callee_int_szext@hi(, %s0)
59; CHECK-NEXT:    or %s0, -1, (0)1
60; CHECK-NEXT:    lea %s1, 255
61; CHECK-NEXT:    or %s2, 3, (0)1
62; CHECK-NEXT:    or %s3, 4, (0)1
63; CHECK-NEXT:    or %s4, 5, (0)1
64; CHECK-NEXT:    or %s5, 6, (0)1
65; CHECK-NEXT:    or %s6, 7, (0)1
66; CHECK-NEXT:    or %s7, 8, (0)1
67; CHECK-NEXT:    st %s34, 240(, %s11)
68; CHECK-NEXT:    bsic %s10, (, %s12)
69; CHECK-NEXT:    or %s11, 0, %s9
70  %r = tail call fastcc i32 @stack_callee_int_szext(i1 -1, i8 -1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i16 -1, i8 -1)
71  ret i32 %r
72}
73
74define fastcc float @stack_call_float() {
75; CHECK-LABEL: stack_call_float:
76; CHECK:       .LBB{{[0-9]+}}_2:
77; CHECK-NEXT:    lea.sl %s0, 1092616192
78; CHECK-NEXT:    st %s0, 248(, %s11)
79; CHECK-NEXT:    lea.sl %s34, 1091567616
80; CHECK-NEXT:    lea %s0, stack_callee_float@lo
81; CHECK-NEXT:    and %s0, %s0, (32)0
82; CHECK-NEXT:    lea.sl %s12, stack_callee_float@hi(, %s0)
83; CHECK-NEXT:    lea.sl %s0, 1065353216
84; CHECK-NEXT:    lea.sl %s1, 1073741824
85; CHECK-NEXT:    lea.sl %s2, 1077936128
86; CHECK-NEXT:    lea.sl %s3, 1082130432
87; CHECK-NEXT:    lea.sl %s4, 1084227584
88; CHECK-NEXT:    lea.sl %s5, 1086324736
89; CHECK-NEXT:    lea.sl %s6, 1088421888
90; CHECK-NEXT:    lea.sl %s7, 1090519040
91; CHECK-NEXT:    st %s34, 240(, %s11)
92; CHECK-NEXT:    bsic %s10, (, %s12)
93; CHECK-NEXT:    or %s11, 0, %s9
94  %r = tail call fastcc float @stack_callee_float(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0)
95  ret float %r
96}
97
98define fastcc float @stack_call_float2(float %p0) {
99; CHECK-LABEL: stack_call_float2:
100; CHECK:       .LBB{{[0-9]+}}_2:
101; CHECK-NEXT:    st %s0, 248(, %s11)
102; CHECK-NEXT:    lea %s1, stack_callee_float@lo
103; CHECK-NEXT:    and %s1, %s1, (32)0
104; CHECK-NEXT:    lea.sl %s12, stack_callee_float@hi(, %s1)
105; CHECK-NEXT:    st %s0, 240(, %s11)
106; CHECK-NEXT:    or %s1, 0, %s0
107; CHECK-NEXT:    or %s2, 0, %s0
108; CHECK-NEXT:    or %s3, 0, %s0
109; CHECK-NEXT:    or %s4, 0, %s0
110; CHECK-NEXT:    or %s5, 0, %s0
111; CHECK-NEXT:    or %s6, 0, %s0
112; CHECK-NEXT:    or %s7, 0, %s0
113; CHECK-NEXT:    bsic %s10, (, %s12)
114; CHECK-NEXT:    or %s11, 0, %s9
115  %r = tail call fastcc float @stack_callee_float(float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0)
116  ret float %r
117}
118
119; Vector argument passing (fastcc feature)
120;
121declare fastcc <256 x i32> @get_v256i32()
122declare fastcc void @vsample_v(<256 x i32>)
123declare fastcc void @vsample_iv(i32, <256 x i32>)
124
125define void @caller_vret() {
126; CHECK:       caller_vret:
127; CHECK:       .LBB{{[0-9]+}}_2:
128; CHECK-NEXT:    lea %s0, get_v256i32@lo
129; CHECK-NEXT:    and %s0, %s0, (32)0
130; CHECK-NEXT:    lea.sl %s12, get_v256i32@hi(, %s0)
131; CHECK-NEXT:    bsic %s10, (, %s12)
132; CHECK-NEXT:    or %s11, 0, %s9
133  %r = tail call fastcc <256 x i32> @get_v256i32()
134  ret void
135}
136
137define void @caller_vret_pass_p0() {
138; CHECK-LABEL: caller_vret_pass_p0:
139; CHECK:       .LBB{{[0-9]+}}_2:
140; CHECK:         lea %s0, get_v256i32@lo
141; CHECK-NEXT:    and %s0, %s0, (32)0
142; CHECK-NEXT:    lea.sl %s12, get_v256i32@hi(, %s0)
143; CHECK-NEXT:    bsic %s10, (, %s12)
144; CHECK-NEXT:    lea %s0, vsample_v@lo
145; CHECK-NEXT:    and %s0, %s0, (32)0
146; CHECK-NEXT:    lea.sl %s12, vsample_v@hi(, %s0)
147; CHECK-NEXT:    bsic %s10, (, %s12)
148; CHECK-NEXT:    or %s11, 0, %s9
149  %p = tail call fastcc <256 x i32> @get_v256i32()
150  call fastcc void @vsample_v(<256 x i32> %p)
151  ret void
152}
153
154define void @caller_vret_pass_p1(i32 %s) {
155; CHECK-LABEL: caller_vret_pass_p1:
156; CHECK:       .LBB{{[0-9]+}}_2:
157; CHECK:         or %s18, 0, %s0
158; CHECK-NEXT:    lea %s0, get_v256i32@lo
159; CHECK-NEXT:    and %s0, %s0, (32)0
160; CHECK-NEXT:    lea.sl %s12, get_v256i32@hi(, %s0)
161; CHECK-NEXT:    bsic %s10, (, %s12)
162; CHECK-NEXT:    lea %s0, vsample_iv@lo
163; CHECK-NEXT:    and %s0, %s0, (32)0
164; CHECK-NEXT:    lea.sl %s12, vsample_iv@hi(, %s0)
165; CHECK-NEXT:    or %s0, 0, %s18
166; CHECK-NEXT:    bsic %s10, (, %s12)
167  %p = tail call fastcc <256 x i32> @get_v256i32()
168  call fastcc void @vsample_iv(i32 %s, <256 x i32> %p)
169  ret void
170}
171
172declare fastcc void @vsample_vv(<256 x i32>, <256 x i32>)
173declare fastcc void @vsample_vvv(<256 x i32>, <256 x i32>, <256 x i32>)
174
175define void @caller_vret_pass_p01() {
176; CHECK-LABEL: caller_vret_pass_p01:
177; CHECK:       .LBB{{[0-9]+}}_2:
178; CHECK-NEXT:    lea %s0, get_v256i32@lo
179; CHECK-NEXT:    and %s0, %s0, (32)0
180; CHECK-NEXT:    lea.sl %s12, get_v256i32@hi(, %s0)
181; CHECK-NEXT:    bsic %s10, (, %s12)
182; CHECK-NEXT:    lea %s0, vsample_vv@lo
183; CHECK-NEXT:    and %s0, %s0, (32)0
184; CHECK-NEXT:    lea.sl %s12, vsample_vv@hi(, %s0)
185; CHECK-NEXT:    lea %s16, 256
186; CHECK-NEXT:    lvl %s16
187; CHECK-NEXT:    vor %v1, (0)1, %v0
188; CHECK-NEXT:    bsic %s10, (, %s12)
189; CHECK-NEXT:    or %s11, 0, %s9
190  %p = tail call fastcc <256 x i32> @get_v256i32()
191  call fastcc void @vsample_vv(<256 x i32> %p, <256 x i32> %p)
192  ret void
193}
194
195define void @caller_vret_pass_p012() {
196; CHECK-LABEL: caller_vret_pass_p012:
197; CHECK:       .LBB{{[0-9]+}}_2:
198; CHECK-NEXT:    lea %s0, get_v256i32@lo
199; CHECK-NEXT:    and %s0, %s0, (32)0
200; CHECK-NEXT:    lea.sl %s12, get_v256i32@hi(, %s0)
201; CHECK-NEXT:    bsic %s10, (, %s12)
202; CHECK-NEXT:    lea %s0, vsample_vvv@lo
203; CHECK-NEXT:    and %s0, %s0, (32)0
204; CHECK-NEXT:    lea.sl %s12, vsample_vvv@hi(, %s0)
205; CHECK-NEXT:    lea %s16, 256
206; CHECK-NEXT:    lvl %s16
207; CHECK-NEXT:    vor %v1, (0)1, %v0
208; CHECK-NEXT:    lea %s16, 256
209; CHECK-NEXT:    lvl %s16
210; CHECK-NEXT:    vor %v2, (0)1, %v0
211; CHECK-NEXT:    bsic %s10, (, %s12)
212; CHECK-NEXT:    or %s11, 0, %s9
213  %p = tail call fastcc <256 x i32> @get_v256i32()
214  call fastcc void @vsample_vvv(<256 x i32> %p, <256 x i32> %p, <256 x i32> %p)
215  ret void
216}
217
218; Expose register parameter mapping by forcing an explicit vreg move for all parameter positions
219declare fastcc void @vsample_vvvvvvv(<256 x i32>, <256 x i32>, <256 x i32>, <256 x i32>, <256 x i32>, <256 x i32>, <256 x i32>)
220
221; TODO improve vreg copy (redundant lea+lvl emitted)
222define fastcc void @roundtrip_caller_callee(<256 x i32> %p0, <256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5, <256 x i32> %p6) {
223; CHECK-LABEL: roundtrip_caller_callee:
224; CHECK:       .LBB{{[0-9]+}}_2:
225; CHECK-NEXT:    lea %s16, 256
226; CHECK-NEXT:    lvl %s16
227; CHECK-NEXT:    vor %v7, (0)1, %v0
228; CHECK-NEXT:    lea %s0, vsample_vvvvvvv@lo
229; CHECK-NEXT:    and %s0, %s0, (32)0
230; CHECK-NEXT:    lea.sl %s12, vsample_vvvvvvv@hi(, %s0)
231; CHECK-NEXT:    lea %s16, 256
232; CHECK-NEXT:    lvl %s16
233; CHECK-NEXT:    vor %v0, (0)1, %v1
234; CHECK-NEXT:    lea %s16, 256
235; CHECK-NEXT:    lvl %s16
236; CHECK-NEXT:    vor %v1, (0)1, %v2
237; CHECK-NEXT:    lea %s16, 256
238; CHECK-NEXT:    lvl %s16
239; CHECK-NEXT:    vor %v2, (0)1, %v3
240; CHECK-NEXT:    lea %s16, 256
241; CHECK-NEXT:    lvl %s16
242; CHECK-NEXT:    vor %v3, (0)1, %v4
243; CHECK-NEXT:    lea %s16, 256
244; CHECK-NEXT:    lvl %s16
245; CHECK-NEXT:    vor %v4, (0)1, %v5
246; CHECK-NEXT:    lea %s16, 256
247; CHECK-NEXT:    lvl %s16
248; CHECK-NEXT:    vor %v5, (0)1, %v6
249; CHECK-NEXT:    lea %s16, 256
250; CHECK-NEXT:    lvl %s16
251; CHECK-NEXT:    vor %v6, (0)1, %v7
252; CHECK-NEXT:    bsic %s10, (, %s12)
253; CHECK-NEXT:    or %s11, 0, %s9
254  call fastcc void @vsample_vvvvvvv(<256 x i32> %p1, <256 x i32> %p2, <256 x i32> %p3, <256 x i32> %p4, <256 x i32> %p5, <256 x i32> %p6, <256 x i32> %p0)
255  ret void
256}
257