1; RUN: llc < %s -march=xcore | FileCheck %s
2; RUN: llc < %s -march=xcore -disable-fp-elim | FileCheck %s -check-prefix=CHECKFP
3
4; When using SP for small frames, we don't need any scratch registers (SR).
5; When using SP for large frames, we may need two scratch registers.
6; When using FP, for large or small frames, we may need one scratch register.
7
8; FP + small frame: spill FP+SR = entsp 2
9; CHECKFP-LABEL: f1
10; CHECKFP: entsp 2
11; CHECKFP-NEXT: stw r10, sp[1]
12; CHECKFP-NEXT: ldaw r10, sp[0]
13; CHECKFP: set sp, r10
14; CHECKFP-NEXT: ldw r10, sp[1]
15; CHECKFP-NEXT: retsp 2
16;
17; !FP + small frame: no spills = no stack adjustment needed
18; CHECK-LABEL: f1
19; CHECK: stw lr, sp[0]
20; CHECK: ldw lr, sp[0]
21; CHECK-NEXT: retsp 0
22define void @f1() nounwind {
23entry:
24  tail call void asm sideeffect "", "~{lr}"() nounwind
25  ret void
26}
27
28
29; FP + small frame: spill FP+SR+R0+LR = entsp 3 + extsp 1
30; CHECKFP-LABEL:f3
31; CHECKFP: entsp 3
32; CHECKFP-NEXT: stw r10, sp[1]
33; CHECKFP-NEXT: ldaw r10, sp[0]
34; CHECKFP-NEXT: stw [[REG:r[4-9]+]], r10[2]
35; CHECKFP-NEXT: mov [[REG]], r0
36; CHECKFP-NEXT: extsp 1
37; CHECKFP-NEXT: bl f2
38; CHECKFP-NEXT: ldaw sp, sp[1]
39; CHECKFP-NEXT: mov r0, [[REG]]
40; CHECKFP-NEXT: ldw [[REG]], r10[2]
41; CHECKFP-NEXT: set sp, r10
42; CHECKFP-NEXT: ldw r10, sp[1]
43; CHECKFP-NEXT: retsp 3
44;
45; !FP + small frame: spill R0+LR = entsp 2
46; CHECK-LABEL: f3
47; CHECK: entsp 2
48; CHECK-NEXT: stw [[REG:r[4-9]+]], sp[1]
49; CHECK-NEXT: mov [[REG]], r0
50; CHECK-NEXT: bl f2
51; CHECK-NEXT: mov r0, [[REG]]
52; CHECK-NEXT: ldw [[REG]], sp[1]
53; CHECK-NEXT: retsp 2
54declare void @f2()
55define i32 @f3(i32 %i) nounwind {
56entry:
57  call void @f2()
58  ret i32 %i
59}
60
61
62; FP + large frame: spill FP+SR = entsp 2 + 100000
63; CHECKFP-LABEL: f4
64; CHECKFP: entsp 65535
65; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
66; CHECKFP-NEXT: .cfi_def_cfa_offset 262140
67; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
68; CHECKFP-NEXT: .cfi_offset 15, 0
69; CHECKFP-NEXT: extsp 34467
70; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
71; CHECKFP-NEXT: .cfi_def_cfa_offset 400008
72; CHECKFP-NEXT: stw r10, sp[1]
73; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
74; CHECKFP-NEXT: .cfi_offset 10, -400004
75; CHECKFP-NEXT: ldaw r10, sp[0]
76; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
77; CHECKFP-NEXT: .cfi_def_cfa_register 10
78; CHECKFP-NEXT: set sp, r10
79; CHECKFP-NEXT: ldw r10, sp[1]
80; CHECKFP-NEXT: ldaw sp, sp[65535]
81; CHECKFP-NEXT: retsp 34467
82;
83; !FP + large frame: spill SR+SR = entsp 2 + 100000
84; CHECK-LABEL: f4
85; CHECK: entsp 65535
86; CHECK-NEXT: .Ltmp{{[0-9]+}}
87; CHECK-NEXT: .cfi_def_cfa_offset 262140
88; CHECK-NEXT: .Ltmp{{[0-9]+}}
89; CHECK-NEXT: .cfi_offset 15, 0
90; CHECK-NEXT: extsp 34467
91; CHECK-NEXT: .Ltmp{{[0-9]+}}
92; CHECK-NEXT: .cfi_def_cfa_offset 400008
93; CHECK-NEXT: ldaw sp, sp[65535]
94; CHECK-NEXT: retsp 34467
95define void @f4() {
96entry:
97  %0 = alloca [100000 x i32]
98  ret void
99}
100
101
102; FP + large frame: spill FP+SR+R4+LR = entsp 3 + 200000  + extsp 1
103; CHECKFP: .section .cp.rodata.cst4,"aMc",@progbits,4
104; CHECKFP-NEXT: .align 4
105; CHECKFP-NEXT: .LCPI[[CNST0:[0-9_]+]]:
106; CHECKFP-NEXT: .long 200002
107; CHECKFP-NEXT: .LCPI[[CNST1:[0-9_]+]]:
108; CHECKFP-NEXT: .long 200001
109; CHECKFP-NEXT: .text
110; CHECKFP-LABEL: f6
111; CHECKFP: entsp 65535
112; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
113; CHECKFP-NEXT: .cfi_def_cfa_offset 262140
114; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
115; CHECKFP-NEXT: .cfi_offset 15, 0
116; CHECKFP-NEXT: extsp 65535
117; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
118; CHECKFP-NEXT: .cfi_def_cfa_offset 524280
119; CHECKFP-NEXT: extsp 65535
120; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
121; CHECKFP-NEXT: .cfi_def_cfa_offset 786420
122; CHECKFP-NEXT: extsp 3398
123; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
124; CHECKFP-NEXT: .cfi_def_cfa_offset 800012
125; CHECKFP-NEXT: stw r10, sp[1]
126; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
127; CHECKFP-NEXT: .cfi_offset 10, -800008
128; CHECKFP-NEXT: ldaw r10, sp[0]
129; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
130; CHECKFP-NEXT: .cfi_def_cfa_register 10
131; CHECKFP-NEXT: ldw r1, cp[.LCPI[[CNST0]]]
132; CHECKFP-NEXT: stw [[REG:r[4-9]+]], r10[r1]
133; CHECKFP-NEXT: .Ltmp{{[0-9]+}}
134; CHECKFP-NEXT: .cfi_offset 4, -4
135; CHECKFP-NEXT: mov [[REG]], r0
136; CHECKFP-NEXT: extsp 1
137; CHECKFP-NEXT: ldaw r0, r10[2]
138; CHECKFP-NEXT: bl f5
139; CHECKFP-NEXT: ldaw sp, sp[1]
140; CHECKFP-NEXT: ldw r1, cp[.LCPI3_1]
141; CHECKFP-NEXT: ldaw r0, r10[r1]
142; CHECKFP-NEXT: extsp 1
143; CHECKFP-NEXT: bl f5
144; CHECKFP-NEXT: ldaw sp, sp[1]
145; CHECKFP-NEXT: mov r0, [[REG]]
146; CHECKFP-NEXT: ldw r1, cp[.LCPI[[CNST0]]]
147; CHECKFP-NEXT: ldw [[REG]], r10[r1]
148; CHECKFP-NEXT: set sp, r10
149; CHECKFP-NEXT: ldw r10, sp[1]
150; CHECKFP-NEXT: ldaw sp, sp[65535]
151; CHECKFP-NEXT: ldaw sp, sp[65535]
152; CHECKFP-NEXT: ldaw sp, sp[65535]
153; CHECKFP-NEXT: retsp 3398
154;
155; !FP + large frame: spill SR+SR+R4+LR = entsp 4 + 200000
156; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
157; CHECK-NEXT: .align 4
158; CHECK-NEXT: .LCPI[[CNST0:[0-9_]+]]:
159; CHECK-NEXT: .long 200003
160; CHECK-NEXT: .LCPI[[CNST1:[0-9_]+]]:
161; CHECK-NEXT: .long 200002
162; CHECK-NEXT: .text
163; CHECK-LABEL: f6
164; CHECK: entsp 65535
165; CHECK-NEXT: .Ltmp{{[0-9]+}}
166; CHECK-NEXT: .cfi_def_cfa_offset 262140
167; CHECK-NEXT: .Ltmp{{[0-9]+}}
168; CHECK-NEXT: .cfi_offset 15, 0
169; CHECK-NEXT: extsp 65535
170; CHECK-NEXT: .Ltmp{{[0-9]+}}
171; CHECK-NEXT: .cfi_def_cfa_offset 524280
172; CHECK-NEXT: extsp 65535
173; CHECK-NEXT: .Ltmp{{[0-9]+}}
174; CHECK-NEXT: .cfi_def_cfa_offset 786420
175; CHECK-NEXT: extsp 3399
176; CHECK-NEXT: .Ltmp{{[0-9]+}}
177; CHECK-NEXT: .cfi_def_cfa_offset 800016
178; CHECK-NEXT: ldaw r1, sp[0]
179; CHECK-NEXT: ldw r2, cp[.LCPI[[CNST0]]]
180; CHECK-NEXT: stw [[REG:r[4-9]+]], r1[r2]
181; CHECK-NEXT: .Ltmp{{[0-9]+}}
182; CHECK-NEXT: .cfi_offset 4, -4
183; CHECK-NEXT: mov [[REG]], r0
184; CHECK-NEXT: ldaw r0, sp[3]
185; CHECK-NEXT: bl f5
186; CHECK-NEXT: ldaw r0, sp[0]
187; CHECK-NEXT: ldw r1, cp[.LCPI[[CNST1]]]
188; CHECK-NEXT: ldaw r0, r0[r1]
189; CHECK-NEXT: bl f5
190; CHECK-NEXT: mov r0, [[REG]]
191; CHECK-NEXT: ldaw [[REG]], sp[0]
192; CHECK-NEXT: ldw r1, cp[.LCPI[[CNST0]]]
193; CHECK-NEXT: ldw [[REG]], [[REG]][r1]
194; CHECK-NEXT: ldaw sp, sp[65535]
195; CHECK-NEXT: ldaw sp, sp[65535]
196; CHECK-NEXT: ldaw sp, sp[65535]
197; CHECK-NEXT: retsp 3399
198declare void @f5(i32*)
199define i32 @f6(i32 %i) {
200entry:
201  %0 = alloca [200000 x i32]
202  %1 = getelementptr inbounds [200000 x i32]* %0, i32 0, i32 0
203  call void @f5(i32* %1)
204  %2 = getelementptr inbounds [200000 x i32]* %0, i32 0, i32 199999
205  call void @f5(i32* %2)
206  ret i32 %i
207}
208
209; FP + large frame: spill FP+SR+LR = entsp 2 + 256  + extsp 1
210; CHECKFP-LABEL:f8
211; CHECKFP: entsp 258
212; CHECKFP-NEXT: stw r10, sp[1]
213; CHECKFP-NEXT: ldaw r10, sp[0]
214; CHECKFP-NEXT: mkmsk [[REG:r[0-9]+]], 8
215; CHECKFP-NEXT: ldaw r0, r10{{\[}}[[REG]]{{\]}}
216; CHECKFP-NEXT: extsp 1
217; CHECKFP-NEXT: bl f5
218; CHECKFP-NEXT: ldaw sp, sp[1]
219; CHECKFP-NEXT: set sp, r10
220; CHECKFP-NEXT: ldw r10, sp[1]
221; CHECKFP-NEXT: retsp 258
222;
223; !FP + large frame: spill SR+SR+LR = entsp 3 + 256
224; CHECK-LABEL:f8
225; CHECK: entsp 257
226; CHECK-NEXT: ldaw r0, sp[254]
227; CHECK-NEXT: bl f5
228; CHECK-NEXT: retsp 257
229define void @f8() nounwind {
230entry:
231  %0 = alloca [256 x i32]
232  %1 = getelementptr inbounds [256 x i32]* %0, i32 0, i32 253
233  call void @f5(i32* %1)
234  ret void
235}
236
237; FP + large frame: spill FP+SR+LR = entsp 2 + 32768  + extsp 1
238; CHECKFP-LABEL:f9
239; CHECKFP: entsp 32770
240; CHECKFP-NEXT: stw r10, sp[1]
241; CHECKFP-NEXT: ldaw r10, sp[0]
242; CHECKFP-NEXT: ldc [[REG:r[0-9]+]], 32767
243; CHECKFP-NEXT: ldaw r0, r10{{\[}}[[REG]]{{\]}}
244; CHECKFP-NEXT: extsp 1
245; CHECKFP-NEXT: bl f5
246; CHECKFP-NEXT: ldaw sp, sp[1]
247; CHECKFP-NEXT: set sp, r10
248; CHECKFP-NEXT: ldw r10, sp[1]
249; CHECKFP-NEXT: retsp 32770
250;
251; !FP + large frame: spill SR+SR+LR = entsp 3 + 32768
252; CHECK-LABEL:f9
253; CHECK: entsp 32771
254; CHECK-NEXT: ldaw r0, sp[32768]
255; CHECK-NEXT: bl f5
256; CHECK-NEXT: retsp 32771
257define void @f9() nounwind {
258entry:
259  %0 = alloca [32768 x i32]
260  %1 = getelementptr inbounds [32768 x i32]* %0, i32 0, i32 32765
261  call void @f5(i32* %1)
262  ret void
263}
264