1; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=0 | FileCheck %s
2; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=1 | FileCheck %s --check-prefix=NEON
3target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
4target triple = "thumbv7-apple-ios"
5
6; CHECK: f
7; This function is forced to spill a double.
8; Verify that the spill slot is properly aligned.
9;
10; The caller-saved r4 is used as a scratch register for stack realignment.
11; CHECK: push {r4, r7, lr}
12; CHECK: bfc r4, #0, #3
13; CHECK: mov sp, r4
14define void @f(double* nocapture %p) nounwind ssp {
15entry:
16  %0 = load double* %p, align 4
17  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
18  tail call void @g() nounwind
19  store double %0, double* %p, align 4
20  ret void
21}
22
23; NEON: f
24; NEON: push {r4, r7, lr}
25; NEON: sub.w r4, sp, #64
26; NEON: bfc r4, #0, #4
27; Stack pointer must be updated before the spills.
28; NEON: mov sp, r4
29; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
30; NEON: vst1.64 {d12, d13, d14, d15}, [r4:128]
31; Stack pointer adjustment for the stack frame contents.
32; This could legally happen before the spills.
33; Since the spill slot is only 8 bytes, technically it would be fine to only
34; subtract #8 here. That would leave sp less aligned than some stack slots,
35; and would probably blow MFI's mind.
36; NEON: sub sp, #16
37; The epilog is free to use another scratch register than r4.
38; NEON: add r[[R4:[0-9]+]], sp, #16
39; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]]:128]!
40; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]]:128]
41; The stack pointer restore must happen after the reloads.
42; NEON: mov sp,
43; NEON: pop
44
45declare void @g()
46
47; Spill 7 d-registers.
48define void @f7(double* nocapture %p) nounwind ssp {
49entry:
50  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14}"() nounwind
51  ret void
52}
53
54; NEON: f7
55; NEON: push {r4, r7, lr}
56; NEON: sub.w r4, sp, #56
57; NEON: bfc r4, #0, #4
58; Stack pointer must be updated before the spills.
59; NEON: mov sp, r4
60; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
61; NEON: vst1.64 {d12, d13}, [r4:128]
62; NEON: vstr d14, [r4, #16]
63; Epilog
64; NEON: vld1.64 {d8, d9, d10, d11},
65; NEON: vld1.64 {d12, d13},
66; NEON: vldr d14,
67; The stack pointer restore must happen after the reloads.
68; NEON: mov sp,
69; NEON: pop
70
71; Spill 7 d-registers, leave a hole.
72define void @f3plus4(double* nocapture %p) nounwind ssp {
73entry:
74  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d12},~{d13},~{d14},~{d15}"() nounwind
75  ret void
76}
77
78; Aligned spilling only works for contiguous ranges starting from d8.
79; The rest goes to the standard vpush instructions.
80; NEON: f3plus4
81; NEON: push {r4, r7, lr}
82; NEON: vpush {d12, d13, d14, d15}
83; NEON: sub.w r4, sp, #24
84; NEON: bfc r4, #0, #4
85; Stack pointer must be updated before the spills.
86; NEON: mov sp, r4
87; NEON: vst1.64 {d8, d9}, [r4:128]
88; NEON: vstr d10, [r4, #16]
89; Epilog
90; NEON: vld1.64 {d8, d9},
91; NEON: vldr d10, [{{.*}}, #16]
92; The stack pointer restore must happen after the reloads.
93; NEON: mov sp,
94; NEON: vpop {d12, d13, d14, d15}
95; NEON: pop
96