1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-load-store-renaming=true < %s -mtriple=arm64-apple-ios7.0.0 -mcpu=cyclone -enable-misched=false | FileCheck %s
3
4; rdar://13625505
5; Here we have 9 fixed integer arguments the 9th argument in on stack, the
6; varargs start right after at 8-byte alignment.
7define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp {
8; CHECK-LABEL: fn9:
9; CHECK:       ; %bb.0:
10; CHECK-NEXT:    sub sp, sp, #64 ; =64
11; CHECK-NEXT:    ldr w8, [sp, #64]
12; CHECK-NEXT:    stp w2, w1, [sp, #52]
13; CHECK-NEXT:    stp w4, w3, [sp, #44]
14; CHECK-NEXT:    stp w6, w5, [sp, #36]
15; CHECK-NEXT:    str w7, [sp, #32]
16; CHECK-NEXT:    str w8, [x0]
17; CHECK-NEXT:    ldr w9, [sp, #72]
18; CHECK-NEXT:    ldr w8, [sp, #80]
19; CHECK-NEXT:    stp w8, w9, [sp, #16]
20; CHECK-NEXT:    add x8, sp, #72 ; =72
21; CHECK-NEXT:    add x8, x8, #24 ; =24
22; CHECK-NEXT:    str x8, [sp, #24]
23; CHECK-NEXT:    ldr w8, [sp, #88]
24; CHECK-NEXT:    str w8, [sp, #12]
25; CHECK-NEXT:    add sp, sp, #64 ; =64
26; CHECK-NEXT:    ret
27  %1 = alloca i32, align 4
28  %2 = alloca i32, align 4
29  %3 = alloca i32, align 4
30  %4 = alloca i32, align 4
31  %5 = alloca i32, align 4
32  %6 = alloca i32, align 4
33  %7 = alloca i32, align 4
34  %8 = alloca i32, align 4
35  %9 = alloca i32, align 4
36  %args = alloca i8*, align 8
37  %a10 = alloca i32, align 4
38  %a11 = alloca i32, align 4
39  %a12 = alloca i32, align 4
40  store i32 %a2, i32* %2, align 4
41  store i32 %a3, i32* %3, align 4
42  store i32 %a4, i32* %4, align 4
43  store i32 %a5, i32* %5, align 4
44  store i32 %a6, i32* %6, align 4
45  store i32 %a7, i32* %7, align 4
46  store i32 %a8, i32* %8, align 4
47  store i32 %a9, i32* %9, align 4
48  store i32 %a9, i32* %a1
49  %10 = bitcast i8** %args to i8*
50  call void @llvm.va_start(i8* %10)
51  %11 = va_arg i8** %args, i32
52  store i32 %11, i32* %a10, align 4
53  %12 = va_arg i8** %args, i32
54  store i32 %12, i32* %a11, align 4
55  %13 = va_arg i8** %args, i32
56  store i32 %13, i32* %a12, align 4
57  ret void
58}
59
60declare void @llvm.va_start(i8*) nounwind
61
62define i32 @main() nounwind ssp {
63; CHECK-LABEL: main:
64; CHECK:       ; %bb.0:
65; CHECK-NEXT:    sub sp, sp, #96 ; =96
66; CHECK-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
67; CHECK-NEXT:    mov w9, #1
68; CHECK-NEXT:    mov w8, #2
69; CHECK-NEXT:    stp w8, w9, [sp, #72]
70; CHECK-NEXT:    mov w9, #3
71; CHECK-NEXT:    mov w8, #4
72; CHECK-NEXT:    stp w8, w9, [sp, #64]
73; CHECK-NEXT:    mov w9, #5
74; CHECK-NEXT:    mov w8, #6
75; CHECK-NEXT:    stp w8, w9, [sp, #56]
76; CHECK-NEXT:    mov w9, #7
77; CHECK-NEXT:    mov w8, #8
78; CHECK-NEXT:    stp w8, w9, [sp, #48]
79; CHECK-NEXT:    mov w8, #9
80; CHECK-NEXT:    mov w9, #10
81; CHECK-NEXT:    stp w9, w8, [sp, #40]
82; CHECK-NEXT:    mov w10, #11
83; CHECK-NEXT:    mov w11, #12
84; CHECK-NEXT:    stp w11, w10, [sp, #32]
85; CHECK-NEXT:    stp x10, x11, [sp, #16]
86; CHECK-NEXT:    str x9, [sp, #8]
87; CHECK-NEXT:    str w8, [sp]
88; CHECK-NEXT:    add x0, sp, #76 ; =76
89; CHECK-NEXT:    mov w1, #2
90; CHECK-NEXT:    mov w2, #3
91; CHECK-NEXT:    mov w3, #4
92; CHECK-NEXT:    mov w4, #5
93; CHECK-NEXT:    mov w5, #6
94; CHECK-NEXT:    mov w6, #7
95; CHECK-NEXT:    mov w7, #8
96; CHECK-NEXT:    bl _fn9
97; CHECK-NEXT:    mov w0, #0
98; CHECK-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
99; CHECK-NEXT:    add sp, sp, #96 ; =96
100; CHECK-NEXT:    ret
101  %a1 = alloca i32, align 4
102  %a2 = alloca i32, align 4
103  %a3 = alloca i32, align 4
104  %a4 = alloca i32, align 4
105  %a5 = alloca i32, align 4
106  %a6 = alloca i32, align 4
107  %a7 = alloca i32, align 4
108  %a8 = alloca i32, align 4
109  %a9 = alloca i32, align 4
110  %a10 = alloca i32, align 4
111  %a11 = alloca i32, align 4
112  %a12 = alloca i32, align 4
113  store i32 1, i32* %a1, align 4
114  store i32 2, i32* %a2, align 4
115  store i32 3, i32* %a3, align 4
116  store i32 4, i32* %a4, align 4
117  store i32 5, i32* %a5, align 4
118  store i32 6, i32* %a6, align 4
119  store i32 7, i32* %a7, align 4
120  store i32 8, i32* %a8, align 4
121  store i32 9, i32* %a9, align 4
122  store i32 10, i32* %a10, align 4
123  store i32 11, i32* %a11, align 4
124  store i32 12, i32* %a12, align 4
125  %1 = load i32, i32* %a1, align 4
126  %2 = load i32, i32* %a2, align 4
127  %3 = load i32, i32* %a3, align 4
128  %4 = load i32, i32* %a4, align 4
129  %5 = load i32, i32* %a5, align 4
130  %6 = load i32, i32* %a6, align 4
131  %7 = load i32, i32* %a7, align 4
132  %8 = load i32, i32* %a8, align 4
133  %9 = load i32, i32* %a9, align 4
134  %10 = load i32, i32* %a10, align 4
135  %11 = load i32, i32* %a11, align 4
136  %12 = load i32, i32* %a12, align 4
137  call void (i32*, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(i32* %a1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
138  ret i32 0
139}
140
141;rdar://13668483
142@.str = private unnamed_addr constant [4 x i8] c"fmt\00", align 1
143define void @foo(i8* %fmt, ...) nounwind {
144; CHECK-LABEL: foo:
145; CHECK:       ; %bb.0: ; %entry
146; CHECK-NEXT:    sub sp, sp, #48 ; =48
147; CHECK-NEXT:    ldr w8, [sp, #48]
148; CHECK-NEXT:    str w8, [sp, #28]
149; CHECK-NEXT:    add x8, sp, #48 ; =48
150; CHECK-NEXT:    add x8, x8, #23 ; =23
151; CHECK-NEXT:    and x8, x8, #0xfffffffffffffff0
152; CHECK-NEXT:    add x9, x8, #16 ; =16
153; CHECK-NEXT:    stp x9, x0, [sp, #32]
154; CHECK-NEXT:    ldr q0, [x8]
155; CHECK-NEXT:    str q0, [sp], #48
156; CHECK-NEXT:    ret
157entry:
158  %fmt.addr = alloca i8*, align 8
159  %args = alloca i8*, align 8
160  %vc = alloca i32, align 4
161  %vv = alloca <4 x i32>, align 16
162  store i8* %fmt, i8** %fmt.addr, align 8
163  %args1 = bitcast i8** %args to i8*
164  call void @llvm.va_start(i8* %args1)
165  %0 = va_arg i8** %args, i32
166  store i32 %0, i32* %vc, align 4
167  %1 = va_arg i8** %args, <4 x i32>
168  store <4 x i32> %1, <4 x i32>* %vv, align 16
169  ret void
170}
171
172define void @bar(i32 %x, <4 x i32> %y) nounwind {
173; CHECK-LABEL: bar:
174; CHECK:       ; %bb.0: ; %entry
175; CHECK-NEXT:    sub sp, sp, #80 ; =80
176; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
177; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
178; CHECK-NEXT:    str w0, [sp, #60]
179; CHECK-NEXT:    stp q0, q0, [sp, #16]
180; CHECK-NEXT:    str x0, [sp]
181; CHECK-NEXT:  Lloh0:
182; CHECK-NEXT:    adrp x0, l_.str@PAGE
183; CHECK-NEXT:  Lloh1:
184; CHECK-NEXT:    add x0, x0, l_.str@PAGEOFF
185; CHECK-NEXT:    bl _foo
186; CHECK-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
187; CHECK-NEXT:    add sp, sp, #80 ; =80
188; CHECK-NEXT:    ret
189; CHECK-NEXT:    .loh AdrpAdd Lloh0, Lloh1
190entry:
191  %x.addr = alloca i32, align 4
192  %y.addr = alloca <4 x i32>, align 16
193  store i32 %x, i32* %x.addr, align 4
194  store <4 x i32> %y, <4 x i32>* %y.addr, align 16
195  %0 = load i32, i32* %x.addr, align 4
196  %1 = load <4 x i32>, <4 x i32>* %y.addr, align 16
197  call void (i8*, ...) @foo(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %0, <4 x i32> %1)
198  ret void
199}
200
201; rdar://13668927
202; When passing 16-byte aligned small structs as vararg, make sure the caller
203; side is 16-byte aligned on stack.
204%struct.s41 = type { i32, i16, i32, i16 }
205define void @foo2(i8* %fmt, ...) nounwind {
206; CHECK-LABEL: foo2:
207; CHECK:       ; %bb.0: ; %entry
208; CHECK-NEXT:    sub sp, sp, #48 ; =48
209; CHECK-NEXT:    ldr w8, [sp, #48]
210; CHECK-NEXT:    str w8, [sp, #28]
211; CHECK-NEXT:    add x8, sp, #48 ; =48
212; CHECK-NEXT:    add x8, x8, #23 ; =23
213; CHECK-NEXT:    and x8, x8, #0xfffffffffffffff0
214; CHECK-NEXT:    add x9, x8, #16 ; =16
215; CHECK-NEXT:    stp x9, x0, [sp, #32]
216; CHECK-NEXT:    ldr q0, [x8]
217; CHECK-NEXT:    str q0, [sp], #48
218; CHECK-NEXT:    ret
219entry:
220  %fmt.addr = alloca i8*, align 8
221  %args = alloca i8*, align 8
222  %vc = alloca i32, align 4
223  %vs = alloca %struct.s41, align 16
224  store i8* %fmt, i8** %fmt.addr, align 8
225  %args1 = bitcast i8** %args to i8*
226  call void @llvm.va_start(i8* %args1)
227  %0 = va_arg i8** %args, i32
228  store i32 %0, i32* %vc, align 4
229  %ap.cur = load i8*, i8** %args
230  %1 = getelementptr i8, i8* %ap.cur, i32 15
231  %2 = ptrtoint i8* %1 to i64
232  %3 = and i64 %2, -16
233  %ap.align = inttoptr i64 %3 to i8*
234  %ap.next = getelementptr i8, i8* %ap.align, i32 16
235  store i8* %ap.next, i8** %args
236  %4 = bitcast i8* %ap.align to %struct.s41*
237  %5 = bitcast %struct.s41* %vs to i8*
238  %6 = bitcast %struct.s41* %4 to i8*
239  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %5, i8* align 16 %6, i64 16, i1 false)
240  ret void
241}
242declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
243
244define void @bar2(i32 %x, i128 %s41.coerce) nounwind {
245; CHECK-LABEL: bar2:
246; CHECK:       ; %bb.0: ; %entry
247; CHECK-NEXT:    sub sp, sp, #80 ; =80
248; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
249; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
250; CHECK-NEXT:    str w0, [sp, #60]
251; CHECK-NEXT:    stp x1, x2, [sp, #32]
252; CHECK-NEXT:    stp x1, x2, [sp, #16]
253; CHECK-NEXT:    str x0, [sp]
254; CHECK-NEXT:  Lloh2:
255; CHECK-NEXT:    adrp x0, l_.str@PAGE
256; CHECK-NEXT:  Lloh3:
257; CHECK-NEXT:    add x0, x0, l_.str@PAGEOFF
258; CHECK-NEXT:    bl _foo2
259; CHECK-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
260; CHECK-NEXT:    add sp, sp, #80 ; =80
261; CHECK-NEXT:    ret
262; CHECK-NEXT:    .loh AdrpAdd Lloh2, Lloh3
263entry:
264  %x.addr = alloca i32, align 4
265  %s41 = alloca %struct.s41, align 16
266  store i32 %x, i32* %x.addr, align 4
267  %0 = bitcast %struct.s41* %s41 to i128*
268  store i128 %s41.coerce, i128* %0, align 1
269  %1 = load i32, i32* %x.addr, align 4
270  %2 = bitcast %struct.s41* %s41 to i128*
271  %3 = load i128, i128* %2, align 1
272  call void (i8*, ...) @foo2(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %1, i128 %3)
273  ret void
274}
275