1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=NORMAL -check-prefix=NORMALFP
2; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=NOPUSH
3; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=NOPUSH -check-prefix=NORMALFP
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH
5
6declare void @seven_params(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g)
7declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
8declare void @eightparams16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g, i16 %h)
9declare void @eightparams64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h)
10declare void @ten_params(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g, i64 %h, i32 %i, i64 %j)
11declare void @ten_params_ptr(i32 %a, i64 %b, i32 %c, i64 %d, i32 %e, i64 %f, i32 %g, i8* %h, i32 %i, i64 %j)
12declare void @cannot_push(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i)
13
14; We should get pushes for the last 4 parameters. Test that the
15; in-register parameters are all in the right places, and check
16; that the stack manipulations are correct and correctly
17; described by the DWARF directives. Test that the switch
18; to disable the optimization works and that the optimization
19; doesn't kick in on Windows64 where it is not allowed.
20; NORMAL-LABEL: test1
21; NORMAL: pushq
22; NORMAL-DAG: movl $1, %edi
23; NORMAL-DAG: movl $2, %esi
24; NORMAL-DAG: movl $3, %edx
25; NORMAL-DAG: movl $4, %ecx
26; NORMAL-DAG: movl $5, %r8d
27; NORMAL-DAG: movl $6, %r9d
28; NORMAL: pushq $10
29; NORMAL: .cfi_adjust_cfa_offset 8
30; NORMAL: pushq $9
31; NORMAL: .cfi_adjust_cfa_offset 8
32; NORMAL: pushq $8
33; NORMAL: .cfi_adjust_cfa_offset 8
34; NORMAL: pushq $7
35; NORMAL: .cfi_adjust_cfa_offset 8
36; NORMAL: callq ten_params
37; NORMAL: addq $32, %rsp
38; NORMAL: .cfi_adjust_cfa_offset -32
39; NORMAL: popq
40; NORMAL: retq
41; NOPUSH-LABEL: test1
42; NOPUSH-NOT: pushq
43; NOPUSH: retq
44define void @test1() {
45entry:
46  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
47  ret void
48}
49
50; The presence of a frame pointer should not prevent pushes. But we
51; don't need the CFI directives in that case.
52; Also check that we generate the right pushes for >8bit immediates.
53; NORMALFP-LABEL: test2
54; NORMALFP: pushq $10000
55; NORMALFP-NEXT: pushq $9000
56; NORMALFP-NEXT: pushq $8000
57; NORMALFP-NEXT: pushq $7000
58; NORMALFP-NEXT: callq {{_?}}ten_params
59define void @test2(i32 %k) {
60entry:
61  %a = alloca i32, i32 %k
62  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7000, i64 8000, i32 9000, i64 10000)
63  ret void
64}
65
66; Parameters 7 & 8 should push a 64-bit register.
67; TODO: Note that the regular expressions disallow r8 and r9. That's fine for
68;       now, because the pushes will always follow the moves into r8 and r9.
69;       Eventually, though, we want to be able to schedule the pushes better.
70;       In this example, it will save two copies, because we have to move the
71;       incoming parameters out of %rdi and %rsi to make room for the outgoing
72;       parameters.
73; NORMAL-LABEL: test3
74; NORMAL: pushq $10000
75; NORMAL: pushq $9000
76; NORMAL: pushq %r{{..}}
77; NORMAL: pushq %r{{..}}
78; NORMAL: callq ten_params
79define void @test3(i32 %a, i64 %b) {
80entry:
81  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 %a, i64 %b, i32 9000, i64 10000)
82  ret void
83}
84
85; Check that we avoid the optimization for just one push.
86; NORMAL-LABEL: test4
87; NORMAL: movl $7, (%rsp)
88; NORMAL: callq seven_params
89define void @test4() {
90entry:
91  call void @seven_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7)
92  ret void
93}
94
95; Check that pushing link-time constant addresses works correctly
96; NORMAL-LABEL: test5
97; NORMAL: pushq $10
98; NORMAL: pushq $9
99; NORMAL: pushq $ext
100; NORMAL: pushq $7
101; NORMAL: callq ten_params_ptr
102@ext = external dso_local constant i8
103define void @test5() {
104entry:
105  call void @ten_params_ptr(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i8* @ext, i32 9, i64 10)
106  ret void
107}
108
109; Check that we fuse 64-bit loads but not 32-bit loads into PUSH mem.
110; NORMAL-LABEL: test6
111; NORMAL: movq %rsi, [[REG64:%.+]]
112; NORMAL: pushq $10
113; NORMAL: pushq $9
114; NORMAL: pushq ([[REG64]])
115; NORMAL: pushq {{%r..}}
116; NORMAL: callq ten_params
117define void @test6(i32* %p32, i64* %p64) {
118entry:
119  %v32 = load i32, i32* %p32
120  %v64 = load i64, i64* %p64
121  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 %v32, i64 %v64, i32 9, i64 10)
122  ret void
123}
124
125; Fold stack-relative loads into the push with correct offsets.
126; Do the same for an indirect call whose address is loaded from the stack.
127; On entry, %p7 is at 8(%rsp) and %p8 is at 16(%rsp). Prior to the call
128; sequence, 72 bytes are allocated to the stack, 48 for register saves and
129; 24 for local storage and alignment, so %p7 is at 80(%rsp) and %p8 is at
130; 88(%rsp). The call address can be stored anywhere in the local space but
131; happens to be stored at 8(%rsp). Each push bumps these offsets up by
132; 8 bytes.
133; NORMAL-LABEL: test7
134; NORMAL: movq %r{{.*}}, 8(%rsp) {{.*Spill$}}
135; NORMAL: pushq 88(%rsp)
136; NORMAL: pushq $9
137; NORMAL: pushq 96(%rsp)
138; NORMAL: pushq $7
139; NORMAL: callq *40(%rsp)
140define void @test7(i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8) {
141entry:
142  %stack_fptr = alloca void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)*
143  store void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)* @ten_params, void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)** %stack_fptr
144  %ten_params_ptr = load volatile void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)*, void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64)** %stack_fptr
145  call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
146  call void (i32, i64, i32, i64, i32, i64, i32, i64, i32, i64) %ten_params_ptr(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 %p7, i32 9, i64 %p8)
147  ret void
148}
149
150; We can't fold the load from the global into the push because of
151; interference from the store
152; NORMAL-LABEL: test8
153; NORMAL: movq the_global(%rip), [[REG:%r.+]]
154; NORMAL: movq $42, the_global
155; NORMAL: pushq $10
156; NORMAL: pushq $9
157; NORMAL: pushq [[REG]]
158; NORMAL: pushq $7
159; NORMAL: callq ten_params
160@the_global = external dso_local global i64
161define void @test8() {
162  %myload = load i64, i64* @the_global
163  store i64 42, i64* @the_global
164  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 %myload, i32 9, i64 10)
165  ret void
166}
167
168
169; Converting one function call to use pushes negatively affects
170; other calls that pass arguments on the stack without pushes.
171; If the cost outweighs the benefit, avoid using pushes.
172; NORMAL-LABEL: test9
173; NORMAL: callq cannot_push
174; NORMAL-NOT: push
175; NORMAL: callq ten_params
176define void @test9(float %p1) {
177  call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1)
178  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
179  call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1)
180  ret void
181}
182
183; But if the benefit outweighs the cost, use pushes.
184; NORMAL-LABEL: test10
185; NORMAL: callq cannot_push
186; NORMAL: pushq $10
187; NORMAL: pushq $9
188; NORMAL: pushq $8
189; NORMAL: pushq $7
190; NORMAL: callq ten_params
191define void @test10(float %p1) {
192  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
193  call void @cannot_push(float 1.0e0, float 2.0e0, float 3.0e0, float 4.0e0, float 5.0e0, float 6.0e0, float 7.0e0, float 8.0e0, float %p1)
194  call void @ten_params(i32 1, i64 2, i32 3, i64 4, i32 5, i64 6, i32 7, i64 8, i32 9, i64 10)
195  ret void
196}
197
198; NORMAL-LABEL: pr34863_16
199; NORMAL:  pushq  ${{-1|65535}}
200; NORMAL-NEXT:  pushq  $0
201; NORMAL-NEXT:  call
202define void @pr34863_16(i16 %x) minsize nounwind {
203entry:
204  tail call void @eightparams16(i16 %x, i16 %x, i16 %x, i16 %x, i16 %x, i16 %x, i16 0, i16 -1)
205  ret void
206}
207
208; NORMAL-LABEL: pr34863_32
209; NORMAL:  pushq  ${{-1|65535}}
210; NORMAL-NEXT:  pushq  $0
211; NORMAL-NEXT:  call
212define void @pr34863_32(i32 %x) minsize nounwind {
213entry:
214  tail call void @eightparams(i32 %x, i32 %x, i32 %x, i32 %x, i32 %x, i32 %x, i32 0, i32 -1)
215  ret void
216}
217
218; NORMAL-LABEL: pr34863_64
219; NORMAL:  pushq  ${{-1|65535}}
220; NORMAL-NEXT:  pushq  $0
221; NORMAL-NEXT:  call
222define void @pr34863_64(i64 %x) minsize nounwind {
223entry:
224  tail call void @eightparams64(i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 0, i64 -1)
225  ret void
226}
227