1; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-linux | FileCheck %s --check-prefix=LINUX
2; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-windows | FileCheck %s --check-prefix=WINDOWS
3; RUN: llc < %s -enable-tail-merge=0 -mtriple=i686-windows | FileCheck %s --check-prefix=X86
4
5; Test that we actually spill and reload all arguments in the variadic argument
6; pack. Doing a normal call will clobber all argument registers, and we will
7; spill around it. A simple adjustment should not require any XMM spills.
8
9declare void @llvm.va_start(i8*) nounwind
10
11declare void(i8*, ...)* @get_f(i8* %this)
12
13define void @f_thunk(i8* %this, ...) {
14  ; Use va_start so that we exercise the combination.
15  %ap = alloca [4 x i8*], align 16
16  %ap_i8 = bitcast [4 x i8*]* %ap to i8*
17  call void @llvm.va_start(i8* %ap_i8)
18
19  %fptr = call void(i8*, ...)*(i8*)* @get_f(i8* %this)
20  musttail call void (i8*, ...)* %fptr(i8* %this, ...)
21  ret void
22}
23
24; Save and restore 6 GPRs, 8 XMMs, and AL around the call.
25
26; LINUX-LABEL: f_thunk:
27; LINUX-DAG: movq %rdi, {{.*}}
28; LINUX-DAG: movq %rsi, {{.*}}
29; LINUX-DAG: movq %rdx, {{.*}}
30; LINUX-DAG: movq %rcx, {{.*}}
31; LINUX-DAG: movq %r8, {{.*}}
32; LINUX-DAG: movq %r9, {{.*}}
33; LINUX-DAG: movb %al, {{.*}}
34; LINUX-DAG: movaps %xmm0, {{[0-9]*}}(%rsp)
35; LINUX-DAG: movaps %xmm1, {{[0-9]*}}(%rsp)
36; LINUX-DAG: movaps %xmm2, {{[0-9]*}}(%rsp)
37; LINUX-DAG: movaps %xmm3, {{[0-9]*}}(%rsp)
38; LINUX-DAG: movaps %xmm4, {{[0-9]*}}(%rsp)
39; LINUX-DAG: movaps %xmm5, {{[0-9]*}}(%rsp)
40; LINUX-DAG: movaps %xmm6, {{[0-9]*}}(%rsp)
41; LINUX-DAG: movaps %xmm7, {{[0-9]*}}(%rsp)
42; LINUX: callq get_f
43; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm0
44; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm1
45; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm2
46; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm3
47; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm4
48; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm5
49; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm6
50; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm7
51; LINUX-DAG: movq {{.*}}, %rdi
52; LINUX-DAG: movq {{.*}}, %rsi
53; LINUX-DAG: movq {{.*}}, %rdx
54; LINUX-DAG: movq {{.*}}, %rcx
55; LINUX-DAG: movq {{.*}}, %r8
56; LINUX-DAG: movq {{.*}}, %r9
57; LINUX-DAG: movb {{.*}}, %al
58; LINUX: jmpq *{{.*}}  # TAILCALL
59
60; WINDOWS-LABEL: f_thunk:
61; WINDOWS-NOT: mov{{.}}ps
62; WINDOWS-DAG: movq %rdx, {{.*}}
63; WINDOWS-DAG: movq %rcx, {{.*}}
64; WINDOWS-DAG: movq %r8, {{.*}}
65; WINDOWS-DAG: movq %r9, {{.*}}
66; WINDOWS-NOT: mov{{.}}ps
67; WINDOWS: callq get_f
68; WINDOWS-NOT: mov{{.}}ps
69; WINDOWS-DAG: movq {{.*}}, %rdx
70; WINDOWS-DAG: movq {{.*}}, %rcx
71; WINDOWS-DAG: movq {{.*}}, %r8
72; WINDOWS-DAG: movq {{.*}}, %r9
73; WINDOWS-NOT: mov{{.}}ps
74; WINDOWS: jmpq *{{.*}} # TAILCALL
75
76; No regparms on normal x86 conventions.
77
78; X86-LABEL: _f_thunk:
79; X86: calll _get_f
80; X86: jmpl *{{.*}} # TAILCALL
81
82; This thunk shouldn't require any spills and reloads, assuming the register
83; allocator knows what it's doing.
84
85define void @g_thunk(i8* %fptr_i8, ...) {
86  %fptr = bitcast i8* %fptr_i8 to void (i8*, ...)*
87  musttail call void (i8*, ...)* %fptr(i8* %fptr_i8, ...)
88  ret void
89}
90
91; LINUX-LABEL: g_thunk:
92; LINUX-NOT: movq
93; LINUX: jmpq *%rdi  # TAILCALL
94
95; WINDOWS-LABEL: g_thunk:
96; WINDOWS-NOT: movq
97; WINDOWS: jmpq *%rcx # TAILCALL
98
99; X86-LABEL: _g_thunk:
100; X86: jmpl *%eax # TAILCALL
101
102; Do a simple multi-exit multi-bb test.
103
104%struct.Foo = type { i1, i8*, i8* }
105
106@g = external global i32
107
108define void @h_thunk(%struct.Foo* %this, ...) {
109  %cond_p = getelementptr %struct.Foo* %this, i32 0, i32 0
110  %cond = load i1* %cond_p
111  br i1 %cond, label %then, label %else
112
113then:
114  %a_p = getelementptr %struct.Foo* %this, i32 0, i32 1
115  %a_i8 = load i8** %a_p
116  %a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)*
117  musttail call void (%struct.Foo*, ...)* %a(%struct.Foo* %this, ...)
118  ret void
119
120else:
121  %b_p = getelementptr %struct.Foo* %this, i32 0, i32 2
122  %b_i8 = load i8** %b_p
123  %b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)*
124  store i32 42, i32* @g
125  musttail call void (%struct.Foo*, ...)* %b(%struct.Foo* %this, ...)
126  ret void
127}
128
129; LINUX-LABEL: h_thunk:
130; LINUX: jne
131; LINUX: jmpq *{{.*}} # TAILCALL
132; LINUX: jmpq *{{.*}} # TAILCALL
133; WINDOWS-LABEL: h_thunk:
134; WINDOWS: jne
135; WINDOWS: jmpq *{{.*}} # TAILCALL
136; WINDOWS: jmpq *{{.*}} # TAILCALL
137; X86-LABEL: _h_thunk:
138; X86: jne
139; X86: jmpl *{{.*}} # TAILCALL
140; X86: jmpl *{{.*}} # TAILCALL
141