1; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
2; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
3; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
4
5; While we don't support varargs with fastcall, we do support forwarding.
6
7@asdf = internal constant [4 x i8] c"asdf"
8
9declare void @puts(i8*)
10
11define i32 @call_fast_thunk() {
12  %r = call x86_fastcallcc i32 (...)* @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3)
13  ret i32 %r
14}
15
16define x86_fastcallcc i32 @fast_thunk(...) {
17  call void @puts(i8* getelementptr ([4 x i8]* @asdf, i32 0, i32 0))
18  %r = musttail call x86_fastcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @fast_target to i32 (...)*) (...)
19  ret i32 %r
20}
21
22; Check that we spill and fill around the call to puts.
23
24; CHECK-LABEL: @fast_thunk@0:
25; CHECK-DAG: movl %ecx, {{.*}}
26; CHECK-DAG: movl %edx, {{.*}}
27; CHECK: calll _puts
28; CHECK-DAG: movl {{.*}}, %ecx
29; CHECK-DAG: movl {{.*}}, %edx
30; CHECK: jmp @fast_target@12
31
32define x86_fastcallcc i32 @fast_target(i32 inreg %a, i32 inreg %b, i32 %c) {
33  %a0 = add i32 %a, %b
34  %a1 = add i32 %a0, %c
35  ret i32 %a1
36}
37
38; Repeat the test for vectorcall, which has XMM registers.
39
40define i32 @call_vector_thunk() {
41  %r = call x86_vectorcallcc i32 (...)* @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3)
42  ret i32 %r
43}
44
45define x86_vectorcallcc i32 @vector_thunk(...) {
46  call void @puts(i8* getelementptr ([4 x i8]* @asdf, i32 0, i32 0))
47  %r = musttail call x86_vectorcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @vector_target to i32 (...)*) (...)
48  ret i32 %r
49}
50
51; Check that we spill and fill SSE registers around the call to puts.
52
53; CHECK-LABEL: vector_thunk@@0:
54; CHECK-DAG: movl %ecx, {{.*}}
55; CHECK-DAG: movl %edx, {{.*}}
56
57; SSE2-DAG: movups %xmm0, {{.*}}
58; SSE2-DAG: movups %xmm1, {{.*}}
59; SSE2-DAG: movups %xmm2, {{.*}}
60; SSE2-DAG: movups %xmm3, {{.*}}
61; SSE2-DAG: movups %xmm4, {{.*}}
62; SSE2-DAG: movups %xmm5, {{.*}}
63
64; AVX-DAG: vmovups %ymm0, {{.*}}
65; AVX-DAG: vmovups %ymm1, {{.*}}
66; AVX-DAG: vmovups %ymm2, {{.*}}
67; AVX-DAG: vmovups %ymm3, {{.*}}
68; AVX-DAG: vmovups %ymm4, {{.*}}
69; AVX-DAG: vmovups %ymm5, {{.*}}
70
71; AVX512-DAG: vmovups %zmm0, {{.*}}
72; AVX512-DAG: vmovups %zmm1, {{.*}}
73; AVX512-DAG: vmovups %zmm2, {{.*}}
74; AVX512-DAG: vmovups %zmm3, {{.*}}
75; AVX512-DAG: vmovups %zmm4, {{.*}}
76; AVX512-DAG: vmovups %zmm5, {{.*}}
77
78; CHECK: calll _puts
79
80; SSE2-DAG: movups {{.*}}, %xmm0
81; SSE2-DAG: movups {{.*}}, %xmm1
82; SSE2-DAG: movups {{.*}}, %xmm2
83; SSE2-DAG: movups {{.*}}, %xmm3
84; SSE2-DAG: movups {{.*}}, %xmm4
85; SSE2-DAG: movups {{.*}}, %xmm5
86
87; AVX-DAG: vmovups {{.*}}, %ymm0
88; AVX-DAG: vmovups {{.*}}, %ymm1
89; AVX-DAG: vmovups {{.*}}, %ymm2
90; AVX-DAG: vmovups {{.*}}, %ymm3
91; AVX-DAG: vmovups {{.*}}, %ymm4
92; AVX-DAG: vmovups {{.*}}, %ymm5
93
94; AVX512-DAG: vmovups {{.*}}, %zmm0
95; AVX512-DAG: vmovups {{.*}}, %zmm1
96; AVX512-DAG: vmovups {{.*}}, %zmm2
97; AVX512-DAG: vmovups {{.*}}, %zmm3
98; AVX512-DAG: vmovups {{.*}}, %zmm4
99; AVX512-DAG: vmovups {{.*}}, %zmm5
100
101; CHECK-DAG: movl {{.*}}, %ecx
102; CHECK-DAG: movl {{.*}}, %edx
103; CHECK: jmp vector_target@@12
104
105define x86_vectorcallcc i32 @vector_target(i32 inreg %a, i32 inreg %b, i32 %c) {
106  %a0 = add i32 %a, %b
107  %a1 = add i32 %a0, %c
108  ret i32 %a1
109}
110