1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-X87
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
5
6; Ideally this would compile to 5 multiplies.
7
8define double @pow_wrapper(double %a) nounwind readonly ssp noredzone {
9; X86-X87-LABEL: pow_wrapper:
10; X86-X87:       # %bb.0:
11; X86-X87-NEXT:    fldl {{[0-9]+}}(%esp)
12; X86-X87-NEXT:    fld %st(0)
13; X86-X87-NEXT:    fmul %st(1), %st
14; X86-X87-NEXT:    fmul %st, %st(1)
15; X86-X87-NEXT:    fmul %st, %st(0)
16; X86-X87-NEXT:    fmul %st, %st(1)
17; X86-X87-NEXT:    fmul %st, %st(0)
18; X86-X87-NEXT:    fmulp %st, %st(1)
19; X86-X87-NEXT:    retl
20;
21; X86-SSE-LABEL: pow_wrapper:
22; X86-SSE:       # %bb.0:
23; X86-SSE-NEXT:    pushl %ebp
24; X86-SSE-NEXT:    movl %esp, %ebp
25; X86-SSE-NEXT:    andl $-8, %esp
26; X86-SSE-NEXT:    subl $8, %esp
27; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
28; X86-SSE-NEXT:    movapd %xmm0, %xmm1
29; X86-SSE-NEXT:    mulsd %xmm0, %xmm1
30; X86-SSE-NEXT:    mulsd %xmm1, %xmm0
31; X86-SSE-NEXT:    mulsd %xmm1, %xmm1
32; X86-SSE-NEXT:    mulsd %xmm1, %xmm0
33; X86-SSE-NEXT:    mulsd %xmm1, %xmm1
34; X86-SSE-NEXT:    mulsd %xmm0, %xmm1
35; X86-SSE-NEXT:    movsd %xmm1, (%esp)
36; X86-SSE-NEXT:    fldl (%esp)
37; X86-SSE-NEXT:    movl %ebp, %esp
38; X86-SSE-NEXT:    popl %ebp
39; X86-SSE-NEXT:    retl
40;
41; X64-LABEL: pow_wrapper:
42; X64:       # %bb.0:
43; X64-NEXT:    movapd %xmm0, %xmm1
44; X64-NEXT:    mulsd %xmm0, %xmm1
45; X64-NEXT:    mulsd %xmm1, %xmm0
46; X64-NEXT:    mulsd %xmm1, %xmm1
47; X64-NEXT:    mulsd %xmm1, %xmm0
48; X64-NEXT:    mulsd %xmm1, %xmm1
49; X64-NEXT:    mulsd %xmm0, %xmm1
50; X64-NEXT:    movapd %xmm1, %xmm0
51; X64-NEXT:    retq
52  %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]
53  ret double %ret
54}
55
56define double @pow_wrapper_optsize(double %a) optsize {
57; X86-X87-LABEL: pow_wrapper_optsize:
58; X86-X87:       # %bb.0:
59; X86-X87-NEXT:    subl $12, %esp
60; X86-X87-NEXT:    .cfi_def_cfa_offset 16
61; X86-X87-NEXT:    fldl {{[0-9]+}}(%esp)
62; X86-X87-NEXT:    fstpl (%esp)
63; X86-X87-NEXT:    movl $15, {{[0-9]+}}(%esp)
64; X86-X87-NEXT:    calll __powidf2
65; X86-X87-NEXT:    addl $12, %esp
66; X86-X87-NEXT:    .cfi_def_cfa_offset 4
67; X86-X87-NEXT:    retl
68;
69; X86-SSE-LABEL: pow_wrapper_optsize:
70; X86-SSE:       # %bb.0:
71; X86-SSE-NEXT:    subl $12, %esp
72; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
73; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
74; X86-SSE-NEXT:    movsd %xmm0, (%esp)
75; X86-SSE-NEXT:    movl $15, {{[0-9]+}}(%esp)
76; X86-SSE-NEXT:    calll __powidf2
77; X86-SSE-NEXT:    addl $12, %esp
78; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
79; X86-SSE-NEXT:    retl
80;
81; X64-LABEL: pow_wrapper_optsize:
82; X64:       # %bb.0:
83; X64-NEXT:    movl $15, %edi
84; X64-NEXT:    jmp __powidf2@PLT # TAILCALL
85  %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]
86  ret double %ret
87}
88
89define double @pow_wrapper_pgso(double %a) !prof !14 {
90; X86-X87-LABEL: pow_wrapper_pgso:
91; X86-X87:       # %bb.0:
92; X86-X87-NEXT:    subl $12, %esp
93; X86-X87-NEXT:    .cfi_def_cfa_offset 16
94; X86-X87-NEXT:    fldl {{[0-9]+}}(%esp)
95; X86-X87-NEXT:    fstpl (%esp)
96; X86-X87-NEXT:    movl $15, {{[0-9]+}}(%esp)
97; X86-X87-NEXT:    calll __powidf2
98; X86-X87-NEXT:    addl $12, %esp
99; X86-X87-NEXT:    .cfi_def_cfa_offset 4
100; X86-X87-NEXT:    retl
101;
102; X86-SSE-LABEL: pow_wrapper_pgso:
103; X86-SSE:       # %bb.0:
104; X86-SSE-NEXT:    subl $12, %esp
105; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
106; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
107; X86-SSE-NEXT:    movsd %xmm0, (%esp)
108; X86-SSE-NEXT:    movl $15, {{[0-9]+}}(%esp)
109; X86-SSE-NEXT:    calll __powidf2
110; X86-SSE-NEXT:    addl $12, %esp
111; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
112; X86-SSE-NEXT:    retl
113;
114; X64-LABEL: pow_wrapper_pgso:
115; X64:       # %bb.0:
116; X64-NEXT:    movl $15, %edi
117; X64-NEXT:    jmp __powidf2@PLT # TAILCALL
118  %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]
119  ret double %ret
120}
121
122define double @pow_wrapper_minsize(double %a) minsize {
123; X86-X87-LABEL: pow_wrapper_minsize:
124; X86-X87:       # %bb.0:
125; X86-X87-NEXT:    subl $12, %esp
126; X86-X87-NEXT:    .cfi_def_cfa_offset 16
127; X86-X87-NEXT:    fldl {{[0-9]+}}(%esp)
128; X86-X87-NEXT:    fstpl (%esp)
129; X86-X87-NEXT:    movl $15, {{[0-9]+}}(%esp)
130; X86-X87-NEXT:    calll __powidf2
131; X86-X87-NEXT:    addl $12, %esp
132; X86-X87-NEXT:    .cfi_def_cfa_offset 4
133; X86-X87-NEXT:    retl
134;
135; X86-SSE-LABEL: pow_wrapper_minsize:
136; X86-SSE:       # %bb.0:
137; X86-SSE-NEXT:    subl $12, %esp
138; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
139; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
140; X86-SSE-NEXT:    movsd %xmm0, (%esp)
141; X86-SSE-NEXT:    movl $15, {{[0-9]+}}(%esp)
142; X86-SSE-NEXT:    calll __powidf2
143; X86-SSE-NEXT:    addl $12, %esp
144; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
145; X86-SSE-NEXT:    retl
146;
147; X64-LABEL: pow_wrapper_minsize:
148; X64:       # %bb.0:
149; X64-NEXT:    pushq $15
150; X64-NEXT:    .cfi_adjust_cfa_offset 8
151; X64-NEXT:    popq %rdi
152; X64-NEXT:    .cfi_adjust_cfa_offset -8
153; X64-NEXT:    jmp __powidf2@PLT # TAILCALL
154  %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]
155  ret double %ret
156}
157
158declare double @llvm.powi.f64.i32(double, i32) nounwind readonly
159
160!llvm.module.flags = !{!0}
161!0 = !{i32 1, !"ProfileSummary", !1}
162!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
163!2 = !{!"ProfileFormat", !"InstrProf"}
164!3 = !{!"TotalCount", i64 10000}
165!4 = !{!"MaxCount", i64 10}
166!5 = !{!"MaxInternalCount", i64 1}
167!6 = !{!"MaxFunctionCount", i64 1000}
168!7 = !{!"NumCounts", i64 3}
169!8 = !{!"NumFunctions", i64 3}
170!9 = !{!"DetailedSummary", !10}
171!10 = !{!11, !12, !13}
172!11 = !{i32 10000, i64 100, i32 1}
173!12 = !{i32 999000, i64 100, i32 1}
174!13 = !{i32 999999, i64 1, i32 2}
175!14 = !{!"function_entry_count", i64 0}
176