1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+use-recip-est,+avx -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
4
5; If the target's divss/divps instructions are substantially
6; slower than rcpss/rcpps with a Newton-Raphson refinement,
7; we should generate the estimate sequence.
8
9; See PR21385 ( http://llvm.org/bugs/show_bug.cgi?id=21385 )
10; for details about the accuracy, speed, and implementation
11; differences of x86 reciprocal estimates.
12
13define float @reciprocal_estimate(float %x) #0 {
14  %div = fdiv fast float 1.0, %x
15  ret float %div
16
17; CHECK-LABEL: reciprocal_estimate:
18; CHECK: movss
19; CHECK-NEXT: divss
20; CHECK-NEXT: movaps
21; CHECK-NEXT: retq
22
23; BTVER2-LABEL: reciprocal_estimate:
24; BTVER2: vrcpss
25; BTVER2: vmulss
26; BTVER2: vsubss
27; BTVER2: vmulss
28; BTVER2: vaddss
29; BTVER2-NEXT: retq
30
31; REFINE-LABEL: reciprocal_estimate:
32; REFINE: vrcpss
33; REFINE: vmulss
34; REFINE: vsubss
35; REFINE: vmulss
36; REFINE: vaddss
37; REFINE: vmulss
38; REFINE: vsubss
39; REFINE: vmulss
40; REFINE: vaddss
41; REFINE-NEXT: retq
42}
43
44define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
45  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
46  ret <4 x float> %div
47
48; CHECK-LABEL: reciprocal_estimate_v4f32:
49; CHECK: movaps
50; CHECK-NEXT: divps
51; CHECK-NEXT: movaps
52; CHECK-NEXT: retq
53
54; BTVER2-LABEL: reciprocal_estimate_v4f32:
55; BTVER2: vrcpps
56; BTVER2: vmulps
57; BTVER2: vsubps
58; BTVER2: vmulps
59; BTVER2: vaddps
60; BTVER2-NEXT: retq
61
62; REFINE-LABEL: reciprocal_estimate_v4f32:
63; REFINE: vrcpps
64; REFINE: vmulps
65; REFINE: vsubps
66; REFINE: vmulps
67; REFINE: vaddps
68; REFINE: vmulps
69; REFINE: vsubps
70; REFINE: vmulps
71; REFINE: vaddps
72; REFINE-NEXT: retq
73}
74
75define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
76  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
77  ret <8 x float> %div
78
79; CHECK-LABEL: reciprocal_estimate_v8f32:
80; CHECK: movaps
81; CHECK: movaps
82; CHECK-NEXT: divps
83; CHECK-NEXT: divps
84; CHECK-NEXT: movaps
85; CHECK-NEXT: movaps
86; CHECK-NEXT: retq
87
88; BTVER2-LABEL: reciprocal_estimate_v8f32:
89; BTVER2: vrcpps
90; BTVER2: vmulps
91; BTVER2: vsubps
92; BTVER2: vmulps
93; BTVER2: vaddps
94; BTVER2-NEXT: retq
95
96; REFINE-LABEL: reciprocal_estimate_v8f32:
97; REFINE: vrcpps
98; REFINE: vmulps
99; REFINE: vsubps
100; REFINE: vmulps
101; REFINE: vaddps
102; REFINE: vmulps
103; REFINE: vsubps
104; REFINE: vmulps
105; REFINE: vaddps
106; REFINE-NEXT: retq
107}
108
109attributes #0 = { "unsafe-fp-math"="true" }
110