1; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s
2; Check generated fused MAC and MLS.
3
4define double @fusedMACTest1(double %d1, double %d2, double %d3) {
5;CHECK-LABEL: fusedMACTest1:
6;CHECK: vfma.f64
7  %1 = fmul double %d1, %d2
8  %2 = fadd double %1, %d3
9  ret double %2
10}
11
12define float @fusedMACTest2(float %f1, float %f2, float %f3) {
13;CHECK-LABEL: fusedMACTest2:
14;CHECK: vfma.f32
15  %1 = fmul float %f1, %f2
16  %2 = fadd float %1, %f3
17  ret float %2
18}
19
20define double @fusedMACTest3(double %d1, double %d2, double %d3) {
21;CHECK-LABEL: fusedMACTest3:
22;CHECK: vfms.f64
23  %1 = fmul double %d2, %d3
24  %2 = fsub double %d1, %1
25  ret double %2
26}
27
28define float @fusedMACTest4(float %f1, float %f2, float %f3) {
29;CHECK-LABEL: fusedMACTest4:
30;CHECK: vfms.f32
31  %1 = fmul float %f2, %f3
32  %2 = fsub float %f1, %1
33  ret float %2
34}
35
36define double @fusedMACTest5(double %d1, double %d2, double %d3) {
37;CHECK-LABEL: fusedMACTest5:
38;CHECK: vfnma.f64
39  %1 = fmul double %d1, %d2
40  %2 = fsub double -0.0, %1
41  %3 = fsub double %2, %d3
42  ret double %3
43}
44
45define float @fusedMACTest6(float %f1, float %f2, float %f3) {
46;CHECK-LABEL: fusedMACTest6:
47;CHECK: vfnma.f32
48  %1 = fmul float %f1, %f2
49  %2 = fsub float -0.0, %1
50  %3 = fsub float %2, %f3
51  ret float %3
52}
53
54define double @fusedMACTest7(double %d1, double %d2, double %d3) {
55;CHECK-LABEL: fusedMACTest7:
56;CHECK: vfnms.f64
57  %1 = fmul double %d1, %d2
58  %2 = fsub double %1, %d3
59  ret double %2
60}
61
62define float @fusedMACTest8(float %f1, float %f2, float %f3) {
63;CHECK-LABEL: fusedMACTest8:
64;CHECK: vfnms.f32
65  %1 = fmul float %f1, %f2
66  %2 = fsub float %1, %f3
67  ret float %2
68}
69
70define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
71;CHECK-LABEL: fusedMACTest9:
72;CHECK: vfma.f32
73  %mul = fmul <2 x float> %a, %b
74  %add = fadd <2 x float> %mul, %a
75  ret <2 x float> %add
76}
77
78define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
79;CHECK-LABEL: fusedMACTest10:
80;CHECK: vfms.f32
81  %mul = fmul <2 x float> %a, %b
82  %sub = fsub <2 x float> %a, %mul
83  ret <2 x float> %sub
84}
85
86define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
87;CHECK-LABEL: fusedMACTest11:
88;CHECK: vfma.f32
89  %mul = fmul <4 x float> %a, %b
90  %add = fadd <4 x float> %mul, %a
91  ret <4 x float> %add
92}
93
94define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
95;CHECK-LABEL: fusedMACTest12:
96;CHECK: vfms.f32
97  %mul = fmul <4 x float> %a, %b
98  %sub = fsub <4 x float> %a, %mul
99  ret <4 x float> %sub
100}
101
102define float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp {
103entry:
104; CHECK: test_fma_f32
105; CHECK: vfma.f32
106  %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
107  ret float %tmp1
108}
109
110define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp {
111entry:
112; CHECK: test_fma_f64
113; CHECK: vfma.f64
114  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
115  ret double %tmp1
116}
117
118define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
119entry:
120; CHECK: test_fma_v2f32
121; CHECK: vfma.f32
122  %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
123  ret <2 x float> %tmp1
124}
125
126define double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp {
127entry:
128; CHECK: test_fms_f64
129; CHECK: vfms.f64
130  %tmp1 = fsub double -0.0, %a
131  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
132  ret double %tmp2
133}
134
135define double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
136entry:
137; CHECK: test_fms_f64_2
138; CHECK: vfms.f64
139  %tmp1 = fsub double -0.0, %b
140  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
141  ret double %tmp2
142}
143
144define float @test_fnms_f32(float %a, float %b, float* %c) nounwind readnone ssp {
145; CHECK: test_fnms_f32
146; CHECK: vfnms.f32
147  %tmp1 = load float, float* %c, align 4
148  %tmp2 = fsub float -0.0, %tmp1
149  %tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone
150  ret float %tmp3
151}
152
153define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
154entry:
155; CHECK: test_fnms_f64
156; CHECK: vfnms.f64
157  %tmp1 = fsub double -0.0, %a
158  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
159  %tmp3 = fsub double -0.0, %tmp2
160  ret double %tmp3
161}
162
163define double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
164entry:
165; CHECK: test_fnms_f64_2
166; CHECK: vfnms.f64
167  %tmp1 = fsub double -0.0, %b
168  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
169  %tmp3 = fsub double -0.0, %tmp2
170  ret double %tmp3
171}
172
173define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
174entry:
175; CHECK: test_fnma_f64
176; CHECK: vfnma.f64
177  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
178  %tmp2 = fsub double -0.0, %tmp1
179  ret double %tmp2
180}
181
182define double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
183entry:
184; CHECK: test_fnma_f64_2
185; CHECK: vfnma.f64
186  %tmp1 = fsub double -0.0, %a
187  %tmp2 = fsub double -0.0, %c
188  %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone
189  ret double %tmp3
190}
191
192define float @test_fma_const_fold(float %a, float %b) nounwind {
193; CHECK: test_fma_const_fold
194; CHECK-NOT: vfma
195; CHECK-NOT: vmul
196; CHECK: vadd
197  %ret = call float @llvm.fma.f32(float %a, float 1.0, float %b)
198  ret float %ret
199}
200
201define float @test_fma_canonicalize(float %a, float %b) nounwind {
202; CHECK: test_fma_canonicalize
203; CHECK: vmov.f32 [[R1:s[0-9]+]], #2.000000e+00
204; CHECK: vfma.f32 {{s[0-9]+}}, {{s[0-9]+}}, [[R1]]
205  %ret = call float @llvm.fma.f32(float 2.0, float %a, float %b)
206  ret float %ret
207}
208
209; Check that very wide vector fma's can be split into legal fma's.
210define void @test_fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>* %p) nounwind readnone ssp {
211; CHECK: test_fma_v8f32
212; CHECK: vfma.f32
213; CHECK: vfma.f32
214entry:
215  %call = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone
216  store <8 x float> %call, <8 x float>* %p, align 16
217  ret void
218}
219
220
221declare float @llvm.fma.f32(float, float, float) nounwind readnone
222declare double @llvm.fma.f64(double, double, double) nounwind readnone
223declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
224declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
225