1 /* SPDX-License-Identifier: MIT
2  *
3  * Permission is hereby granted, free of charge, to any person
4  * obtaining a copy of this software and associated documentation
5  * files (the "Software"), to deal in the Software without
6  * restriction, including without limitation the rights to use, copy,
7  * modify, merge, publish, distribute, sublicense, and/or sell copies
8  * of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Copyright:
24  *   2019      Evan Nemerson <evan@nemerson.com>
25  */
26 
27 #if !defined(SIMDE_X86_FMA_H)
28 #define SIMDE_X86_FMA_H
29 
30 #include "avx.h"
31 
32 #if !defined(SIMDE_X86_FMA_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
33 #  define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES
34 #endif
35 
36 HEDLEY_DIAGNOSTIC_PUSH
37 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
38 SIMDE_BEGIN_DECLS_
39 
40 SIMDE_FUNCTION_ATTRIBUTES
41 simde__m128d
simde_mm_fmadd_pd(simde__m128d a,simde__m128d b,simde__m128d c)42 simde_mm_fmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
43   #if defined(SIMDE_X86_FMA_NATIVE)
44     return _mm_fmadd_pd(a, b, c);
45   #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
46     simde__m128d_private
47       a_ = simde__m128d_to_private(a),
48       b_ = simde__m128d_to_private(b),
49       c_ = simde__m128d_to_private(c),
50       r_;
51 
52     r_.altivec_f64 = vec_madd(a_.altivec_f64, b_.altivec_f64, c_.altivec_f64);
53 
54     return simde__m128d_from_private(r_);
55   #else
56     return simde_mm_add_pd(simde_mm_mul_pd(a, b), c);
57   #endif
58 }
59 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
60   #undef _mm_fmadd_pd
61   #define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c)
62 #endif
63 
64 SIMDE_FUNCTION_ATTRIBUTES
65 simde__m256d
simde_mm256_fmadd_pd(simde__m256d a,simde__m256d b,simde__m256d c)66 simde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
67 #if defined(SIMDE_X86_FMA_NATIVE)
68   return _mm256_fmadd_pd(a, b, c);
69 #else
70   return simde_mm256_add_pd(simde_mm256_mul_pd(a, b), c);
71 #endif
72 }
73 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
74   #undef _mm256_fmadd_pd
75   #define _mm256_fmadd_pd(a, b, c) simde_mm256_fmadd_pd(a, b, c)
76 #endif
77 
78 SIMDE_FUNCTION_ATTRIBUTES
79 simde__m128
simde_mm_fmadd_ps(simde__m128 a,simde__m128 b,simde__m128 c)80 simde_mm_fmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
81   #if defined(SIMDE_X86_FMA_NATIVE)
82     return _mm_fmadd_ps(a, b, c);
83   #elif defined(SIMDE_POWER_ALTIVEC_P5_NATIVE)
84     simde__m128_private
85       a_ = simde__m128_to_private(a),
86       b_ = simde__m128_to_private(b),
87       c_ = simde__m128_to_private(c),
88       r_;
89 
90     r_.altivec_f32 = vec_madd(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32);
91 
92     return simde__m128_from_private(r_);
93   #else
94     return simde_mm_add_ps(simde_mm_mul_ps(a, b), c);
95   #endif
96 }
97 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
98   #undef _mm_fmadd_ps
99   #define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c)
100 #endif
101 
102 SIMDE_FUNCTION_ATTRIBUTES
103 simde__m256
simde_mm256_fmadd_ps(simde__m256 a,simde__m256 b,simde__m256 c)104 simde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
105 #if defined(SIMDE_X86_FMA_NATIVE)
106   return _mm256_fmadd_ps(a, b, c);
107 #else
108   return simde_mm256_add_ps(simde_mm256_mul_ps(a, b), c);
109 #endif
110 }
111 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
112   #undef _mm256_fmadd_ps
113   #define _mm256_fmadd_ps(a, b, c) simde_mm256_fmadd_ps(a, b, c)
114 #endif
115 
116 SIMDE_FUNCTION_ATTRIBUTES
117 simde__m128d
simde_mm_fmadd_sd(simde__m128d a,simde__m128d b,simde__m128d c)118 simde_mm_fmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) {
119 #if defined(SIMDE_X86_FMA_NATIVE)
120   return _mm_fmadd_sd(a, b, c);
121 #else
122   return simde_mm_add_sd(simde_mm_mul_sd(a, b), c);
123 #endif
124 }
125 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
126   #undef _mm_fmadd_sd
127   #define _mm_fmadd_sd(a, b, c) simde_mm_fmadd_sd(a, b, c)
128 #endif
129 
130 SIMDE_FUNCTION_ATTRIBUTES
131 simde__m128
simde_mm_fmadd_ss(simde__m128 a,simde__m128 b,simde__m128 c)132 simde_mm_fmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) {
133 #if defined(SIMDE_X86_FMA_NATIVE)
134   return _mm_fmadd_ss(a, b, c);
135 #else
136   return simde_mm_add_ss(simde_mm_mul_ss(a, b), c);
137 #endif
138 }
139 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
140   #undef _mm_fmadd_ss
141   #define _mm_fmadd_ss(a, b, c) simde_mm_fmadd_ss(a, b, c)
142 #endif
143 
144 SIMDE_FUNCTION_ATTRIBUTES
145 simde__m128d
simde_mm_fmaddsub_pd(simde__m128d a,simde__m128d b,simde__m128d c)146 simde_mm_fmaddsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
147 #if defined(SIMDE_X86_FMA_NATIVE)
148   return _mm_fmaddsub_pd(a, b, c);
149 #else
150   return simde_mm_addsub_pd(simde_mm_mul_pd(a, b), c);
151 #endif
152 }
153 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
154   #undef _mm_fmaddsub_pd
155   #define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c)
156 #endif
157 
158 SIMDE_FUNCTION_ATTRIBUTES
159 simde__m256d
simde_mm256_fmaddsub_pd(simde__m256d a,simde__m256d b,simde__m256d c)160 simde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
161 #if defined(SIMDE_X86_FMA_NATIVE)
162   return _mm256_fmaddsub_pd(a, b, c);
163 #else
164   return simde_mm256_addsub_pd(simde_mm256_mul_pd(a, b), c);
165 #endif
166 }
167 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
168   #undef _mm256_fmaddsub_pd
169   #define _mm256_fmaddsub_pd(a, b, c) simde_mm256_fmaddsub_pd(a, b, c)
170 #endif
171 
172 SIMDE_FUNCTION_ATTRIBUTES
173 simde__m128
simde_mm_fmaddsub_ps(simde__m128 a,simde__m128 b,simde__m128 c)174 simde_mm_fmaddsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
175 #if defined(SIMDE_X86_FMA_NATIVE)
176   return _mm_fmaddsub_ps(a, b, c);
177 #else
178   return simde_mm_addsub_ps(simde_mm_mul_ps(a, b), c);
179 #endif
180 }
181 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
182   #undef _mm_fmaddsub_ps
183   #define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c)
184 #endif
185 
186 SIMDE_FUNCTION_ATTRIBUTES
187 simde__m256
simde_mm256_fmaddsub_ps(simde__m256 a,simde__m256 b,simde__m256 c)188 simde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
189 #if defined(SIMDE_X86_FMA_NATIVE)
190   return _mm256_fmaddsub_ps(a, b, c);
191 #else
192   return simde_mm256_addsub_ps(simde_mm256_mul_ps(a, b), c);
193 #endif
194 }
195 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
196   #undef _mm256_fmaddsub_ps
197   #define _mm256_fmaddsub_ps(a, b, c) simde_mm256_fmaddsub_ps(a, b, c)
198 #endif
199 
200 SIMDE_FUNCTION_ATTRIBUTES
201 simde__m128d
simde_mm_fmsub_pd(simde__m128d a,simde__m128d b,simde__m128d c)202 simde_mm_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
203 #if defined(SIMDE_X86_FMA_NATIVE)
204   return _mm_fmsub_pd(a, b, c);
205 #else
206   return simde_mm_sub_pd(simde_mm_mul_pd(a, b), c);
207 #endif
208 }
209 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
210   #undef _mm_fmsub_pd
211   #define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c)
212 #endif
213 
214 SIMDE_FUNCTION_ATTRIBUTES
215 simde__m256d
simde_mm256_fmsub_pd(simde__m256d a,simde__m256d b,simde__m256d c)216 simde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
217 #if defined(SIMDE_X86_FMA_NATIVE)
218   return _mm256_fmsub_pd(a, b, c);
219 #else
220   return simde_mm256_sub_pd(simde_mm256_mul_pd(a, b), c);
221 #endif
222 }
223 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
224   #undef _mm256_fmsub_pd
225   #define _mm256_fmsub_pd(a, b, c) simde_mm256_fmsub_pd(a, b, c)
226 #endif
227 
228 SIMDE_FUNCTION_ATTRIBUTES
229 simde__m128
simde_mm_fmsub_ps(simde__m128 a,simde__m128 b,simde__m128 c)230 simde_mm_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
231 #if defined(SIMDE_X86_FMA_NATIVE)
232   return _mm_fmsub_ps(a, b, c);
233 #else
234   return simde_mm_sub_ps(simde_mm_mul_ps(a, b), c);
235 #endif
236 }
237 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
238   #undef _mm_fmsub_ps
239   #define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c)
240 #endif
241 
242 SIMDE_FUNCTION_ATTRIBUTES
243 simde__m256
simde_mm256_fmsub_ps(simde__m256 a,simde__m256 b,simde__m256 c)244 simde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
245 #if defined(SIMDE_X86_FMA_NATIVE)
246   return _mm256_fmsub_ps(a, b, c);
247 #else
248   return simde_mm256_sub_ps(simde_mm256_mul_ps(a, b), c);
249 #endif
250 }
251 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
252   #undef _mm256_fmsub_ps
253   #define _mm256_fmsub_ps(a, b, c) simde_mm256_fmsub_ps(a, b, c)
254 #endif
255 
256 SIMDE_FUNCTION_ATTRIBUTES
257 simde__m128d
simde_mm_fmsub_sd(simde__m128d a,simde__m128d b,simde__m128d c)258 simde_mm_fmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) {
259 #if defined(SIMDE_X86_FMA_NATIVE)
260   return _mm_fmsub_sd(a, b, c);
261 #else
262   return simde_mm_sub_sd(simde_mm_mul_sd(a, b), c);
263 #endif
264 }
265 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
266   #undef _mm_fmsub_sd
267   #define _mm_fmsub_sd(a, b, c) simde_mm_fmsub_sd(a, b, c)
268 #endif
269 
270 SIMDE_FUNCTION_ATTRIBUTES
271 simde__m128
simde_mm_fmsub_ss(simde__m128 a,simde__m128 b,simde__m128 c)272 simde_mm_fmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) {
273 #if defined(SIMDE_X86_FMA_NATIVE)
274   return _mm_fmsub_ss(a, b, c);
275 #else
276   return simde_mm_sub_ss(simde_mm_mul_ss(a, b), c);
277 #endif
278 }
279 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
280   #undef _mm_fmsub_ss
281   #define _mm_fmsub_ss(a, b, c) simde_mm_fmsub_ss(a, b, c)
282 #endif
283 
284 SIMDE_FUNCTION_ATTRIBUTES
285 simde__m128d
simde_mm_fmsubadd_pd(simde__m128d a,simde__m128d b,simde__m128d c)286 simde_mm_fmsubadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
287 #if defined(SIMDE_X86_FMA_NATIVE)
288   return _mm_fmsubadd_pd(a, b, c);
289 #else
290   simde__m128d_private
291     r_,
292     a_ = simde__m128d_to_private(a),
293     b_ = simde__m128d_to_private(b),
294     c_ = simde__m128d_to_private(c);
295 
296   SIMDE_VECTORIZE
297   for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) {
298     r_.f64[  i  ] = (a_.f64[  i  ] * b_.f64[  i  ]) + c_.f64[  i  ];
299     r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1];
300   }
301 
302   return simde__m128d_from_private(r_);
303 #endif
304 }
305 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
306   #undef _mm_fmsubadd_pd
307   #define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c)
308 #endif
309 
310 SIMDE_FUNCTION_ATTRIBUTES
311 simde__m256d
simde_mm256_fmsubadd_pd(simde__m256d a,simde__m256d b,simde__m256d c)312 simde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
313 #if defined(SIMDE_X86_FMA_NATIVE)
314   return _mm256_fmsubadd_pd(a, b, c);
315 #else
316   simde__m256d_private
317     r_,
318     a_ = simde__m256d_to_private(a),
319     b_ = simde__m256d_to_private(b),
320     c_ = simde__m256d_to_private(c);
321 
322   SIMDE_VECTORIZE
323   for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) {
324     r_.f64[  i  ] = (a_.f64[  i  ] * b_.f64[  i  ]) + c_.f64[  i  ];
325     r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1];
326   }
327 
328   return simde__m256d_from_private(r_);
329 #endif
330 }
331 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
332   #undef _mm256_fmsubadd_pd
333   #define _mm256_fmsubadd_pd(a, b, c) simde_mm256_fmsubadd_pd(a, b, c)
334 #endif
335 
336 SIMDE_FUNCTION_ATTRIBUTES
337 simde__m128
simde_mm_fmsubadd_ps(simde__m128 a,simde__m128 b,simde__m128 c)338 simde_mm_fmsubadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
339 #if defined(SIMDE_X86_FMA_NATIVE)
340   return _mm_fmsubadd_ps(a, b, c);
341 #else
342   simde__m128_private
343     r_,
344     a_ = simde__m128_to_private(a),
345     b_ = simde__m128_to_private(b),
346     c_ = simde__m128_to_private(c);
347 
348   SIMDE_VECTORIZE
349   for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {
350     r_.f32[  i  ] = (a_.f32[  i  ] * b_.f32[  i  ]) + c_.f32[  i  ];
351     r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1];
352   }
353 
354   return simde__m128_from_private(r_);
355 #endif
356 }
357 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
358   #undef _mm_fmsubadd_ps
359   #define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c)
360 #endif
361 
362 SIMDE_FUNCTION_ATTRIBUTES
363 simde__m256
simde_mm256_fmsubadd_ps(simde__m256 a,simde__m256 b,simde__m256 c)364 simde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
365 #if defined(SIMDE_X86_FMA_NATIVE)
366   return _mm256_fmsubadd_ps(a, b, c);
367 #else
368   simde__m256_private
369     r_,
370     a_ = simde__m256_to_private(a),
371     b_ = simde__m256_to_private(b),
372     c_ = simde__m256_to_private(c);
373 
374   SIMDE_VECTORIZE
375   for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {
376     r_.f32[  i  ] = (a_.f32[  i  ] * b_.f32[  i  ]) + c_.f32[  i  ];
377     r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1];
378   }
379 
380   return simde__m256_from_private(r_);
381 #endif
382 }
383 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
384   #undef _mm256_fmsubadd_ps
385   #define _mm256_fmsubadd_ps(a, b, c) simde_mm256_fmsubadd_ps(a, b, c)
386 #endif
387 
388 SIMDE_FUNCTION_ATTRIBUTES
389 simde__m128d
simde_mm_fnmadd_pd(simde__m128d a,simde__m128d b,simde__m128d c)390 simde_mm_fnmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
391 #if defined(SIMDE_X86_FMA_NATIVE)
392   return _mm_fnmadd_pd(a, b, c);
393 #else
394   simde__m128d_private
395     r_,
396     a_ = simde__m128d_to_private(a),
397     b_ = simde__m128d_to_private(b),
398     c_ = simde__m128d_to_private(c);
399 
400   SIMDE_VECTORIZE
401   for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
402     r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i];
403   }
404 
405   return simde__m128d_from_private(r_);
406 #endif
407 }
408 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
409   #undef _mm_fnmadd_pd
410   #define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c)
411 #endif
412 
413 SIMDE_FUNCTION_ATTRIBUTES
414 simde__m256d
simde_mm256_fnmadd_pd(simde__m256d a,simde__m256d b,simde__m256d c)415 simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
416 #if defined(SIMDE_X86_FMA_NATIVE)
417   return _mm256_fnmadd_pd(a, b, c);
418 #else
419   simde__m256d_private
420     r_,
421     a_ = simde__m256d_to_private(a),
422     b_ = simde__m256d_to_private(b),
423     c_ = simde__m256d_to_private(c);
424 
425   SIMDE_VECTORIZE
426   for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
427     r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i];
428   }
429 
430   return simde__m256d_from_private(r_);
431 #endif
432 }
433 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
434   #undef _mm256_fnmadd_pd
435   #define _mm256_fnmadd_pd(a, b, c) simde_mm256_fnmadd_pd(a, b, c)
436 #endif
437 
438 SIMDE_FUNCTION_ATTRIBUTES
439 simde__m128
simde_mm_fnmadd_ps(simde__m128 a,simde__m128 b,simde__m128 c)440 simde_mm_fnmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
441 #if defined(SIMDE_X86_FMA_NATIVE)
442   return _mm_fnmadd_ps(a, b, c);
443 #else
444   simde__m128_private
445     r_,
446     a_ = simde__m128_to_private(a),
447     b_ = simde__m128_to_private(b),
448     c_ = simde__m128_to_private(c);
449 
450   SIMDE_VECTORIZE
451   for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
452     r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i];
453   }
454 
455   return simde__m128_from_private(r_);
456 #endif
457 }
458 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
459   #undef _mm_fnmadd_ps
460   #define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c)
461 #endif
462 
463 SIMDE_FUNCTION_ATTRIBUTES
464 simde__m256
simde_mm256_fnmadd_ps(simde__m256 a,simde__m256 b,simde__m256 c)465 simde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
466 #if defined(SIMDE_X86_FMA_NATIVE)
467   return _mm256_fnmadd_ps(a, b, c);
468 #else
469   simde__m256_private
470     r_,
471     a_ = simde__m256_to_private(a),
472     b_ = simde__m256_to_private(b),
473     c_ = simde__m256_to_private(c);
474 
475   SIMDE_VECTORIZE
476   for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
477     r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i];
478   }
479 
480   return simde__m256_from_private(r_);
481 #endif
482 }
483 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
484   #undef _mm256_fnmadd_ps
485   #define _mm256_fnmadd_ps(a, b, c) simde_mm256_fnmadd_ps(a, b, c)
486 #endif
487 
488 SIMDE_FUNCTION_ATTRIBUTES
489 simde__m128d
simde_mm_fnmadd_sd(simde__m128d a,simde__m128d b,simde__m128d c)490 simde_mm_fnmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) {
491 #if defined(SIMDE_X86_FMA_NATIVE)
492   return _mm_fnmadd_sd(a, b, c);
493 #else
494   simde__m128d_private
495     r_,
496     a_ = simde__m128d_to_private(a),
497     b_ = simde__m128d_to_private(b),
498     c_ = simde__m128d_to_private(c);
499 
500   r_ = a_;
501   r_.f64[0] = -(a_.f64[0] * b_.f64[0]) + c_.f64[0];
502 
503   return simde__m128d_from_private(r_);
504 #endif
505 }
506 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
507   #undef _mm_fnmadd_sd
508   #define _mm_fnmadd_sd(a, b, c) simde_mm_fnmadd_sd(a, b, c)
509 #endif
510 
511 SIMDE_FUNCTION_ATTRIBUTES
512 simde__m128
simde_mm_fnmadd_ss(simde__m128 a,simde__m128 b,simde__m128 c)513 simde_mm_fnmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) {
514 #if defined(SIMDE_X86_FMA_NATIVE)
515   return _mm_fnmadd_ss(a, b, c);
516 #else
517   simde__m128_private
518     r_,
519     a_ = simde__m128_to_private(a),
520     b_ = simde__m128_to_private(b),
521     c_ = simde__m128_to_private(c);
522 
523   r_ = a_;
524   r_.f32[0] = -(a_.f32[0] * b_.f32[0]) + c_.f32[0];
525 
526   return simde__m128_from_private(r_);
527 #endif
528 }
529 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
530   #undef _mm_fnmadd_ss
531   #define _mm_fnmadd_ss(a, b, c) simde_mm_fnmadd_ss(a, b, c)
532 #endif
533 
534 SIMDE_FUNCTION_ATTRIBUTES
535 simde__m128d
simde_mm_fnmsub_pd(simde__m128d a,simde__m128d b,simde__m128d c)536 simde_mm_fnmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
537 #if defined(SIMDE_X86_FMA_NATIVE)
538   return _mm_fnmsub_pd(a, b, c);
539 #else
540   simde__m128d_private
541     r_,
542     a_ = simde__m128d_to_private(a),
543     b_ = simde__m128d_to_private(b),
544     c_ = simde__m128d_to_private(c);
545 
546   SIMDE_VECTORIZE
547   for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
548     r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i];
549   }
550 
551   return simde__m128d_from_private(r_);
552 #endif
553 }
554 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
555   #undef _mm_fnmsub_pd
556   #define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c)
557 #endif
558 
559 SIMDE_FUNCTION_ATTRIBUTES
560 simde__m256d
simde_mm256_fnmsub_pd(simde__m256d a,simde__m256d b,simde__m256d c)561 simde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
562 #if defined(SIMDE_X86_FMA_NATIVE)
563   return _mm256_fnmsub_pd(a, b, c);
564 #else
565   simde__m256d_private
566     r_,
567     a_ = simde__m256d_to_private(a),
568     b_ = simde__m256d_to_private(b),
569     c_ = simde__m256d_to_private(c);
570 
571   SIMDE_VECTORIZE
572   for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
573     r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i];
574   }
575 
576   return simde__m256d_from_private(r_);
577 #endif
578 }
579 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
580   #undef _mm256_fnmsub_pd
581   #define _mm256_fnmsub_pd(a, b, c) simde_mm256_fnmsub_pd(a, b, c)
582 #endif
583 
584 SIMDE_FUNCTION_ATTRIBUTES
585 simde__m128
simde_mm_fnmsub_ps(simde__m128 a,simde__m128 b,simde__m128 c)586 simde_mm_fnmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
587 #if defined(SIMDE_X86_FMA_NATIVE)
588   return _mm_fnmsub_ps(a, b, c);
589 #else
590   simde__m128_private
591     r_,
592     a_ = simde__m128_to_private(a),
593     b_ = simde__m128_to_private(b),
594     c_ = simde__m128_to_private(c);
595 
596   SIMDE_VECTORIZE
597   for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
598     r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i];
599   }
600 
601   return simde__m128_from_private(r_);
602 #endif
603 }
604 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
605   #undef _mm_fnmsub_ps
606   #define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c)
607 #endif
608 
609 SIMDE_FUNCTION_ATTRIBUTES
610 simde__m256
simde_mm256_fnmsub_ps(simde__m256 a,simde__m256 b,simde__m256 c)611 simde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
612 #if defined(SIMDE_X86_FMA_NATIVE)
613   return _mm256_fnmsub_ps(a, b, c);
614 #else
615   simde__m256_private
616     r_,
617     a_ = simde__m256_to_private(a),
618     b_ = simde__m256_to_private(b),
619     c_ = simde__m256_to_private(c);
620 
621   SIMDE_VECTORIZE
622   for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
623     r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i];
624   }
625 
626   return simde__m256_from_private(r_);
627 #endif
628 }
629 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
630   #undef _mm256_fnmsub_ps
631   #define _mm256_fnmsub_ps(a, b, c) simde_mm256_fnmsub_ps(a, b, c)
632 #endif
633 
634 SIMDE_FUNCTION_ATTRIBUTES
635 simde__m128d
simde_mm_fnmsub_sd(simde__m128d a,simde__m128d b,simde__m128d c)636 simde_mm_fnmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) {
637 #if defined(SIMDE_X86_FMA_NATIVE)
638   return _mm_fnmsub_sd(a, b, c);
639 #else
640   simde__m128d_private
641     r_,
642     a_ = simde__m128d_to_private(a),
643     b_ = simde__m128d_to_private(b),
644     c_ = simde__m128d_to_private(c);
645 
646   r_ = a_;
647   r_.f64[0] = -(a_.f64[0] * b_.f64[0]) - c_.f64[0];
648 
649   return simde__m128d_from_private(r_);
650 #endif
651 }
652 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
653   #undef _mm_fnmsub_sd
654   #define _mm_fnmsub_sd(a, b, c) simde_mm_fnmsub_sd(a, b, c)
655 #endif
656 
657 SIMDE_FUNCTION_ATTRIBUTES
658 simde__m128
simde_mm_fnmsub_ss(simde__m128 a,simde__m128 b,simde__m128 c)659 simde_mm_fnmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) {
660 #if defined(SIMDE_X86_FMA_NATIVE)
661   return _mm_fnmsub_ss(a, b, c);
662 #else
663   simde__m128_private
664     r_,
665     a_ = simde__m128_to_private(a),
666     b_ = simde__m128_to_private(b),
667     c_ = simde__m128_to_private(c);
668 
669   r_ = simde__m128_to_private(a);
670   r_.f32[0] = -(a_.f32[0] * b_.f32[0]) - c_.f32[0];
671 
672   return simde__m128_from_private(r_);
673 #endif
674 }
675 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
676   #undef _mm_fnmsub_ss
677   #define _mm_fnmsub_ss(a, b, c) simde_mm_fnmsub_ss(a, b, c)
678 #endif
679 
680 SIMDE_END_DECLS_
681 
682 HEDLEY_DIAGNOSTIC_POP
683 
684 #endif /* !defined(SIMDE_X86_FMA_H) */
685