1 /* SPDX-License-Identifier: MIT
2 *
3 * Permission is hereby granted, free of charge, to any person
4 * obtaining a copy of this software and associated documentation
5 * files (the "Software"), to deal in the Software without
6 * restriction, including without limitation the rights to use, copy,
7 * modify, merge, publish, distribute, sublicense, and/or sell copies
8 * of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Copyright:
24 * 2019 Evan Nemerson <evan@nemerson.com>
25 */
26
27 #if !defined(SIMDE_X86_FMA_H)
28 #define SIMDE_X86_FMA_H
29
30 #include "avx.h"
31
32 #if !defined(SIMDE_X86_FMA_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
33 # define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES
34 #endif
35
36 HEDLEY_DIAGNOSTIC_PUSH
37 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
38 SIMDE_BEGIN_DECLS_
39
40 SIMDE_FUNCTION_ATTRIBUTES
41 simde__m128d
simde_mm_fmadd_pd(simde__m128d a,simde__m128d b,simde__m128d c)42 simde_mm_fmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
43 #if defined(SIMDE_X86_FMA_NATIVE)
44 return _mm_fmadd_pd(a, b, c);
45 #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
46 simde__m128d_private
47 a_ = simde__m128d_to_private(a),
48 b_ = simde__m128d_to_private(b),
49 c_ = simde__m128d_to_private(c),
50 r_;
51
52 r_.altivec_f64 = vec_madd(a_.altivec_f64, b_.altivec_f64, c_.altivec_f64);
53
54 return simde__m128d_from_private(r_);
55 #else
56 return simde_mm_add_pd(simde_mm_mul_pd(a, b), c);
57 #endif
58 }
59 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
60 #undef _mm_fmadd_pd
61 #define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c)
62 #endif
63
64 SIMDE_FUNCTION_ATTRIBUTES
65 simde__m256d
simde_mm256_fmadd_pd(simde__m256d a,simde__m256d b,simde__m256d c)66 simde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
67 #if defined(SIMDE_X86_FMA_NATIVE)
68 return _mm256_fmadd_pd(a, b, c);
69 #else
70 return simde_mm256_add_pd(simde_mm256_mul_pd(a, b), c);
71 #endif
72 }
73 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
74 #undef _mm256_fmadd_pd
75 #define _mm256_fmadd_pd(a, b, c) simde_mm256_fmadd_pd(a, b, c)
76 #endif
77
78 SIMDE_FUNCTION_ATTRIBUTES
79 simde__m128
simde_mm_fmadd_ps(simde__m128 a,simde__m128 b,simde__m128 c)80 simde_mm_fmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
81 #if defined(SIMDE_X86_FMA_NATIVE)
82 return _mm_fmadd_ps(a, b, c);
83 #elif defined(SIMDE_POWER_ALTIVEC_P5_NATIVE)
84 simde__m128_private
85 a_ = simde__m128_to_private(a),
86 b_ = simde__m128_to_private(b),
87 c_ = simde__m128_to_private(c),
88 r_;
89
90 r_.altivec_f32 = vec_madd(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32);
91
92 return simde__m128_from_private(r_);
93 #else
94 return simde_mm_add_ps(simde_mm_mul_ps(a, b), c);
95 #endif
96 }
97 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
98 #undef _mm_fmadd_ps
99 #define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c)
100 #endif
101
102 SIMDE_FUNCTION_ATTRIBUTES
103 simde__m256
simde_mm256_fmadd_ps(simde__m256 a,simde__m256 b,simde__m256 c)104 simde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
105 #if defined(SIMDE_X86_FMA_NATIVE)
106 return _mm256_fmadd_ps(a, b, c);
107 #else
108 return simde_mm256_add_ps(simde_mm256_mul_ps(a, b), c);
109 #endif
110 }
111 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
112 #undef _mm256_fmadd_ps
113 #define _mm256_fmadd_ps(a, b, c) simde_mm256_fmadd_ps(a, b, c)
114 #endif
115
116 SIMDE_FUNCTION_ATTRIBUTES
117 simde__m128d
simde_mm_fmadd_sd(simde__m128d a,simde__m128d b,simde__m128d c)118 simde_mm_fmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) {
119 #if defined(SIMDE_X86_FMA_NATIVE)
120 return _mm_fmadd_sd(a, b, c);
121 #else
122 return simde_mm_add_sd(simde_mm_mul_sd(a, b), c);
123 #endif
124 }
125 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
126 #undef _mm_fmadd_sd
127 #define _mm_fmadd_sd(a, b, c) simde_mm_fmadd_sd(a, b, c)
128 #endif
129
130 SIMDE_FUNCTION_ATTRIBUTES
131 simde__m128
simde_mm_fmadd_ss(simde__m128 a,simde__m128 b,simde__m128 c)132 simde_mm_fmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) {
133 #if defined(SIMDE_X86_FMA_NATIVE)
134 return _mm_fmadd_ss(a, b, c);
135 #else
136 return simde_mm_add_ss(simde_mm_mul_ss(a, b), c);
137 #endif
138 }
139 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
140 #undef _mm_fmadd_ss
141 #define _mm_fmadd_ss(a, b, c) simde_mm_fmadd_ss(a, b, c)
142 #endif
143
144 SIMDE_FUNCTION_ATTRIBUTES
145 simde__m128d
simde_mm_fmaddsub_pd(simde__m128d a,simde__m128d b,simde__m128d c)146 simde_mm_fmaddsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
147 #if defined(SIMDE_X86_FMA_NATIVE)
148 return _mm_fmaddsub_pd(a, b, c);
149 #else
150 return simde_mm_addsub_pd(simde_mm_mul_pd(a, b), c);
151 #endif
152 }
153 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
154 #undef _mm_fmaddsub_pd
155 #define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c)
156 #endif
157
158 SIMDE_FUNCTION_ATTRIBUTES
159 simde__m256d
simde_mm256_fmaddsub_pd(simde__m256d a,simde__m256d b,simde__m256d c)160 simde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
161 #if defined(SIMDE_X86_FMA_NATIVE)
162 return _mm256_fmaddsub_pd(a, b, c);
163 #else
164 return simde_mm256_addsub_pd(simde_mm256_mul_pd(a, b), c);
165 #endif
166 }
167 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
168 #undef _mm256_fmaddsub_pd
169 #define _mm256_fmaddsub_pd(a, b, c) simde_mm256_fmaddsub_pd(a, b, c)
170 #endif
171
172 SIMDE_FUNCTION_ATTRIBUTES
173 simde__m128
simde_mm_fmaddsub_ps(simde__m128 a,simde__m128 b,simde__m128 c)174 simde_mm_fmaddsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
175 #if defined(SIMDE_X86_FMA_NATIVE)
176 return _mm_fmaddsub_ps(a, b, c);
177 #else
178 return simde_mm_addsub_ps(simde_mm_mul_ps(a, b), c);
179 #endif
180 }
181 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
182 #undef _mm_fmaddsub_ps
183 #define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c)
184 #endif
185
186 SIMDE_FUNCTION_ATTRIBUTES
187 simde__m256
simde_mm256_fmaddsub_ps(simde__m256 a,simde__m256 b,simde__m256 c)188 simde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
189 #if defined(SIMDE_X86_FMA_NATIVE)
190 return _mm256_fmaddsub_ps(a, b, c);
191 #else
192 return simde_mm256_addsub_ps(simde_mm256_mul_ps(a, b), c);
193 #endif
194 }
195 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
196 #undef _mm256_fmaddsub_ps
197 #define _mm256_fmaddsub_ps(a, b, c) simde_mm256_fmaddsub_ps(a, b, c)
198 #endif
199
200 SIMDE_FUNCTION_ATTRIBUTES
201 simde__m128d
simde_mm_fmsub_pd(simde__m128d a,simde__m128d b,simde__m128d c)202 simde_mm_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
203 #if defined(SIMDE_X86_FMA_NATIVE)
204 return _mm_fmsub_pd(a, b, c);
205 #else
206 return simde_mm_sub_pd(simde_mm_mul_pd(a, b), c);
207 #endif
208 }
209 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
210 #undef _mm_fmsub_pd
211 #define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c)
212 #endif
213
214 SIMDE_FUNCTION_ATTRIBUTES
215 simde__m256d
simde_mm256_fmsub_pd(simde__m256d a,simde__m256d b,simde__m256d c)216 simde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
217 #if defined(SIMDE_X86_FMA_NATIVE)
218 return _mm256_fmsub_pd(a, b, c);
219 #else
220 return simde_mm256_sub_pd(simde_mm256_mul_pd(a, b), c);
221 #endif
222 }
223 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
224 #undef _mm256_fmsub_pd
225 #define _mm256_fmsub_pd(a, b, c) simde_mm256_fmsub_pd(a, b, c)
226 #endif
227
228 SIMDE_FUNCTION_ATTRIBUTES
229 simde__m128
simde_mm_fmsub_ps(simde__m128 a,simde__m128 b,simde__m128 c)230 simde_mm_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
231 #if defined(SIMDE_X86_FMA_NATIVE)
232 return _mm_fmsub_ps(a, b, c);
233 #else
234 return simde_mm_sub_ps(simde_mm_mul_ps(a, b), c);
235 #endif
236 }
237 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
238 #undef _mm_fmsub_ps
239 #define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c)
240 #endif
241
242 SIMDE_FUNCTION_ATTRIBUTES
243 simde__m256
simde_mm256_fmsub_ps(simde__m256 a,simde__m256 b,simde__m256 c)244 simde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
245 #if defined(SIMDE_X86_FMA_NATIVE)
246 return _mm256_fmsub_ps(a, b, c);
247 #else
248 return simde_mm256_sub_ps(simde_mm256_mul_ps(a, b), c);
249 #endif
250 }
251 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
252 #undef _mm256_fmsub_ps
253 #define _mm256_fmsub_ps(a, b, c) simde_mm256_fmsub_ps(a, b, c)
254 #endif
255
256 SIMDE_FUNCTION_ATTRIBUTES
257 simde__m128d
simde_mm_fmsub_sd(simde__m128d a,simde__m128d b,simde__m128d c)258 simde_mm_fmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) {
259 #if defined(SIMDE_X86_FMA_NATIVE)
260 return _mm_fmsub_sd(a, b, c);
261 #else
262 return simde_mm_sub_sd(simde_mm_mul_sd(a, b), c);
263 #endif
264 }
265 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
266 #undef _mm_fmsub_sd
267 #define _mm_fmsub_sd(a, b, c) simde_mm_fmsub_sd(a, b, c)
268 #endif
269
270 SIMDE_FUNCTION_ATTRIBUTES
271 simde__m128
simde_mm_fmsub_ss(simde__m128 a,simde__m128 b,simde__m128 c)272 simde_mm_fmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) {
273 #if defined(SIMDE_X86_FMA_NATIVE)
274 return _mm_fmsub_ss(a, b, c);
275 #else
276 return simde_mm_sub_ss(simde_mm_mul_ss(a, b), c);
277 #endif
278 }
279 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
280 #undef _mm_fmsub_ss
281 #define _mm_fmsub_ss(a, b, c) simde_mm_fmsub_ss(a, b, c)
282 #endif
283
284 SIMDE_FUNCTION_ATTRIBUTES
285 simde__m128d
simde_mm_fmsubadd_pd(simde__m128d a,simde__m128d b,simde__m128d c)286 simde_mm_fmsubadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
287 #if defined(SIMDE_X86_FMA_NATIVE)
288 return _mm_fmsubadd_pd(a, b, c);
289 #else
290 simde__m128d_private
291 r_,
292 a_ = simde__m128d_to_private(a),
293 b_ = simde__m128d_to_private(b),
294 c_ = simde__m128d_to_private(c);
295
296 SIMDE_VECTORIZE
297 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) {
298 r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ];
299 r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1];
300 }
301
302 return simde__m128d_from_private(r_);
303 #endif
304 }
305 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
306 #undef _mm_fmsubadd_pd
307 #define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c)
308 #endif
309
310 SIMDE_FUNCTION_ATTRIBUTES
311 simde__m256d
simde_mm256_fmsubadd_pd(simde__m256d a,simde__m256d b,simde__m256d c)312 simde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
313 #if defined(SIMDE_X86_FMA_NATIVE)
314 return _mm256_fmsubadd_pd(a, b, c);
315 #else
316 simde__m256d_private
317 r_,
318 a_ = simde__m256d_to_private(a),
319 b_ = simde__m256d_to_private(b),
320 c_ = simde__m256d_to_private(c);
321
322 SIMDE_VECTORIZE
323 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) {
324 r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ];
325 r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1];
326 }
327
328 return simde__m256d_from_private(r_);
329 #endif
330 }
331 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
332 #undef _mm256_fmsubadd_pd
333 #define _mm256_fmsubadd_pd(a, b, c) simde_mm256_fmsubadd_pd(a, b, c)
334 #endif
335
336 SIMDE_FUNCTION_ATTRIBUTES
337 simde__m128
simde_mm_fmsubadd_ps(simde__m128 a,simde__m128 b,simde__m128 c)338 simde_mm_fmsubadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
339 #if defined(SIMDE_X86_FMA_NATIVE)
340 return _mm_fmsubadd_ps(a, b, c);
341 #else
342 simde__m128_private
343 r_,
344 a_ = simde__m128_to_private(a),
345 b_ = simde__m128_to_private(b),
346 c_ = simde__m128_to_private(c);
347
348 SIMDE_VECTORIZE
349 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {
350 r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ];
351 r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1];
352 }
353
354 return simde__m128_from_private(r_);
355 #endif
356 }
357 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
358 #undef _mm_fmsubadd_ps
359 #define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c)
360 #endif
361
362 SIMDE_FUNCTION_ATTRIBUTES
363 simde__m256
simde_mm256_fmsubadd_ps(simde__m256 a,simde__m256 b,simde__m256 c)364 simde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
365 #if defined(SIMDE_X86_FMA_NATIVE)
366 return _mm256_fmsubadd_ps(a, b, c);
367 #else
368 simde__m256_private
369 r_,
370 a_ = simde__m256_to_private(a),
371 b_ = simde__m256_to_private(b),
372 c_ = simde__m256_to_private(c);
373
374 SIMDE_VECTORIZE
375 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {
376 r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ];
377 r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1];
378 }
379
380 return simde__m256_from_private(r_);
381 #endif
382 }
383 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
384 #undef _mm256_fmsubadd_ps
385 #define _mm256_fmsubadd_ps(a, b, c) simde_mm256_fmsubadd_ps(a, b, c)
386 #endif
387
388 SIMDE_FUNCTION_ATTRIBUTES
389 simde__m128d
simde_mm_fnmadd_pd(simde__m128d a,simde__m128d b,simde__m128d c)390 simde_mm_fnmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
391 #if defined(SIMDE_X86_FMA_NATIVE)
392 return _mm_fnmadd_pd(a, b, c);
393 #else
394 simde__m128d_private
395 r_,
396 a_ = simde__m128d_to_private(a),
397 b_ = simde__m128d_to_private(b),
398 c_ = simde__m128d_to_private(c);
399
400 SIMDE_VECTORIZE
401 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
402 r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i];
403 }
404
405 return simde__m128d_from_private(r_);
406 #endif
407 }
408 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
409 #undef _mm_fnmadd_pd
410 #define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c)
411 #endif
412
413 SIMDE_FUNCTION_ATTRIBUTES
414 simde__m256d
simde_mm256_fnmadd_pd(simde__m256d a,simde__m256d b,simde__m256d c)415 simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
416 #if defined(SIMDE_X86_FMA_NATIVE)
417 return _mm256_fnmadd_pd(a, b, c);
418 #else
419 simde__m256d_private
420 r_,
421 a_ = simde__m256d_to_private(a),
422 b_ = simde__m256d_to_private(b),
423 c_ = simde__m256d_to_private(c);
424
425 SIMDE_VECTORIZE
426 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
427 r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i];
428 }
429
430 return simde__m256d_from_private(r_);
431 #endif
432 }
433 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
434 #undef _mm256_fnmadd_pd
435 #define _mm256_fnmadd_pd(a, b, c) simde_mm256_fnmadd_pd(a, b, c)
436 #endif
437
438 SIMDE_FUNCTION_ATTRIBUTES
439 simde__m128
simde_mm_fnmadd_ps(simde__m128 a,simde__m128 b,simde__m128 c)440 simde_mm_fnmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
441 #if defined(SIMDE_X86_FMA_NATIVE)
442 return _mm_fnmadd_ps(a, b, c);
443 #else
444 simde__m128_private
445 r_,
446 a_ = simde__m128_to_private(a),
447 b_ = simde__m128_to_private(b),
448 c_ = simde__m128_to_private(c);
449
450 SIMDE_VECTORIZE
451 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
452 r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i];
453 }
454
455 return simde__m128_from_private(r_);
456 #endif
457 }
458 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
459 #undef _mm_fnmadd_ps
460 #define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c)
461 #endif
462
463 SIMDE_FUNCTION_ATTRIBUTES
464 simde__m256
simde_mm256_fnmadd_ps(simde__m256 a,simde__m256 b,simde__m256 c)465 simde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
466 #if defined(SIMDE_X86_FMA_NATIVE)
467 return _mm256_fnmadd_ps(a, b, c);
468 #else
469 simde__m256_private
470 r_,
471 a_ = simde__m256_to_private(a),
472 b_ = simde__m256_to_private(b),
473 c_ = simde__m256_to_private(c);
474
475 SIMDE_VECTORIZE
476 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
477 r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i];
478 }
479
480 return simde__m256_from_private(r_);
481 #endif
482 }
483 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
484 #undef _mm256_fnmadd_ps
485 #define _mm256_fnmadd_ps(a, b, c) simde_mm256_fnmadd_ps(a, b, c)
486 #endif
487
488 SIMDE_FUNCTION_ATTRIBUTES
489 simde__m128d
simde_mm_fnmadd_sd(simde__m128d a,simde__m128d b,simde__m128d c)490 simde_mm_fnmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) {
491 #if defined(SIMDE_X86_FMA_NATIVE)
492 return _mm_fnmadd_sd(a, b, c);
493 #else
494 simde__m128d_private
495 r_,
496 a_ = simde__m128d_to_private(a),
497 b_ = simde__m128d_to_private(b),
498 c_ = simde__m128d_to_private(c);
499
500 r_ = a_;
501 r_.f64[0] = -(a_.f64[0] * b_.f64[0]) + c_.f64[0];
502
503 return simde__m128d_from_private(r_);
504 #endif
505 }
506 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
507 #undef _mm_fnmadd_sd
508 #define _mm_fnmadd_sd(a, b, c) simde_mm_fnmadd_sd(a, b, c)
509 #endif
510
511 SIMDE_FUNCTION_ATTRIBUTES
512 simde__m128
simde_mm_fnmadd_ss(simde__m128 a,simde__m128 b,simde__m128 c)513 simde_mm_fnmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) {
514 #if defined(SIMDE_X86_FMA_NATIVE)
515 return _mm_fnmadd_ss(a, b, c);
516 #else
517 simde__m128_private
518 r_,
519 a_ = simde__m128_to_private(a),
520 b_ = simde__m128_to_private(b),
521 c_ = simde__m128_to_private(c);
522
523 r_ = a_;
524 r_.f32[0] = -(a_.f32[0] * b_.f32[0]) + c_.f32[0];
525
526 return simde__m128_from_private(r_);
527 #endif
528 }
529 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
530 #undef _mm_fnmadd_ss
531 #define _mm_fnmadd_ss(a, b, c) simde_mm_fnmadd_ss(a, b, c)
532 #endif
533
534 SIMDE_FUNCTION_ATTRIBUTES
535 simde__m128d
simde_mm_fnmsub_pd(simde__m128d a,simde__m128d b,simde__m128d c)536 simde_mm_fnmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) {
537 #if defined(SIMDE_X86_FMA_NATIVE)
538 return _mm_fnmsub_pd(a, b, c);
539 #else
540 simde__m128d_private
541 r_,
542 a_ = simde__m128d_to_private(a),
543 b_ = simde__m128d_to_private(b),
544 c_ = simde__m128d_to_private(c);
545
546 SIMDE_VECTORIZE
547 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
548 r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i];
549 }
550
551 return simde__m128d_from_private(r_);
552 #endif
553 }
554 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
555 #undef _mm_fnmsub_pd
556 #define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c)
557 #endif
558
559 SIMDE_FUNCTION_ATTRIBUTES
560 simde__m256d
simde_mm256_fnmsub_pd(simde__m256d a,simde__m256d b,simde__m256d c)561 simde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) {
562 #if defined(SIMDE_X86_FMA_NATIVE)
563 return _mm256_fnmsub_pd(a, b, c);
564 #else
565 simde__m256d_private
566 r_,
567 a_ = simde__m256d_to_private(a),
568 b_ = simde__m256d_to_private(b),
569 c_ = simde__m256d_to_private(c);
570
571 SIMDE_VECTORIZE
572 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
573 r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i];
574 }
575
576 return simde__m256d_from_private(r_);
577 #endif
578 }
579 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
580 #undef _mm256_fnmsub_pd
581 #define _mm256_fnmsub_pd(a, b, c) simde_mm256_fnmsub_pd(a, b, c)
582 #endif
583
584 SIMDE_FUNCTION_ATTRIBUTES
585 simde__m128
simde_mm_fnmsub_ps(simde__m128 a,simde__m128 b,simde__m128 c)586 simde_mm_fnmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
587 #if defined(SIMDE_X86_FMA_NATIVE)
588 return _mm_fnmsub_ps(a, b, c);
589 #else
590 simde__m128_private
591 r_,
592 a_ = simde__m128_to_private(a),
593 b_ = simde__m128_to_private(b),
594 c_ = simde__m128_to_private(c);
595
596 SIMDE_VECTORIZE
597 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
598 r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i];
599 }
600
601 return simde__m128_from_private(r_);
602 #endif
603 }
604 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
605 #undef _mm_fnmsub_ps
606 #define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c)
607 #endif
608
609 SIMDE_FUNCTION_ATTRIBUTES
610 simde__m256
simde_mm256_fnmsub_ps(simde__m256 a,simde__m256 b,simde__m256 c)611 simde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) {
612 #if defined(SIMDE_X86_FMA_NATIVE)
613 return _mm256_fnmsub_ps(a, b, c);
614 #else
615 simde__m256_private
616 r_,
617 a_ = simde__m256_to_private(a),
618 b_ = simde__m256_to_private(b),
619 c_ = simde__m256_to_private(c);
620
621 SIMDE_VECTORIZE
622 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
623 r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i];
624 }
625
626 return simde__m256_from_private(r_);
627 #endif
628 }
629 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
630 #undef _mm256_fnmsub_ps
631 #define _mm256_fnmsub_ps(a, b, c) simde_mm256_fnmsub_ps(a, b, c)
632 #endif
633
634 SIMDE_FUNCTION_ATTRIBUTES
635 simde__m128d
simde_mm_fnmsub_sd(simde__m128d a,simde__m128d b,simde__m128d c)636 simde_mm_fnmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) {
637 #if defined(SIMDE_X86_FMA_NATIVE)
638 return _mm_fnmsub_sd(a, b, c);
639 #else
640 simde__m128d_private
641 r_,
642 a_ = simde__m128d_to_private(a),
643 b_ = simde__m128d_to_private(b),
644 c_ = simde__m128d_to_private(c);
645
646 r_ = a_;
647 r_.f64[0] = -(a_.f64[0] * b_.f64[0]) - c_.f64[0];
648
649 return simde__m128d_from_private(r_);
650 #endif
651 }
652 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
653 #undef _mm_fnmsub_sd
654 #define _mm_fnmsub_sd(a, b, c) simde_mm_fnmsub_sd(a, b, c)
655 #endif
656
657 SIMDE_FUNCTION_ATTRIBUTES
658 simde__m128
simde_mm_fnmsub_ss(simde__m128 a,simde__m128 b,simde__m128 c)659 simde_mm_fnmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) {
660 #if defined(SIMDE_X86_FMA_NATIVE)
661 return _mm_fnmsub_ss(a, b, c);
662 #else
663 simde__m128_private
664 r_,
665 a_ = simde__m128_to_private(a),
666 b_ = simde__m128_to_private(b),
667 c_ = simde__m128_to_private(c);
668
669 r_ = simde__m128_to_private(a);
670 r_.f32[0] = -(a_.f32[0] * b_.f32[0]) - c_.f32[0];
671
672 return simde__m128_from_private(r_);
673 #endif
674 }
675 #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES)
676 #undef _mm_fnmsub_ss
677 #define _mm_fnmsub_ss(a, b, c) simde_mm_fnmsub_ss(a, b, c)
678 #endif
679
680 SIMDE_END_DECLS_
681
682 HEDLEY_DIAGNOSTIC_POP
683
684 #endif /* !defined(SIMDE_X86_FMA_H) */
685