1 /* SPDX-License-Identifier: MIT
2 *
3 * Permission is hereby granted, free of charge, to any person
4 * obtaining a copy of this software and associated documentation
5 * files (the "Software"), to deal in the Software without
6 * restriction, including without limitation the rights to use, copy,
7 * modify, merge, publish, distribute, sublicense, and/or sell copies
8 * of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Copyright:
24 * 2020 Evan Nemerson <evan@nemerson.com>
25 * 2020 Himanshi Mathur <himanshi18037@iiitd.ac.in>
26 */
27
28 #if !defined(SIMDE_X86_SVML_H)
29 #define SIMDE_X86_SVML_H
30
31 #include "fma.h"
32 #include "avx2.h"
33 #include "avx512/abs.h"
34 #include "avx512/add.h"
35 #include "avx512/cmp.h"
36 #include "avx512/copysign.h"
37 #include "avx512/xorsign.h"
38 #include "avx512/div.h"
39 #include "avx512/fmadd.h"
40 #include "avx512/mov.h"
41 #include "avx512/mul.h"
42 #include "avx512/negate.h"
43 #include "avx512/or.h"
44 #include "avx512/set1.h"
45 #include "avx512/setone.h"
46 #include "avx512/setzero.h"
47 #include "avx512/sqrt.h"
48 #include "avx512/sub.h"
49
50 #include "../simde-complex.h"
51
52 #if !defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
53 # define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES
54 #endif
55
56 HEDLEY_DIAGNOSTIC_PUSH
57 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
58 SIMDE_BEGIN_DECLS_
59
60 #if !defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
61 # define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES
62 #endif
63
64 SIMDE_FUNCTION_ATTRIBUTES
65 simde__m128
simde_mm_acos_ps(simde__m128 a)66 simde_mm_acos_ps (simde__m128 a) {
67 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
68 return _mm_acos_ps(a);
69 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
70 #if SIMDE_ACCURACY_PREFERENCE > 1
71 return Sleef_acosf4_u10(a);
72 #else
73 return Sleef_acosf4_u35(a);
74 #endif
75 #else
76 simde__m128_private
77 r_,
78 a_ = simde__m128_to_private(a);
79
80 SIMDE_VECTORIZE
81 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
82 r_.f32[i] = simde_math_acosf(a_.f32[i]);
83 }
84
85 return simde__m128_from_private(r_);
86 #endif
87 }
88 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
89 #undef _mm_acos_ps
90 #define _mm_acos_ps(a) simde_mm_acos_ps(a)
91 #endif
92
93 SIMDE_FUNCTION_ATTRIBUTES
94 simde__m128d
simde_mm_acos_pd(simde__m128d a)95 simde_mm_acos_pd (simde__m128d a) {
96 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
97 return _mm_acos_pd(a);
98 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
99 #if SIMDE_ACCURACY_PREFERENCE > 1
100 return Sleef_acosd2_u10(a);
101 #else
102 return Sleef_acosd2_u35(a);
103 #endif
104 #else
105 simde__m128d_private
106 r_,
107 a_ = simde__m128d_to_private(a);
108
109 SIMDE_VECTORIZE
110 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
111 r_.f64[i] = simde_math_acos(a_.f64[i]);
112 }
113
114 return simde__m128d_from_private(r_);
115 #endif
116 }
117 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
118 #undef _mm_acos_pd
119 #define _mm_acos_pd(a) simde_mm_acos_pd(a)
120 #endif
121
122 SIMDE_FUNCTION_ATTRIBUTES
123 simde__m256
simde_mm256_acos_ps(simde__m256 a)124 simde_mm256_acos_ps (simde__m256 a) {
125 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
126 return _mm256_acos_ps(a);
127 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
128 #if SIMDE_ACCURACY_PREFERENCE > 1
129 return Sleef_acosf8_u10(a);
130 #else
131 return Sleef_acosf8_u35(a);
132 #endif
133 #else
134 simde__m256_private
135 r_,
136 a_ = simde__m256_to_private(a);
137
138 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
139 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
140 r_.m128[i] = simde_mm_acos_ps(a_.m128[i]);
141 }
142 #else
143 SIMDE_VECTORIZE
144 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
145 r_.f32[i] = simde_math_acosf(a_.f32[i]);
146 }
147 #endif
148
149 return simde__m256_from_private(r_);
150 #endif
151 }
152 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
153 #undef _mm256_acos_ps
154 #define _mm256_acos_ps(a) simde_mm256_acos_ps(a)
155 #endif
156
157
158 SIMDE_FUNCTION_ATTRIBUTES
159 simde__m256d
simde_mm256_acos_pd(simde__m256d a)160 simde_mm256_acos_pd (simde__m256d a) {
161 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
162 return _mm256_acos_pd(a);
163 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
164 #if SIMDE_ACCURACY_PREFERENCE > 1
165 return Sleef_acosd4_u10(a);
166 #else
167 return Sleef_acosd4_u35(a);
168 #endif
169 #else
170 simde__m256d_private
171 r_,
172 a_ = simde__m256d_to_private(a);
173
174 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
175 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
176 r_.m128d[i] = simde_mm_acos_pd(a_.m128d[i]);
177 }
178 #else
179 SIMDE_VECTORIZE
180 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
181 r_.f64[i] = simde_math_acos(a_.f64[i]);
182 }
183 #endif
184
185 return simde__m256d_from_private(r_);
186 #endif
187 }
188 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
189 #undef _mm256_acos_pd
190 #define _mm256_acos_pd(a) simde_mm256_acos_pd(a)
191 #endif
192
193 SIMDE_FUNCTION_ATTRIBUTES
194 simde__m512
simde_mm512_acos_ps(simde__m512 a)195 simde_mm512_acos_ps (simde__m512 a) {
196 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
197 return _mm512_acos_ps(a);
198 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
199 #if SIMDE_ACCURACY_PREFERENCE > 1
200 return Sleef_acosf16_u10(a);
201 #else
202 return Sleef_acosf16_u35(a);
203 #endif
204 #else
205 simde__m512_private
206 r_,
207 a_ = simde__m512_to_private(a);
208
209 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
210 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
211 r_.m256[i] = simde_mm256_acos_ps(a_.m256[i]);
212 }
213 #else
214 SIMDE_VECTORIZE
215 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
216 r_.f32[i] = simde_math_acosf(a_.f32[i]);
217 }
218 #endif
219
220 return simde__m512_from_private(r_);
221 #endif
222 }
223 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
224 #undef _mm512_acos_ps
225 #define _mm512_acos_ps(a) simde_mm512_acos_ps(a)
226 #endif
227
228 SIMDE_FUNCTION_ATTRIBUTES
229 simde__m512d
simde_mm512_acos_pd(simde__m512d a)230 simde_mm512_acos_pd (simde__m512d a) {
231 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
232 return _mm512_acos_pd(a);
233 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
234 #if SIMDE_ACCURACY_PREFERENCE > 1
235 return Sleef_acosd8_u10(a);
236 #else
237 return Sleef_acosd8_u35(a);
238 #endif
239 #else
240 simde__m512d_private
241 r_,
242 a_ = simde__m512d_to_private(a);
243
244 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
245 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
246 r_.m256d[i] = simde_mm256_acos_pd(a_.m256d[i]);
247 }
248 #else
249 SIMDE_VECTORIZE
250 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
251 r_.f64[i] = simde_math_acos(a_.f64[i]);
252 }
253 #endif
254
255 return simde__m512d_from_private(r_);
256 #endif
257 }
258 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
259 #undef _mm512_acos_pd
260 #define _mm512_acos_pd(a) simde_mm512_acos_pd(a)
261 #endif
262
263 SIMDE_FUNCTION_ATTRIBUTES
264 simde__m512
simde_mm512_mask_acos_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)265 simde_mm512_mask_acos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
266 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
267 return _mm512_mask_acos_ps(src, k, a);
268 #else
269 return simde_mm512_mask_mov_ps(src, k, simde_mm512_acos_ps(a));
270 #endif
271 }
272 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
273 #undef _mm512_mask_acos_ps
274 #define _mm512_mask_acos_ps(src, k, a) simde_mm512_mask_acos_ps(src, k, a)
275 #endif
276
277 SIMDE_FUNCTION_ATTRIBUTES
278 simde__m512d
simde_mm512_mask_acos_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)279 simde_mm512_mask_acos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
280 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
281 return _mm512_mask_acos_pd(src, k, a);
282 #else
283 return simde_mm512_mask_mov_pd(src, k, simde_mm512_acos_pd(a));
284 #endif
285 }
286 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
287 #undef _mm512_mask_acos_pd
288 #define _mm512_mask_acos_pd(src, k, a) simde_mm512_mask_acos_pd(src, k, a)
289 #endif
290
291 SIMDE_FUNCTION_ATTRIBUTES
292 simde__m128
simde_mm_acosh_ps(simde__m128 a)293 simde_mm_acosh_ps (simde__m128 a) {
294 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
295 return _mm_acosh_ps(a);
296 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
297 return Sleef_acoshf4_u10(a);
298 #else
299 simde__m128_private
300 r_,
301 a_ = simde__m128_to_private(a);
302
303 SIMDE_VECTORIZE
304 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
305 r_.f32[i] = simde_math_acoshf(a_.f32[i]);
306 }
307
308 return simde__m128_from_private(r_);
309 #endif
310 }
311 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
312 #undef _mm_acosh_ps
313 #define _mm_acosh_ps(a) simde_mm_acosh_ps(a)
314 #endif
315
316 SIMDE_FUNCTION_ATTRIBUTES
317 simde__m128d
simde_mm_acosh_pd(simde__m128d a)318 simde_mm_acosh_pd (simde__m128d a) {
319 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
320 return _mm_acosh_pd(a);
321 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
322 return Sleef_acoshd2_u10(a);
323 #else
324 simde__m128d_private
325 r_,
326 a_ = simde__m128d_to_private(a);
327
328 SIMDE_VECTORIZE
329 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
330 r_.f64[i] = simde_math_acosh(a_.f64[i]);
331 }
332
333 return simde__m128d_from_private(r_);
334 #endif
335 }
336 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
337 #undef _mm_acosh_pd
338 #define _mm_acosh_pd(a) simde_mm_acosh_pd(a)
339 #endif
340
341 SIMDE_FUNCTION_ATTRIBUTES
342 simde__m256
simde_mm256_acosh_ps(simde__m256 a)343 simde_mm256_acosh_ps (simde__m256 a) {
344 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
345 return _mm256_acosh_ps(a);
346 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
347 return Sleef_acoshf8_u10(a);
348 #else
349 simde__m256_private
350 r_,
351 a_ = simde__m256_to_private(a);
352
353 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
354 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
355 r_.m128[i] = simde_mm_acosh_ps(a_.m128[i]);
356 }
357 #else
358 SIMDE_VECTORIZE
359 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
360 r_.f32[i] = simde_math_acoshf(a_.f32[i]);
361 }
362 #endif
363
364 return simde__m256_from_private(r_);
365 #endif
366 }
367 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
368 #undef _mm256_acosh_ps
369 #define _mm256_acosh_ps(a) simde_mm256_acosh_ps(a)
370 #endif
371
372
373 SIMDE_FUNCTION_ATTRIBUTES
374 simde__m256d
simde_mm256_acosh_pd(simde__m256d a)375 simde_mm256_acosh_pd (simde__m256d a) {
376 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
377 return _mm256_acosh_pd(a);
378 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
379 return Sleef_acoshd4_u10(a);
380 #else
381 simde__m256d_private
382 r_,
383 a_ = simde__m256d_to_private(a);
384
385 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
386 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
387 r_.m128d[i] = simde_mm_acosh_pd(a_.m128d[i]);
388 }
389 #else
390 SIMDE_VECTORIZE
391 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
392 r_.f64[i] = simde_math_acosh(a_.f64[i]);
393 }
394 #endif
395
396 return simde__m256d_from_private(r_);
397 #endif
398 }
399 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
400 #undef _mm256_acosh_pd
401 #define _mm256_acosh_pd(a) simde_mm256_acosh_pd(a)
402 #endif
403
404 SIMDE_FUNCTION_ATTRIBUTES
405 simde__m512
simde_mm512_acosh_ps(simde__m512 a)406 simde_mm512_acosh_ps (simde__m512 a) {
407 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
408 return _mm512_acosh_ps(a);
409 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
410 return Sleef_acoshf16_u10(a);
411 #else
412 simde__m512_private
413 r_,
414 a_ = simde__m512_to_private(a);
415
416 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
417 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
418 r_.m256[i] = simde_mm256_acosh_ps(a_.m256[i]);
419 }
420 #else
421 SIMDE_VECTORIZE
422 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
423 r_.f32[i] = simde_math_acoshf(a_.f32[i]);
424 }
425 #endif
426
427 return simde__m512_from_private(r_);
428 #endif
429 }
430 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
431 #undef _mm512_acosh_ps
432 #define _mm512_acosh_ps(a) simde_mm512_acosh_ps(a)
433 #endif
434
435 SIMDE_FUNCTION_ATTRIBUTES
436 simde__m512d
simde_mm512_acosh_pd(simde__m512d a)437 simde_mm512_acosh_pd (simde__m512d a) {
438 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
439 return _mm512_acosh_pd(a);
440 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
441 return Sleef_acoshd8_u10(a);
442 #else
443 simde__m512d_private
444 r_,
445 a_ = simde__m512d_to_private(a);
446
447 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
448 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
449 r_.m256d[i] = simde_mm256_acosh_pd(a_.m256d[i]);
450 }
451 #else
452 SIMDE_VECTORIZE
453 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
454 r_.f64[i] = simde_math_acosh(a_.f64[i]);
455 }
456 #endif
457
458 return simde__m512d_from_private(r_);
459 #endif
460 }
461 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
462 #undef _mm512_acosh_pd
463 #define _mm512_acosh_pd(a) simde_mm512_acosh_pd(a)
464 #endif
465
466 SIMDE_FUNCTION_ATTRIBUTES
467 simde__m512
simde_mm512_mask_acosh_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)468 simde_mm512_mask_acosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
469 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
470 return _mm512_mask_acosh_ps(src, k, a);
471 #else
472 return simde_mm512_mask_mov_ps(src, k, simde_mm512_acosh_ps(a));
473 #endif
474 }
475 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
476 #undef _mm512_mask_acosh_ps
477 #define _mm512_mask_acosh_ps(src, k, a) simde_mm512_mask_acosh_ps(src, k, a)
478 #endif
479
480 SIMDE_FUNCTION_ATTRIBUTES
481 simde__m512d
simde_mm512_mask_acosh_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)482 simde_mm512_mask_acosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
483 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
484 return _mm512_mask_acosh_pd(src, k, a);
485 #else
486 return simde_mm512_mask_mov_pd(src, k, simde_mm512_acosh_pd(a));
487 #endif
488 }
489 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
490 #undef _mm512_mask_acosh_pd
491 #define _mm512_mask_acosh_pd(src, k, a) simde_mm512_mask_acosh_pd(src, k, a)
492 #endif
493
494 SIMDE_FUNCTION_ATTRIBUTES
495 simde__m128
simde_mm_asin_ps(simde__m128 a)496 simde_mm_asin_ps (simde__m128 a) {
497 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
498 return _mm_asin_ps(a);
499 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
500 #if SIMDE_ACCURACY_PREFERENCE > 1
501 return Sleef_asinf4_u10(a);
502 #else
503 return Sleef_asinf4_u35(a);
504 #endif
505 #else
506 simde__m128_private
507 r_,
508 a_ = simde__m128_to_private(a);
509
510 SIMDE_VECTORIZE
511 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
512 r_.f32[i] = simde_math_asinf(a_.f32[i]);
513 }
514
515 return simde__m128_from_private(r_);
516 #endif
517 }
518 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
519 #undef _mm_asin_ps
520 #define _mm_asin_ps(a) simde_mm_asin_ps(a)
521 #endif
522
523 SIMDE_FUNCTION_ATTRIBUTES
524 simde__m128d
simde_mm_asin_pd(simde__m128d a)525 simde_mm_asin_pd (simde__m128d a) {
526 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
527 return _mm_asin_pd(a);
528 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
529 #if SIMDE_ACCURACY_PREFERENCE > 1
530 return Sleef_asind2_u10(a);
531 #else
532 return Sleef_asind2_u35(a);
533 #endif
534 #else
535 simde__m128d_private
536 r_,
537 a_ = simde__m128d_to_private(a);
538
539 SIMDE_VECTORIZE
540 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
541 r_.f64[i] = simde_math_asin(a_.f64[i]);
542 }
543
544 return simde__m128d_from_private(r_);
545 #endif
546 }
547 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
548 #undef _mm_asin_pd
549 #define _mm_asin_pd(a) simde_mm_asin_pd(a)
550 #endif
551
552 SIMDE_FUNCTION_ATTRIBUTES
553 simde__m256
simde_mm256_asin_ps(simde__m256 a)554 simde_mm256_asin_ps (simde__m256 a) {
555 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
556 return _mm256_asin_ps(a);
557 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
558 #if SIMDE_ACCURACY_PREFERENCE > 1
559 return Sleef_asinf8_u10(a);
560 #else
561 return Sleef_asinf8_u35(a);
562 #endif
563 #else
564 simde__m256_private
565 r_,
566 a_ = simde__m256_to_private(a);
567
568 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
569 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
570 r_.m128[i] = simde_mm_asin_ps(a_.m128[i]);
571 }
572 #else
573 SIMDE_VECTORIZE
574 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
575 r_.f32[i] = simde_math_asinf(a_.f32[i]);
576 }
577 #endif
578
579 return simde__m256_from_private(r_);
580 #endif
581 }
582 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
583 #undef _mm256_asin_ps
584 #define _mm256_asin_ps(a) simde_mm256_asin_ps(a)
585 #endif
586
587
588 SIMDE_FUNCTION_ATTRIBUTES
589 simde__m256d
simde_mm256_asin_pd(simde__m256d a)590 simde_mm256_asin_pd (simde__m256d a) {
591 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
592 return _mm256_asin_pd(a);
593 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
594 #if SIMDE_ACCURACY_PREFERENCE > 1
595 return Sleef_asind4_u10(a);
596 #else
597 return Sleef_asind4_u35(a);
598 #endif
599 #else
600 simde__m256d_private
601 r_,
602 a_ = simde__m256d_to_private(a);
603
604 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
605 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
606 r_.m128d[i] = simde_mm_asin_pd(a_.m128d[i]);
607 }
608 #else
609 SIMDE_VECTORIZE
610 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
611 r_.f64[i] = simde_math_asin(a_.f64[i]);
612 }
613 #endif
614
615 return simde__m256d_from_private(r_);
616 #endif
617 }
618 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
619 #undef _mm256_asin_pd
620 #define _mm256_asin_pd(a) simde_mm256_asin_pd(a)
621 #endif
622
623 SIMDE_FUNCTION_ATTRIBUTES
624 simde__m512
simde_mm512_asin_ps(simde__m512 a)625 simde_mm512_asin_ps (simde__m512 a) {
626 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
627 return _mm512_asin_ps(a);
628 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
629 #if SIMDE_ACCURACY_PREFERENCE > 1
630 return Sleef_asinf16_u10(a);
631 #else
632 return Sleef_asinf16_u35(a);
633 #endif
634 #else
635 simde__m512_private
636 r_,
637 a_ = simde__m512_to_private(a);
638
639 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
640 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
641 r_.m256[i] = simde_mm256_asin_ps(a_.m256[i]);
642 }
643 #else
644 SIMDE_VECTORIZE
645 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
646 r_.f32[i] = simde_math_asinf(a_.f32[i]);
647 }
648 #endif
649
650 return simde__m512_from_private(r_);
651 #endif
652 }
653 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
654 #undef _mm512_asin_ps
655 #define _mm512_asin_ps(a) simde_mm512_asin_ps(a)
656 #endif
657
658 SIMDE_FUNCTION_ATTRIBUTES
659 simde__m512d
simde_mm512_asin_pd(simde__m512d a)660 simde_mm512_asin_pd (simde__m512d a) {
661 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
662 return _mm512_asin_pd(a);
663 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
664 #if SIMDE_ACCURACY_PREFERENCE > 1
665 return Sleef_asind8_u10(a);
666 #else
667 return Sleef_asind8_u35(a);
668 #endif
669 #else
670 simde__m512d_private
671 r_,
672 a_ = simde__m512d_to_private(a);
673
674 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
675 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
676 r_.m256d[i] = simde_mm256_asin_pd(a_.m256d[i]);
677 }
678 #else
679 SIMDE_VECTORIZE
680 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
681 r_.f64[i] = simde_math_asin(a_.f64[i]);
682 }
683 #endif
684
685 return simde__m512d_from_private(r_);
686 #endif
687 }
688 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
689 #undef _mm512_asin_pd
690 #define _mm512_asin_pd(a) simde_mm512_asin_pd(a)
691 #endif
692
693 SIMDE_FUNCTION_ATTRIBUTES
694 simde__m512
simde_mm512_mask_asin_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)695 simde_mm512_mask_asin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
696 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
697 return _mm512_mask_asin_ps(src, k, a);
698 #else
699 return simde_mm512_mask_mov_ps(src, k, simde_mm512_asin_ps(a));
700 #endif
701 }
702 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
703 #undef _mm512_mask_asin_ps
704 #define _mm512_mask_asin_ps(src, k, a) simde_mm512_mask_asin_ps(src, k, a)
705 #endif
706
707 SIMDE_FUNCTION_ATTRIBUTES
708 simde__m512d
simde_mm512_mask_asin_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)709 simde_mm512_mask_asin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
710 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
711 return _mm512_mask_asin_pd(src, k, a);
712 #else
713 return simde_mm512_mask_mov_pd(src, k, simde_mm512_asin_pd(a));
714 #endif
715 }
716 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
717 #undef _mm512_mask_asin_pd
718 #define _mm512_mask_asin_pd(src, k, a) simde_mm512_mask_asin_pd(src, k, a)
719 #endif
720
721 SIMDE_FUNCTION_ATTRIBUTES
722 simde__m128
simde_mm_asinh_ps(simde__m128 a)723 simde_mm_asinh_ps (simde__m128 a) {
724 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
725 return _mm_asinh_ps(a);
726 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
727 return Sleef_asinhf4_u10(a);
728 #else
729 simde__m128_private
730 r_,
731 a_ = simde__m128_to_private(a);
732
733 SIMDE_VECTORIZE
734 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
735 r_.f32[i] = simde_math_asinhf(a_.f32[i]);
736 }
737
738 return simde__m128_from_private(r_);
739 #endif
740 }
741 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
742 #undef _mm_asinh_ps
743 #define _mm_asinh_ps(a) simde_mm_asinh_ps(a)
744 #endif
745
746 SIMDE_FUNCTION_ATTRIBUTES
747 simde__m128d
simde_mm_asinh_pd(simde__m128d a)748 simde_mm_asinh_pd (simde__m128d a) {
749 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
750 return _mm_asinh_pd(a);
751 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
752 return Sleef_asinhd2_u10(a);
753 #else
754 simde__m128d_private
755 r_,
756 a_ = simde__m128d_to_private(a);
757
758 SIMDE_VECTORIZE
759 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
760 r_.f64[i] = simde_math_asinh(a_.f64[i]);
761 }
762
763 return simde__m128d_from_private(r_);
764 #endif
765 }
766 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
767 #undef _mm_asinh_pd
768 #define _mm_asinh_pd(a) simde_mm_asinh_pd(a)
769 #endif
770
771 SIMDE_FUNCTION_ATTRIBUTES
772 simde__m256
simde_mm256_asinh_ps(simde__m256 a)773 simde_mm256_asinh_ps (simde__m256 a) {
774 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
775 return _mm256_asinh_ps(a);
776 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
777 return Sleef_asinhf8_u10(a);
778 #else
779 simde__m256_private
780 r_,
781 a_ = simde__m256_to_private(a);
782
783 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
784 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
785 r_.m128[i] = simde_mm_asinh_ps(a_.m128[i]);
786 }
787 #else
788 SIMDE_VECTORIZE
789 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
790 r_.f32[i] = simde_math_asinhf(a_.f32[i]);
791 }
792 #endif
793
794 return simde__m256_from_private(r_);
795 #endif
796 }
797 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
798 #undef _mm256_asinh_ps
799 #define _mm256_asinh_ps(a) simde_mm256_asinh_ps(a)
800 #endif
801
802
803 SIMDE_FUNCTION_ATTRIBUTES
804 simde__m256d
simde_mm256_asinh_pd(simde__m256d a)805 simde_mm256_asinh_pd (simde__m256d a) {
806 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
807 return _mm256_asinh_pd(a);
808 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
809 return Sleef_asinhd4_u10(a);
810 #else
811 simde__m256d_private
812 r_,
813 a_ = simde__m256d_to_private(a);
814
815 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
816 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
817 r_.m128d[i] = simde_mm_asinh_pd(a_.m128d[i]);
818 }
819 #else
820 SIMDE_VECTORIZE
821 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
822 r_.f64[i] = simde_math_asinh(a_.f64[i]);
823 }
824 #endif
825
826 return simde__m256d_from_private(r_);
827 #endif
828 }
829 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
830 #undef _mm256_asinh_pd
831 #define _mm256_asinh_pd(a) simde_mm256_asinh_pd(a)
832 #endif
833
834 SIMDE_FUNCTION_ATTRIBUTES
835 simde__m512
simde_mm512_asinh_ps(simde__m512 a)836 simde_mm512_asinh_ps (simde__m512 a) {
837 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
838 return _mm512_asinh_ps(a);
839 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
840 return Sleef_asinhf16_u10(a);
841 #else
842 simde__m512_private
843 r_,
844 a_ = simde__m512_to_private(a);
845
846 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
847 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
848 r_.m256[i] = simde_mm256_asinh_ps(a_.m256[i]);
849 }
850 #else
851 SIMDE_VECTORIZE
852 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
853 r_.f32[i] = simde_math_asinhf(a_.f32[i]);
854 }
855 #endif
856
857 return simde__m512_from_private(r_);
858 #endif
859 }
860 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
861 #undef _mm512_asinh_ps
862 #define _mm512_asinh_ps(a) simde_mm512_asinh_ps(a)
863 #endif
864
865 SIMDE_FUNCTION_ATTRIBUTES
866 simde__m512d
simde_mm512_asinh_pd(simde__m512d a)867 simde_mm512_asinh_pd (simde__m512d a) {
868 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
869 return _mm512_asinh_pd(a);
870 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
871 return Sleef_asinhd8_u10(a);
872 #else
873 simde__m512d_private
874 r_,
875 a_ = simde__m512d_to_private(a);
876
877 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
878 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
879 r_.m256d[i] = simde_mm256_asinh_pd(a_.m256d[i]);
880 }
881 #else
882 SIMDE_VECTORIZE
883 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
884 r_.f64[i] = simde_math_asinh(a_.f64[i]);
885 }
886 #endif
887
888 return simde__m512d_from_private(r_);
889 #endif
890 }
891 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
892 #undef _mm512_asinh_pd
893 #define _mm512_asinh_pd(a) simde_mm512_asinh_pd(a)
894 #endif
895
896 SIMDE_FUNCTION_ATTRIBUTES
897 simde__m512
simde_mm512_mask_asinh_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)898 simde_mm512_mask_asinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
899 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
900 return _mm512_mask_asinh_ps(src, k, a);
901 #else
902 return simde_mm512_mask_mov_ps(src, k, simde_mm512_asinh_ps(a));
903 #endif
904 }
905 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
906 #undef _mm512_mask_asinh_ps
907 #define _mm512_mask_asinh_ps(src, k, a) simde_mm512_mask_asinh_ps(src, k, a)
908 #endif
909
910 SIMDE_FUNCTION_ATTRIBUTES
911 simde__m512d
simde_mm512_mask_asinh_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)912 simde_mm512_mask_asinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
913 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
914 return _mm512_mask_asinh_pd(src, k, a);
915 #else
916 return simde_mm512_mask_mov_pd(src, k, simde_mm512_asinh_pd(a));
917 #endif
918 }
919 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
920 #undef _mm512_mask_asinh_pd
921 #define _mm512_mask_asinh_pd(src, k, a) simde_mm512_mask_asinh_pd(src, k, a)
922 #endif
923
924 SIMDE_FUNCTION_ATTRIBUTES
925 simde__m128
simde_mm_atan_ps(simde__m128 a)926 simde_mm_atan_ps (simde__m128 a) {
927 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
928 return _mm_atan_ps(a);
929 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
930 #if SIMDE_ACCURACY_PREFERENCE > 1
931 return Sleef_atanf4_u10(a);
932 #else
933 return Sleef_atanf4_u35(a);
934 #endif
935 #else
936 simde__m128_private
937 r_,
938 a_ = simde__m128_to_private(a);
939
940 SIMDE_VECTORIZE
941 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
942 r_.f32[i] = simde_math_atanf(a_.f32[i]);
943 }
944
945 return simde__m128_from_private(r_);
946 #endif
947 }
948 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
949 #undef _mm_atan_ps
950 #define _mm_atan_ps(a) simde_mm_atan_ps(a)
951 #endif
952
953 SIMDE_FUNCTION_ATTRIBUTES
954 simde__m128d
simde_mm_atan_pd(simde__m128d a)955 simde_mm_atan_pd (simde__m128d a) {
956 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
957 return _mm_atan_pd(a);
958 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
959 #if SIMDE_ACCURACY_PREFERENCE > 1
960 return Sleef_atand2_u10(a);
961 #else
962 return Sleef_atand2_u35(a);
963 #endif
964 #else
965 simde__m128d_private
966 r_,
967 a_ = simde__m128d_to_private(a);
968
969 SIMDE_VECTORIZE
970 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
971 r_.f64[i] = simde_math_atan(a_.f64[i]);
972 }
973
974 return simde__m128d_from_private(r_);
975 #endif
976 }
977 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
978 #undef _mm_atan_pd
979 #define _mm_atan_pd(a) simde_mm_atan_pd(a)
980 #endif
981
982 SIMDE_FUNCTION_ATTRIBUTES
983 simde__m256
simde_mm256_atan_ps(simde__m256 a)984 simde_mm256_atan_ps (simde__m256 a) {
985 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
986 return _mm256_atan_ps(a);
987 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
988 #if SIMDE_ACCURACY_PREFERENCE > 1
989 return Sleef_atanf8_u10(a);
990 #else
991 return Sleef_atanf8_u35(a);
992 #endif
993 #else
994 simde__m256_private
995 r_,
996 a_ = simde__m256_to_private(a);
997
998 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
999 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
1000 r_.m128[i] = simde_mm_atan_ps(a_.m128[i]);
1001 }
1002 #else
1003 SIMDE_VECTORIZE
1004 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1005 r_.f32[i] = simde_math_atanf(a_.f32[i]);
1006 }
1007 #endif
1008
1009 return simde__m256_from_private(r_);
1010 #endif
1011 }
1012 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1013 #undef _mm256_atan_ps
1014 #define _mm256_atan_ps(a) simde_mm256_atan_ps(a)
1015 #endif
1016
1017
1018 SIMDE_FUNCTION_ATTRIBUTES
1019 simde__m256d
simde_mm256_atan_pd(simde__m256d a)1020 simde_mm256_atan_pd (simde__m256d a) {
1021 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1022 return _mm256_atan_pd(a);
1023 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1024 #if SIMDE_ACCURACY_PREFERENCE > 1
1025 return Sleef_atand4_u10(a);
1026 #else
1027 return Sleef_atand4_u35(a);
1028 #endif
1029 #else
1030 simde__m256d_private
1031 r_,
1032 a_ = simde__m256d_to_private(a);
1033
1034 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1035 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
1036 r_.m128d[i] = simde_mm_atan_pd(a_.m128d[i]);
1037 }
1038 #else
1039 SIMDE_VECTORIZE
1040 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1041 r_.f64[i] = simde_math_atan(a_.f64[i]);
1042 }
1043 #endif
1044
1045 return simde__m256d_from_private(r_);
1046 #endif
1047 }
1048 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1049 #undef _mm256_atan_pd
1050 #define _mm256_atan_pd(a) simde_mm256_atan_pd(a)
1051 #endif
1052
1053 SIMDE_FUNCTION_ATTRIBUTES
1054 simde__m512
simde_mm512_atan_ps(simde__m512 a)1055 simde_mm512_atan_ps (simde__m512 a) {
1056 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1057 return _mm512_atan_ps(a);
1058 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1059 #if SIMDE_ACCURACY_PREFERENCE > 1
1060 return Sleef_atanf16_u10(a);
1061 #else
1062 return Sleef_atanf16_u35(a);
1063 #endif
1064 #else
1065 simde__m512_private
1066 r_,
1067 a_ = simde__m512_to_private(a);
1068
1069 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1070 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1071 r_.m256[i] = simde_mm256_atan_ps(a_.m256[i]);
1072 }
1073 #else
1074 SIMDE_VECTORIZE
1075 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1076 r_.f32[i] = simde_math_atanf(a_.f32[i]);
1077 }
1078 #endif
1079
1080 return simde__m512_from_private(r_);
1081 #endif
1082 }
1083 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1084 #undef _mm512_atan_ps
1085 #define _mm512_atan_ps(a) simde_mm512_atan_ps(a)
1086 #endif
1087
1088 SIMDE_FUNCTION_ATTRIBUTES
1089 simde__m512d
simde_mm512_atan_pd(simde__m512d a)1090 simde_mm512_atan_pd (simde__m512d a) {
1091 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1092 return _mm512_atan_pd(a);
1093 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1094 #if SIMDE_ACCURACY_PREFERENCE > 1
1095 return Sleef_atand8_u10(a);
1096 #else
1097 return Sleef_atand8_u35(a);
1098 #endif
1099 #else
1100 simde__m512d_private
1101 r_,
1102 a_ = simde__m512d_to_private(a);
1103
1104 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1105 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
1106 r_.m256d[i] = simde_mm256_atan_pd(a_.m256d[i]);
1107 }
1108 #else
1109 SIMDE_VECTORIZE
1110 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1111 r_.f64[i] = simde_math_atan(a_.f64[i]);
1112 }
1113 #endif
1114
1115 return simde__m512d_from_private(r_);
1116 #endif
1117 }
1118 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1119 #undef _mm512_atan_pd
1120 #define _mm512_atan_pd(a) simde_mm512_atan_pd(a)
1121 #endif
1122
1123 SIMDE_FUNCTION_ATTRIBUTES
1124 simde__m512
simde_mm512_mask_atan_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)1125 simde_mm512_mask_atan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
1126 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1127 return _mm512_mask_atan_ps(src, k, a);
1128 #else
1129 return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan_ps(a));
1130 #endif
1131 }
1132 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1133 #undef _mm512_mask_atan_ps
1134 #define _mm512_mask_atan_ps(src, k, a) simde_mm512_mask_atan_ps(src, k, a)
1135 #endif
1136
1137 SIMDE_FUNCTION_ATTRIBUTES
1138 simde__m512d
simde_mm512_mask_atan_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)1139 simde_mm512_mask_atan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
1140 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1141 return _mm512_mask_atan_pd(src, k, a);
1142 #else
1143 return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan_pd(a));
1144 #endif
1145 }
1146 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1147 #undef _mm512_mask_atan_pd
1148 #define _mm512_mask_atan_pd(src, k, a) simde_mm512_mask_atan_pd(src, k, a)
1149 #endif
1150
1151 SIMDE_FUNCTION_ATTRIBUTES
1152 simde__m128
simde_mm_atan2_ps(simde__m128 a,simde__m128 b)1153 simde_mm_atan2_ps (simde__m128 a, simde__m128 b) {
1154 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1155 return _mm_atan2_ps(a, b);
1156 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1157 #if SIMDE_ACCURACY_PREFERENCE > 1
1158 return Sleef_atan2f4_u10(a, b);
1159 #else
1160 return Sleef_atan2f4_u35(a, b);
1161 #endif
1162 #else
1163 simde__m128_private
1164 r_,
1165 a_ = simde__m128_to_private(a),
1166 b_ = simde__m128_to_private(b);
1167
1168 SIMDE_VECTORIZE
1169 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1170 r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]);
1171 }
1172
1173 return simde__m128_from_private(r_);
1174 #endif
1175 }
1176 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1177 #undef _mm_atan2_ps
1178 #define _mm_atan2_ps(a, b) simde_mm_atan2_ps(a, b)
1179 #endif
1180
1181 SIMDE_FUNCTION_ATTRIBUTES
1182 simde__m128d
simde_mm_atan2_pd(simde__m128d a,simde__m128d b)1183 simde_mm_atan2_pd (simde__m128d a, simde__m128d b) {
1184 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1185 return _mm_atan2_pd(a, b);
1186 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1187 #if SIMDE_ACCURACY_PREFERENCE > 1
1188 return Sleef_atan2d2_u10(a, b);
1189 #else
1190 return Sleef_atan2d2_u35(a, b);
1191 #endif
1192 #else
1193 simde__m128d_private
1194 r_,
1195 a_ = simde__m128d_to_private(a),
1196 b_ = simde__m128d_to_private(b);
1197
1198 SIMDE_VECTORIZE
1199 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1200 r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]);
1201 }
1202
1203 return simde__m128d_from_private(r_);
1204 #endif
1205 }
1206 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1207 #undef _mm_atan2_pd
1208 #define _mm_atan2_pd(a, b) simde_mm_atan2_pd(a, b)
1209 #endif
1210
1211 SIMDE_FUNCTION_ATTRIBUTES
1212 simde__m256
simde_mm256_atan2_ps(simde__m256 a,simde__m256 b)1213 simde_mm256_atan2_ps (simde__m256 a, simde__m256 b) {
1214 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1215 return _mm256_atan2_ps(a, b);
1216 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1217 #if SIMDE_ACCURACY_PREFERENCE > 1
1218 return Sleef_atan2f8_u10(a, b);
1219 #else
1220 return Sleef_atan2f8_u35(a, b);
1221 #endif
1222 #else
1223 simde__m256_private
1224 r_,
1225 a_ = simde__m256_to_private(a),
1226 b_ = simde__m256_to_private(b);
1227
1228 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1229 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
1230 r_.m128[i] = simde_mm_atan2_ps(a_.m128[i], b_.m128[i]);
1231 }
1232 #else
1233 SIMDE_VECTORIZE
1234 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1235 r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]);
1236 }
1237 #endif
1238
1239 return simde__m256_from_private(r_);
1240 #endif
1241 }
1242 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1243 #undef _mm256_atan2_ps
1244 #define _mm256_atan2_ps(a, b) simde_mm256_atan2_ps(a, b)
1245 #endif
1246
1247
1248 SIMDE_FUNCTION_ATTRIBUTES
1249 simde__m256d
simde_mm256_atan2_pd(simde__m256d a,simde__m256d b)1250 simde_mm256_atan2_pd (simde__m256d a, simde__m256d b) {
1251 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1252 return _mm256_atan2_pd(a, b);
1253 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1254 #if SIMDE_ACCURACY_PREFERENCE > 1
1255 return Sleef_atan2d4_u10(a, b);
1256 #else
1257 return Sleef_atan2d4_u35(a, b);
1258 #endif
1259 #else
1260 simde__m256d_private
1261 r_,
1262 a_ = simde__m256d_to_private(a),
1263 b_ = simde__m256d_to_private(b);
1264
1265 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1266 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
1267 r_.m128d[i] = simde_mm_atan2_pd(a_.m128d[i], b_.m128d[i]);
1268 }
1269 #else
1270 SIMDE_VECTORIZE
1271 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1272 r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]);
1273 }
1274 #endif
1275
1276 return simde__m256d_from_private(r_);
1277 #endif
1278 }
1279 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1280 #undef _mm256_atan2_pd
1281 #define _mm256_atan2_pd(a, b) simde_mm256_atan2_pd(a, b)
1282 #endif
1283
1284 SIMDE_FUNCTION_ATTRIBUTES
1285 simde__m512
simde_mm512_atan2_ps(simde__m512 a,simde__m512 b)1286 simde_mm512_atan2_ps (simde__m512 a, simde__m512 b) {
1287 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1288 return _mm512_atan2_ps(a, b);
1289 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1290 #if SIMDE_ACCURACY_PREFERENCE > 1
1291 return Sleef_atan2f16_u10(a, b);
1292 #else
1293 return Sleef_atan2f16_u35(a, b);
1294 #endif
1295 #else
1296 simde__m512_private
1297 r_,
1298 a_ = simde__m512_to_private(a),
1299 b_ = simde__m512_to_private(b);
1300
1301 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1302 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1303 r_.m256[i] = simde_mm256_atan2_ps(a_.m256[i], b_.m256[i]);
1304 }
1305 #else
1306 SIMDE_VECTORIZE
1307 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1308 r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]);
1309 }
1310 #endif
1311
1312 return simde__m512_from_private(r_);
1313 #endif
1314 }
1315 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1316 #undef _mm512_atan2_ps
1317 #define _mm512_atan2_ps(a, b) simde_mm512_atan2_ps(a, b)
1318 #endif
1319
1320 SIMDE_FUNCTION_ATTRIBUTES
1321 simde__m512d
simde_mm512_atan2_pd(simde__m512d a,simde__m512d b)1322 simde_mm512_atan2_pd (simde__m512d a, simde__m512d b) {
1323 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1324 return _mm512_atan2_pd(a, b);
1325 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1326 #if SIMDE_ACCURACY_PREFERENCE > 1
1327 return Sleef_atan2d8_u10(a, b);
1328 #else
1329 return Sleef_atan2d8_u35(a, b);
1330 #endif
1331 #else
1332 simde__m512d_private
1333 r_,
1334 a_ = simde__m512d_to_private(a),
1335 b_ = simde__m512d_to_private(b);
1336
1337 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1338 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
1339 r_.m256d[i] = simde_mm256_atan2_pd(a_.m256d[i], b_.m256d[i]);
1340 }
1341 #else
1342 SIMDE_VECTORIZE
1343 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1344 r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]);
1345 }
1346 #endif
1347
1348 return simde__m512d_from_private(r_);
1349 #endif
1350 }
1351 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1352 #undef _mm512_atan2_pd
1353 #define _mm512_atan2_pd(a, b) simde_mm512_atan2_pd(a, b)
1354 #endif
1355
1356 SIMDE_FUNCTION_ATTRIBUTES
1357 simde__m512
simde_mm512_mask_atan2_ps(simde__m512 src,simde__mmask16 k,simde__m512 a,simde__m512 b)1358 simde_mm512_mask_atan2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
1359 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1360 return _mm512_mask_atan2_ps(src, k, a, b);
1361 #else
1362 return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan2_ps(a, b));
1363 #endif
1364 }
1365 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1366 #undef _mm512_mask_atan2_ps
1367 #define _mm512_mask_atan2_ps(src, k, a, b) simde_mm512_mask_atan2_ps(src, k, a, b)
1368 #endif
1369
1370 SIMDE_FUNCTION_ATTRIBUTES
1371 simde__m512d
simde_mm512_mask_atan2_pd(simde__m512d src,simde__mmask8 k,simde__m512d a,simde__m512d b)1372 simde_mm512_mask_atan2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
1373 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1374 return _mm512_mask_atan2_pd(src, k, a, b);
1375 #else
1376 return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan2_pd(a, b));
1377 #endif
1378 }
1379 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1380 #undef _mm512_mask_atan2_pd
1381 #define _mm512_mask_atan2_pd(src, k, a, b) simde_mm512_mask_atan2_pd(src, k, a, b)
1382 #endif
1383
1384 SIMDE_FUNCTION_ATTRIBUTES
1385 simde__m128
simde_mm_atanh_ps(simde__m128 a)1386 simde_mm_atanh_ps (simde__m128 a) {
1387 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1388 return _mm_atanh_ps(a);
1389 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1390 return Sleef_atanhf4_u10(a);
1391 #else
1392 simde__m128_private
1393 r_,
1394 a_ = simde__m128_to_private(a);
1395
1396 SIMDE_VECTORIZE
1397 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1398 r_.f32[i] = simde_math_atanhf(a_.f32[i]);
1399 }
1400
1401 return simde__m128_from_private(r_);
1402 #endif
1403 }
1404 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1405 #undef _mm_atanh_ps
1406 #define _mm_atanh_ps(a) simde_mm_atanh_ps(a)
1407 #endif
1408
1409 SIMDE_FUNCTION_ATTRIBUTES
1410 simde__m128d
simde_mm_atanh_pd(simde__m128d a)1411 simde_mm_atanh_pd (simde__m128d a) {
1412 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1413 return _mm_atanh_pd(a);
1414 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1415 return Sleef_atanhd2_u10(a);
1416 #else
1417 simde__m128d_private
1418 r_,
1419 a_ = simde__m128d_to_private(a);
1420
1421 SIMDE_VECTORIZE
1422 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1423 r_.f64[i] = simde_math_atanh(a_.f64[i]);
1424 }
1425
1426 return simde__m128d_from_private(r_);
1427 #endif
1428 }
1429 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1430 #undef _mm_atanh_pd
1431 #define _mm_atanh_pd(a) simde_mm_atanh_pd(a)
1432 #endif
1433
1434 SIMDE_FUNCTION_ATTRIBUTES
1435 simde__m256
simde_mm256_atanh_ps(simde__m256 a)1436 simde_mm256_atanh_ps (simde__m256 a) {
1437 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1438 return _mm256_atanh_ps(a);
1439 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1440 return Sleef_atanhf8_u10(a);
1441 #else
1442 simde__m256_private
1443 r_,
1444 a_ = simde__m256_to_private(a);
1445
1446 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1447 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
1448 r_.m128[i] = simde_mm_atanh_ps(a_.m128[i]);
1449 }
1450 #else
1451 SIMDE_VECTORIZE
1452 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1453 r_.f32[i] = simde_math_atanhf(a_.f32[i]);
1454 }
1455 #endif
1456
1457 return simde__m256_from_private(r_);
1458 #endif
1459 }
1460 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1461 #undef _mm256_atanh_ps
1462 #define _mm256_atanh_ps(a) simde_mm256_atanh_ps(a)
1463 #endif
1464
1465
1466 SIMDE_FUNCTION_ATTRIBUTES
1467 simde__m256d
simde_mm256_atanh_pd(simde__m256d a)1468 simde_mm256_atanh_pd (simde__m256d a) {
1469 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1470 return _mm256_atanh_pd(a);
1471 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1472 return Sleef_atanhd4_u10(a);
1473 #else
1474 simde__m256d_private
1475 r_,
1476 a_ = simde__m256d_to_private(a);
1477
1478 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1479 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
1480 r_.m128d[i] = simde_mm_atanh_pd(a_.m128d[i]);
1481 }
1482 #else
1483 SIMDE_VECTORIZE
1484 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1485 r_.f64[i] = simde_math_atanh(a_.f64[i]);
1486 }
1487 #endif
1488
1489 return simde__m256d_from_private(r_);
1490 #endif
1491 }
1492 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1493 #undef _mm256_atanh_pd
1494 #define _mm256_atanh_pd(a) simde_mm256_atanh_pd(a)
1495 #endif
1496
1497 SIMDE_FUNCTION_ATTRIBUTES
1498 simde__m512
simde_mm512_atanh_ps(simde__m512 a)1499 simde_mm512_atanh_ps (simde__m512 a) {
1500 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1501 return _mm512_atanh_ps(a);
1502 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1503 return Sleef_atanhf16_u10(a);
1504 #else
1505 simde__m512_private
1506 r_,
1507 a_ = simde__m512_to_private(a);
1508
1509 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1510 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1511 r_.m256[i] = simde_mm256_atanh_ps(a_.m256[i]);
1512 }
1513 #else
1514 SIMDE_VECTORIZE
1515 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1516 r_.f32[i] = simde_math_atanhf(a_.f32[i]);
1517 }
1518 #endif
1519
1520 return simde__m512_from_private(r_);
1521 #endif
1522 }
1523 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1524 #undef _mm512_atanh_ps
1525 #define _mm512_atanh_ps(a) simde_mm512_atanh_ps(a)
1526 #endif
1527
1528 SIMDE_FUNCTION_ATTRIBUTES
1529 simde__m512d
simde_mm512_atanh_pd(simde__m512d a)1530 simde_mm512_atanh_pd (simde__m512d a) {
1531 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1532 return _mm512_atanh_pd(a);
1533 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1534 return Sleef_atanhd8_u10(a);
1535 #else
1536 simde__m512d_private
1537 r_,
1538 a_ = simde__m512d_to_private(a);
1539
1540 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1541 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
1542 r_.m256d[i] = simde_mm256_atanh_pd(a_.m256d[i]);
1543 }
1544 #else
1545 SIMDE_VECTORIZE
1546 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1547 r_.f64[i] = simde_math_atanh(a_.f64[i]);
1548 }
1549 #endif
1550
1551 return simde__m512d_from_private(r_);
1552 #endif
1553 }
1554 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1555 #undef _mm512_atanh_pd
1556 #define _mm512_atanh_pd(a) simde_mm512_atanh_pd(a)
1557 #endif
1558
1559 SIMDE_FUNCTION_ATTRIBUTES
1560 simde__m512
simde_mm512_mask_atanh_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)1561 simde_mm512_mask_atanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
1562 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1563 return _mm512_mask_atanh_ps(src, k, a);
1564 #else
1565 return simde_mm512_mask_mov_ps(src, k, simde_mm512_atanh_ps(a));
1566 #endif
1567 }
1568 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1569 #undef _mm512_mask_atanh_ps
1570 #define _mm512_mask_atanh_ps(src, k, a) simde_mm512_mask_atanh_ps(src, k, a)
1571 #endif
1572
1573 SIMDE_FUNCTION_ATTRIBUTES
1574 simde__m512d
simde_mm512_mask_atanh_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)1575 simde_mm512_mask_atanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
1576 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1577 return _mm512_mask_atanh_pd(src, k, a);
1578 #else
1579 return simde_mm512_mask_mov_pd(src, k, simde_mm512_atanh_pd(a));
1580 #endif
1581 }
1582 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1583 #undef _mm512_mask_atanh_pd
1584 #define _mm512_mask_atanh_pd(src, k, a) simde_mm512_mask_atanh_pd(src, k, a)
1585 #endif
1586
1587 SIMDE_FUNCTION_ATTRIBUTES
1588 simde__m128
simde_mm_cbrt_ps(simde__m128 a)1589 simde_mm_cbrt_ps (simde__m128 a) {
1590 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1591 return _mm_cbrt_ps(a);
1592 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1593 return Sleef_cbrtf4_u10(a);
1594 #else
1595 simde__m128_private
1596 r_,
1597 a_ = simde__m128_to_private(a);
1598
1599 SIMDE_VECTORIZE
1600 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1601 r_.f32[i] = simde_math_cbrtf(a_.f32[i]);
1602 }
1603
1604 return simde__m128_from_private(r_);
1605 #endif
1606 }
1607 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1608 #undef _mm_cbrt_ps
1609 #define _mm_cbrt_ps(a) simde_mm_cbrt_ps(a)
1610 #endif
1611
1612 SIMDE_FUNCTION_ATTRIBUTES
1613 simde__m128d
simde_mm_cbrt_pd(simde__m128d a)1614 simde_mm_cbrt_pd (simde__m128d a) {
1615 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1616 return _mm_cbrt_pd(a);
1617 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1618 return Sleef_cbrtd2_u10(a);
1619 #else
1620 simde__m128d_private
1621 r_,
1622 a_ = simde__m128d_to_private(a);
1623
1624 SIMDE_VECTORIZE
1625 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1626 r_.f64[i] = simde_math_cbrt(a_.f64[i]);
1627 }
1628
1629 return simde__m128d_from_private(r_);
1630 #endif
1631 }
1632 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1633 #undef _mm_cbrt_pd
1634 #define _mm_cbrt_pd(a) simde_mm_cbrt_pd(a)
1635 #endif
1636
1637 SIMDE_FUNCTION_ATTRIBUTES
1638 simde__m256
simde_mm256_cbrt_ps(simde__m256 a)1639 simde_mm256_cbrt_ps (simde__m256 a) {
1640 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1641 return _mm256_cbrt_ps(a);
1642 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1643 return Sleef_cbrtf8_u10(a);
1644 #else
1645 simde__m256_private
1646 r_,
1647 a_ = simde__m256_to_private(a);
1648
1649 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1650 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
1651 r_.m128[i] = simde_mm_cbrt_ps(a_.m128[i]);
1652 }
1653 #else
1654 SIMDE_VECTORIZE
1655 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1656 r_.f32[i] = simde_math_cbrtf(a_.f32[i]);
1657 }
1658 #endif
1659
1660 return simde__m256_from_private(r_);
1661 #endif
1662 }
1663 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1664 #undef _mm256_cbrt_ps
1665 #define _mm256_cbrt_ps(a) simde_mm256_cbrt_ps(a)
1666 #endif
1667
1668
1669 SIMDE_FUNCTION_ATTRIBUTES
1670 simde__m256d
simde_mm256_cbrt_pd(simde__m256d a)1671 simde_mm256_cbrt_pd (simde__m256d a) {
1672 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1673 return _mm256_cbrt_pd(a);
1674 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1675 return Sleef_cbrtd4_u10(a);
1676 #else
1677 simde__m256d_private
1678 r_,
1679 a_ = simde__m256d_to_private(a);
1680
1681 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1682 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
1683 r_.m128d[i] = simde_mm_cbrt_pd(a_.m128d[i]);
1684 }
1685 #else
1686 SIMDE_VECTORIZE
1687 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1688 r_.f64[i] = simde_math_cbrt(a_.f64[i]);
1689 }
1690 #endif
1691
1692 return simde__m256d_from_private(r_);
1693 #endif
1694 }
1695 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1696 #undef _mm256_cbrt_pd
1697 #define _mm256_cbrt_pd(a) simde_mm256_cbrt_pd(a)
1698 #endif
1699
1700 SIMDE_FUNCTION_ATTRIBUTES
1701 simde__m512
simde_mm512_cbrt_ps(simde__m512 a)1702 simde_mm512_cbrt_ps (simde__m512 a) {
1703 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1704 return _mm512_cbrt_ps(a);
1705 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1706 return Sleef_cbrtf16_u10(a);
1707 #else
1708 simde__m512_private
1709 r_,
1710 a_ = simde__m512_to_private(a);
1711
1712 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1713 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1714 r_.m256[i] = simde_mm256_cbrt_ps(a_.m256[i]);
1715 }
1716 #else
1717 SIMDE_VECTORIZE
1718 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1719 r_.f32[i] = simde_math_cbrtf(a_.f32[i]);
1720 }
1721 #endif
1722
1723 return simde__m512_from_private(r_);
1724 #endif
1725 }
1726 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1727 #undef _mm512_cbrt_ps
1728 #define _mm512_cbrt_ps(a) simde_mm512_cbrt_ps(a)
1729 #endif
1730
1731 SIMDE_FUNCTION_ATTRIBUTES
1732 simde__m512d
simde_mm512_cbrt_pd(simde__m512d a)1733 simde_mm512_cbrt_pd (simde__m512d a) {
1734 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1735 return _mm512_cbrt_pd(a);
1736 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1737 return Sleef_cbrtd8_u10(a);
1738 #else
1739 simde__m512d_private
1740 r_,
1741 a_ = simde__m512d_to_private(a);
1742
1743 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1744 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
1745 r_.m256d[i] = simde_mm256_cbrt_pd(a_.m256d[i]);
1746 }
1747 #else
1748 SIMDE_VECTORIZE
1749 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1750 r_.f64[i] = simde_math_cbrt(a_.f64[i]);
1751 }
1752 #endif
1753
1754 return simde__m512d_from_private(r_);
1755 #endif
1756 }
1757 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1758 #undef _mm512_cbrt_pd
1759 #define _mm512_cbrt_pd(a) simde_mm512_cbrt_pd(a)
1760 #endif
1761
1762 SIMDE_FUNCTION_ATTRIBUTES
1763 simde__m512
simde_mm512_mask_cbrt_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)1764 simde_mm512_mask_cbrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
1765 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1766 return _mm512_mask_cbrt_ps(src, k, a);
1767 #else
1768 return simde_mm512_mask_mov_ps(src, k, simde_mm512_cbrt_ps(a));
1769 #endif
1770 }
1771 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1772 #undef _mm512_mask_cbrt_ps
1773 #define _mm512_mask_cbrt_ps(src, k, a) simde_mm512_mask_cbrt_ps(src, k, a)
1774 #endif
1775
1776 SIMDE_FUNCTION_ATTRIBUTES
1777 simde__m512d
simde_mm512_mask_cbrt_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)1778 simde_mm512_mask_cbrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
1779 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1780 return _mm512_mask_cbrt_pd(src, k, a);
1781 #else
1782 return simde_mm512_mask_mov_pd(src, k, simde_mm512_cbrt_pd(a));
1783 #endif
1784 }
1785 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1786 #undef _mm512_mask_cbrt_pd
1787 #define _mm512_mask_cbrt_pd(src, k, a) simde_mm512_mask_cbrt_pd(src, k, a)
1788 #endif
1789
1790 SIMDE_FUNCTION_ATTRIBUTES
1791 simde__m128
simde_mm_cexp_ps(simde__m128 a)1792 simde_mm_cexp_ps (simde__m128 a) {
1793 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1794 return _mm_cexp_ps(a);
1795 #else
1796 simde__m128_private
1797 r_,
1798 a_ = simde__m128_to_private(a);
1799
1800 SIMDE_VECTORIZE
1801 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) {
1802 simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1]));
1803 r_.f32[ i ] = simde_math_crealf(val);
1804 r_.f32[i + 1] = simde_math_cimagf(val);
1805 }
1806
1807 return simde__m128_from_private(r_);
1808 #endif
1809 }
1810 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1811 #undef _mm_cexp_ps
1812 #define _mm_cexp_ps(a) simde_mm_cexp_ps(a)
1813 #endif
1814
1815 SIMDE_FUNCTION_ATTRIBUTES
1816 simde__m256
simde_mm256_cexp_ps(simde__m256 a)1817 simde_mm256_cexp_ps (simde__m256 a) {
1818 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1819 return _mm256_cexp_ps(a);
1820 #else
1821 simde__m256_private
1822 r_,
1823 a_ = simde__m256_to_private(a);
1824
1825 SIMDE_VECTORIZE
1826 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) {
1827 simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1]));
1828 r_.f32[ i ] = simde_math_crealf(val);
1829 r_.f32[i + 1] = simde_math_cimagf(val);
1830 }
1831
1832 return simde__m256_from_private(r_);
1833 #endif
1834 }
1835 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1836 #undef _mm256_cexp_ps
1837 #define _mm256_cexp_ps(a) simde_mm256_cexp_ps(a)
1838 #endif
1839
1840 SIMDE_FUNCTION_ATTRIBUTES
1841 simde__m128
simde_mm_cos_ps(simde__m128 a)1842 simde_mm_cos_ps (simde__m128 a) {
1843 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1844 return _mm_cos_ps(a);
1845 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1846 #if SIMDE_ACCURACY_PREFERENCE > 1
1847 return Sleef_cosf4_u10(a);
1848 #else
1849 return Sleef_cosf4_u35(a);
1850 #endif
1851 #else
1852 simde__m128_private
1853 r_,
1854 a_ = simde__m128_to_private(a);
1855
1856 SIMDE_VECTORIZE
1857 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1858 r_.f32[i] = simde_math_cosf(a_.f32[i]);
1859 }
1860
1861 return simde__m128_from_private(r_);
1862 #endif
1863 }
1864 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1865 #undef _mm_cos_ps
1866 #define _mm_cos_ps(a) simde_mm_cos_ps(a)
1867 #endif
1868
1869 SIMDE_FUNCTION_ATTRIBUTES
1870 simde__m128d
simde_mm_cos_pd(simde__m128d a)1871 simde_mm_cos_pd (simde__m128d a) {
1872 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1873 return _mm_cos_pd(a);
1874 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1875 #if SIMDE_ACCURACY_PREFERENCE > 1
1876 return Sleef_cosd2_u10(a);
1877 #else
1878 return Sleef_cosd2_u35(a);
1879 #endif
1880 #else
1881 simde__m128d_private
1882 r_,
1883 a_ = simde__m128d_to_private(a);
1884
1885 SIMDE_VECTORIZE
1886 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1887 r_.f64[i] = simde_math_cos(a_.f64[i]);
1888 }
1889
1890 return simde__m128d_from_private(r_);
1891 #endif
1892 }
1893 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1894 #undef _mm_cos_pd
1895 #define _mm_cos_pd(a) simde_mm_cos_pd(a)
1896 #endif
1897
1898 SIMDE_FUNCTION_ATTRIBUTES
1899 simde__m256
simde_mm256_cos_ps(simde__m256 a)1900 simde_mm256_cos_ps (simde__m256 a) {
1901 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1902 return _mm256_cos_ps(a);
1903 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1904 #if SIMDE_ACCURACY_PREFERENCE > 1
1905 return Sleef_cosf8_u10(a);
1906 #else
1907 return Sleef_cosf8_u35(a);
1908 #endif
1909 #else
1910 simde__m256_private
1911 r_,
1912 a_ = simde__m256_to_private(a);
1913
1914 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1915 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
1916 r_.m128[i] = simde_mm_cos_ps(a_.m128[i]);
1917 }
1918 #else
1919 SIMDE_VECTORIZE
1920 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1921 r_.f32[i] = simde_math_cosf(a_.f32[i]);
1922 }
1923 #endif
1924
1925 return simde__m256_from_private(r_);
1926 #endif
1927 }
1928 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1929 #undef _mm256_cos_ps
1930 #define _mm256_cos_ps(a) simde_mm256_cos_ps(a)
1931 #endif
1932
1933
1934 SIMDE_FUNCTION_ATTRIBUTES
1935 simde__m256d
simde_mm256_cos_pd(simde__m256d a)1936 simde_mm256_cos_pd (simde__m256d a) {
1937 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1938 return _mm256_cos_pd(a);
1939 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1940 #if SIMDE_ACCURACY_PREFERENCE > 1
1941 return Sleef_cosd4_u10(a);
1942 #else
1943 return Sleef_cosd4_u35(a);
1944 #endif
1945 #else
1946 simde__m256d_private
1947 r_,
1948 a_ = simde__m256d_to_private(a);
1949
1950 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1951 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
1952 r_.m128d[i] = simde_mm_cos_pd(a_.m128d[i]);
1953 }
1954 #else
1955 SIMDE_VECTORIZE
1956 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1957 r_.f64[i] = simde_math_cos(a_.f64[i]);
1958 }
1959 #endif
1960
1961 return simde__m256d_from_private(r_);
1962 #endif
1963 }
1964 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1965 #undef _mm256_cos_pd
1966 #define _mm256_cos_pd(a) simde_mm256_cos_pd(a)
1967 #endif
1968
1969 SIMDE_FUNCTION_ATTRIBUTES
1970 simde__m512
simde_mm512_cos_ps(simde__m512 a)1971 simde_mm512_cos_ps (simde__m512 a) {
1972 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1973 return _mm512_cos_ps(a);
1974 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1975 #if SIMDE_ACCURACY_PREFERENCE > 1
1976 return Sleef_cosf16_u10(a);
1977 #else
1978 return Sleef_cosf16_u35(a);
1979 #endif
1980 #else
1981 simde__m512_private
1982 r_,
1983 a_ = simde__m512_to_private(a);
1984
1985 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1986 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1987 r_.m256[i] = simde_mm256_cos_ps(a_.m256[i]);
1988 }
1989 #else
1990 SIMDE_VECTORIZE
1991 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1992 r_.f32[i] = simde_math_cosf(a_.f32[i]);
1993 }
1994 #endif
1995
1996 return simde__m512_from_private(r_);
1997 #endif
1998 }
1999 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2000 #undef _mm512_cos_ps
2001 #define _mm512_cos_ps(a) simde_mm512_cos_ps(a)
2002 #endif
2003
2004 SIMDE_FUNCTION_ATTRIBUTES
2005 simde__m512d
simde_mm512_cos_pd(simde__m512d a)2006 simde_mm512_cos_pd (simde__m512d a) {
2007 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2008 return _mm512_cos_pd(a);
2009 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
2010 #if SIMDE_ACCURACY_PREFERENCE > 1
2011 return Sleef_cosd8_u10(a);
2012 #else
2013 return Sleef_cosd8_u35(a);
2014 #endif
2015 #else
2016 simde__m512d_private
2017 r_,
2018 a_ = simde__m512d_to_private(a);
2019
2020 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2021 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2022 r_.m256d[i] = simde_mm256_cos_pd(a_.m256d[i]);
2023 }
2024 #else
2025 SIMDE_VECTORIZE
2026 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2027 r_.f64[i] = simde_math_cos(a_.f64[i]);
2028 }
2029 #endif
2030
2031 return simde__m512d_from_private(r_);
2032 #endif
2033 }
2034 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2035 #undef _mm512_cos_pd
2036 #define _mm512_cos_pd(a) simde_mm512_cos_pd(a)
2037 #endif
2038
2039 SIMDE_FUNCTION_ATTRIBUTES
2040 simde__m512
simde_mm512_mask_cos_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)2041 simde_mm512_mask_cos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
2042 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2043 return _mm512_mask_cos_ps(src, k, a);
2044 #else
2045 return simde_mm512_mask_mov_ps(src, k, simde_mm512_cos_ps(a));
2046 #endif
2047 }
2048 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2049 #undef _mm512_mask_cos_ps
2050 #define _mm512_mask_cos_ps(src, k, a) simde_mm512_mask_cos_ps(src, k, a)
2051 #endif
2052
2053 SIMDE_FUNCTION_ATTRIBUTES
2054 simde__m512d
simde_mm512_mask_cos_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)2055 simde_mm512_mask_cos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
2056 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2057 return _mm512_mask_cos_pd(src, k, a);
2058 #else
2059 return simde_mm512_mask_mov_pd(src, k, simde_mm512_cos_pd(a));
2060 #endif
2061 }
2062 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2063 #undef _mm512_mask_cos_pd
2064 #define _mm512_mask_cos_pd(src, k, a) simde_mm512_mask_cos_pd(src, k, a)
2065 #endif
2066
2067 SIMDE_FUNCTION_ATTRIBUTES
2068 simde__m128
simde_x_mm_deg2rad_ps(simde__m128 a)2069 simde_x_mm_deg2rad_ps(simde__m128 a) {
2070 #if SIMDE_NATURAL_VECTOR_SIZE_GE(128)
2071 return simde_mm_mul_ps(a, simde_mm_set1_ps(SIMDE_MATH_PI_OVER_180F));
2072 #else
2073 simde__m128_private
2074 r_,
2075 a_ = simde__m128_to_private(a);
2076
2077 #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2078 r_.neon_f32 = vmulq_n_f32(a_.neon_i32, SIMDE_MATH_PI_OVER_180F);
2079 #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
2080 r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F;
2081 #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2082 const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F };
2083 r_.f32 = a_.f32 * tmp;
2084 #else
2085 SIMDE_VECTORIZE
2086 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2087 r_.f32[i] = simde_math_deg2radf(a_.f32[i]);
2088 }
2089
2090 #endif
2091 return simde__m128_from_private(r_);
2092 #endif
2093 }
2094
2095 SIMDE_FUNCTION_ATTRIBUTES
2096 simde__m128d
simde_x_mm_deg2rad_pd(simde__m128d a)2097 simde_x_mm_deg2rad_pd(simde__m128d a) {
2098 #if SIMDE_NATURAL_VECTOR_SIZE_GE(128)
2099 return simde_mm_mul_pd(a, simde_mm_set1_pd(SIMDE_MATH_PI_OVER_180));
2100 #else
2101 simde__m128d_private
2102 r_,
2103 a_ = simde__m128d_to_private(a);
2104
2105 #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
2106 r_.neon_f64 = vmulq_n_f64(a_.neon_i64, SIMDE_MATH_PI_OVER_180);
2107 #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
2108 r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180;
2109 #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2110 const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 };
2111 r_.f64 = a_.f64 * tmp;
2112 #else
2113 SIMDE_VECTORIZE
2114 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2115 r_.f64[i] = simde_math_deg2rad(a_.f64[i]);
2116 }
2117
2118 #endif
2119 return simde__m128d_from_private(r_);
2120 #endif
2121 }
2122
2123 SIMDE_FUNCTION_ATTRIBUTES
2124 simde__m256
simde_x_mm256_deg2rad_ps(simde__m256 a)2125 simde_x_mm256_deg2rad_ps(simde__m256 a) {
2126 #if SIMDE_NATURAL_VECTOR_SIZE_GE(256)
2127 return simde_mm256_mul_ps(a, simde_mm256_set1_ps(SIMDE_MATH_PI_OVER_180F));
2128 #else
2129 simde__m256_private
2130 r_,
2131 a_ = simde__m256_to_private(a);
2132
2133 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2134 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
2135 r_.m128[i] = simde_x_mm_deg2rad_ps(a_.m128[i]);
2136 }
2137 #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
2138 r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F;
2139 #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2140 const __typeof__(r_.f32) tmp = {
2141 SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F,
2142 SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F
2143 };
2144 r_.f32 = a_.f32 * tmp;
2145 #else
2146 SIMDE_VECTORIZE
2147 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2148 r_.f32[i] = simde_math_deg2radf(a_.f32[i]);
2149 }
2150
2151 #endif
2152 return simde__m256_from_private(r_);
2153 #endif
2154 }
2155
2156 SIMDE_FUNCTION_ATTRIBUTES
2157 simde__m256d
simde_x_mm256_deg2rad_pd(simde__m256d a)2158 simde_x_mm256_deg2rad_pd(simde__m256d a) {
2159 #if SIMDE_NATURAL_VECTOR_SIZE_GE(256)
2160 return simde_mm256_mul_pd(a, simde_mm256_set1_pd(SIMDE_MATH_PI_OVER_180));
2161 #else
2162 simde__m256d_private
2163 r_,
2164 a_ = simde__m256d_to_private(a);
2165
2166 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2167 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
2168 r_.m128d[i] = simde_x_mm_deg2rad_pd(a_.m128d[i]);
2169 }
2170 #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
2171 r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180;
2172 #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2173 const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 };
2174 r_.f64 = a_.f64 * tmp;
2175 #else
2176 SIMDE_VECTORIZE
2177 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2178 r_.f64[i] = simde_math_deg2rad(a_.f64[i]);
2179 }
2180
2181 #endif
2182 return simde__m256d_from_private(r_);
2183 #endif
2184 }
2185
2186 SIMDE_FUNCTION_ATTRIBUTES
2187 simde__m512
simde_x_mm512_deg2rad_ps(simde__m512 a)2188 simde_x_mm512_deg2rad_ps(simde__m512 a) {
2189 #if SIMDE_NATURAL_VECTOR_SIZE_GE(512)
2190 return simde_mm512_mul_ps(a, simde_mm512_set1_ps(SIMDE_MATH_PI_OVER_180F));
2191 #else
2192 simde__m512_private
2193 r_,
2194 a_ = simde__m512_to_private(a);
2195
2196 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2197 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
2198 r_.m256[i] = simde_x_mm256_deg2rad_ps(a_.m256[i]);
2199 }
2200 #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
2201 r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F;
2202 #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2203 const __typeof__(r_.f32) tmp = {
2204 SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F,
2205 SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F,
2206 SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F,
2207 SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F
2208 };
2209 r_.f32 = a_.f32 * tmp;
2210 #else
2211 SIMDE_VECTORIZE
2212 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2213 r_.f32[i] = simde_math_deg2radf(a_.f32[i]);
2214 }
2215
2216 #endif
2217 return simde__m512_from_private(r_);
2218 #endif
2219 }
2220
2221 SIMDE_FUNCTION_ATTRIBUTES
2222 simde__m512d
simde_x_mm512_deg2rad_pd(simde__m512d a)2223 simde_x_mm512_deg2rad_pd(simde__m512d a) {
2224 #if SIMDE_NATURAL_VECTOR_SIZE_GE(512)
2225 return simde_mm512_mul_pd(a, simde_mm512_set1_pd(SIMDE_MATH_PI_OVER_180));
2226 #else
2227 simde__m512d_private
2228 r_,
2229 a_ = simde__m512d_to_private(a);
2230
2231 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2232 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2233 r_.m256d[i] = simde_x_mm256_deg2rad_pd(a_.m256d[i]);
2234 }
2235 #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
2236 r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180;
2237 #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2238 const __typeof__(r_.f64) tmp = {
2239 SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180,
2240 SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180
2241 };
2242 r_.f64 = a_.f64 * tmp;
2243 #else
2244 SIMDE_VECTORIZE
2245 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2246 r_.f64[i] = simde_math_deg2rad(a_.f64[i]);
2247 }
2248
2249 #endif
2250 return simde__m512d_from_private(r_);
2251 #endif
2252 }
2253
2254 SIMDE_FUNCTION_ATTRIBUTES
2255 simde__m128
simde_mm_cosd_ps(simde__m128 a)2256 simde_mm_cosd_ps (simde__m128 a) {
2257 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
2258 return _mm_cosd_ps(a);
2259 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
2260 #if SIMDE_ACCURACY_PREFERENCE > 1
2261 return Sleef_cosf4_u10(simde_x_mm_deg2rad_ps(a));
2262 #else
2263 return Sleef_cosf4_u35(simde_x_mm_deg2rad_ps(a));
2264 #endif
2265 #else
2266 simde__m128_private
2267 r_,
2268 a_ = simde__m128_to_private(a);
2269
2270 SIMDE_VECTORIZE
2271 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2272 r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i]));
2273 }
2274
2275 return simde__m128_from_private(r_);
2276 #endif
2277 }
2278 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2279 #undef _mm_cosd_ps
2280 #define _mm_cosd_ps(a) simde_mm_cosd_ps(a)
2281 #endif
2282
2283 SIMDE_FUNCTION_ATTRIBUTES
2284 simde__m128d
simde_mm_cosd_pd(simde__m128d a)2285 simde_mm_cosd_pd (simde__m128d a) {
2286 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
2287 return _mm_cosd_pd(a);
2288 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
2289 #if SIMDE_ACCURACY_PREFERENCE > 1
2290 return Sleef_cosd2_u10(simde_x_mm_deg2rad_pd(a));
2291 #else
2292 return Sleef_cosd2_u35(simde_x_mm_deg2rad_pd(a));
2293 #endif
2294 #else
2295 simde__m128d_private
2296 r_,
2297 a_ = simde__m128d_to_private(a);
2298
2299 SIMDE_VECTORIZE
2300 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2301 r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i]));
2302 }
2303
2304 return simde__m128d_from_private(r_);
2305 #endif
2306 }
2307 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2308 #undef _mm_cosd_pd
2309 #define _mm_cosd_pd(a) simde_mm_cosd_pd(a)
2310 #endif
2311
2312 SIMDE_FUNCTION_ATTRIBUTES
2313 simde__m256
simde_mm256_cosd_ps(simde__m256 a)2314 simde_mm256_cosd_ps (simde__m256 a) {
2315 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
2316 return _mm256_cosd_ps(a);
2317 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
2318 #if SIMDE_ACCURACY_PREFERENCE > 1
2319 return Sleef_cosf8_u10(simde_x_mm256_deg2rad_ps(a));
2320 #else
2321 return Sleef_cosf8_u35(simde_x_mm256_deg2rad_ps(a));
2322 #endif
2323 #else
2324 simde__m256_private
2325 r_,
2326 a_ = simde__m256_to_private(a);
2327
2328 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2329 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
2330 r_.m128[i] = simde_mm_cosd_ps(a_.m128[i]);
2331 }
2332 #else
2333 SIMDE_VECTORIZE
2334 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2335 r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i]));
2336 }
2337 #endif
2338
2339 return simde__m256_from_private(r_);
2340 #endif
2341 }
2342 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2343 #undef _mm256_cosd_ps
2344 #define _mm256_cosd_ps(a) simde_mm256_cosd_ps(a)
2345 #endif
2346
2347 SIMDE_FUNCTION_ATTRIBUTES
2348 simde__m256d
simde_mm256_cosd_pd(simde__m256d a)2349 simde_mm256_cosd_pd (simde__m256d a) {
2350 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
2351 return _mm256_cosd_pd(a);
2352 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
2353 #if SIMDE_ACCURACY_PREFERENCE > 1
2354 return Sleef_cosd4_u10(simde_x_mm256_deg2rad_pd(a));
2355 #else
2356 return Sleef_cosd4_u35(simde_x_mm256_deg2rad_pd(a));
2357 #endif
2358 #else
2359 simde__m256d_private
2360 r_,
2361 a_ = simde__m256d_to_private(a);
2362
2363 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2364 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
2365 r_.m128d[i] = simde_mm_cosd_pd(a_.m128d[i]);
2366 }
2367 #else
2368 SIMDE_VECTORIZE
2369 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2370 r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i]));
2371 }
2372 #endif
2373
2374 return simde__m256d_from_private(r_);
2375 #endif
2376 }
2377 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2378 #undef _mm256_cosd_pd
2379 #define _mm256_cosd_pd(a) simde_mm256_cosd_pd(a)
2380 #endif
2381
2382 SIMDE_FUNCTION_ATTRIBUTES
2383 simde__m512
simde_mm512_cosd_ps(simde__m512 a)2384 simde_mm512_cosd_ps (simde__m512 a) {
2385 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2386 return _mm512_cosd_ps(a);
2387 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
2388 #if SIMDE_ACCURACY_PREFERENCE > 1
2389 return Sleef_cosf16_u10(simde_x_mm512_deg2rad_ps(a));
2390 #else
2391 return Sleef_cosf16_u35(simde_x_mm512_deg2rad_ps(a));
2392 #endif
2393 #else
2394 simde__m512_private
2395 r_,
2396 a_ = simde__m512_to_private(a);
2397
2398 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2399 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
2400 r_.m256[i] = simde_mm256_cosd_ps(a_.m256[i]);
2401 }
2402 #else
2403 SIMDE_VECTORIZE
2404 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2405 r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i]));
2406 }
2407 #endif
2408
2409 return simde__m512_from_private(r_);
2410 #endif
2411 }
2412 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2413 #undef _mm512_cosd_ps
2414 #define _mm512_cosd_ps(a) simde_mm512_cosd_ps(a)
2415 #endif
2416
2417 SIMDE_FUNCTION_ATTRIBUTES
2418 simde__m512d
simde_mm512_cosd_pd(simde__m512d a)2419 simde_mm512_cosd_pd (simde__m512d a) {
2420 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2421 return _mm512_cosd_pd(a);
2422 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
2423 #if SIMDE_ACCURACY_PREFERENCE > 1
2424 return Sleef_cosd8_u10(simde_x_mm512_deg2rad_pd(a));
2425 #else
2426 return Sleef_cosd8_u35(simde_x_mm512_deg2rad_pd(a));
2427 #endif
2428 #else
2429 simde__m512d_private
2430 r_,
2431 a_ = simde__m512d_to_private(a);
2432
2433 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2434 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2435 r_.m256d[i] = simde_mm256_cosd_pd(a_.m256d[i]);
2436 }
2437 #else
2438 SIMDE_VECTORIZE
2439 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2440 r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i]));
2441 }
2442 #endif
2443
2444 return simde__m512d_from_private(r_);
2445 #endif
2446 }
2447 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2448 #undef _mm512_cosd_pd
2449 #define _mm512_cosd_pd(a) simde_mm512_cosd_pd(a)
2450 #endif
2451
2452 SIMDE_FUNCTION_ATTRIBUTES
2453 simde__m512
simde_mm512_mask_cosd_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)2454 simde_mm512_mask_cosd_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
2455 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2456 return _mm512_mask_cosd_ps(src, k, a);
2457 #else
2458 return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosd_ps(a));
2459 #endif
2460 }
2461 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2462 #undef _mm512_mask_cosd_ps
2463 #define _mm512_mask_cosd_ps(src, k, a) simde_mm512_mask_cosd_ps(src, k, a)
2464 #endif
2465
2466 SIMDE_FUNCTION_ATTRIBUTES
2467 simde__m512d
simde_mm512_mask_cosd_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)2468 simde_mm512_mask_cosd_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
2469 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2470 return _mm512_mask_cosd_pd(src, k, a);
2471 #else
2472 return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosd_pd(a));
2473 #endif
2474 }
2475 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2476 #undef _mm512_mask_cosd_pd
2477 #define _mm512_mask_cosd_pd(src, k, a) simde_mm512_mask_cosd_pd(src, k, a)
2478 #endif
2479
2480 SIMDE_FUNCTION_ATTRIBUTES
2481 simde__m128
simde_mm_cosh_ps(simde__m128 a)2482 simde_mm_cosh_ps (simde__m128 a) {
2483 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
2484 return _mm_cosh_ps(a);
2485 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
2486 return Sleef_coshf4_u10(a);
2487 #else
2488 simde__m128_private
2489 r_,
2490 a_ = simde__m128_to_private(a);
2491
2492 SIMDE_VECTORIZE
2493 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2494 r_.f32[i] = simde_math_coshf(a_.f32[i]);
2495 }
2496
2497 return simde__m128_from_private(r_);
2498 #endif
2499 }
2500 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2501 #undef _mm_cosh_ps
2502 #define _mm_cosh_ps(a) simde_mm_cosh_ps(a)
2503 #endif
2504
2505 SIMDE_FUNCTION_ATTRIBUTES
2506 simde__m128d
simde_mm_cosh_pd(simde__m128d a)2507 simde_mm_cosh_pd (simde__m128d a) {
2508 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
2509 return _mm_cosh_pd(a);
2510 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
2511 return Sleef_coshd2_u10(a);
2512 #else
2513 simde__m128d_private
2514 r_,
2515 a_ = simde__m128d_to_private(a);
2516
2517 SIMDE_VECTORIZE
2518 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2519 r_.f64[i] = simde_math_cosh(a_.f64[i]);
2520 }
2521
2522 return simde__m128d_from_private(r_);
2523 #endif
2524 }
2525 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2526 #undef _mm_cosh_pd
2527 #define _mm_cosh_pd(a) simde_mm_cosh_pd(a)
2528 #endif
2529
2530 SIMDE_FUNCTION_ATTRIBUTES
2531 simde__m256
simde_mm256_cosh_ps(simde__m256 a)2532 simde_mm256_cosh_ps (simde__m256 a) {
2533 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
2534 return _mm256_cosh_ps(a);
2535 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
2536 return Sleef_coshf8_u10(a);
2537 #else
2538 simde__m256_private
2539 r_,
2540 a_ = simde__m256_to_private(a);
2541
2542 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2543 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
2544 r_.m128[i] = simde_mm_cosh_ps(a_.m128[i]);
2545 }
2546 #else
2547 SIMDE_VECTORIZE
2548 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2549 r_.f32[i] = simde_math_coshf(a_.f32[i]);
2550 }
2551 #endif
2552
2553 return simde__m256_from_private(r_);
2554 #endif
2555 }
2556 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2557 #undef _mm256_cosh_ps
2558 #define _mm256_cosh_ps(a) simde_mm256_cosh_ps(a)
2559 #endif
2560
2561
2562 SIMDE_FUNCTION_ATTRIBUTES
2563 simde__m256d
simde_mm256_cosh_pd(simde__m256d a)2564 simde_mm256_cosh_pd (simde__m256d a) {
2565 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
2566 return _mm256_cosh_pd(a);
2567 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
2568 return Sleef_coshd4_u10(a);
2569 #else
2570 simde__m256d_private
2571 r_,
2572 a_ = simde__m256d_to_private(a);
2573
2574 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2575 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
2576 r_.m128d[i] = simde_mm_cosh_pd(a_.m128d[i]);
2577 }
2578 #else
2579 SIMDE_VECTORIZE
2580 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2581 r_.f64[i] = simde_math_cosh(a_.f64[i]);
2582 }
2583 #endif
2584
2585 return simde__m256d_from_private(r_);
2586 #endif
2587 }
2588 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2589 #undef _mm256_cosh_pd
2590 #define _mm256_cosh_pd(a) simde_mm256_cosh_pd(a)
2591 #endif
2592
2593 SIMDE_FUNCTION_ATTRIBUTES
2594 simde__m512
simde_mm512_cosh_ps(simde__m512 a)2595 simde_mm512_cosh_ps (simde__m512 a) {
2596 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2597 return _mm512_cosh_ps(a);
2598 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
2599 return Sleef_coshf16_u10(a);
2600 #else
2601 simde__m512_private
2602 r_,
2603 a_ = simde__m512_to_private(a);
2604
2605 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2606 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
2607 r_.m256[i] = simde_mm256_cosh_ps(a_.m256[i]);
2608 }
2609 #else
2610 SIMDE_VECTORIZE
2611 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2612 r_.f32[i] = simde_math_coshf(a_.f32[i]);
2613 }
2614 #endif
2615
2616 return simde__m512_from_private(r_);
2617 #endif
2618 }
2619 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2620 #undef _mm512_cosh_ps
2621 #define _mm512_cosh_ps(a) simde_mm512_cosh_ps(a)
2622 #endif
2623
2624 SIMDE_FUNCTION_ATTRIBUTES
2625 simde__m512d
simde_mm512_cosh_pd(simde__m512d a)2626 simde_mm512_cosh_pd (simde__m512d a) {
2627 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2628 return _mm512_cosh_pd(a);
2629 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
2630 return Sleef_coshd8_u10(a);
2631 #else
2632 simde__m512d_private
2633 r_,
2634 a_ = simde__m512d_to_private(a);
2635
2636 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2637 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2638 r_.m256d[i] = simde_mm256_cosh_pd(a_.m256d[i]);
2639 }
2640 #else
2641 SIMDE_VECTORIZE
2642 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2643 r_.f64[i] = simde_math_cosh(a_.f64[i]);
2644 }
2645 #endif
2646
2647 return simde__m512d_from_private(r_);
2648 #endif
2649 }
2650 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2651 #undef _mm512_cosh_pd
2652 #define _mm512_cosh_pd(a) simde_mm512_cosh_pd(a)
2653 #endif
2654
2655 SIMDE_FUNCTION_ATTRIBUTES
2656 simde__m512
simde_mm512_mask_cosh_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)2657 simde_mm512_mask_cosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
2658 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2659 return _mm512_mask_cosh_ps(src, k, a);
2660 #else
2661 return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosh_ps(a));
2662 #endif
2663 }
2664 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2665 #undef _mm512_mask_cosh_ps
2666 #define _mm512_mask_cosh_ps(src, k, a) simde_mm512_mask_cosh_ps(src, k, a)
2667 #endif
2668
2669 SIMDE_FUNCTION_ATTRIBUTES
2670 simde__m512d
simde_mm512_mask_cosh_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)2671 simde_mm512_mask_cosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
2672 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2673 return _mm512_mask_cosh_pd(src, k, a);
2674 #else
2675 return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosh_pd(a));
2676 #endif
2677 }
2678 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2679 #undef _mm512_mask_cosh_pd
2680 #define _mm512_mask_cosh_pd(src, k, a) simde_mm512_mask_cosh_pd(src, k, a)
2681 #endif
2682
2683 SIMDE_FUNCTION_ATTRIBUTES
2684 simde__m128i
simde_mm_div_epi8(simde__m128i a,simde__m128i b)2685 simde_mm_div_epi8 (simde__m128i a, simde__m128i b) {
2686 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2687 return _mm_div_epi8(a, b);
2688 #else
2689 simde__m128i_private
2690 r_,
2691 a_ = simde__m128i_to_private(a),
2692 b_ = simde__m128i_to_private(b);
2693
2694 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2695 r_.i8 = a_.i8 / b_.i8;
2696 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2697 r_.wasm_v128 = wasm_i8x4_div(a_.wasm_v128, b_.wasm_v128);
2698 #else
2699 SIMDE_VECTORIZE
2700 for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
2701 r_.i8[i] = a_.i8[i] / b_.i8[i];
2702 }
2703 #endif
2704
2705 return simde__m128i_from_private(r_);
2706 #endif
2707 }
2708 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2709 #undef _mm_div_epi8
2710 #define _mm_div_epi8(a, b) simde_mm_div_epi8((a), (b))
2711 #endif
2712
2713 SIMDE_FUNCTION_ATTRIBUTES
2714 simde__m128i
simde_mm_div_epi16(simde__m128i a,simde__m128i b)2715 simde_mm_div_epi16 (simde__m128i a, simde__m128i b) {
2716 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2717 return _mm_div_epi16(a, b);
2718 #else
2719 simde__m128i_private
2720 r_,
2721 a_ = simde__m128i_to_private(a),
2722 b_ = simde__m128i_to_private(b);
2723
2724 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2725 r_.i16 = a_.i16 / b_.i16;
2726 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2727 r_.wasm_v128 = wasm_i16x4_div(a_.wasm_v128, b_.wasm_v128);
2728 #else
2729 SIMDE_VECTORIZE
2730 for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
2731 r_.i16[i] = a_.i16[i] / b_.i16[i];
2732 }
2733 #endif
2734
2735 return simde__m128i_from_private(r_);
2736 #endif
2737 }
2738 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2739 #undef _mm_div_epi16
2740 #define _mm_div_epi16(a, b) simde_mm_div_epi16((a), (b))
2741 #endif
2742
2743 SIMDE_FUNCTION_ATTRIBUTES
2744 simde__m128i
simde_mm_div_epi32(simde__m128i a,simde__m128i b)2745 simde_mm_div_epi32 (simde__m128i a, simde__m128i b) {
2746 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2747 return _mm_div_epi32(a, b);
2748 #else
2749 simde__m128i_private
2750 r_,
2751 a_ = simde__m128i_to_private(a),
2752 b_ = simde__m128i_to_private(b);
2753
2754 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2755 r_.i32 = a_.i32 / b_.i32;
2756 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2757 r_.wasm_v128 = wasm_i32x4_div(a_.wasm_v128, b_.wasm_v128);
2758 #else
2759 SIMDE_VECTORIZE
2760 for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2761 r_.i32[i] = a_.i32[i] / b_.i32[i];
2762 }
2763 #endif
2764
2765 return simde__m128i_from_private(r_);
2766 #endif
2767 }
2768 #define simde_mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b)
2769 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2770 #undef _mm_div_epi32
2771 #define _mm_div_epi32(a, b) simde_mm_div_epi32(a, b)
2772 #undef _mm_idiv_epi32
2773 #define _mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b)
2774 #endif
2775
2776 SIMDE_FUNCTION_ATTRIBUTES
2777 simde__m128i
simde_mm_div_epi64(simde__m128i a,simde__m128i b)2778 simde_mm_div_epi64 (simde__m128i a, simde__m128i b) {
2779 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2780 return _mm_div_epi64(a, b);
2781 #else
2782 simde__m128i_private
2783 r_,
2784 a_ = simde__m128i_to_private(a),
2785 b_ = simde__m128i_to_private(b);
2786
2787 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2788 r_.i64 = a_.i64 / b_.i64;
2789 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2790 r_.wasm_v128 = wasm_i64x4_div(a_.wasm_v128, b_.wasm_v128);
2791 #else
2792 SIMDE_VECTORIZE
2793 for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
2794 r_.i64[i] = a_.i64[i] / b_.i64[i];
2795 }
2796 #endif
2797
2798 return simde__m128i_from_private(r_);
2799 #endif
2800 }
2801 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2802 #undef _mm_div_epi64
2803 #define _mm_div_epi64(a, b) simde_mm_div_epi64((a), (b))
2804 #endif
2805
2806 SIMDE_FUNCTION_ATTRIBUTES
2807 simde__m128i
simde_mm_div_epu8(simde__m128i a,simde__m128i b)2808 simde_mm_div_epu8 (simde__m128i a, simde__m128i b) {
2809 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2810 return _mm_div_epu8(a, b);
2811 #else
2812 simde__m128i_private
2813 r_,
2814 a_ = simde__m128i_to_private(a),
2815 b_ = simde__m128i_to_private(b);
2816
2817 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2818 r_.u8 = a_.u8 / b_.u8;
2819 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2820 r_.wasm_v128 = wasm_u8x16_div(a_.wasm_v128, b_.wasm_v128);
2821 #else
2822 SIMDE_VECTORIZE
2823 for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
2824 r_.u8[i] = a_.u8[i] / b_.u8[i];
2825 }
2826 #endif
2827
2828 return simde__m128i_from_private(r_);
2829 #endif
2830 }
2831 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2832 #undef _mm_div_epu8
2833 #define _mm_div_epu8(a, b) simde_mm_div_epu8((a), (b))
2834 #endif
2835
2836 SIMDE_FUNCTION_ATTRIBUTES
2837 simde__m128i
simde_mm_div_epu16(simde__m128i a,simde__m128i b)2838 simde_mm_div_epu16 (simde__m128i a, simde__m128i b) {
2839 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2840 return _mm_div_epu16(a, b);
2841 #else
2842 simde__m128i_private
2843 r_,
2844 a_ = simde__m128i_to_private(a),
2845 b_ = simde__m128i_to_private(b);
2846
2847 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2848 r_.u16 = a_.u16 / b_.u16;
2849 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2850 r_.wasm_v128 = wasm_u16x16_div(a_.wasm_v128, b_.wasm_v128);
2851 #else
2852 SIMDE_VECTORIZE
2853 for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
2854 r_.u16[i] = a_.u16[i] / b_.u16[i];
2855 }
2856 #endif
2857
2858 return simde__m128i_from_private(r_);
2859 #endif
2860 }
2861 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2862 #undef _mm_div_epu16
2863 #define _mm_div_epu16(a, b) simde_mm_div_epu16((a), (b))
2864 #endif
2865
2866 SIMDE_FUNCTION_ATTRIBUTES
2867 simde__m128i
simde_mm_div_epu32(simde__m128i a,simde__m128i b)2868 simde_mm_div_epu32 (simde__m128i a, simde__m128i b) {
2869 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2870 return _mm_div_epu32(a, b);
2871 #else
2872 simde__m128i_private
2873 r_,
2874 a_ = simde__m128i_to_private(a),
2875 b_ = simde__m128i_to_private(b);
2876
2877 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2878 r_.u32 = a_.u32 / b_.u32;
2879 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2880 r_.wasm_v128 = wasm_u32x16_div(a_.wasm_v128, b_.wasm_v128);
2881 #else
2882 SIMDE_VECTORIZE
2883 for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
2884 r_.u32[i] = a_.u32[i] / b_.u32[i];
2885 }
2886 #endif
2887
2888 return simde__m128i_from_private(r_);
2889 #endif
2890 }
2891 #define simde_mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b)
2892 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2893 #undef _mm_div_epu32
2894 #define _mm_div_epu32(a, b) simde_mm_div_epu32(a, b)
2895 #undef _mm_udiv_epi32
2896 #define _mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b)
2897 #endif
2898
2899 SIMDE_FUNCTION_ATTRIBUTES
2900 simde__m128i
simde_mm_div_epu64(simde__m128i a,simde__m128i b)2901 simde_mm_div_epu64 (simde__m128i a, simde__m128i b) {
2902 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2903 return _mm_div_epu64(a, b);
2904 #else
2905 simde__m128i_private
2906 r_,
2907 a_ = simde__m128i_to_private(a),
2908 b_ = simde__m128i_to_private(b);
2909
2910 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2911 r_.u64 = a_.u64 / b_.u64;
2912 #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2913 r_.wasm_v128 = wasm_u64x16_div(a_.wasm_v128, b_.wasm_v128);
2914 #else
2915 SIMDE_VECTORIZE
2916 for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
2917 r_.u64[i] = a_.u64[i] / b_.u64[i];
2918 }
2919 #endif
2920
2921 return simde__m128i_from_private(r_);
2922 #endif
2923 }
2924 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2925 #undef _mm_div_epu64
2926 #define _mm_div_epu64(a, b) simde_mm_div_epu64((a), (b))
2927 #endif
2928
2929 SIMDE_FUNCTION_ATTRIBUTES
2930 simde__m256i
simde_mm256_div_epi8(simde__m256i a,simde__m256i b)2931 simde_mm256_div_epi8 (simde__m256i a, simde__m256i b) {
2932 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
2933 return _mm256_div_epi8(a, b);
2934 #else
2935 simde__m256i_private
2936 r_,
2937 a_ = simde__m256i_to_private(a),
2938 b_ = simde__m256i_to_private(b);
2939
2940 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2941 r_.i8 = a_.i8 / b_.i8;
2942 #else
2943 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2944 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
2945 r_.m128i[i] = simde_mm_div_epi8(a_.m128i[i], b_.m128i[i]);
2946 }
2947 #else
2948 SIMDE_VECTORIZE
2949 for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
2950 r_.i8[i] = a_.i8[i] / b_.i8[i];
2951 }
2952 #endif
2953 #endif
2954
2955 return simde__m256i_from_private(r_);
2956 #endif
2957 }
2958 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2959 #undef _mm256_div_epi8
2960 #define _mm256_div_epi8(a, b) simde_mm256_div_epi8((a), (b))
2961 #endif
2962
2963 SIMDE_FUNCTION_ATTRIBUTES
2964 simde__m256i
simde_mm256_div_epi16(simde__m256i a,simde__m256i b)2965 simde_mm256_div_epi16 (simde__m256i a, simde__m256i b) {
2966 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
2967 return _mm256_div_epi16(a, b);
2968 #else
2969 simde__m256i_private
2970 r_,
2971 a_ = simde__m256i_to_private(a),
2972 b_ = simde__m256i_to_private(b);
2973
2974 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2975 r_.i16 = a_.i16 / b_.i16;
2976 #else
2977 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2978 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
2979 r_.m128i[i] = simde_mm_div_epi16(a_.m128i[i], b_.m128i[i]);
2980 }
2981 #else
2982 SIMDE_VECTORIZE
2983 for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
2984 r_.i16[i] = a_.i16[i] / b_.i16[i];
2985 }
2986 #endif
2987 #endif
2988
2989 return simde__m256i_from_private(r_);
2990 #endif
2991 }
2992 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2993 #undef _mm256_div_epi16
2994 #define _mm256_div_epi16(a, b) simde_mm256_div_epi16((a), (b))
2995 #endif
2996
2997 SIMDE_FUNCTION_ATTRIBUTES
2998 simde__m256i
simde_mm256_div_epi32(simde__m256i a,simde__m256i b)2999 simde_mm256_div_epi32 (simde__m256i a, simde__m256i b) {
3000 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3001 return _mm256_div_epi32(a, b);
3002 #else
3003 simde__m256i_private
3004 r_,
3005 a_ = simde__m256i_to_private(a),
3006 b_ = simde__m256i_to_private(b);
3007
3008 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3009 r_.i32 = a_.i32 / b_.i32;
3010 #else
3011 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3012 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
3013 r_.m128i[i] = simde_mm_div_epi32(a_.m128i[i], b_.m128i[i]);
3014 }
3015 #else
3016 SIMDE_VECTORIZE
3017 for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
3018 r_.i32[i] = a_.i32[i] / b_.i32[i];
3019 }
3020 #endif
3021 #endif
3022
3023 return simde__m256i_from_private(r_);
3024 #endif
3025 }
3026 #define simde_mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b)
3027 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3028 #undef _mm256_div_epi32
3029 #define _mm256_div_epi32(a, b) simde_mm256_div_epi32(a, b)
3030 #undef _mm256_idiv_epi32
3031 #define _mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b)
3032 #endif
3033
3034 SIMDE_FUNCTION_ATTRIBUTES
3035 simde__m256i
simde_mm256_div_epi64(simde__m256i a,simde__m256i b)3036 simde_mm256_div_epi64 (simde__m256i a, simde__m256i b) {
3037 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3038 return _mm256_div_epi64(a, b);
3039 #else
3040 simde__m256i_private
3041 r_,
3042 a_ = simde__m256i_to_private(a),
3043 b_ = simde__m256i_to_private(b);
3044
3045 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3046 r_.i64 = a_.i64 / b_.i64;
3047 #else
3048 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3049 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
3050 r_.m128i[i] = simde_mm_div_epi64(a_.m128i[i], b_.m128i[i]);
3051 }
3052 #else
3053 SIMDE_VECTORIZE
3054 for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
3055 r_.i64[i] = a_.i64[i] / b_.i64[i];
3056 }
3057 #endif
3058 #endif
3059
3060 return simde__m256i_from_private(r_);
3061 #endif
3062 }
3063 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3064 #undef _mm256_div_epi64
3065 #define _mm256_div_epi64(a, b) simde_mm256_div_epi64((a), (b))
3066 #endif
3067
3068 SIMDE_FUNCTION_ATTRIBUTES
3069 simde__m256i
simde_mm256_div_epu8(simde__m256i a,simde__m256i b)3070 simde_mm256_div_epu8 (simde__m256i a, simde__m256i b) {
3071 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3072 return _mm256_div_epu8(a, b);
3073 #else
3074 simde__m256i_private
3075 r_,
3076 a_ = simde__m256i_to_private(a),
3077 b_ = simde__m256i_to_private(b);
3078
3079 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3080 r_.u8 = a_.u8 / b_.u8;
3081 #else
3082 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3083 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
3084 r_.m128i[i] = simde_mm_div_epu8(a_.m128i[i], b_.m128i[i]);
3085 }
3086 #else
3087 SIMDE_VECTORIZE
3088 for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
3089 r_.u8[i] = a_.u8[i] / b_.u8[i];
3090 }
3091 #endif
3092 #endif
3093
3094 return simde__m256i_from_private(r_);
3095 #endif
3096 }
3097 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3098 #undef _mm256_div_epu8
3099 #define _mm256_div_epu8(a, b) simde_mm256_div_epu8((a), (b))
3100 #endif
3101
3102 SIMDE_FUNCTION_ATTRIBUTES
3103 simde__m256i
simde_mm256_div_epu16(simde__m256i a,simde__m256i b)3104 simde_mm256_div_epu16 (simde__m256i a, simde__m256i b) {
3105 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3106 return _mm256_div_epu16(a, b);
3107 #else
3108 simde__m256i_private
3109 r_,
3110 a_ = simde__m256i_to_private(a),
3111 b_ = simde__m256i_to_private(b);
3112
3113 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3114 r_.u16 = a_.u16 / b_.u16;
3115 #else
3116 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3117 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
3118 r_.m128i[i] = simde_mm_div_epu16(a_.m128i[i], b_.m128i[i]);
3119 }
3120 #else
3121 SIMDE_VECTORIZE
3122 for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
3123 r_.u16[i] = a_.u16[i] / b_.u16[i];
3124 }
3125 #endif
3126 #endif
3127
3128 return simde__m256i_from_private(r_);
3129 #endif
3130 }
3131 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3132 #undef _mm256_div_epu16
3133 #define _mm256_div_epu16(a, b) simde_mm256_div_epu16((a), (b))
3134 #endif
3135
3136 SIMDE_FUNCTION_ATTRIBUTES
3137 simde__m256i
simde_mm256_div_epu32(simde__m256i a,simde__m256i b)3138 simde_mm256_div_epu32 (simde__m256i a, simde__m256i b) {
3139 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3140 return _mm256_div_epu32(a, b);
3141 #else
3142 simde__m256i_private
3143 r_,
3144 a_ = simde__m256i_to_private(a),
3145 b_ = simde__m256i_to_private(b);
3146
3147 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3148 r_.u32 = a_.u32 / b_.u32;
3149 #else
3150 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3151 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
3152 r_.m128i[i] = simde_mm_div_epu32(a_.m128i[i], b_.m128i[i]);
3153 }
3154 #else
3155 SIMDE_VECTORIZE
3156 for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
3157 r_.u32[i] = a_.u32[i] / b_.u32[i];
3158 }
3159 #endif
3160 #endif
3161
3162 return simde__m256i_from_private(r_);
3163 #endif
3164 }
3165 #define simde_mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b)
3166 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3167 #undef _mm256_div_epu32
3168 #define _mm256_div_epu32(a, b) simde_mm256_div_epu32(a, b)
3169 #undef _mm256_udiv_epi32
3170 #define _mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b)
3171 #endif
3172
3173 SIMDE_FUNCTION_ATTRIBUTES
3174 simde__m256i
simde_mm256_div_epu64(simde__m256i a,simde__m256i b)3175 simde_mm256_div_epu64 (simde__m256i a, simde__m256i b) {
3176 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3177 return _mm256_div_epu64(a, b);
3178 #else
3179 simde__m256i_private
3180 r_,
3181 a_ = simde__m256i_to_private(a),
3182 b_ = simde__m256i_to_private(b);
3183
3184 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3185 r_.u64 = a_.u64 / b_.u64;
3186 #else
3187 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3188 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
3189 r_.m128i[i] = simde_mm_div_epu64(a_.m128i[i], b_.m128i[i]);
3190 }
3191 #else
3192 SIMDE_VECTORIZE
3193 for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
3194 r_.u64[i] = a_.u64[i] / b_.u64[i];
3195 }
3196 #endif
3197 #endif
3198
3199 return simde__m256i_from_private(r_);
3200 #endif
3201 }
3202 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3203 #undef _mm256_div_epu64
3204 #define _mm256_div_epu64(a, b) simde_mm256_div_epu64((a), (b))
3205 #endif
3206
3207 SIMDE_FUNCTION_ATTRIBUTES
3208 simde__m512i
simde_mm512_div_epi8(simde__m512i a,simde__m512i b)3209 simde_mm512_div_epi8 (simde__m512i a, simde__m512i b) {
3210 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3211 return _mm512_div_epi8(a, b);
3212 #else
3213 simde__m512i_private
3214 r_,
3215 a_ = simde__m512i_to_private(a),
3216 b_ = simde__m512i_to_private(b);
3217
3218 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3219 r_.i8 = a_.i8 / b_.i8;
3220 #else
3221 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3222 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3223 r_.m256i[i] = simde_mm256_div_epi8(a_.m256i[i], b_.m256i[i]);
3224 }
3225 #else
3226 SIMDE_VECTORIZE
3227 for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
3228 r_.i8[i] = a_.i8[i] / b_.i8[i];
3229 }
3230 #endif
3231 #endif
3232
3233 return simde__m512i_from_private(r_);
3234 #endif
3235 }
3236 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3237 #undef _mm512_div_epi8
3238 #define _mm512_div_epi8(a, b) simde_mm512_div_epi8((a), (b))
3239 #endif
3240
3241 SIMDE_FUNCTION_ATTRIBUTES
3242 simde__m512i
simde_mm512_div_epi16(simde__m512i a,simde__m512i b)3243 simde_mm512_div_epi16 (simde__m512i a, simde__m512i b) {
3244 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3245 return _mm512_div_epi16(a, b);
3246 #else
3247 simde__m512i_private
3248 r_,
3249 a_ = simde__m512i_to_private(a),
3250 b_ = simde__m512i_to_private(b);
3251
3252 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3253 r_.i16 = a_.i16 / b_.i16;
3254 #else
3255 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3256 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3257 r_.m256i[i] = simde_mm256_div_epi16(a_.m256i[i], b_.m256i[i]);
3258 }
3259 #else
3260 SIMDE_VECTORIZE
3261 for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
3262 r_.i16[i] = a_.i16[i] / b_.i16[i];
3263 }
3264 #endif
3265 #endif
3266
3267 return simde__m512i_from_private(r_);
3268 #endif
3269 }
3270 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3271 #undef _mm512_div_epi16
3272 #define _mm512_div_epi16(a, b) simde_mm512_div_epi16((a), (b))
3273 #endif
3274
3275 SIMDE_FUNCTION_ATTRIBUTES
3276 simde__m512i
simde_mm512_div_epi32(simde__m512i a,simde__m512i b)3277 simde_mm512_div_epi32 (simde__m512i a, simde__m512i b) {
3278 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3279 return _mm512_div_epi32(a, b);
3280 #else
3281 simde__m512i_private
3282 r_,
3283 a_ = simde__m512i_to_private(a),
3284 b_ = simde__m512i_to_private(b);
3285
3286 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3287 r_.i32 = a_.i32 / b_.i32;
3288 #else
3289 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3290 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3291 r_.m256i[i] = simde_mm256_div_epi32(a_.m256i[i], b_.m256i[i]);
3292 }
3293 #else
3294 SIMDE_VECTORIZE
3295 for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
3296 r_.i32[i] = a_.i32[i] / b_.i32[i];
3297 }
3298 #endif
3299 #endif
3300
3301 return simde__m512i_from_private(r_);
3302 #endif
3303 }
3304 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3305 #undef _mm512_div_epi32
3306 #define _mm512_div_epi32(a, b) simde_mm512_div_epi32((a), (b))
3307 #endif
3308
3309 SIMDE_FUNCTION_ATTRIBUTES
3310 simde__m512i
simde_mm512_mask_div_epi32(simde__m512i src,simde__mmask16 k,simde__m512i a,simde__m512i b)3311 simde_mm512_mask_div_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
3312 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3313 return _mm512_mask_div_epi32(src, k, a, b);
3314 #else
3315 return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epi32(a, b));
3316 #endif
3317 }
3318 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3319 #undef _mm512_mask_div_epi32
3320 #define _mm512_mask_div_epi32(src, k, a, b) simde_mm512_mask_div_epi32(src, k, a, b)
3321 #endif
3322
3323 SIMDE_FUNCTION_ATTRIBUTES
3324 simde__m512i
simde_mm512_div_epi64(simde__m512i a,simde__m512i b)3325 simde_mm512_div_epi64 (simde__m512i a, simde__m512i b) {
3326 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3327 return _mm512_div_epi64(a, b);
3328 #else
3329 simde__m512i_private
3330 r_,
3331 a_ = simde__m512i_to_private(a),
3332 b_ = simde__m512i_to_private(b);
3333
3334 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3335 r_.i64 = a_.i64 / b_.i64;
3336 #else
3337 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3338 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3339 r_.m256i[i] = simde_mm256_div_epi64(a_.m256i[i], b_.m256i[i]);
3340 }
3341 #else
3342 SIMDE_VECTORIZE
3343 for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
3344 r_.i64[i] = a_.i64[i] / b_.i64[i];
3345 }
3346 #endif
3347 #endif
3348
3349 return simde__m512i_from_private(r_);
3350 #endif
3351 }
3352 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3353 #undef _mm512_div_epi64
3354 #define _mm512_div_epi64(a, b) simde_mm512_div_epi64((a), (b))
3355 #endif
3356
3357 SIMDE_FUNCTION_ATTRIBUTES
3358 simde__m512i
simde_mm512_div_epu8(simde__m512i a,simde__m512i b)3359 simde_mm512_div_epu8 (simde__m512i a, simde__m512i b) {
3360 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3361 return _mm512_div_epu8(a, b);
3362 #else
3363 simde__m512i_private
3364 r_,
3365 a_ = simde__m512i_to_private(a),
3366 b_ = simde__m512i_to_private(b);
3367
3368 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3369 r_.u8 = a_.u8 / b_.u8;
3370 #else
3371 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3372 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3373 r_.m256i[i] = simde_mm256_div_epu8(a_.m256i[i], b_.m256i[i]);
3374 }
3375 #else
3376 SIMDE_VECTORIZE
3377 for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
3378 r_.u8[i] = a_.u8[i] / b_.u8[i];
3379 }
3380 #endif
3381 #endif
3382
3383 return simde__m512i_from_private(r_);
3384 #endif
3385 }
3386 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3387 #undef _mm512_div_epu8
3388 #define _mm512_div_epu8(a, b) simde_mm512_div_epu8((a), (b))
3389 #endif
3390
3391 SIMDE_FUNCTION_ATTRIBUTES
3392 simde__m512i
simde_mm512_div_epu16(simde__m512i a,simde__m512i b)3393 simde_mm512_div_epu16 (simde__m512i a, simde__m512i b) {
3394 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3395 return _mm512_div_epu16(a, b);
3396 #else
3397 simde__m512i_private
3398 r_,
3399 a_ = simde__m512i_to_private(a),
3400 b_ = simde__m512i_to_private(b);
3401
3402 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3403 r_.u16 = a_.u16 / b_.u16;
3404 #else
3405 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3406 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3407 r_.m256i[i] = simde_mm256_div_epu16(a_.m256i[i], b_.m256i[i]);
3408 }
3409 #else
3410 SIMDE_VECTORIZE
3411 for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
3412 r_.u16[i] = a_.u16[i] / b_.u16[i];
3413 }
3414 #endif
3415 #endif
3416
3417 return simde__m512i_from_private(r_);
3418 #endif
3419 }
3420 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3421 #undef _mm512_div_epu16
3422 #define _mm512_div_epu16(a, b) simde_mm512_div_epu16((a), (b))
3423 #endif
3424
3425 SIMDE_FUNCTION_ATTRIBUTES
3426 simde__m512i
simde_mm512_div_epu32(simde__m512i a,simde__m512i b)3427 simde_mm512_div_epu32 (simde__m512i a, simde__m512i b) {
3428 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3429 return _mm512_div_epu32(a, b);
3430 #else
3431 simde__m512i_private
3432 r_,
3433 a_ = simde__m512i_to_private(a),
3434 b_ = simde__m512i_to_private(b);
3435
3436 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3437 r_.u32 = a_.u32 / b_.u32;
3438 #else
3439 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3440 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3441 r_.m256i[i] = simde_mm256_div_epu32(a_.m256i[i], b_.m256i[i]);
3442 }
3443 #else
3444 SIMDE_VECTORIZE
3445 for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
3446 r_.u32[i] = a_.u32[i] / b_.u32[i];
3447 }
3448 #endif
3449 #endif
3450
3451 return simde__m512i_from_private(r_);
3452 #endif
3453 }
3454 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3455 #undef _mm512_div_epu32
3456 #define _mm512_div_epu32(a, b) simde_mm512_div_epu32((a), (b))
3457 #endif
3458
3459 SIMDE_FUNCTION_ATTRIBUTES
3460 simde__m512i
simde_mm512_mask_div_epu32(simde__m512i src,simde__mmask16 k,simde__m512i a,simde__m512i b)3461 simde_mm512_mask_div_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
3462 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3463 return _mm512_mask_div_epu32(src, k, a, b);
3464 #else
3465 return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epu32(a, b));
3466 #endif
3467 }
3468 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3469 #undef _mm512_mask_div_epu32
3470 #define _mm512_mask_div_epu32(src, k, a, b) simde_mm512_mask_div_epu32(src, k, a, b)
3471 #endif
3472
3473 SIMDE_FUNCTION_ATTRIBUTES
3474 simde__m512i
simde_mm512_div_epu64(simde__m512i a,simde__m512i b)3475 simde_mm512_div_epu64 (simde__m512i a, simde__m512i b) {
3476 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3477 return _mm512_div_epu64(a, b);
3478 #else
3479 simde__m512i_private
3480 r_,
3481 a_ = simde__m512i_to_private(a),
3482 b_ = simde__m512i_to_private(b);
3483
3484 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3485 r_.u64 = a_.u64 / b_.u64;
3486 #else
3487 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3488 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3489 r_.m256i[i] = simde_mm256_div_epu64(a_.m256i[i], b_.m256i[i]);
3490 }
3491 #else
3492 SIMDE_VECTORIZE
3493 for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
3494 r_.u64[i] = a_.u64[i] / b_.u64[i];
3495 }
3496 #endif
3497 #endif
3498
3499 return simde__m512i_from_private(r_);
3500 #endif
3501 }
3502 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3503 #undef _mm512_div_epu64
3504 #define _mm512_div_epu64(a, b) simde_mm512_div_epu64((a), (b))
3505 #endif
3506
3507 SIMDE_FUNCTION_ATTRIBUTES
3508 simde__m128
simde_mm_erf_ps(simde__m128 a)3509 simde_mm_erf_ps (simde__m128 a) {
3510 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
3511 return _mm_erf_ps(a);
3512 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
3513 return Sleef_erff4_u10(a);
3514 #else
3515 simde__m128_private
3516 r_,
3517 a_ = simde__m128_to_private(a);
3518
3519 SIMDE_VECTORIZE
3520 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3521 r_.f32[i] = simde_math_erff(a_.f32[i]);
3522 }
3523
3524 return simde__m128_from_private(r_);
3525 #endif
3526 }
3527 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3528 #undef _mm_erf_ps
3529 #define _mm_erf_ps(a) simde_mm_erf_ps(a)
3530 #endif
3531
3532 SIMDE_FUNCTION_ATTRIBUTES
3533 simde__m128d
simde_mm_erf_pd(simde__m128d a)3534 simde_mm_erf_pd (simde__m128d a) {
3535 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
3536 return _mm_erf_pd(a);
3537 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
3538 return Sleef_erfd2_u10(a);
3539 #else
3540 simde__m128d_private
3541 r_,
3542 a_ = simde__m128d_to_private(a);
3543
3544 SIMDE_VECTORIZE
3545 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3546 r_.f64[i] = simde_math_erf(a_.f64[i]);
3547 }
3548
3549 return simde__m128d_from_private(r_);
3550 #endif
3551 }
3552 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3553 #undef _mm_erf_pd
3554 #define _mm_erf_pd(a) simde_mm_erf_pd(a)
3555 #endif
3556
3557 SIMDE_FUNCTION_ATTRIBUTES
3558 simde__m256
simde_mm256_erf_ps(simde__m256 a)3559 simde_mm256_erf_ps (simde__m256 a) {
3560 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3561 return _mm256_erf_ps(a);
3562 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
3563 return Sleef_erff8_u10(a);
3564 #else
3565 simde__m256_private
3566 r_,
3567 a_ = simde__m256_to_private(a);
3568
3569 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3570 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
3571 r_.m128[i] = simde_mm_erf_ps(a_.m128[i]);
3572 }
3573 #else
3574 SIMDE_VECTORIZE
3575 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3576 r_.f32[i] = simde_math_erff(a_.f32[i]);
3577 }
3578 #endif
3579
3580 return simde__m256_from_private(r_);
3581 #endif
3582 }
3583 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3584 #undef _mm256_erf_ps
3585 #define _mm256_erf_ps(a) simde_mm256_erf_ps(a)
3586 #endif
3587
3588
3589 SIMDE_FUNCTION_ATTRIBUTES
3590 simde__m256d
simde_mm256_erf_pd(simde__m256d a)3591 simde_mm256_erf_pd (simde__m256d a) {
3592 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3593 return _mm256_erf_pd(a);
3594 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
3595 return Sleef_erfd4_u10(a);
3596 #else
3597 simde__m256d_private
3598 r_,
3599 a_ = simde__m256d_to_private(a);
3600
3601 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3602 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
3603 r_.m128d[i] = simde_mm_erf_pd(a_.m128d[i]);
3604 }
3605 #else
3606 SIMDE_VECTORIZE
3607 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3608 r_.f64[i] = simde_math_erf(a_.f64[i]);
3609 }
3610 #endif
3611
3612 return simde__m256d_from_private(r_);
3613 #endif
3614 }
3615 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3616 #undef _mm256_erf_pd
3617 #define _mm256_erf_pd(a) simde_mm256_erf_pd(a)
3618 #endif
3619
3620 SIMDE_FUNCTION_ATTRIBUTES
3621 simde__m512
simde_mm512_erf_ps(simde__m512 a)3622 simde_mm512_erf_ps (simde__m512 a) {
3623 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3624 return _mm512_erf_ps(a);
3625 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
3626 return Sleef_erff16_u10(a);
3627 #else
3628 simde__m512_private
3629 r_,
3630 a_ = simde__m512_to_private(a);
3631
3632 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3633 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
3634 r_.m256[i] = simde_mm256_erf_ps(a_.m256[i]);
3635 }
3636 #else
3637 SIMDE_VECTORIZE
3638 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3639 r_.f32[i] = simde_math_erff(a_.f32[i]);
3640 }
3641 #endif
3642
3643 return simde__m512_from_private(r_);
3644 #endif
3645 }
3646 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3647 #undef _mm512_erf_ps
3648 #define _mm512_erf_ps(a) simde_mm512_erf_ps(a)
3649 #endif
3650
3651 SIMDE_FUNCTION_ATTRIBUTES
3652 simde__m512d
simde_mm512_erf_pd(simde__m512d a)3653 simde_mm512_erf_pd (simde__m512d a) {
3654 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3655 return _mm512_erf_pd(a);
3656 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
3657 return Sleef_erfd8_u10(a);
3658 #else
3659 simde__m512d_private
3660 r_,
3661 a_ = simde__m512d_to_private(a);
3662
3663 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3664 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
3665 r_.m256d[i] = simde_mm256_erf_pd(a_.m256d[i]);
3666 }
3667 #else
3668 SIMDE_VECTORIZE
3669 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3670 r_.f64[i] = simde_math_erf(a_.f64[i]);
3671 }
3672 #endif
3673
3674 return simde__m512d_from_private(r_);
3675 #endif
3676 }
3677 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3678 #undef _mm512_erf_pd
3679 #define _mm512_erf_pd(a) simde_mm512_erf_pd(a)
3680 #endif
3681
3682 SIMDE_FUNCTION_ATTRIBUTES
3683 simde__m512
simde_mm512_mask_erf_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)3684 simde_mm512_mask_erf_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
3685 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3686 return _mm512_mask_erf_ps(src, k, a);
3687 #else
3688 return simde_mm512_mask_mov_ps(src, k, simde_mm512_erf_ps(a));
3689 #endif
3690 }
3691 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3692 #undef _mm512_mask_erf_ps
3693 #define _mm512_mask_erf_ps(src, k, a) simde_mm512_mask_erf_ps(src, k, a)
3694 #endif
3695
3696 SIMDE_FUNCTION_ATTRIBUTES
3697 simde__m512d
simde_mm512_mask_erf_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)3698 simde_mm512_mask_erf_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
3699 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3700 return _mm512_mask_erf_pd(src, k, a);
3701 #else
3702 return simde_mm512_mask_mov_pd(src, k, simde_mm512_erf_pd(a));
3703 #endif
3704 }
3705 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3706 #undef _mm512_mask_erf_pd
3707 #define _mm512_mask_erf_pd(src, k, a) simde_mm512_mask_erf_pd(src, k, a)
3708 #endif
3709
3710 SIMDE_FUNCTION_ATTRIBUTES
3711 simde__m128
simde_mm_erfc_ps(simde__m128 a)3712 simde_mm_erfc_ps (simde__m128 a) {
3713 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
3714 return _mm_erfc_ps(a);
3715 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
3716 return Sleef_erfcf4_u15(a);
3717 #else
3718 simde__m128_private
3719 r_,
3720 a_ = simde__m128_to_private(a);
3721
3722 SIMDE_VECTORIZE
3723 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3724 r_.f32[i] = simde_math_erfcf(a_.f32[i]);
3725 }
3726
3727 return simde__m128_from_private(r_);
3728 #endif
3729 }
3730 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3731 #undef _mm_erfc_ps
3732 #define _mm_erfc_ps(a) simde_mm_erfc_ps(a)
3733 #endif
3734
3735 SIMDE_FUNCTION_ATTRIBUTES
3736 simde__m128d
simde_mm_erfc_pd(simde__m128d a)3737 simde_mm_erfc_pd (simde__m128d a) {
3738 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
3739 return _mm_erfc_pd(a);
3740 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
3741 return Sleef_erfcd2_u15(a);
3742 #else
3743 simde__m128d_private
3744 r_,
3745 a_ = simde__m128d_to_private(a);
3746
3747 SIMDE_VECTORIZE
3748 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3749 r_.f64[i] = simde_math_erfc(a_.f64[i]);
3750 }
3751
3752 return simde__m128d_from_private(r_);
3753 #endif
3754 }
3755 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3756 #undef _mm_erfc_pd
3757 #define _mm_erfc_pd(a) simde_mm_erfc_pd(a)
3758 #endif
3759
3760 SIMDE_FUNCTION_ATTRIBUTES
3761 simde__m256
simde_mm256_erfc_ps(simde__m256 a)3762 simde_mm256_erfc_ps (simde__m256 a) {
3763 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3764 return _mm256_erfc_ps(a);
3765 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
3766 return Sleef_erfcf8_u15(a);
3767 #else
3768 simde__m256_private
3769 r_,
3770 a_ = simde__m256_to_private(a);
3771
3772 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3773 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
3774 r_.m128[i] = simde_mm_erfc_ps(a_.m128[i]);
3775 }
3776 #else
3777 SIMDE_VECTORIZE
3778 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3779 r_.f32[i] = simde_math_erfcf(a_.f32[i]);
3780 }
3781 #endif
3782
3783 return simde__m256_from_private(r_);
3784 #endif
3785 }
3786 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3787 #undef _mm256_erfc_ps
3788 #define _mm256_erfc_ps(a) simde_mm256_erfc_ps(a)
3789 #endif
3790
3791
3792 SIMDE_FUNCTION_ATTRIBUTES
3793 simde__m256d
simde_mm256_erfc_pd(simde__m256d a)3794 simde_mm256_erfc_pd (simde__m256d a) {
3795 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3796 return _mm256_erfc_pd(a);
3797 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
3798 return Sleef_erfcd4_u15(a);
3799 #else
3800 simde__m256d_private
3801 r_,
3802 a_ = simde__m256d_to_private(a);
3803
3804 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3805 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
3806 r_.m128d[i] = simde_mm_erfc_pd(a_.m128d[i]);
3807 }
3808 #else
3809 SIMDE_VECTORIZE
3810 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3811 r_.f64[i] = simde_math_erfc(a_.f64[i]);
3812 }
3813 #endif
3814
3815 return simde__m256d_from_private(r_);
3816 #endif
3817 }
3818 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3819 #undef _mm256_erfc_pd
3820 #define _mm256_erfc_pd(a) simde_mm256_erfc_pd(a)
3821 #endif
3822
3823 SIMDE_FUNCTION_ATTRIBUTES
3824 simde__m512
simde_mm512_erfc_ps(simde__m512 a)3825 simde_mm512_erfc_ps (simde__m512 a) {
3826 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3827 return _mm512_erfc_ps(a);
3828 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
3829 return Sleef_erfcf16_u15(a);
3830 #else
3831 simde__m512_private
3832 r_,
3833 a_ = simde__m512_to_private(a);
3834
3835 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3836 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
3837 r_.m256[i] = simde_mm256_erfc_ps(a_.m256[i]);
3838 }
3839 #else
3840 SIMDE_VECTORIZE
3841 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3842 r_.f32[i] = simde_math_erfcf(a_.f32[i]);
3843 }
3844 #endif
3845
3846 return simde__m512_from_private(r_);
3847 #endif
3848 }
3849 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3850 #undef _mm512_erfc_ps
3851 #define _mm512_erfc_ps(a) simde_mm512_erfc_ps(a)
3852 #endif
3853
3854 SIMDE_FUNCTION_ATTRIBUTES
3855 simde__m512d
simde_mm512_erfc_pd(simde__m512d a)3856 simde_mm512_erfc_pd (simde__m512d a) {
3857 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3858 return _mm512_erfc_pd(a);
3859 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
3860 return Sleef_erfcd8_u15(a);
3861 #else
3862 simde__m512d_private
3863 r_,
3864 a_ = simde__m512d_to_private(a);
3865
3866 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3867 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
3868 r_.m256d[i] = simde_mm256_erfc_pd(a_.m256d[i]);
3869 }
3870 #else
3871 SIMDE_VECTORIZE
3872 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3873 r_.f64[i] = simde_math_erfc(a_.f64[i]);
3874 }
3875 #endif
3876
3877 return simde__m512d_from_private(r_);
3878 #endif
3879 }
3880 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3881 #undef _mm512_erfc_pd
3882 #define _mm512_erfc_pd(a) simde_mm512_erfc_pd(a)
3883 #endif
3884
3885 SIMDE_FUNCTION_ATTRIBUTES
3886 simde__m512
simde_mm512_mask_erfc_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)3887 simde_mm512_mask_erfc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
3888 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3889 return _mm512_mask_erfc_ps(src, k, a);
3890 #else
3891 return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfc_ps(a));
3892 #endif
3893 }
3894 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3895 #undef _mm512_mask_erfc_ps
3896 #define _mm512_mask_erfc_ps(src, k, a) simde_mm512_mask_erfc_ps(src, k, a)
3897 #endif
3898
3899 SIMDE_FUNCTION_ATTRIBUTES
3900 simde__m512d
simde_mm512_mask_erfc_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)3901 simde_mm512_mask_erfc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
3902 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3903 return _mm512_mask_erfc_pd(src, k, a);
3904 #else
3905 return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfc_pd(a));
3906 #endif
3907 }
3908 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3909 #undef _mm512_mask_erfc_pd
3910 #define _mm512_mask_erfc_pd(src, k, a) simde_mm512_mask_erfc_pd(src, k, a)
3911 #endif
3912
3913 SIMDE_FUNCTION_ATTRIBUTES
3914 simde__m128
simde_mm_exp_ps(simde__m128 a)3915 simde_mm_exp_ps (simde__m128 a) {
3916 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
3917 return _mm_exp_ps(a);
3918 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
3919 return Sleef_expf4_u10(a);
3920 #else
3921 simde__m128_private
3922 r_,
3923 a_ = simde__m128_to_private(a);
3924
3925 SIMDE_VECTORIZE
3926 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3927 r_.f32[i] = simde_math_expf(a_.f32[i]);
3928 }
3929
3930 return simde__m128_from_private(r_);
3931 #endif
3932 }
3933 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3934 #undef _mm_exp_ps
3935 #define _mm_exp_ps(a) simde_mm_exp_ps(a)
3936 #endif
3937
3938 SIMDE_FUNCTION_ATTRIBUTES
3939 simde__m128d
simde_mm_exp_pd(simde__m128d a)3940 simde_mm_exp_pd (simde__m128d a) {
3941 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
3942 return _mm_exp_pd(a);
3943 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
3944 return Sleef_expd2_u10(a);
3945 #else
3946 simde__m128d_private
3947 r_,
3948 a_ = simde__m128d_to_private(a);
3949
3950 SIMDE_VECTORIZE
3951 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3952 r_.f64[i] = simde_math_exp(a_.f64[i]);
3953 }
3954
3955 return simde__m128d_from_private(r_);
3956 #endif
3957 }
3958 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3959 #undef _mm_exp_pd
3960 #define _mm_exp_pd(a) simde_mm_exp_pd(a)
3961 #endif
3962
3963 SIMDE_FUNCTION_ATTRIBUTES
3964 simde__m256
simde_mm256_exp_ps(simde__m256 a)3965 simde_mm256_exp_ps (simde__m256 a) {
3966 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3967 return _mm256_exp_ps(a);
3968 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
3969 return Sleef_expf8_u10(a);
3970 #else
3971 simde__m256_private
3972 r_,
3973 a_ = simde__m256_to_private(a);
3974
3975 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3976 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
3977 r_.m128[i] = simde_mm_exp_ps(a_.m128[i]);
3978 }
3979 #else
3980 SIMDE_VECTORIZE
3981 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3982 r_.f32[i] = simde_math_expf(a_.f32[i]);
3983 }
3984 #endif
3985
3986 return simde__m256_from_private(r_);
3987 #endif
3988 }
3989 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3990 #undef _mm256_exp_ps
3991 #define _mm256_exp_ps(a) simde_mm256_exp_ps(a)
3992 #endif
3993
3994
3995 SIMDE_FUNCTION_ATTRIBUTES
3996 simde__m256d
simde_mm256_exp_pd(simde__m256d a)3997 simde_mm256_exp_pd (simde__m256d a) {
3998 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3999 return _mm256_exp_pd(a);
4000 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4001 return Sleef_expd4_u10(a);
4002 #else
4003 simde__m256d_private
4004 r_,
4005 a_ = simde__m256d_to_private(a);
4006
4007 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4008 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
4009 r_.m128d[i] = simde_mm_exp_pd(a_.m128d[i]);
4010 }
4011 #else
4012 SIMDE_VECTORIZE
4013 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4014 r_.f64[i] = simde_math_exp(a_.f64[i]);
4015 }
4016 #endif
4017
4018 return simde__m256d_from_private(r_);
4019 #endif
4020 }
4021 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4022 #undef _mm256_exp_pd
4023 #define _mm256_exp_pd(a) simde_mm256_exp_pd(a)
4024 #endif
4025
4026 SIMDE_FUNCTION_ATTRIBUTES
4027 simde__m512
simde_mm512_exp_ps(simde__m512 a)4028 simde_mm512_exp_ps (simde__m512 a) {
4029 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4030 return _mm512_exp_ps(a);
4031 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4032 return Sleef_expf16_u10(a);
4033 #else
4034 simde__m512_private
4035 r_,
4036 a_ = simde__m512_to_private(a);
4037
4038 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4039 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
4040 r_.m256[i] = simde_mm256_exp_ps(a_.m256[i]);
4041 }
4042 #else
4043 SIMDE_VECTORIZE
4044 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4045 r_.f32[i] = simde_math_expf(a_.f32[i]);
4046 }
4047 #endif
4048
4049 return simde__m512_from_private(r_);
4050 #endif
4051 }
4052 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4053 #undef _mm512_exp_ps
4054 #define _mm512_exp_ps(a) simde_mm512_exp_ps(a)
4055 #endif
4056
4057 SIMDE_FUNCTION_ATTRIBUTES
4058 simde__m512d
simde_mm512_exp_pd(simde__m512d a)4059 simde_mm512_exp_pd (simde__m512d a) {
4060 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4061 return _mm512_exp_pd(a);
4062 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4063 return Sleef_expd8_u10(a);
4064 #else
4065 simde__m512d_private
4066 r_,
4067 a_ = simde__m512d_to_private(a);
4068
4069 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4070 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
4071 r_.m256d[i] = simde_mm256_exp_pd(a_.m256d[i]);
4072 }
4073 #else
4074 SIMDE_VECTORIZE
4075 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4076 r_.f64[i] = simde_math_exp(a_.f64[i]);
4077 }
4078 #endif
4079
4080 return simde__m512d_from_private(r_);
4081 #endif
4082 }
4083 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4084 #undef _mm512_exp_pd
4085 #define _mm512_exp_pd(a) simde_mm512_exp_pd(a)
4086 #endif
4087
4088 SIMDE_FUNCTION_ATTRIBUTES
4089 simde__m512
simde_mm512_mask_exp_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)4090 simde_mm512_mask_exp_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
4091 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4092 return _mm512_mask_exp_ps(src, k, a);
4093 #else
4094 return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp_ps(a));
4095 #endif
4096 }
4097 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4098 #undef _mm512_mask_exp_ps
4099 #define _mm512_mask_exp_ps(src, k, a) simde_mm512_mask_exp_ps(src, k, a)
4100 #endif
4101
4102 SIMDE_FUNCTION_ATTRIBUTES
4103 simde__m512d
simde_mm512_mask_exp_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)4104 simde_mm512_mask_exp_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
4105 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4106 return _mm512_mask_exp_pd(src, k, a);
4107 #else
4108 return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp_pd(a));
4109 #endif
4110 }
4111 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4112 #undef _mm512_mask_exp_pd
4113 #define _mm512_mask_exp_pd(src, k, a) simde_mm512_mask_exp_pd(src, k, a)
4114 #endif
4115
4116 SIMDE_FUNCTION_ATTRIBUTES
4117 simde__m128
simde_mm_expm1_ps(simde__m128 a)4118 simde_mm_expm1_ps (simde__m128 a) {
4119 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4120 return _mm_expm1_ps(a);
4121 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
4122 return Sleef_expm1f4_u10(a);
4123 #else
4124 simde__m128_private
4125 r_,
4126 a_ = simde__m128_to_private(a);
4127
4128 SIMDE_VECTORIZE
4129 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4130 r_.f32[i] = simde_math_expm1f(a_.f32[i]);
4131 }
4132
4133 return simde__m128_from_private(r_);
4134 #endif
4135 }
4136 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4137 #undef _mm_expm1_ps
4138 #define _mm_expm1_ps(a) simde_mm_expm1_ps(a)
4139 #endif
4140
4141 SIMDE_FUNCTION_ATTRIBUTES
4142 simde__m128d
simde_mm_expm1_pd(simde__m128d a)4143 simde_mm_expm1_pd (simde__m128d a) {
4144 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4145 return _mm_expm1_pd(a);
4146 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
4147 return Sleef_expm1d2_u10(a);
4148 #else
4149 simde__m128d_private
4150 r_,
4151 a_ = simde__m128d_to_private(a);
4152
4153 SIMDE_VECTORIZE
4154 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4155 r_.f64[i] = simde_math_expm1(a_.f64[i]);
4156 }
4157
4158 return simde__m128d_from_private(r_);
4159 #endif
4160 }
4161 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4162 #undef _mm_expm1_pd
4163 #define _mm_expm1_pd(a) simde_mm_expm1_pd(a)
4164 #endif
4165
4166 SIMDE_FUNCTION_ATTRIBUTES
4167 simde__m256
simde_mm256_expm1_ps(simde__m256 a)4168 simde_mm256_expm1_ps (simde__m256 a) {
4169 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4170 return _mm256_expm1_ps(a);
4171 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4172 return Sleef_expm1f8_u10(a);
4173 #else
4174 simde__m256_private
4175 r_,
4176 a_ = simde__m256_to_private(a);
4177
4178 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4179 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
4180 r_.m128[i] = simde_mm_expm1_ps(a_.m128[i]);
4181 }
4182 #else
4183 SIMDE_VECTORIZE
4184 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4185 r_.f32[i] = simde_math_expm1f(a_.f32[i]);
4186 }
4187 #endif
4188
4189 return simde__m256_from_private(r_);
4190 #endif
4191 }
4192 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4193 #undef _mm256_expm1_ps
4194 #define _mm256_expm1_ps(a) simde_mm256_expm1_ps(a)
4195 #endif
4196
4197
4198 SIMDE_FUNCTION_ATTRIBUTES
4199 simde__m256d
simde_mm256_expm1_pd(simde__m256d a)4200 simde_mm256_expm1_pd (simde__m256d a) {
4201 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4202 return _mm256_expm1_pd(a);
4203 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4204 return Sleef_expm1d4_u10(a);
4205 #else
4206 simde__m256d_private
4207 r_,
4208 a_ = simde__m256d_to_private(a);
4209
4210 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4211 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
4212 r_.m128d[i] = simde_mm_expm1_pd(a_.m128d[i]);
4213 }
4214 #else
4215 SIMDE_VECTORIZE
4216 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4217 r_.f64[i] = simde_math_expm1(a_.f64[i]);
4218 }
4219 #endif
4220
4221 return simde__m256d_from_private(r_);
4222 #endif
4223 }
4224 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4225 #undef _mm256_expm1_pd
4226 #define _mm256_expm1_pd(a) simde_mm256_expm1_pd(a)
4227 #endif
4228
4229 SIMDE_FUNCTION_ATTRIBUTES
4230 simde__m512
simde_mm512_expm1_ps(simde__m512 a)4231 simde_mm512_expm1_ps (simde__m512 a) {
4232 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4233 return _mm512_expm1_ps(a);
4234 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4235 return Sleef_expm1f16_u10(a);
4236 #else
4237 simde__m512_private
4238 r_,
4239 a_ = simde__m512_to_private(a);
4240
4241 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4242 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
4243 r_.m256[i] = simde_mm256_expm1_ps(a_.m256[i]);
4244 }
4245 #else
4246 SIMDE_VECTORIZE
4247 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4248 r_.f32[i] = simde_math_expm1f(a_.f32[i]);
4249 }
4250 #endif
4251
4252 return simde__m512_from_private(r_);
4253 #endif
4254 }
4255 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4256 #undef _mm512_expm1_ps
4257 #define _mm512_expm1_ps(a) simde_mm512_expm1_ps(a)
4258 #endif
4259
4260 SIMDE_FUNCTION_ATTRIBUTES
4261 simde__m512d
simde_mm512_expm1_pd(simde__m512d a)4262 simde_mm512_expm1_pd (simde__m512d a) {
4263 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4264 return _mm512_expm1_pd(a);
4265 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4266 return Sleef_expm1d8_u10(a);
4267 #else
4268 simde__m512d_private
4269 r_,
4270 a_ = simde__m512d_to_private(a);
4271
4272 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4273 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
4274 r_.m256d[i] = simde_mm256_expm1_pd(a_.m256d[i]);
4275 }
4276 #else
4277 SIMDE_VECTORIZE
4278 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4279 r_.f64[i] = simde_math_expm1(a_.f64[i]);
4280 }
4281 #endif
4282
4283 return simde__m512d_from_private(r_);
4284 #endif
4285 }
4286 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4287 #undef _mm512_expm1_pd
4288 #define _mm512_expm1_pd(a) simde_mm512_expm1_pd(a)
4289 #endif
4290
4291 SIMDE_FUNCTION_ATTRIBUTES
4292 simde__m512
simde_mm512_mask_expm1_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)4293 simde_mm512_mask_expm1_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
4294 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4295 return _mm512_mask_expm1_ps(src, k, a);
4296 #else
4297 return simde_mm512_mask_mov_ps(src, k, simde_mm512_expm1_ps(a));
4298 #endif
4299 }
4300 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4301 #undef _mm512_mask_expm1_ps
4302 #define _mm512_mask_expm1_ps(src, k, a) simde_mm512_mask_expm1_ps(src, k, a)
4303 #endif
4304
4305 SIMDE_FUNCTION_ATTRIBUTES
4306 simde__m512d
simde_mm512_mask_expm1_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)4307 simde_mm512_mask_expm1_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
4308 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4309 return _mm512_mask_expm1_pd(src, k, a);
4310 #else
4311 return simde_mm512_mask_mov_pd(src, k, simde_mm512_expm1_pd(a));
4312 #endif
4313 }
4314 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4315 #undef _mm512_mask_expm1_pd
4316 #define _mm512_mask_expm1_pd(src, k, a) simde_mm512_mask_expm1_pd(src, k, a)
4317 #endif
4318
4319 SIMDE_FUNCTION_ATTRIBUTES
4320 simde__m128
simde_mm_exp2_ps(simde__m128 a)4321 simde_mm_exp2_ps (simde__m128 a) {
4322 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4323 return _mm_exp2_ps(a);
4324 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
4325 return Sleef_exp2f4_u10(a);
4326 #else
4327 simde__m128_private
4328 r_,
4329 a_ = simde__m128_to_private(a);
4330
4331 SIMDE_VECTORIZE
4332 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4333 r_.f32[i] = simde_math_exp2f(a_.f32[i]);
4334 }
4335
4336 return simde__m128_from_private(r_);
4337 #endif
4338 }
4339 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4340 #undef _mm_exp2_ps
4341 #define _mm_exp2_ps(a) simde_mm_exp2_ps(a)
4342 #endif
4343
4344 SIMDE_FUNCTION_ATTRIBUTES
4345 simde__m128d
simde_mm_exp2_pd(simde__m128d a)4346 simde_mm_exp2_pd (simde__m128d a) {
4347 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4348 return _mm_exp2_pd(a);
4349 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
4350 return Sleef_exp2d2_u10(a);
4351 #else
4352 simde__m128d_private
4353 r_,
4354 a_ = simde__m128d_to_private(a);
4355
4356 SIMDE_VECTORIZE
4357 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4358 r_.f64[i] = simde_math_exp2(a_.f64[i]);
4359 }
4360
4361 return simde__m128d_from_private(r_);
4362 #endif
4363 }
4364 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4365 #undef _mm_exp2_pd
4366 #define _mm_exp2_pd(a) simde_mm_exp2_pd(a)
4367 #endif
4368
4369 SIMDE_FUNCTION_ATTRIBUTES
4370 simde__m256
simde_mm256_exp2_ps(simde__m256 a)4371 simde_mm256_exp2_ps (simde__m256 a) {
4372 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4373 return _mm256_exp2_ps(a);
4374 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4375 return Sleef_exp2f8_u10(a);
4376 #else
4377 simde__m256_private
4378 r_,
4379 a_ = simde__m256_to_private(a);
4380
4381 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4382 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
4383 r_.m128[i] = simde_mm_exp2_ps(a_.m128[i]);
4384 }
4385 #else
4386 SIMDE_VECTORIZE
4387 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4388 r_.f32[i] = simde_math_exp2f(a_.f32[i]);
4389 }
4390 #endif
4391
4392 return simde__m256_from_private(r_);
4393 #endif
4394 }
4395 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4396 #undef _mm256_exp2_ps
4397 #define _mm256_exp2_ps(a) simde_mm256_exp2_ps(a)
4398 #endif
4399
4400
4401 SIMDE_FUNCTION_ATTRIBUTES
4402 simde__m256d
simde_mm256_exp2_pd(simde__m256d a)4403 simde_mm256_exp2_pd (simde__m256d a) {
4404 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4405 return _mm256_exp2_pd(a);
4406 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4407 return Sleef_exp2d4_u10(a);
4408 #else
4409 simde__m256d_private
4410 r_,
4411 a_ = simde__m256d_to_private(a);
4412
4413 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4414 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
4415 r_.m128d[i] = simde_mm_exp2_pd(a_.m128d[i]);
4416 }
4417 #else
4418 SIMDE_VECTORIZE
4419 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4420 r_.f64[i] = simde_math_exp2(a_.f64[i]);
4421 }
4422 #endif
4423
4424 return simde__m256d_from_private(r_);
4425 #endif
4426 }
4427 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4428 #undef _mm256_exp2_pd
4429 #define _mm256_exp2_pd(a) simde_mm256_exp2_pd(a)
4430 #endif
4431
4432 SIMDE_FUNCTION_ATTRIBUTES
4433 simde__m512
simde_mm512_exp2_ps(simde__m512 a)4434 simde_mm512_exp2_ps (simde__m512 a) {
4435 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4436 return _mm512_exp2_ps(a);
4437 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4438 return Sleef_exp2f16_u10(a);
4439 #else
4440 simde__m512_private
4441 r_,
4442 a_ = simde__m512_to_private(a);
4443
4444 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4445 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
4446 r_.m256[i] = simde_mm256_exp2_ps(a_.m256[i]);
4447 }
4448 #else
4449 SIMDE_VECTORIZE
4450 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4451 r_.f32[i] = simde_math_exp2f(a_.f32[i]);
4452 }
4453 #endif
4454
4455 return simde__m512_from_private(r_);
4456 #endif
4457 }
4458 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4459 #undef _mm512_exp2_ps
4460 #define _mm512_exp2_ps(a) simde_mm512_exp2_ps(a)
4461 #endif
4462
4463 SIMDE_FUNCTION_ATTRIBUTES
4464 simde__m512d
simde_mm512_exp2_pd(simde__m512d a)4465 simde_mm512_exp2_pd (simde__m512d a) {
4466 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4467 return _mm512_exp2_pd(a);
4468 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4469 return Sleef_exp2d8_u10(a);
4470 #else
4471 simde__m512d_private
4472 r_,
4473 a_ = simde__m512d_to_private(a);
4474
4475 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4476 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
4477 r_.m256d[i] = simde_mm256_exp2_pd(a_.m256d[i]);
4478 }
4479 #else
4480 SIMDE_VECTORIZE
4481 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4482 r_.f64[i] = simde_math_exp2(a_.f64[i]);
4483 }
4484 #endif
4485
4486 return simde__m512d_from_private(r_);
4487 #endif
4488 }
4489 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4490 #undef _mm512_exp2_pd
4491 #define _mm512_exp2_pd(a) simde_mm512_exp2_pd(a)
4492 #endif
4493
4494 SIMDE_FUNCTION_ATTRIBUTES
4495 simde__m512
simde_mm512_mask_exp2_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)4496 simde_mm512_mask_exp2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
4497 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4498 return _mm512_mask_exp2_ps(src, k, a);
4499 #else
4500 return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp2_ps(a));
4501 #endif
4502 }
4503 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4504 #undef _mm512_mask_exp2_ps
4505 #define _mm512_mask_exp2_ps(src, k, a) simde_mm512_mask_exp2_ps(src, k, a)
4506 #endif
4507
4508 SIMDE_FUNCTION_ATTRIBUTES
4509 simde__m512d
simde_mm512_mask_exp2_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)4510 simde_mm512_mask_exp2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
4511 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4512 return _mm512_mask_exp2_pd(src, k, a);
4513 #else
4514 return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp2_pd(a));
4515 #endif
4516 }
4517 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4518 #undef _mm512_mask_exp2_pd
4519 #define _mm512_mask_exp2_pd(src, k, a) simde_mm512_mask_exp2_pd(src, k, a)
4520 #endif
4521
4522 SIMDE_FUNCTION_ATTRIBUTES
4523 simde__m128
simde_mm_exp10_ps(simde__m128 a)4524 simde_mm_exp10_ps (simde__m128 a) {
4525 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4526 return _mm_exp10_ps(a);
4527 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
4528 return Sleef_exp10f4_u10(a);
4529 #else
4530 simde__m128_private
4531 r_,
4532 a_ = simde__m128_to_private(a);
4533
4534 SIMDE_VECTORIZE
4535 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4536 r_.f32[i] = simde_math_exp10f(a_.f32[i]);
4537 }
4538
4539 return simde__m128_from_private(r_);
4540 #endif
4541 }
4542 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4543 #undef _mm_exp10_ps
4544 #define _mm_exp10_ps(a) simde_mm_exp10_ps(a)
4545 #endif
4546
4547 SIMDE_FUNCTION_ATTRIBUTES
4548 simde__m128d
simde_mm_exp10_pd(simde__m128d a)4549 simde_mm_exp10_pd (simde__m128d a) {
4550 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4551 return _mm_exp10_pd(a);
4552 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
4553 return Sleef_exp10d2_u10(a);
4554 #else
4555 simde__m128d_private
4556 r_,
4557 a_ = simde__m128d_to_private(a);
4558
4559 SIMDE_VECTORIZE
4560 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4561 r_.f64[i] = simde_math_exp10(a_.f64[i]);
4562 }
4563
4564 return simde__m128d_from_private(r_);
4565 #endif
4566 }
4567 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4568 #undef _mm_exp10_pd
4569 #define _mm_exp10_pd(a) simde_mm_exp10_pd(a)
4570 #endif
4571
4572 SIMDE_FUNCTION_ATTRIBUTES
4573 simde__m256
simde_mm256_exp10_ps(simde__m256 a)4574 simde_mm256_exp10_ps (simde__m256 a) {
4575 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4576 return _mm256_exp10_ps(a);
4577 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4578 return Sleef_exp10f8_u10(a);
4579 #else
4580 simde__m256_private
4581 r_,
4582 a_ = simde__m256_to_private(a);
4583
4584 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4585 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
4586 r_.m128[i] = simde_mm_exp10_ps(a_.m128[i]);
4587 }
4588 #else
4589 SIMDE_VECTORIZE
4590 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4591 r_.f32[i] = simde_math_exp10f(a_.f32[i]);
4592 }
4593 #endif
4594
4595 return simde__m256_from_private(r_);
4596 #endif
4597 }
4598 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4599 #undef _mm256_exp10_ps
4600 #define _mm256_exp10_ps(a) simde_mm256_exp10_ps(a)
4601 #endif
4602
4603
4604 SIMDE_FUNCTION_ATTRIBUTES
4605 simde__m256d
simde_mm256_exp10_pd(simde__m256d a)4606 simde_mm256_exp10_pd (simde__m256d a) {
4607 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4608 return _mm256_exp10_pd(a);
4609 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4610 return Sleef_exp10d4_u10(a);
4611 #else
4612 simde__m256d_private
4613 r_,
4614 a_ = simde__m256d_to_private(a);
4615
4616 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4617 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
4618 r_.m128d[i] = simde_mm_exp10_pd(a_.m128d[i]);
4619 }
4620 #else
4621 SIMDE_VECTORIZE
4622 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4623 r_.f64[i] = simde_math_exp10(a_.f64[i]);
4624 }
4625 #endif
4626
4627 return simde__m256d_from_private(r_);
4628 #endif
4629 }
4630 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4631 #undef _mm256_exp10_pd
4632 #define _mm256_exp10_pd(a) simde_mm256_exp10_pd(a)
4633 #endif
4634
4635 SIMDE_FUNCTION_ATTRIBUTES
4636 simde__m512
simde_mm512_exp10_ps(simde__m512 a)4637 simde_mm512_exp10_ps (simde__m512 a) {
4638 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4639 return _mm512_exp10_ps(a);
4640 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4641 return Sleef_exp10f16_u10(a);
4642 #else
4643 simde__m512_private
4644 r_,
4645 a_ = simde__m512_to_private(a);
4646
4647 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4648 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
4649 r_.m256[i] = simde_mm256_exp10_ps(a_.m256[i]);
4650 }
4651 #else
4652 SIMDE_VECTORIZE
4653 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4654 r_.f32[i] = simde_math_exp10f(a_.f32[i]);
4655 }
4656 #endif
4657
4658 return simde__m512_from_private(r_);
4659 #endif
4660 }
4661 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4662 #undef _mm512_exp10_ps
4663 #define _mm512_exp10_ps(a) simde_mm512_exp10_ps(a)
4664 #endif
4665
4666 SIMDE_FUNCTION_ATTRIBUTES
4667 simde__m512d
simde_mm512_exp10_pd(simde__m512d a)4668 simde_mm512_exp10_pd (simde__m512d a) {
4669 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4670 return _mm512_exp10_pd(a);
4671 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4672 return Sleef_exp10d8_u10(a);
4673 #else
4674 simde__m512d_private
4675 r_,
4676 a_ = simde__m512d_to_private(a);
4677
4678 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4679 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
4680 r_.m256d[i] = simde_mm256_exp10_pd(a_.m256d[i]);
4681 }
4682 #else
4683 SIMDE_VECTORIZE
4684 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4685 r_.f64[i] = simde_math_exp10(a_.f64[i]);
4686 }
4687 #endif
4688
4689 return simde__m512d_from_private(r_);
4690 #endif
4691 }
4692 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4693 #undef _mm512_exp10_pd
4694 #define _mm512_exp10_pd(a) simde_mm512_exp10_pd(a)
4695 #endif
4696
4697 SIMDE_FUNCTION_ATTRIBUTES
4698 simde__m512
simde_mm512_mask_exp10_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)4699 simde_mm512_mask_exp10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
4700 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4701 return _mm512_mask_exp10_ps(src, k, a);
4702 #else
4703 return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp10_ps(a));
4704 #endif
4705 }
4706 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4707 #undef _mm512_mask_exp10_ps
4708 #define _mm512_mask_exp10_ps(src, k, a) simde_mm512_mask_exp10_ps(src, k, a)
4709 #endif
4710
4711 SIMDE_FUNCTION_ATTRIBUTES
4712 simde__m512d
simde_mm512_mask_exp10_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)4713 simde_mm512_mask_exp10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
4714 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4715 return _mm512_mask_exp10_pd(src, k, a);
4716 #else
4717 return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp10_pd(a));
4718 #endif
4719 }
4720 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4721 #undef _mm512_mask_exp10_pd
4722 #define _mm512_mask_exp10_pd(src, k, a) simde_mm512_mask_exp10_pd(src, k, a)
4723 #endif
4724
4725 SIMDE_FUNCTION_ATTRIBUTES
4726 simde__m128
simde_mm_cdfnorm_ps(simde__m128 a)4727 simde_mm_cdfnorm_ps (simde__m128 a) {
4728 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4729 return _mm_cdfnorm_ps(a);
4730 #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
4731 /* https://www.johndcook.com/blog/cpp_phi/ */
4732 const simde__m128 a1 = simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.254829592));
4733 const simde__m128 a2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.284496736));
4734 const simde__m128 a3 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.421413741));
4735 const simde__m128 a4 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.453152027));
4736 const simde__m128 a5 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.061405429));
4737 const simde__m128 p = simde_mm_set1_ps(SIMDE_FLOAT32_C(0.3275911));
4738 const simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0));
4739
4740 /* simde_math_fabsf(x) / sqrtf(2.0) */
4741 const simde__m128 x = simde_mm_div_ps(simde_x_mm_abs_ps(a), simde_mm_sqrt_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))));
4742
4743 /* 1.0 / (1.0 + p * x) */
4744 const simde__m128 t = simde_mm_div_ps(one, simde_mm_add_ps(one, simde_mm_mul_ps(p, x)));
4745
4746 /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */
4747 simde__m128 y = simde_mm_mul_ps(a5, t);
4748 y = simde_mm_add_ps(y, a4);
4749 y = simde_mm_mul_ps(y, t);
4750 y = simde_mm_add_ps(y, a3);
4751 y = simde_mm_mul_ps(y, t);
4752 y = simde_mm_add_ps(y, a2);
4753 y = simde_mm_mul_ps(y, t);
4754 y = simde_mm_add_ps(y, a1);
4755 y = simde_mm_mul_ps(y, t);
4756 y = simde_mm_mul_ps(y, simde_mm_exp_ps(simde_mm_mul_ps(x, simde_x_mm_negate_ps(x))));
4757 y = simde_mm_sub_ps(one, y);
4758
4759 /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */
4760 return simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm_add_ps(one, simde_x_mm_xorsign_ps(y, a)));
4761 #else
4762 simde__m128_private
4763 r_,
4764 a_ = simde__m128_to_private(a);
4765
4766 SIMDE_VECTORIZE
4767 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4768 r_.f32[i] = simde_math_cdfnormf(a_.f32[i]);
4769 }
4770
4771 return simde__m128_from_private(r_);
4772 #endif
4773 }
4774 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4775 #undef _mm_cdfnorm_ps
4776 #define _mm_cdfnorm_ps(a) simde_mm_cdfnorm_ps(a)
4777 #endif
4778
4779 SIMDE_FUNCTION_ATTRIBUTES
4780 simde__m128d
simde_mm_cdfnorm_pd(simde__m128d a)4781 simde_mm_cdfnorm_pd (simde__m128d a) {
4782 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4783 return _mm_cdfnorm_pd(a);
4784 #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
4785 /* https://www.johndcook.com/blog/cpp_phi/ */
4786 const simde__m128d a1 = simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.254829592));
4787 const simde__m128d a2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.284496736));
4788 const simde__m128d a3 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.421413741));
4789 const simde__m128d a4 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.453152027));
4790 const simde__m128d a5 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.061405429));
4791 const simde__m128d p = simde_mm_set1_pd(SIMDE_FLOAT64_C(0.6475911));
4792 const simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0));
4793
4794 /* simde_math_fabs(x) / sqrt(2.0) */
4795 const simde__m128d x = simde_mm_div_pd(simde_x_mm_abs_pd(a), simde_mm_sqrt_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0))));
4796
4797 /* 1.0 / (1.0 + p * x) */
4798 const simde__m128d t = simde_mm_div_pd(one, simde_mm_add_pd(one, simde_mm_mul_pd(p, x)));
4799
4800 /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */
4801 simde__m128d y = simde_mm_mul_pd(a5, t);
4802 y = simde_mm_add_pd(y, a4);
4803 y = simde_mm_mul_pd(y, t);
4804 y = simde_mm_add_pd(y, a3);
4805 y = simde_mm_mul_pd(y, t);
4806 y = simde_mm_add_pd(y, a2);
4807 y = simde_mm_mul_pd(y, t);
4808 y = simde_mm_add_pd(y, a1);
4809 y = simde_mm_mul_pd(y, t);
4810 y = simde_mm_mul_pd(y, simde_mm_exp_pd(simde_mm_mul_pd(x, simde_x_mm_negate_pd(x))));
4811 y = simde_mm_sub_pd(one, y);
4812
4813 /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */
4814 return simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm_add_pd(one, simde_x_mm_xorsign_pd(y, a)));
4815 #else
4816 simde__m128d_private
4817 r_,
4818 a_ = simde__m128d_to_private(a);
4819
4820 SIMDE_VECTORIZE
4821 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4822 r_.f64[i] = simde_math_cdfnorm(a_.f64[i]);
4823 }
4824
4825 return simde__m128d_from_private(r_);
4826 #endif
4827 }
4828 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4829 #undef _mm_cdfnorm_pd
4830 #define _mm_cdfnorm_pd(a) simde_mm_cdfnorm_pd(a)
4831 #endif
4832
4833 SIMDE_FUNCTION_ATTRIBUTES
4834 simde__m256
simde_mm256_cdfnorm_ps(simde__m256 a)4835 simde_mm256_cdfnorm_ps (simde__m256 a) {
4836 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4837 return _mm256_cdfnorm_ps(a);
4838 #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
4839 /* https://www.johndcook.com/blog/cpp_phi/ */
4840 const simde__m256 a1 = simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.254829592));
4841 const simde__m256 a2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.284496736));
4842 const simde__m256 a3 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.421413741));
4843 const simde__m256 a4 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.453152027));
4844 const simde__m256 a5 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.061405429));
4845 const simde__m256 p = simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.3275911));
4846 const simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0));
4847
4848 /* simde_math_fabsf(x) / sqrtf(2.0) */
4849 const simde__m256 x = simde_mm256_div_ps(simde_x_mm256_abs_ps(a), simde_mm256_sqrt_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0))));
4850
4851 /* 1.0 / (1.0 + p * x) */
4852 const simde__m256 t = simde_mm256_div_ps(one, simde_mm256_add_ps(one, simde_mm256_mul_ps(p, x)));
4853
4854 /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */
4855 simde__m256 y = simde_mm256_mul_ps(a5, t);
4856 y = simde_mm256_add_ps(y, a4);
4857 y = simde_mm256_mul_ps(y, t);
4858 y = simde_mm256_add_ps(y, a3);
4859 y = simde_mm256_mul_ps(y, t);
4860 y = simde_mm256_add_ps(y, a2);
4861 y = simde_mm256_mul_ps(y, t);
4862 y = simde_mm256_add_ps(y, a1);
4863 y = simde_mm256_mul_ps(y, t);
4864 y = simde_mm256_mul_ps(y, simde_mm256_exp_ps(simde_mm256_mul_ps(x, simde_x_mm256_negate_ps(x))));
4865 y = simde_mm256_sub_ps(one, y);
4866
4867 /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */
4868 return simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm256_add_ps(one, simde_x_mm256_xorsign_ps(y, a)));
4869 #else
4870 simde__m256_private
4871 r_,
4872 a_ = simde__m256_to_private(a);
4873
4874 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4875 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
4876 r_.m128[i] = simde_mm_cdfnorm_ps(a_.m128[i]);
4877 }
4878 #else
4879 SIMDE_VECTORIZE
4880 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4881 r_.f32[i] = simde_math_cdfnormf(a_.f32[i]);
4882 }
4883 #endif
4884
4885 return simde__m256_from_private(r_);
4886 #endif
4887 }
4888 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4889 #undef _mm256_cdfnorm_ps
4890 #define _mm256_cdfnorm_ps(a) simde_mm256_cdfnorm_ps(a)
4891 #endif
4892
4893 SIMDE_FUNCTION_ATTRIBUTES
4894 simde__m256d
simde_mm256_cdfnorm_pd(simde__m256d a)4895 simde_mm256_cdfnorm_pd (simde__m256d a) {
4896 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4897 return _mm256_cdfnorm_pd(a);
4898 #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
4899 /* https://www.johndcook.com/blog/cpp_phi/ */
4900 const simde__m256d a1 = simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.254829592));
4901 const simde__m256d a2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.284496736));
4902 const simde__m256d a3 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.421413741));
4903 const simde__m256d a4 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.453152027));
4904 const simde__m256d a5 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.061405429));
4905 const simde__m256d p = simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.6475911));
4906 const simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0));
4907
4908 /* simde_math_fabs(x) / sqrt(2.0) */
4909 const simde__m256d x = simde_mm256_div_pd(simde_x_mm256_abs_pd(a), simde_mm256_sqrt_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0))));
4910
4911 /* 1.0 / (1.0 + p * x) */
4912 const simde__m256d t = simde_mm256_div_pd(one, simde_mm256_add_pd(one, simde_mm256_mul_pd(p, x)));
4913
4914 /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */
4915 simde__m256d y = simde_mm256_mul_pd(a5, t);
4916 y = simde_mm256_add_pd(y, a4);
4917 y = simde_mm256_mul_pd(y, t);
4918 y = simde_mm256_add_pd(y, a3);
4919 y = simde_mm256_mul_pd(y, t);
4920 y = simde_mm256_add_pd(y, a2);
4921 y = simde_mm256_mul_pd(y, t);
4922 y = simde_mm256_add_pd(y, a1);
4923 y = simde_mm256_mul_pd(y, t);
4924 y = simde_mm256_mul_pd(y, simde_mm256_exp_pd(simde_mm256_mul_pd(x, simde_x_mm256_negate_pd(x))));
4925 y = simde_mm256_sub_pd(one, y);
4926
4927 /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */
4928 return simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm256_add_pd(one, simde_x_mm256_xorsign_pd(y, a)));
4929 #else
4930 simde__m256d_private
4931 r_,
4932 a_ = simde__m256d_to_private(a);
4933
4934 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4935 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
4936 r_.m128d[i] = simde_mm_cdfnorm_pd(a_.m128d[i]);
4937 }
4938 #else
4939 SIMDE_VECTORIZE
4940 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4941 r_.f64[i] = simde_math_cdfnorm(a_.f64[i]);
4942 }
4943 #endif
4944
4945 return simde__m256d_from_private(r_);
4946 #endif
4947 }
4948 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4949 #undef _mm256_cdfnorm_pd
4950 #define _mm256_cdfnorm_pd(a) simde_mm256_cdfnorm_pd(a)
4951 #endif
4952
4953 SIMDE_FUNCTION_ATTRIBUTES
4954 simde__m512
simde_mm512_cdfnorm_ps(simde__m512 a)4955 simde_mm512_cdfnorm_ps (simde__m512 a) {
4956 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4957 return _mm512_cdfnorm_ps(a);
4958 #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
4959 /* https://www.johndcook.com/blog/cpp_phi/ */
4960 const simde__m512 a1 = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.254829592));
4961 const simde__m512 a2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.284496736));
4962 const simde__m512 a3 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.421413741));
4963 const simde__m512 a4 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.453152027));
4964 const simde__m512 a5 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.061405429));
4965 const simde__m512 p = simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.3275911));
4966 const simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0));
4967
4968 /* simde_math_fabsf(x) / sqrtf(2.0) */
4969 const simde__m512 x = simde_mm512_div_ps(simde_mm512_abs_ps(a), simde_mm512_sqrt_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0))));
4970
4971 /* 1.0 / (1.0 + p * x) */
4972 const simde__m512 t = simde_mm512_div_ps(one, simde_mm512_add_ps(one, simde_mm512_mul_ps(p, x)));
4973
4974 /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */
4975 simde__m512 y = simde_mm512_mul_ps(a5, t);
4976 y = simde_mm512_add_ps(y, a4);
4977 y = simde_mm512_mul_ps(y, t);
4978 y = simde_mm512_add_ps(y, a3);
4979 y = simde_mm512_mul_ps(y, t);
4980 y = simde_mm512_add_ps(y, a2);
4981 y = simde_mm512_mul_ps(y, t);
4982 y = simde_mm512_add_ps(y, a1);
4983 y = simde_mm512_mul_ps(y, t);
4984 y = simde_mm512_mul_ps(y, simde_mm512_exp_ps(simde_mm512_mul_ps(x, simde_x_mm512_negate_ps(x))));
4985 y = simde_mm512_sub_ps(one, y);
4986
4987 /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */
4988 return simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm512_add_ps(one, simde_x_mm512_xorsign_ps(y, a)));
4989 #else
4990 simde__m512_private
4991 r_,
4992 a_ = simde__m512_to_private(a);
4993
4994 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4995 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
4996 r_.m256[i] = simde_mm256_cdfnorm_ps(a_.m256[i]);
4997 }
4998 #else
4999 SIMDE_VECTORIZE
5000 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5001 r_.f32[i] = simde_math_cdfnormf(a_.f32[i]);
5002 }
5003 #endif
5004
5005 return simde__m512_from_private(r_);
5006 #endif
5007 }
5008 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5009 #undef _mm512_cdfnorm_ps
5010 #define _mm512_cdfnorm_ps(a) simde_mm512_cdfnorm_ps(a)
5011 #endif
5012
5013 SIMDE_FUNCTION_ATTRIBUTES
5014 simde__m512d
simde_mm512_cdfnorm_pd(simde__m512d a)5015 simde_mm512_cdfnorm_pd (simde__m512d a) {
5016 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5017 return _mm512_cdfnorm_pd(a);
5018 #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
5019 /* https://www.johndcook.com/blog/cpp_phi/ */
5020 const simde__m512d a1 = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.254829592));
5021 const simde__m512d a2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.284496736));
5022 const simde__m512d a3 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.421413741));
5023 const simde__m512d a4 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.453152027));
5024 const simde__m512d a5 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.061405429));
5025 const simde__m512d p = simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.6475911));
5026 const simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0));
5027
5028 /* simde_math_fabs(x) / sqrt(2.0) */
5029 const simde__m512d x = simde_mm512_div_pd(simde_mm512_abs_pd(a), simde_mm512_sqrt_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0))));
5030
5031 /* 1.0 / (1.0 + p * x) */
5032 const simde__m512d t = simde_mm512_div_pd(one, simde_mm512_add_pd(one, simde_mm512_mul_pd(p, x)));
5033
5034 /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */
5035 simde__m512d y = simde_mm512_mul_pd(a5, t);
5036 y = simde_mm512_add_pd(y, a4);
5037 y = simde_mm512_mul_pd(y, t);
5038 y = simde_mm512_add_pd(y, a3);
5039 y = simde_mm512_mul_pd(y, t);
5040 y = simde_mm512_add_pd(y, a2);
5041 y = simde_mm512_mul_pd(y, t);
5042 y = simde_mm512_add_pd(y, a1);
5043 y = simde_mm512_mul_pd(y, t);
5044 y = simde_mm512_mul_pd(y, simde_mm512_exp_pd(simde_mm512_mul_pd(x, simde_x_mm512_negate_pd(x))));
5045 y = simde_mm512_sub_pd(one, y);
5046
5047 /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */
5048 return simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm512_add_pd(one, simde_x_mm512_xorsign_pd(y, a)));
5049 #else
5050 simde__m512d_private
5051 r_,
5052 a_ = simde__m512d_to_private(a);
5053
5054 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
5055 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
5056 r_.m256d[i] = simde_mm256_cdfnorm_pd(a_.m256d[i]);
5057 }
5058 #else
5059 SIMDE_VECTORIZE
5060 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5061 r_.f64[i] = simde_math_cdfnorm(a_.f64[i]);
5062 }
5063 #endif
5064
5065 return simde__m512d_from_private(r_);
5066 #endif
5067 }
5068 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5069 #undef _mm512_cdfnorm_pd
5070 #define _mm512_cdfnorm_pd(a) simde_mm512_cdfnorm_pd(a)
5071 #endif
5072
5073 SIMDE_FUNCTION_ATTRIBUTES
5074 simde__m512
simde_mm512_mask_cdfnorm_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)5075 simde_mm512_mask_cdfnorm_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
5076 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5077 return _mm512_mask_cdfnorm_ps(src, k, a);
5078 #else
5079 return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorm_ps(a));
5080 #endif
5081 }
5082 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5083 #undef _mm512_mask_cdfnorm_ps
5084 #define _mm512_mask_cdfnorm_ps(src, k, a) simde_mm512_mask_cdfnorm_ps(src, k, a)
5085 #endif
5086
5087 SIMDE_FUNCTION_ATTRIBUTES
5088 simde__m512d
simde_mm512_mask_cdfnorm_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)5089 simde_mm512_mask_cdfnorm_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
5090 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5091 return _mm512_mask_cdfnorm_pd(src, k, a);
5092 #else
5093 return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorm_pd(a));
5094 #endif
5095 }
5096 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5097 #undef _mm512_mask_cdfnorm_pd
5098 #define _mm512_mask_cdfnorm_pd(src, k, a) simde_mm512_mask_cdfnorm_pd(src, k, a)
5099 #endif
5100
5101 SIMDE_FUNCTION_ATTRIBUTES
5102 simde__m128i
simde_mm_idivrem_epi32(simde__m128i * mem_addr,simde__m128i a,simde__m128i b)5103 simde_mm_idivrem_epi32 (simde__m128i* mem_addr, simde__m128i a, simde__m128i b) {
5104 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
5105 return _mm_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m128i*, mem_addr), a, b);
5106 #else
5107 simde__m128i r;
5108
5109 r = simde_mm_div_epi32(a, b);
5110 *mem_addr = simde_mm_sub_epi32(a, simde_mm_mullo_epi32(r, b));
5111
5112 return r;
5113 #endif
5114 }
5115 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5116 #undef _mm_idivrem_epi32
5117 #define _mm_idivrem_epi32(mem_addr, a, b) simde_mm_idivrem_epi32((mem_addr),(a), (b))
5118 #endif
5119
5120 SIMDE_FUNCTION_ATTRIBUTES
5121 simde__m256i
simde_mm256_idivrem_epi32(simde__m256i * mem_addr,simde__m256i a,simde__m256i b)5122 simde_mm256_idivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) {
5123 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5124 return _mm256_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b);
5125 #else
5126 simde__m256i r;
5127
5128 r = simde_mm256_div_epi32(a, b);
5129 *mem_addr = simde_mm256_sub_epi32(a, simde_mm256_mullo_epi32(r, b));
5130
5131 return r;
5132 #endif
5133 }
5134 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5135 #undef _mm256_idivrem_epi32
5136 #define _mm256_idivrem_epi32(mem_addr, a, b) simde_mm256_idivrem_epi32((mem_addr),(a), (b))
5137 #endif
5138
5139 SIMDE_FUNCTION_ATTRIBUTES
5140 simde__m128
simde_mm_hypot_ps(simde__m128 a,simde__m128 b)5141 simde_mm_hypot_ps (simde__m128 a, simde__m128 b) {
5142 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5143 return _mm_hypot_ps(a, b);
5144 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
5145 #if SIMDE_ACCURACY_PREFERENCE > 1
5146 return Sleef_hypotf4_u05(a, b);
5147 #else
5148 return Sleef_hypotf4_u35(a, b);
5149 #endif
5150 #else
5151 simde__m128_private
5152 r_,
5153 a_ = simde__m128_to_private(a),
5154 b_ = simde__m128_to_private(b);
5155
5156 SIMDE_VECTORIZE
5157 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5158 r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]);
5159 }
5160
5161 return simde__m128_from_private(r_);
5162 #endif
5163 }
5164 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5165 #undef _mm_hypot_ps
5166 #define _mm_hypot_ps(a, b) simde_mm_hypot_ps(a, b)
5167 #endif
5168
5169 SIMDE_FUNCTION_ATTRIBUTES
5170 simde__m128d
simde_mm_hypot_pd(simde__m128d a,simde__m128d b)5171 simde_mm_hypot_pd (simde__m128d a, simde__m128d b) {
5172 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5173 return _mm_hypot_pd(a, b);
5174 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
5175 #if SIMDE_ACCURACY_PREFERENCE > 1
5176 return Sleef_hypotd2_u05(a, b);
5177 #else
5178 return Sleef_hypotd2_u35(a, b);
5179 #endif
5180 #else
5181 simde__m128d_private
5182 r_,
5183 a_ = simde__m128d_to_private(a),
5184 b_ = simde__m128d_to_private(b);
5185
5186 SIMDE_VECTORIZE
5187 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5188 r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]);
5189 }
5190
5191 return simde__m128d_from_private(r_);
5192 #endif
5193 }
5194 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5195 #undef _mm_hypot_pd
5196 #define _mm_hypot_pd(a, b) simde_mm_hypot_pd(a, b)
5197 #endif
5198
5199 SIMDE_FUNCTION_ATTRIBUTES
5200 simde__m256
simde_mm256_hypot_ps(simde__m256 a,simde__m256 b)5201 simde_mm256_hypot_ps (simde__m256 a, simde__m256 b) {
5202 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5203 return _mm256_hypot_ps(a, b);
5204 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
5205 #if SIMDE_ACCURACY_PREFERENCE > 1
5206 return Sleef_hypotf8_u05(a, b);
5207 #else
5208 return Sleef_hypotf8_u35(a, b);
5209 #endif
5210 #else
5211 simde__m256_private
5212 r_,
5213 a_ = simde__m256_to_private(a),
5214 b_ = simde__m256_to_private(b);
5215
5216 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
5217 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
5218 r_.m128[i] = simde_mm_hypot_ps(a_.m128[i], b_.m128[i]);
5219 }
5220 #else
5221 SIMDE_VECTORIZE
5222 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5223 r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]);
5224 }
5225 #endif
5226
5227 return simde__m256_from_private(r_);
5228 #endif
5229 }
5230 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5231 #undef _mm256_hypot_ps
5232 #define _mm256_hypot_ps(a, b) simde_mm256_hypot_ps(a, b)
5233 #endif
5234
5235
5236 SIMDE_FUNCTION_ATTRIBUTES
5237 simde__m256d
simde_mm256_hypot_pd(simde__m256d a,simde__m256d b)5238 simde_mm256_hypot_pd (simde__m256d a, simde__m256d b) {
5239 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5240 return _mm256_hypot_pd(a, b);
5241 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
5242 #if SIMDE_ACCURACY_PREFERENCE > 1
5243 return Sleef_hypotd4_u05(a, b);
5244 #else
5245 return Sleef_hypotd4_u35(a, b);
5246 #endif
5247 #else
5248 simde__m256d_private
5249 r_,
5250 a_ = simde__m256d_to_private(a),
5251 b_ = simde__m256d_to_private(b);
5252
5253 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
5254 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
5255 r_.m128d[i] = simde_mm_hypot_pd(a_.m128d[i], b_.m128d[i]);
5256 }
5257 #else
5258 SIMDE_VECTORIZE
5259 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5260 r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]);
5261 }
5262 #endif
5263
5264 return simde__m256d_from_private(r_);
5265 #endif
5266 }
5267 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5268 #undef _mm256_hypot_pd
5269 #define _mm256_hypot_pd(a, b) simde_mm256_hypot_pd(a, b)
5270 #endif
5271
5272 SIMDE_FUNCTION_ATTRIBUTES
5273 simde__m512
simde_mm512_hypot_ps(simde__m512 a,simde__m512 b)5274 simde_mm512_hypot_ps (simde__m512 a, simde__m512 b) {
5275 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5276 return _mm512_hypot_ps(a, b);
5277 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
5278 #if SIMDE_ACCURACY_PREFERENCE > 1
5279 return Sleef_hypotf16_u05(a, b);
5280 #else
5281 return Sleef_hypotf16_u35(a, b);
5282 #endif
5283 #else
5284 simde__m512_private
5285 r_,
5286 a_ = simde__m512_to_private(a),
5287 b_ = simde__m512_to_private(b);
5288
5289 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
5290 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
5291 r_.m256[i] = simde_mm256_hypot_ps(a_.m256[i], b_.m256[i]);
5292 }
5293 #else
5294 SIMDE_VECTORIZE
5295 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5296 r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]);
5297 }
5298 #endif
5299
5300 return simde__m512_from_private(r_);
5301 #endif
5302 }
5303 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5304 #undef _mm512_hypot_ps
5305 #define _mm512_hypot_ps(a, b) simde_mm512_hypot_ps(a, b)
5306 #endif
5307
5308 SIMDE_FUNCTION_ATTRIBUTES
5309 simde__m512d
simde_mm512_hypot_pd(simde__m512d a,simde__m512d b)5310 simde_mm512_hypot_pd (simde__m512d a, simde__m512d b) {
5311 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5312 return _mm512_hypot_pd(a, b);
5313 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
5314 #if SIMDE_ACCURACY_PREFERENCE > 1
5315 return Sleef_hypotd8_u05(a, b);
5316 #else
5317 return Sleef_hypotd8_u35(a, b);
5318 #endif
5319 #else
5320 simde__m512d_private
5321 r_,
5322 a_ = simde__m512d_to_private(a),
5323 b_ = simde__m512d_to_private(b);
5324
5325 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
5326 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
5327 r_.m256d[i] = simde_mm256_hypot_pd(a_.m256d[i], b_.m256d[i]);
5328 }
5329 #else
5330 SIMDE_VECTORIZE
5331 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5332 r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]);
5333 }
5334 #endif
5335
5336 return simde__m512d_from_private(r_);
5337 #endif
5338 }
5339 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5340 #undef _mm512_hypot_pd
5341 #define _mm512_hypot_pd(a, b) simde_mm512_hypot_pd(a, b)
5342 #endif
5343
5344 SIMDE_FUNCTION_ATTRIBUTES
5345 simde__m512
simde_mm512_mask_hypot_ps(simde__m512 src,simde__mmask16 k,simde__m512 a,simde__m512 b)5346 simde_mm512_mask_hypot_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
5347 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5348 return _mm512_mask_hypot_ps(src, k, a, b);
5349 #else
5350 return simde_mm512_mask_mov_ps(src, k, simde_mm512_hypot_ps(a, b));
5351 #endif
5352 }
5353 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5354 #undef _mm512_mask_hypot_ps
5355 #define _mm512_mask_hypot_ps(src, k, a, b) simde_mm512_mask_hypot_ps(src, k, a, b)
5356 #endif
5357
5358 SIMDE_FUNCTION_ATTRIBUTES
5359 simde__m512d
simde_mm512_mask_hypot_pd(simde__m512d src,simde__mmask8 k,simde__m512d a,simde__m512d b)5360 simde_mm512_mask_hypot_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
5361 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5362 return _mm512_mask_hypot_pd(src, k, a, b);
5363 #else
5364 return simde_mm512_mask_mov_pd(src, k, simde_mm512_hypot_pd(a, b));
5365 #endif
5366 }
5367 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5368 #undef _mm512_mask_hypot_pd
5369 #define _mm512_mask_hypot_pd(src, k, a, b) simde_mm512_mask_hypot_pd(src, k, a, b)
5370 #endif
5371
5372 SIMDE_FUNCTION_ATTRIBUTES
5373 simde__m128
simde_mm_invcbrt_ps(simde__m128 a)5374 simde_mm_invcbrt_ps (simde__m128 a) {
5375 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5376 return _mm_invcbrt_ps(a);
5377 #else
5378 return simde_mm_rcp_ps(simde_mm_cbrt_ps(a));
5379 #endif
5380 }
5381 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5382 #undef _mm_invcbrt_ps
5383 #define _mm_invcbrt_ps(a) simde_mm_invcbrt_ps(a)
5384 #endif
5385
5386 SIMDE_FUNCTION_ATTRIBUTES
5387 simde__m128d
simde_mm_invcbrt_pd(simde__m128d a)5388 simde_mm_invcbrt_pd (simde__m128d a) {
5389 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5390 return _mm_invcbrt_pd(a);
5391 #else
5392 return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_cbrt_pd(a));
5393 #endif
5394 }
5395 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5396 #undef _mm_invcbrt_pd
5397 #define _mm_invcbrt_pd(a) simde_mm_invcbrt_pd(a)
5398 #endif
5399
5400 SIMDE_FUNCTION_ATTRIBUTES
5401 simde__m256
simde_mm256_invcbrt_ps(simde__m256 a)5402 simde_mm256_invcbrt_ps (simde__m256 a) {
5403 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5404 return _mm256_invcbrt_ps(a);
5405 #else
5406 return simde_mm256_rcp_ps(simde_mm256_cbrt_ps(a));
5407 #endif
5408 }
5409 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5410 #undef _mm256_invcbrt_ps
5411 #define _mm256_invcbrt_ps(a) simde_mm256_invcbrt_ps(a)
5412 #endif
5413
5414 SIMDE_FUNCTION_ATTRIBUTES
5415 simde__m256d
simde_mm256_invcbrt_pd(simde__m256d a)5416 simde_mm256_invcbrt_pd (simde__m256d a) {
5417 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5418 return _mm256_invcbrt_pd(a);
5419 #else
5420 return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_cbrt_pd(a));
5421 #endif
5422 }
5423 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5424 #undef _mm256_invcbrt_pd
5425 #define _mm256_invcbrt_pd(a) simde_mm256_invcbrt_pd(a)
5426 #endif
5427
5428 SIMDE_FUNCTION_ATTRIBUTES
5429 simde__m128
simde_mm_invsqrt_ps(simde__m128 a)5430 simde_mm_invsqrt_ps (simde__m128 a) {
5431 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5432 return _mm_invsqrt_ps(a);
5433 #else
5434 return simde_mm_rcp_ps(simde_mm_sqrt_ps(a));
5435 #endif
5436 }
5437 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5438 #undef _mm_invsqrt_ps
5439 #define _mm_invsqrt_ps(a) simde_mm_invsqrt_ps(a)
5440 #endif
5441
5442 SIMDE_FUNCTION_ATTRIBUTES
5443 simde__m128d
simde_mm_invsqrt_pd(simde__m128d a)5444 simde_mm_invsqrt_pd (simde__m128d a) {
5445 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5446 return _mm_invsqrt_pd(a);
5447 #else
5448 return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_sqrt_pd(a));
5449 #endif
5450 }
5451 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5452 #undef _mm_invsqrt_pd
5453 #define _mm_invsqrt_pd(a) simde_mm_invsqrt_pd(a)
5454 #endif
5455
5456 SIMDE_FUNCTION_ATTRIBUTES
5457 simde__m256
simde_mm256_invsqrt_ps(simde__m256 a)5458 simde_mm256_invsqrt_ps (simde__m256 a) {
5459 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5460 return _mm256_invsqrt_ps(a);
5461 #else
5462 return simde_mm256_rcp_ps(simde_mm256_sqrt_ps(a));
5463 #endif
5464 }
5465 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5466 #undef _mm256_invsqrt_ps
5467 #define _mm256_invsqrt_ps(a) simde_mm256_invsqrt_ps(a)
5468 #endif
5469
5470 SIMDE_FUNCTION_ATTRIBUTES
5471 simde__m256d
simde_mm256_invsqrt_pd(simde__m256d a)5472 simde_mm256_invsqrt_pd (simde__m256d a) {
5473 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5474 return _mm256_invsqrt_pd(a);
5475 #else
5476 return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_sqrt_pd(a));
5477 #endif
5478 }
5479 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5480 #undef _mm256_invsqrt_pd
5481 #define _mm256_invsqrt_pd(a) simde_mm256_invsqrt_pd(a)
5482 #endif
5483
5484 SIMDE_FUNCTION_ATTRIBUTES
5485 simde__m512
simde_mm512_invsqrt_ps(simde__m512 a)5486 simde_mm512_invsqrt_ps (simde__m512 a) {
5487 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5488 return _mm512_invsqrt_ps(a);
5489 #else
5490 return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), simde_mm512_sqrt_ps(a));
5491 #endif
5492 }
5493 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5494 #undef _mm512_invsqrt_ps
5495 #define _mm512_invsqrt_ps(a) simde_mm512_invsqrt_ps(a)
5496 #endif
5497
5498 SIMDE_FUNCTION_ATTRIBUTES
5499 simde__m512d
simde_mm512_invsqrt_pd(simde__m512d a)5500 simde_mm512_invsqrt_pd (simde__m512d a) {
5501 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5502 return _mm512_invsqrt_pd(a);
5503 #else
5504 return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm512_sqrt_pd(a));
5505 #endif
5506 }
5507 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5508 #undef _mm512_invsqrt_pd
5509 #define _mm512_invsqrt_pd(a) simde_mm512_invsqrt_pd(a)
5510 #endif
5511
5512 SIMDE_FUNCTION_ATTRIBUTES
5513 simde__m512
simde_mm512_mask_invsqrt_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)5514 simde_mm512_mask_invsqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
5515 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5516 return _mm512_mask_invsqrt_ps(src, k, a);
5517 #else
5518 return simde_mm512_mask_mov_ps(src, k, simde_mm512_invsqrt_ps(a));
5519 #endif
5520 }
5521 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5522 #undef _mm512_mask_invsqrt_ps
5523 #define _mm512_mask_invsqrt_ps(src, k, a) simde_mm512_mask_invsqrt_ps(src, k, a)
5524 #endif
5525
5526 SIMDE_FUNCTION_ATTRIBUTES
5527 simde__m512d
simde_mm512_mask_invsqrt_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)5528 simde_mm512_mask_invsqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
5529 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5530 return _mm512_mask_invsqrt_pd(src, k, a);
5531 #else
5532 return simde_mm512_mask_mov_pd(src, k, simde_mm512_invsqrt_pd(a));
5533 #endif
5534 }
5535 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5536 #undef _mm512_mask_invsqrt_pd
5537 #define _mm512_mask_invsqrt_pd(src, k, a) simde_mm512_mask_invsqrt_pd(src, k, a)
5538 #endif
5539
5540 SIMDE_FUNCTION_ATTRIBUTES
5541 simde__m128
simde_mm_log_ps(simde__m128 a)5542 simde_mm_log_ps (simde__m128 a) {
5543 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5544 return _mm_log_ps(a);
5545 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
5546 #if SIMDE_ACCURACY_PREFERENCE > 1
5547 return Sleef_logf4_u10(a);
5548 #else
5549 return Sleef_logf4_u35(a);
5550 #endif
5551 #else
5552 simde__m128_private
5553 r_,
5554 a_ = simde__m128_to_private(a);
5555
5556 SIMDE_VECTORIZE
5557 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5558 r_.f32[i] = simde_math_logf(a_.f32[i]);
5559 }
5560
5561 return simde__m128_from_private(r_);
5562 #endif
5563 }
5564 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5565 #undef _mm_log_ps
5566 #define _mm_log_ps(a) simde_mm_log_ps(a)
5567 #endif
5568
5569 SIMDE_FUNCTION_ATTRIBUTES
5570 simde__m128d
simde_mm_log_pd(simde__m128d a)5571 simde_mm_log_pd (simde__m128d a) {
5572 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5573 return _mm_log_pd(a);
5574 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
5575 #if SIMDE_ACCURACY_PREFERENCE > 1
5576 return Sleef_logd2_u10(a);
5577 #else
5578 return Sleef_logd2_u35(a);
5579 #endif
5580 #else
5581 simde__m128d_private
5582 r_,
5583 a_ = simde__m128d_to_private(a);
5584
5585 SIMDE_VECTORIZE
5586 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5587 r_.f64[i] = simde_math_log(a_.f64[i]);
5588 }
5589
5590 return simde__m128d_from_private(r_);
5591 #endif
5592 }
5593 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5594 #undef _mm_log_pd
5595 #define _mm_log_pd(a) simde_mm_log_pd(a)
5596 #endif
5597
5598 SIMDE_FUNCTION_ATTRIBUTES
5599 simde__m256
simde_mm256_log_ps(simde__m256 a)5600 simde_mm256_log_ps (simde__m256 a) {
5601 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5602 return _mm256_log_ps(a);
5603 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
5604 #if SIMDE_ACCURACY_PREFERENCE > 1
5605 return Sleef_logf8_u10(a);
5606 #else
5607 return Sleef_logf8_u35(a);
5608 #endif
5609 #else
5610 simde__m256_private
5611 r_,
5612 a_ = simde__m256_to_private(a);
5613
5614 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
5615 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
5616 r_.m128[i] = simde_mm_log_ps(a_.m128[i]);
5617 }
5618 #else
5619 SIMDE_VECTORIZE
5620 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5621 r_.f32[i] = simde_math_logf(a_.f32[i]);
5622 }
5623 #endif
5624
5625 return simde__m256_from_private(r_);
5626 #endif
5627 }
5628 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5629 #undef _mm256_log_ps
5630 #define _mm256_log_ps(a) simde_mm256_log_ps(a)
5631 #endif
5632
5633
5634 SIMDE_FUNCTION_ATTRIBUTES
5635 simde__m256d
simde_mm256_log_pd(simde__m256d a)5636 simde_mm256_log_pd (simde__m256d a) {
5637 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5638 return _mm256_log_pd(a);
5639 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
5640 #if SIMDE_ACCURACY_PREFERENCE > 1
5641 return Sleef_logd4_u10(a);
5642 #else
5643 return Sleef_logd4_u35(a);
5644 #endif
5645 #else
5646 simde__m256d_private
5647 r_,
5648 a_ = simde__m256d_to_private(a);
5649
5650 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
5651 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
5652 r_.m128d[i] = simde_mm_log_pd(a_.m128d[i]);
5653 }
5654 #else
5655 SIMDE_VECTORIZE
5656 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5657 r_.f64[i] = simde_math_log(a_.f64[i]);
5658 }
5659 #endif
5660
5661 return simde__m256d_from_private(r_);
5662 #endif
5663 }
5664 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5665 #undef _mm256_log_pd
5666 #define _mm256_log_pd(a) simde_mm256_log_pd(a)
5667 #endif
5668
5669 SIMDE_FUNCTION_ATTRIBUTES
5670 simde__m512
simde_mm512_log_ps(simde__m512 a)5671 simde_mm512_log_ps (simde__m512 a) {
5672 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5673 return _mm512_log_ps(a);
5674 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
5675 #if SIMDE_ACCURACY_PREFERENCE > 1
5676 return Sleef_logf16_u10(a);
5677 #else
5678 return Sleef_logf16_u35(a);
5679 #endif
5680 #else
5681 simde__m512_private
5682 r_,
5683 a_ = simde__m512_to_private(a);
5684
5685 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
5686 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
5687 r_.m256[i] = simde_mm256_log_ps(a_.m256[i]);
5688 }
5689 #else
5690 SIMDE_VECTORIZE
5691 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5692 r_.f32[i] = simde_math_logf(a_.f32[i]);
5693 }
5694 #endif
5695
5696 return simde__m512_from_private(r_);
5697 #endif
5698 }
5699 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5700 #undef _mm512_log_ps
5701 #define _mm512_log_ps(a) simde_mm512_log_ps(a)
5702 #endif
5703
5704 SIMDE_FUNCTION_ATTRIBUTES
5705 simde__m512d
simde_mm512_log_pd(simde__m512d a)5706 simde_mm512_log_pd (simde__m512d a) {
5707 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5708 return _mm512_log_pd(a);
5709 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
5710 #if SIMDE_ACCURACY_PREFERENCE > 1
5711 return Sleef_logd8_u10(a);
5712 #else
5713 return Sleef_logd8_u35(a);
5714 #endif
5715 #else
5716 simde__m512d_private
5717 r_,
5718 a_ = simde__m512d_to_private(a);
5719
5720 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
5721 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
5722 r_.m256d[i] = simde_mm256_log_pd(a_.m256d[i]);
5723 }
5724 #else
5725 SIMDE_VECTORIZE
5726 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5727 r_.f64[i] = simde_math_log(a_.f64[i]);
5728 }
5729 #endif
5730
5731 return simde__m512d_from_private(r_);
5732 #endif
5733 }
5734 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5735 #undef _mm512_log_pd
5736 #define _mm512_log_pd(a) simde_mm512_log_pd(a)
5737 #endif
5738
5739 SIMDE_FUNCTION_ATTRIBUTES
5740 simde__m512
simde_mm512_mask_log_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)5741 simde_mm512_mask_log_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
5742 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5743 return _mm512_mask_log_ps(src, k, a);
5744 #else
5745 return simde_mm512_mask_mov_ps(src, k, simde_mm512_log_ps(a));
5746 #endif
5747 }
5748 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5749 #undef _mm512_mask_log_ps
5750 #define _mm512_mask_log_ps(src, k, a) simde_mm512_mask_log_ps(src, k, a)
5751 #endif
5752
5753 SIMDE_FUNCTION_ATTRIBUTES
5754 simde__m512d
simde_mm512_mask_log_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)5755 simde_mm512_mask_log_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
5756 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5757 return _mm512_mask_log_pd(src, k, a);
5758 #else
5759 return simde_mm512_mask_mov_pd(src, k, simde_mm512_log_pd(a));
5760 #endif
5761 }
5762 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5763 #undef _mm512_mask_log_pd
5764 #define _mm512_mask_log_pd(src, k, a) simde_mm512_mask_log_pd(src, k, a)
5765 #endif
5766
5767 SIMDE_FUNCTION_ATTRIBUTES
5768 simde__m128
simde_mm_cdfnorminv_ps(simde__m128 a)5769 simde_mm_cdfnorminv_ps (simde__m128 a) {
5770 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5771 return _mm_cdfnorminv_ps(a);
5772 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
5773 simde__m128 matched, retval = simde_mm_setzero_ps();
5774
5775 { /* if (a < 0 || a > 1) */
5776 matched = simde_mm_or_ps(simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))), simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))));
5777
5778 /* We don't actually need to do anything here since we initialize
5779 * retval to 0.0. */
5780 }
5781
5782 { /* else if (a == 0) */
5783 simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)));
5784 mask = simde_mm_andnot_ps(matched, mask);
5785 matched = simde_mm_or_ps(matched, mask);
5786
5787 simde__m128 res = simde_mm_set1_ps(-SIMDE_MATH_INFINITYF);
5788
5789 retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));
5790 }
5791
5792 { /* else if (a == 1) */
5793 simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)));
5794 mask = simde_mm_andnot_ps(matched, mask);
5795 matched = simde_mm_or_ps(matched, mask);
5796
5797 simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF);
5798
5799 retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));
5800 }
5801
5802 { /* Remaining conditions.
5803 *
5804 * Including the else case in this complicates things a lot, but
5805 * we're using cheap operations to get rid of expensive multiply
5806 * and add functions. This should be a small improvement on SSE
5807 * prior to 4.1. On SSE 4.1 we can use _mm_blendv_ps which is
5808 * very fast and this becomes a huge win. NEON, AltiVec, and
5809 * WASM also have blend operations, so this should be a big win
5810 * there, too. */
5811
5812 /* else if (a < 0.02425) */
5813 simde__m128 mask_lo = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.02425)));
5814 /* else if (a > 0.97575) */
5815 simde__m128 mask_hi = simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.97575)));
5816
5817 simde__m128 mask = simde_mm_or_ps(mask_lo, mask_hi);
5818 matched = simde_mm_or_ps(matched, mask);
5819
5820 /* else */
5821 simde__m128 mask_el = simde_x_mm_not_ps(matched);
5822 mask = simde_mm_or_ps(mask, mask_el);
5823
5824 /* r = a - 0.5f */
5825 simde__m128 r = simde_mm_sub_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)));
5826
5827 /* lo: q = a
5828 * hi: q = (1.0 - a) */
5829 simde__m128 q = simde_mm_and_ps(mask_lo, a);
5830 q = simde_mm_or_ps(q, simde_mm_and_ps(mask_hi, simde_mm_sub_ps(simde_mm_set1_ps(1.0f), a)));
5831
5832 /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */
5833 q = simde_mm_log_ps(q);
5834 q = simde_mm_mul_ps(q, simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.0)));
5835 q = simde_mm_sqrt_ps(q);
5836
5837 /* el: q = r * r */
5838 q = simde_x_mm_select_ps(q, simde_mm_mul_ps(r, r), mask_el);
5839
5840 /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */
5841 /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */
5842 /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */
5843 simde__m128 numerator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el);
5844 numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el));
5845 numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el));
5846 numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el));
5847 numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el));
5848 numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el));
5849 {
5850 simde__m128 multiplier;
5851 multiplier = simde_mm_and_ps(mask_lo, simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0)));
5852 multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_hi, simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.0))));
5853 multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_el, r));
5854 numerator = simde_mm_mul_ps(numerator, multiplier);
5855 }
5856
5857 /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */
5858 /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */
5859 simde__m128 denominator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el);
5860 denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el));
5861 denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el));
5862 denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el));
5863 denominator = simde_mm_fmadd_ps(denominator, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el),
5864 simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el));
5865 denominator = simde_mm_fmadd_ps(denominator, q, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)));
5866
5867 /* res = numerator / denominator; */
5868 simde__m128 res = simde_mm_div_ps(numerator, denominator);
5869
5870 retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));
5871 }
5872
5873 return retval;
5874 #else
5875 simde__m128_private
5876 r_,
5877 a_ = simde__m128_to_private(a);
5878
5879 SIMDE_VECTORIZE
5880 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5881 r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]);
5882 }
5883
5884 return simde__m128_from_private(r_);
5885 #endif
5886 }
5887 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5888 #undef _mm_cdfnorminv_ps
5889 #define _mm_cdfnorminv_ps(a) simde_mm_cdfnorminv_ps(a)
5890 #endif
5891
5892 SIMDE_FUNCTION_ATTRIBUTES
5893 simde__m128d
simde_mm_cdfnorminv_pd(simde__m128d a)5894 simde_mm_cdfnorminv_pd (simde__m128d a) {
5895 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5896 return _mm_cdfnorminv_pd(a);
5897 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
5898 simde__m128d matched, retval = simde_mm_setzero_pd();
5899
5900 { /* if (a < 0 || a > 1) */
5901 matched = simde_mm_or_pd(simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))), simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))));
5902
5903 /* We don't actually need to do anything here since we initialize
5904 * retval to 0.0. */
5905 }
5906
5907 { /* else if (a == 0) */
5908 simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)));
5909 mask = simde_mm_andnot_pd(matched, mask);
5910 matched = simde_mm_or_pd(matched, mask);
5911
5912 simde__m128d res = simde_mm_set1_pd(-SIMDE_MATH_INFINITY);
5913
5914 retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));
5915 }
5916
5917 { /* else if (a == 1) */
5918 simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)));
5919 mask = simde_mm_andnot_pd(matched, mask);
5920 matched = simde_mm_or_pd(matched, mask);
5921
5922 simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY);
5923
5924 retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));
5925 }
5926
5927 { /* Remaining conditions.
5928 *
5929 * Including the else case in this complicates things a lot, but
5930 * we're using cheap operations to get rid of expensive multiply
5931 * and add functions. This should be a small improvement on SSE
5932 * prior to 4.1. On SSE 4.1 we can use _mm_blendv_pd which is
5933 * very fast and this becomes a huge win. NEON, AltiVec, and
5934 * WASM also have blend operations, so this should be a big win
5935 * there, too. */
5936
5937 /* else if (a < 0.02425) */
5938 simde__m128d mask_lo = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.02425)));
5939 /* else if (a > 0.97575) */
5940 simde__m128d mask_hi = simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.97575)));
5941
5942 simde__m128d mask = simde_mm_or_pd(mask_lo, mask_hi);
5943 matched = simde_mm_or_pd(matched, mask);
5944
5945 /* else */
5946 simde__m128d mask_el = simde_x_mm_not_pd(matched);
5947 mask = simde_mm_or_pd(mask, mask_el);
5948
5949 /* r = a - 0.5 */
5950 simde__m128d r = simde_mm_sub_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)));
5951
5952 /* lo: q = a
5953 * hi: q = (1.0 - a) */
5954 simde__m128d q = simde_mm_and_pd(mask_lo, a);
5955 q = simde_mm_or_pd(q, simde_mm_and_pd(mask_hi, simde_mm_sub_pd(simde_mm_set1_pd(1.0), a)));
5956
5957 /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */
5958 q = simde_mm_log_pd(q);
5959 q = simde_mm_mul_pd(q, simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.0)));
5960 q = simde_mm_sqrt_pd(q);
5961
5962 /* el: q = r * r */
5963 q = simde_x_mm_select_pd(q, simde_mm_mul_pd(r, r), mask_el);
5964
5965 /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0); */
5966 /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */
5967 /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */
5968 simde__m128d numerator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el);
5969 numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el));
5970 numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el));
5971 numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el));
5972 numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el));
5973 numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el));
5974 {
5975 simde__m128d multiplier;
5976 multiplier = simde_mm_and_pd(mask_lo, simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0)));
5977 multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_hi, simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.0))));
5978 multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_el, r));
5979 numerator = simde_mm_mul_pd(numerator, multiplier);
5980 }
5981
5982 /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */
5983 /* el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */
5984 simde__m128d denominator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el);
5985 denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el));
5986 denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el));
5987 denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el));
5988 denominator = simde_mm_fmadd_pd(denominator, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el),
5989 simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el));
5990 denominator = simde_mm_fmadd_pd(denominator, q, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)));
5991
5992 /* res = numerator / denominator; */
5993 simde__m128d res = simde_mm_div_pd(numerator, denominator);
5994
5995 retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));
5996 }
5997
5998 return retval;
5999 #else
6000 simde__m128d_private
6001 r_,
6002 a_ = simde__m128d_to_private(a);
6003
6004 SIMDE_VECTORIZE
6005 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
6006 r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]);
6007 }
6008
6009 return simde__m128d_from_private(r_);
6010 #endif
6011 }
6012 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6013 #undef _mm_cdfnorminv_pd
6014 #define _mm_cdfnorminv_pd(a) simde_mm_cdfnorminv_pd(a)
6015 #endif
6016
6017 SIMDE_FUNCTION_ATTRIBUTES
6018 simde__m256
simde_mm256_cdfnorminv_ps(simde__m256 a)6019 simde_mm256_cdfnorminv_ps (simde__m256 a) {
6020 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
6021 return _mm256_cdfnorminv_ps(a);
6022 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256)
6023 simde__m256 matched, retval = simde_mm256_setzero_ps();
6024
6025 { /* if (a < 0 || a > 1) */
6026 matched = simde_mm256_or_ps(simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ));
6027
6028 /* We don't actually need to do anything here since we initialize
6029 * retval to 0.0. */
6030 }
6031
6032 { /* else if (a == 0) */
6033 simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);
6034 mask = simde_mm256_andnot_ps(matched, mask);
6035 matched = simde_mm256_or_ps(matched, mask);
6036
6037 simde__m256 res = simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF);
6038
6039 retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));
6040 }
6041
6042 { /* else if (a == 1) */
6043 simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_EQ_OQ);
6044 mask = simde_mm256_andnot_ps(matched, mask);
6045 matched = simde_mm256_or_ps(matched, mask);
6046
6047 simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF);
6048
6049 retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));
6050 }
6051
6052 { /* Remaining conditions.
6053 *
6054 * Including the else case in this complicates things a lot, but
6055 * we're using cheap operations to get rid of expensive multiply
6056 * and add functions. This should be a small improvement on SSE
6057 * prior to 4.1. On SSE 4.1 we can use _mm256_blendv_ps which is
6058 * very fast and this becomes a huge win. NEON, AltiVec, and
6059 * WASM also have blend operations, so this should be a big win
6060 * there, too. */
6061
6062 /* else if (a < 0.02425) */
6063 simde__m256 mask_lo = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ);
6064 /* else if (a > 0.97575) */
6065 simde__m256 mask_hi = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ);
6066
6067 simde__m256 mask = simde_mm256_or_ps(mask_lo, mask_hi);
6068 matched = simde_mm256_or_ps(matched, mask);
6069
6070 /* else */
6071 simde__m256 mask_el = simde_x_mm256_not_ps(matched);
6072 mask = simde_mm256_or_ps(mask, mask_el);
6073
6074 /* r = a - 0.5f */
6075 simde__m256 r = simde_mm256_sub_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)));
6076
6077 /* lo: q = a
6078 * hi: q = (1.0 - a) */
6079 simde__m256 q = simde_mm256_and_ps(mask_lo, a);
6080 q = simde_mm256_or_ps(q, simde_mm256_and_ps(mask_hi, simde_mm256_sub_ps(simde_mm256_set1_ps(1.0f), a)));
6081
6082 /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */
6083 q = simde_mm256_log_ps(q);
6084 q = simde_mm256_mul_ps(q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.0)));
6085 q = simde_mm256_sqrt_ps(q);
6086
6087 /* el: q = r * r */
6088 q = simde_x_mm256_select_ps(q, simde_mm256_mul_ps(r, r), mask_el);
6089
6090 /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */
6091 /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */
6092 /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */
6093 simde__m256 numerator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el);
6094 numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el));
6095 numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el));
6096 numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el));
6097 numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el));
6098 numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el));
6099 {
6100 simde__m256 multiplier;
6101 multiplier = simde_mm256_and_ps(mask_lo, simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0)));
6102 multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_hi, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.0))));
6103 multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_el, r));
6104 numerator = simde_mm256_mul_ps(numerator, multiplier);
6105 }
6106
6107 /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */
6108 /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */
6109 simde__m256 denominator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el);
6110 denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el));
6111 denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el));
6112 denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el));
6113 denominator = simde_mm256_fmadd_ps(denominator, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el),
6114 simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el));
6115 denominator = simde_mm256_fmadd_ps(denominator, q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)));
6116
6117 /* res = numerator / denominator; */
6118 simde__m256 res = simde_mm256_div_ps(numerator, denominator);
6119
6120 retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));
6121 }
6122
6123 return retval;
6124 #else
6125 simde__m256_private
6126 r_,
6127 a_ = simde__m256_to_private(a);
6128
6129 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
6130 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
6131 r_.m128[i] = simde_mm_cdfnorminv_ps(a_.m128[i]);
6132 }
6133 #else
6134 SIMDE_VECTORIZE
6135 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
6136 r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]);
6137 }
6138 #endif
6139
6140 return simde__m256_from_private(r_);
6141 #endif
6142 }
6143 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6144 #undef _mm256_cdfnorminv_ps
6145 #define _mm256_cdfnorminv_ps(a) simde_mm256_cdfnorminv_ps(a)
6146 #endif
6147
6148 SIMDE_FUNCTION_ATTRIBUTES
6149 simde__m256d
simde_mm256_cdfnorminv_pd(simde__m256d a)6150 simde_mm256_cdfnorminv_pd (simde__m256d a) {
6151 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
6152 return _mm256_cdfnorminv_pd(a);
6153 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256)
6154 simde__m256d matched, retval = simde_mm256_setzero_pd();
6155
6156 { /* if (a < 0 || a > 1) */
6157 matched = simde_mm256_or_pd(simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ));
6158
6159 /* We don't actually need to do anything here since we initialize
6160 * retval to 0.0. */
6161 }
6162
6163 { /* else if (a == 0) */
6164 simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);
6165 mask = simde_mm256_andnot_pd(matched, mask);
6166 matched = simde_mm256_or_pd(matched, mask);
6167
6168 simde__m256d res = simde_mm256_set1_pd(-SIMDE_MATH_INFINITY);
6169
6170 retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));
6171 }
6172
6173 { /* else if (a == 1) */
6174 simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_EQ_OQ);
6175 mask = simde_mm256_andnot_pd(matched, mask);
6176 matched = simde_mm256_or_pd(matched, mask);
6177
6178 simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY);
6179
6180 retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));
6181 }
6182
6183 { /* Remaining conditions.
6184 *
6185 * Including the else case in this complicates things a lot, but
6186 * we're using cheap operations to get rid of expensive multiply
6187 * and add functions. This should be a small improvement on SSE
6188 * prior to 4.1. On SSE 4.1 we can use _mm256_blendv_pd which is
6189 * very fast and this becomes a huge win. NEON, AltiVec, and
6190 * WASM also have blend operations, so this should be a big win
6191 * there, too. */
6192
6193 /* else if (a < 0.02425) */
6194 simde__m256d mask_lo = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ);
6195 /* else if (a > 0.97575) */
6196 simde__m256d mask_hi = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ);
6197
6198 simde__m256d mask = simde_mm256_or_pd(mask_lo, mask_hi);
6199 matched = simde_mm256_or_pd(matched, mask);
6200
6201 /* else */
6202 simde__m256d mask_el = simde_x_mm256_not_pd(matched);
6203 mask = simde_mm256_or_pd(mask, mask_el);
6204
6205 /* r = a - 0.5 */
6206 simde__m256d r = simde_mm256_sub_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)));
6207
6208 /* lo: q = a
6209 * hi: q = (1.0 - a) */
6210 simde__m256d q = simde_mm256_and_pd(mask_lo, a);
6211 q = simde_mm256_or_pd(q, simde_mm256_and_pd(mask_hi, simde_mm256_sub_pd(simde_mm256_set1_pd(1.0), a)));
6212
6213 /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */
6214 q = simde_mm256_log_pd(q);
6215 q = simde_mm256_mul_pd(q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.0)));
6216 q = simde_mm256_sqrt_pd(q);
6217
6218 /* el: q = r * r */
6219 q = simde_x_mm256_select_pd(q, simde_mm256_mul_pd(r, r), mask_el);
6220
6221 /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0); */
6222 /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */
6223 /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */
6224 simde__m256d numerator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el);
6225 numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el));
6226 numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el));
6227 numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el));
6228 numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el));
6229 numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el));
6230 {
6231 simde__m256d multiplier;
6232 multiplier = simde_mm256_and_pd(mask_lo, simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0)));
6233 multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_hi, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.0))));
6234 multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_el, r));
6235 numerator = simde_mm256_mul_pd(numerator, multiplier);
6236 }
6237
6238 /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */
6239 /* el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */
6240 simde__m256d denominator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el);
6241 denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el));
6242 denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el));
6243 denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el));
6244 denominator = simde_mm256_fmadd_pd(denominator, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el),
6245 simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el));
6246 denominator = simde_mm256_fmadd_pd(denominator, q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)));
6247
6248 /* res = numerator / denominator; */
6249 simde__m256d res = simde_mm256_div_pd(numerator, denominator);
6250
6251 retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));
6252 }
6253
6254 return retval;
6255 #else
6256 simde__m256d_private
6257 r_,
6258 a_ = simde__m256d_to_private(a);
6259
6260 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
6261 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
6262 r_.m128d[i] = simde_mm_cdfnorminv_pd(a_.m128d[i]);
6263 }
6264 #else
6265 SIMDE_VECTORIZE
6266 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
6267 r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]);
6268 }
6269 #endif
6270
6271 return simde__m256d_from_private(r_);
6272 #endif
6273 }
6274 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6275 #undef _mm256_cdfnorminv_pd
6276 #define _mm256_cdfnorminv_pd(a) simde_mm256_cdfnorminv_pd(a)
6277 #endif
6278
6279 SIMDE_FUNCTION_ATTRIBUTES
6280 simde__m512
simde_mm512_cdfnorminv_ps(simde__m512 a)6281 simde_mm512_cdfnorminv_ps (simde__m512 a) {
6282 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6283 return _mm512_cdfnorminv_ps(a);
6284 #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256)
6285 simde__m512_private
6286 r_,
6287 a_ = simde__m512_to_private(a);
6288
6289 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
6290 r_.m256[i] = simde_mm256_cdfnorminv_ps(a_.m256[i]);
6291 }
6292
6293 return simde__m512_from_private(r_);
6294 #else
6295
6296 simde__m512 retval = simde_mm512_setzero_ps();
6297 simde__mmask16 matched;
6298
6299 { /* if (a < 0 || a > 1) */
6300 matched = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ);
6301 matched |= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ);
6302
6303 /* We don't actually need to do anything here since we initialize
6304 * retval to 0.0. */
6305 }
6306
6307 { /* else if (a == 0) */
6308 simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);
6309 matched |= mask;
6310
6311 retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF));
6312 }
6313
6314 { /* else if (a == 1) */
6315 simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);
6316 matched |= mask;
6317
6318 retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(SIMDE_MATH_INFINITYF));
6319 }
6320
6321 { /* else if (a < 0.02425) */
6322 simde__mmask16 mask_lo = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ);
6323 /* else if (a > 0.97575) */
6324 simde__mmask16 mask_hi = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ);
6325
6326 simde__mmask16 mask = mask_lo | mask_hi;
6327 matched = matched | mask;
6328
6329 /* else */
6330 simde__mmask16 mask_el = ~matched;
6331 mask = mask | mask_el;
6332
6333 /* r = a - 0.5f */
6334 simde__m512 r = simde_mm512_sub_ps(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)));
6335
6336 /* lo: q = a
6337 * hi: q = (1.0 - a) */
6338 simde__m512 q = simde_mm512_maskz_mov_ps(mask_lo, a);
6339 q = simde_mm512_mask_sub_ps(q, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a);
6340
6341 /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */
6342 q = simde_mm512_log_ps(q);
6343 q = simde_mm512_mul_ps(q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.0)));
6344 q = simde_mm512_sqrt_ps(q);
6345
6346 /* el: q = r * r */
6347 q = simde_mm512_mask_mul_ps(q, mask_el, r, r);
6348
6349 /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */
6350 /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */
6351 /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */
6352 simde__m512 numerator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)));
6353 numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02))));
6354 numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02))));
6355 numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02))));
6356 numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01))));
6357 numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00))));
6358 {
6359 simde__m512 multiplier;
6360 multiplier = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0));
6361 multiplier = simde_mm512_mask_mov_ps(multiplier, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.0)));
6362 multiplier = simde_mm512_mask_mov_ps(multiplier, mask_el, r);
6363 numerator = simde_mm512_mul_ps(numerator, multiplier);
6364 }
6365
6366 /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */
6367 /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */
6368 simde__m512 denominator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)));
6369 denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02))));
6370 denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02))));
6371 denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01))));
6372 denominator = simde_mm512_fmadd_ps(denominator, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)), mask_el, q),
6373 simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.0)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01))));
6374 denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)));
6375
6376 /* res = numerator / denominator; */
6377 retval = simde_mm512_mask_div_ps(retval, mask_lo | mask_hi | mask_el, numerator, denominator);
6378 }
6379
6380 return retval;
6381 #endif
6382 }
6383 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6384 #undef _mm512_cdfnorminv_ps
6385 #define _mm512_cdfnorminv_ps(a) simde_mm512_cdfnorminv_ps(a)
6386 #endif
6387
6388 SIMDE_FUNCTION_ATTRIBUTES
6389 simde__m512d
simde_mm512_cdfnorminv_pd(simde__m512d a)6390 simde_mm512_cdfnorminv_pd (simde__m512d a) {
6391 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6392 return _mm512_cdfnorminv_pd(a);
6393 #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256)
6394 simde__m512d_private
6395 r_,
6396 a_ = simde__m512d_to_private(a);
6397
6398 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
6399 r_.m256d[i] = simde_mm256_cdfnorminv_pd(a_.m256d[i]);
6400 }
6401
6402 return simde__m512d_from_private(r_);
6403 #else
6404
6405 simde__m512d retval = simde_mm512_setzero_pd();
6406 simde__mmask8 matched;
6407
6408 { /* if (a < 0 || a > 1) */
6409 matched = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ);
6410 matched |= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ);
6411
6412 /* We don't actually need to do anything here since we initialize
6413 * retval to 0.0. */
6414 }
6415
6416 { /* else if (a == 0) */
6417 simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);
6418 matched |= mask;
6419
6420 retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY));
6421 }
6422
6423 { /* else if (a == 1) */
6424 simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);
6425 matched |= mask;
6426
6427 retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(SIMDE_MATH_INFINITY));
6428 }
6429
6430 { /* else if (a < 0.02425) */
6431 simde__mmask8 mask_lo = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ);
6432 /* else if (a > 0.97575) */
6433 simde__mmask8 mask_hi = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ);
6434
6435 simde__mmask8 mask = mask_lo | mask_hi;
6436 matched = matched | mask;
6437
6438 /* else */
6439 simde__mmask8 mask_el = ~matched;
6440 mask = mask | mask_el;
6441
6442 /* r = a - 0.5f */
6443 simde__m512d r = simde_mm512_sub_pd(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)));
6444
6445 /* lo: q = a
6446 * hi: q = (1.0 - a) */
6447 simde__m512d q = a;
6448 q = simde_mm512_mask_sub_pd(q, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a);
6449
6450 /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */
6451 q = simde_mm512_log_pd(q);
6452 q = simde_mm512_mul_pd(q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.0)));
6453 q = simde_mm512_sqrt_pd(q);
6454
6455 /* el: q = r * r */
6456 q = simde_mm512_mask_mul_pd(q, mask_el, r, r);
6457
6458 /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */
6459 /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */
6460 /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */
6461 simde__m512d numerator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)));
6462 numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02))));
6463 numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02))));
6464 numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02))));
6465 numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01))));
6466 numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00))));
6467 {
6468 simde__m512d multiplier;
6469 multiplier = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0));
6470 multiplier = simde_mm512_mask_mov_pd(multiplier, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.0)));
6471 multiplier = simde_mm512_mask_mov_pd(multiplier, mask_el, r);
6472 numerator = simde_mm512_mul_pd(numerator, multiplier);
6473 }
6474
6475 /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */
6476 /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */
6477 simde__m512d denominator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)));
6478 denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02))));
6479 denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02))));
6480 denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01))));
6481 denominator = simde_mm512_fmadd_pd(denominator, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)), mask_el, q),
6482 simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.0)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01))));
6483 denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)));
6484
6485 /* res = numerator / denominator; */
6486 retval = simde_mm512_mask_div_pd(retval, mask_lo | mask_hi | mask_el, numerator, denominator);
6487 }
6488
6489 return retval;
6490 #endif
6491 }
6492 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6493 #undef _mm512_cdfnorminv_pd
6494 #define _mm512_cdfnorminv_pd(a) simde_mm512_cdfnorminv_pd(a)
6495 #endif
6496
6497 SIMDE_FUNCTION_ATTRIBUTES
6498 simde__m512
simde_mm512_mask_cdfnorminv_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)6499 simde_mm512_mask_cdfnorminv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
6500 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6501 return _mm512_mask_cdfnorminv_ps(src, k, a);
6502 #else
6503 return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorminv_ps(a));
6504 #endif
6505 }
6506 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6507 #undef _mm512_mask_cdfnorminv_ps
6508 #define _mm512_mask_cdfnorminv_ps(src, k, a) simde_mm512_mask_cdfnorminv_ps(src, k, a)
6509 #endif
6510
6511 SIMDE_FUNCTION_ATTRIBUTES
6512 simde__m512d
simde_mm512_mask_cdfnorminv_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)6513 simde_mm512_mask_cdfnorminv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
6514 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6515 return _mm512_mask_cdfnorminv_pd(src, k, a);
6516 #else
6517 return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorminv_pd(a));
6518 #endif
6519 }
6520 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6521 #undef _mm512_mask_cdfnorminv_pd
6522 #define _mm512_mask_cdfnorminv_pd(src, k, a) simde_mm512_mask_cdfnorminv_pd(src, k, a)
6523 #endif
6524
6525 SIMDE_FUNCTION_ATTRIBUTES
6526 simde__m128
simde_mm_erfinv_ps(simde__m128 a)6527 simde_mm_erfinv_ps (simde__m128 a) {
6528 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
6529 return _mm_erfinv_ps(a);
6530 #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
6531 /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c */
6532 simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0));
6533
6534 simde__m128 lnx = simde_mm_log_ps(simde_mm_mul_ps(simde_mm_sub_ps(one, a), simde_mm_add_ps(one, a)));
6535
6536 simde__m128 tt1 = simde_mm_mul_ps(simde_mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.147)));
6537 tt1 = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1);
6538 tt1 = simde_mm_add_ps(tt1, simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx));
6539
6540 simde__m128 tt2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147));
6541 tt2 = simde_mm_mul_ps(tt2, lnx);
6542
6543 simde__m128 r = simde_mm_mul_ps(tt1, tt1);
6544 r = simde_mm_sub_ps(r, tt2);
6545 r = simde_mm_sqrt_ps(r);
6546 r = simde_mm_add_ps(simde_x_mm_negate_ps(tt1), r);
6547 r = simde_mm_sqrt_ps(r);
6548
6549 return simde_x_mm_xorsign_ps(r, a);
6550 #else
6551 simde__m128_private
6552 a_ = simde__m128_to_private(a),
6553 r_;
6554
6555 SIMDE_VECTORIZE
6556 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
6557 r_.f32[i] = simde_math_erfinvf(a_.f32[i]);
6558 }
6559
6560 return simde__m128_from_private(r_);
6561 #endif
6562 }
6563 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6564 #undef _mm_erfinv_ps
6565 #define _mm_erfinv_ps(a) simde_mm_erfinv_ps(a)
6566 #endif
6567
6568 SIMDE_FUNCTION_ATTRIBUTES
6569 simde__m128d
simde_mm_erfinv_pd(simde__m128d a)6570 simde_mm_erfinv_pd (simde__m128d a) {
6571 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
6572 return _mm_erfinv_pd(a);
6573 #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
6574 simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0));
6575
6576 simde__m128d lnx = simde_mm_log_pd(simde_mm_mul_pd(simde_mm_sub_pd(one, a), simde_mm_add_pd(one, a)));
6577
6578 simde__m128d tt1 = simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_MATH_PI), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.147)));
6579 tt1 = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1);
6580 tt1 = simde_mm_add_pd(tt1, simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx));
6581
6582 simde__m128d tt2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147));
6583 tt2 = simde_mm_mul_pd(tt2, lnx);
6584
6585 simde__m128d r = simde_mm_mul_pd(tt1, tt1);
6586 r = simde_mm_sub_pd(r, tt2);
6587 r = simde_mm_sqrt_pd(r);
6588 r = simde_mm_add_pd(simde_x_mm_negate_pd(tt1), r);
6589 r = simde_mm_sqrt_pd(r);
6590
6591 return simde_x_mm_xorsign_pd(r, a);
6592 #else
6593 simde__m128d_private
6594 a_ = simde__m128d_to_private(a),
6595 r_;
6596
6597 SIMDE_VECTORIZE
6598 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
6599 r_.f64[i] = simde_math_erfinv(a_.f64[i]);
6600 }
6601
6602 return simde__m128d_from_private(r_);
6603 #endif
6604 }
6605 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6606 #undef _mm_erfinv_pd
6607 #define _mm_erfinv_pd(a) simde_mm_erfinv_pd(a)
6608 #endif
6609
6610 SIMDE_FUNCTION_ATTRIBUTES
6611 simde__m256
simde_mm256_erfinv_ps(simde__m256 a)6612 simde_mm256_erfinv_ps (simde__m256 a) {
6613 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
6614 return _mm256_erfinv_ps(a);
6615 #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
6616 simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0));
6617 simde__m256 sgn = simde_x_mm256_copysign_ps(one, a);
6618
6619 a = simde_mm256_mul_ps(simde_mm256_sub_ps(one, a), simde_mm256_add_ps(one, a));
6620 simde__m256 lnx = simde_mm256_log_ps(a);
6621
6622 simde__m256 tt1 = simde_mm256_mul_ps(simde_mm256_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.147)));
6623 tt1 = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1);
6624 tt1 = simde_mm256_add_ps(tt1, simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx));
6625
6626 simde__m256 tt2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147));
6627 tt2 = simde_mm256_mul_ps(tt2, lnx);
6628
6629 simde__m256 r = simde_mm256_mul_ps(tt1, tt1);
6630 r = simde_mm256_sub_ps(r, tt2);
6631 r = simde_mm256_sqrt_ps(r);
6632 r = simde_mm256_add_ps(simde_x_mm256_negate_ps(tt1), r);
6633 r = simde_mm256_sqrt_ps(r);
6634
6635 return simde_mm256_mul_ps(sgn, r);
6636 #else
6637 simde__m256_private
6638 a_ = simde__m256_to_private(a),
6639 r_;
6640
6641 SIMDE_VECTORIZE
6642 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
6643 r_.f32[i] = simde_math_erfinvf(a_.f32[i]);
6644 }
6645
6646 return simde__m256_from_private(r_);
6647 #endif
6648 }
6649 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6650 #undef _mm256_erfinv_ps
6651 #define _mm256_erfinv_ps(a) simde_mm256_erfinv_ps(a)
6652 #endif
6653
6654 SIMDE_FUNCTION_ATTRIBUTES
6655 simde__m256d
simde_mm256_erfinv_pd(simde__m256d a)6656 simde_mm256_erfinv_pd (simde__m256d a) {
6657 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
6658 return _mm256_erfinv_pd(a);
6659 #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
6660 simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0));
6661 simde__m256d sgn = simde_x_mm256_copysign_pd(one, a);
6662
6663 a = simde_mm256_mul_pd(simde_mm256_sub_pd(one, a), simde_mm256_add_pd(one, a));
6664 simde__m256d lnx = simde_mm256_log_pd(a);
6665
6666 simde__m256d tt1 = simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_MATH_PI), simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.147)));
6667 tt1 = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1);
6668 tt1 = simde_mm256_add_pd(tt1, simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx));
6669
6670 simde__m256d tt2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147));
6671 tt2 = simde_mm256_mul_pd(tt2, lnx);
6672
6673 simde__m256d r = simde_mm256_mul_pd(tt1, tt1);
6674 r = simde_mm256_sub_pd(r, tt2);
6675 r = simde_mm256_sqrt_pd(r);
6676 r = simde_mm256_add_pd(simde_x_mm256_negate_pd(tt1), r);
6677 r = simde_mm256_sqrt_pd(r);
6678
6679 return simde_mm256_mul_pd(sgn, r);
6680 #else
6681 simde__m256d_private
6682 a_ = simde__m256d_to_private(a),
6683 r_;
6684
6685 SIMDE_VECTORIZE
6686 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
6687 r_.f64[i] = simde_math_erfinv(a_.f64[i]);
6688 }
6689
6690 return simde__m256d_from_private(r_);
6691 #endif
6692 }
6693 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6694 #undef _mm256_erfinv_pd
6695 #define _mm256_erfinv_pd(a) simde_mm256_erfinv_pd(a)
6696 #endif
6697
6698 SIMDE_FUNCTION_ATTRIBUTES
6699 simde__m512
simde_mm512_erfinv_ps(simde__m512 a)6700 simde_mm512_erfinv_ps (simde__m512 a) {
6701 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6702 return _mm512_erfinv_ps(a);
6703 #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
6704 simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0));
6705 simde__m512 sgn = simde_x_mm512_copysign_ps(one, a);
6706
6707 a = simde_mm512_mul_ps(simde_mm512_sub_ps(one, a), simde_mm512_add_ps(one, a));
6708 simde__m512 lnx = simde_mm512_log_ps(a);
6709
6710 simde__m512 tt1 = simde_mm512_mul_ps(simde_mm512_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.147)));
6711 tt1 = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1);
6712 tt1 = simde_mm512_add_ps(tt1, simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx));
6713
6714 simde__m512 tt2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147));
6715 tt2 = simde_mm512_mul_ps(tt2, lnx);
6716
6717 simde__m512 r = simde_mm512_mul_ps(tt1, tt1);
6718 r = simde_mm512_sub_ps(r, tt2);
6719 r = simde_mm512_sqrt_ps(r);
6720 r = simde_mm512_add_ps(simde_x_mm512_negate_ps(tt1), r);
6721 r = simde_mm512_sqrt_ps(r);
6722
6723 return simde_mm512_mul_ps(sgn, r);
6724 #else
6725 simde__m512_private
6726 a_ = simde__m512_to_private(a),
6727 r_;
6728
6729 SIMDE_VECTORIZE
6730 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
6731 r_.f32[i] = simde_math_erfinvf(a_.f32[i]);
6732 }
6733
6734 return simde__m512_from_private(r_);
6735 #endif
6736 }
6737 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6738 #undef _mm512_erfinv_ps
6739 #define _mm512_erfinv_ps(a) simde_mm512_erfinv_ps(a)
6740 #endif
6741
6742 SIMDE_FUNCTION_ATTRIBUTES
6743 simde__m512d
simde_mm512_erfinv_pd(simde__m512d a)6744 simde_mm512_erfinv_pd (simde__m512d a) {
6745 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6746 return _mm512_erfinv_pd(a);
6747 #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
6748 simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0));
6749 simde__m512d sgn = simde_x_mm512_copysign_pd(one, a);
6750
6751 a = simde_mm512_mul_pd(simde_mm512_sub_pd(one, a), simde_mm512_add_pd(one, a));
6752 simde__m512d lnx = simde_mm512_log_pd(a);
6753
6754 simde__m512d tt1 = simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_MATH_PI), simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.147)));
6755 tt1 = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1);
6756 tt1 = simde_mm512_add_pd(tt1, simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx));
6757
6758 simde__m512d tt2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147));
6759 tt2 = simde_mm512_mul_pd(tt2, lnx);
6760
6761 simde__m512d r = simde_mm512_mul_pd(tt1, tt1);
6762 r = simde_mm512_sub_pd(r, tt2);
6763 r = simde_mm512_sqrt_pd(r);
6764 r = simde_mm512_add_pd(simde_x_mm512_negate_pd(tt1), r);
6765 r = simde_mm512_sqrt_pd(r);
6766
6767 return simde_mm512_mul_pd(sgn, r);
6768 #else
6769 simde__m512d_private
6770 a_ = simde__m512d_to_private(a),
6771 r_;
6772
6773 SIMDE_VECTORIZE
6774 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
6775 r_.f64[i] = simde_math_erfinv(a_.f64[i]);
6776 }
6777
6778 return simde__m512d_from_private(r_);
6779 #endif
6780 }
6781 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6782 #undef _mm512_erfinv_pd
6783 #define _mm512_erfinv_pd(a) simde_mm512_erfinv_pd(a)
6784 #endif
6785
6786 SIMDE_FUNCTION_ATTRIBUTES
6787 simde__m512
simde_mm512_mask_erfinv_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)6788 simde_mm512_mask_erfinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
6789 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6790 return _mm512_mask_erfinv_ps(src, k, a);
6791 #else
6792 return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfinv_ps(a));
6793 #endif
6794 }
6795 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6796 #undef _mm512_mask_erfinv_ps
6797 #define _mm512_mask_erfinv_ps(src, k, a) simde_mm512_mask_erfinv_ps(src, k, a)
6798 #endif
6799
6800 SIMDE_FUNCTION_ATTRIBUTES
6801 simde__m512d
simde_mm512_mask_erfinv_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)6802 simde_mm512_mask_erfinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
6803 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6804 return _mm512_mask_erfinv_pd(src, k, a);
6805 #else
6806 return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfinv_pd(a));
6807 #endif
6808 }
6809 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6810 #undef _mm512_mask_erfinv_pd
6811 #define _mm512_mask_erfinv_pd(src, k, a) simde_mm512_mask_erfinv_pd(src, k, a)
6812 #endif
6813
6814 SIMDE_FUNCTION_ATTRIBUTES
6815 simde__m128
simde_mm_erfcinv_ps(simde__m128 a)6816 simde_mm_erfcinv_ps (simde__m128 a) {
6817 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
6818 return _mm_erfcinv_ps(a);
6819 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
6820 simde__m128 matched, retval = simde_mm_setzero_ps();
6821
6822 { /* if (a < 2.0f && a > 0.0625f) */
6823 matched = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)));
6824 matched = simde_mm_and_ps(matched, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625))));
6825
6826 if (!simde_mm_test_all_zeros(simde_mm_castps_si128(matched), simde_x_mm_setone_si128())) {
6827 retval = simde_mm_erfinv_ps(simde_mm_sub_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), a));
6828 }
6829
6830 if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) {
6831 return retval;
6832 }
6833 }
6834
6835 { /* else if (a < 0.0625f && a > 0.0f) */
6836 simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625)));
6837 mask = simde_mm_and_ps(mask, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
6838 mask = simde_mm_andnot_ps(matched, mask);
6839
6840 if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) {
6841 matched = simde_mm_or_ps(matched, mask);
6842
6843 /* t = 1/(sqrt(-log(a))) */
6844 simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a));
6845 t = simde_mm_sqrt_ps(t);
6846 t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t);
6847
6848 const simde__m128 p[] = {
6849 simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)),
6850 simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)),
6851 simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)),
6852 simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)),
6853 simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)),
6854 simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.164441567910))
6855 };
6856
6857 const simde__m128 q[] = {
6858 simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)),
6859 simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)),
6860 simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.000000000000))
6861 };
6862
6863 /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */
6864 simde__m128 numerator = simde_mm_fmadd_ps(p[5], t, p[4]);
6865 numerator = simde_mm_fmadd_ps(numerator, t, p[3]);
6866 numerator = simde_mm_fmadd_ps(numerator, t, p[2]);
6867 numerator = simde_mm_fmadd_ps(numerator, t, p[1]);
6868 numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t));
6869
6870 /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
6871 simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]);
6872 denominator = simde_mm_fmadd_ps(denominator, t, q[0]);
6873
6874 simde__m128 res = simde_mm_div_ps(numerator, denominator);
6875
6876 retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));
6877 }
6878 }
6879
6880 { /* else if (a < 0.0f) */
6881 simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)));
6882 mask = simde_mm_andnot_ps(matched, mask);
6883
6884 if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) {
6885 matched = simde_mm_or_ps(matched, mask);
6886
6887 /* t = 1/(sqrt(-log(a))) */
6888 simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a));
6889 t = simde_mm_sqrt_ps(t);
6890 t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t);
6891
6892 const simde__m128 p[] = {
6893 simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)),
6894 simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)),
6895 simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)),
6896 simde_mm_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000))
6897 };
6898
6899 const simde__m128 q[] = {
6900 simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)),
6901 simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)),
6902 simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000))
6903 };
6904
6905 /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */
6906 simde__m128 numerator = simde_mm_fmadd_ps(p[3], t, p[2]);
6907 numerator = simde_mm_fmadd_ps(numerator, t, p[1]);
6908 numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t));
6909
6910 /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
6911 simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]);
6912 denominator = simde_mm_fmadd_ps(denominator, t, q[0]);
6913
6914 simde__m128 res = simde_mm_div_ps(numerator, denominator);
6915
6916 retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));
6917
6918 if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) {
6919 return retval;
6920 }
6921 }
6922 }
6923
6924 { /* else if (a == 0.0f) */
6925 simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)));
6926 mask = simde_mm_andnot_ps(matched, mask);
6927 matched = simde_mm_or_ps(matched, mask);
6928
6929 simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF);
6930
6931 retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));
6932 }
6933
6934 { /* else */
6935 /* (a >= 2.0f) */
6936 retval = simde_mm_or_ps(retval, simde_mm_andnot_ps(matched, simde_mm_set1_ps(-SIMDE_MATH_INFINITYF)));
6937 }
6938
6939 return retval;
6940 #else
6941 simde__m128_private
6942 r_,
6943 a_ = simde__m128_to_private(a);
6944
6945 SIMDE_VECTORIZE
6946 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
6947 r_.f32[i] = simde_math_erfcinvf(a_.f32[i]);
6948 }
6949
6950 return simde__m128_from_private(r_);
6951 #endif
6952 }
6953 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6954 #undef _mm_erfcinv_ps
6955 #define _mm_erfcinv_ps(a) simde_mm_erfcinv_ps(a)
6956 #endif
6957
6958 SIMDE_FUNCTION_ATTRIBUTES
6959 simde__m128d
simde_mm_erfcinv_pd(simde__m128d a)6960 simde_mm_erfcinv_pd (simde__m128d a) {
6961 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
6962 return _mm_erfcinv_pd(a);
6963 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
6964 simde__m128d matched, retval = simde_mm_setzero_pd();
6965
6966 { /* if (a < 2.0 && a > 0.0625) */
6967 matched = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)));
6968 matched = simde_mm_and_pd(matched, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625))));
6969
6970 if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(matched), simde_x_mm_setone_si128())) {
6971 retval = simde_mm_erfinv_pd(simde_mm_sub_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), a));
6972 }
6973
6974 if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) {
6975 return retval;
6976 }
6977 }
6978
6979 { /* else if (a < 0.0625 && a > 0.0) */
6980 simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625)));
6981 mask = simde_mm_and_pd(mask, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
6982 mask = simde_mm_andnot_pd(matched, mask);
6983
6984 if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) {
6985 matched = simde_mm_or_pd(matched, mask);
6986
6987 /* t = 1/(sqrt(-log(a))) */
6988 simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a));
6989 t = simde_mm_sqrt_pd(t);
6990 t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t);
6991
6992 const simde__m128d p[] = {
6993 simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)),
6994 simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)),
6995 simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)),
6996 simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)),
6997 simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)),
6998 simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.164441567910))
6999 };
7000
7001 const simde__m128d q[] = {
7002 simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)),
7003 simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)),
7004 simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.000000000000))
7005 };
7006
7007 /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */
7008 simde__m128d numerator = simde_mm_fmadd_pd(p[5], t, p[4]);
7009 numerator = simde_mm_fmadd_pd(numerator, t, p[3]);
7010 numerator = simde_mm_fmadd_pd(numerator, t, p[2]);
7011 numerator = simde_mm_fmadd_pd(numerator, t, p[1]);
7012 numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t));
7013
7014 /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7015 simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]);
7016 denominator = simde_mm_fmadd_pd(denominator, t, q[0]);
7017
7018 simde__m128d res = simde_mm_div_pd(numerator, denominator);
7019
7020 retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));
7021 }
7022 }
7023
7024 { /* else if (a < 0.0) */
7025 simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)));
7026 mask = simde_mm_andnot_pd(matched, mask);
7027
7028 if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) {
7029 matched = simde_mm_or_pd(matched, mask);
7030
7031 /* t = 1/(sqrt(-log(a))) */
7032 simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a));
7033 t = simde_mm_sqrt_pd(t);
7034 t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t);
7035
7036 const simde__m128d p[] = {
7037 simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)),
7038 simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)),
7039 simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)),
7040 simde_mm_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000))
7041 };
7042
7043 const simde__m128d q[] = {
7044 simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)),
7045 simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)),
7046 simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000))
7047 };
7048
7049 /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */
7050 simde__m128d numerator = simde_mm_fmadd_pd(p[3], t, p[2]);
7051 numerator = simde_mm_fmadd_pd(numerator, t, p[1]);
7052 numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t));
7053
7054 /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7055 simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]);
7056 denominator = simde_mm_fmadd_pd(denominator, t, q[0]);
7057
7058 simde__m128d res = simde_mm_div_pd(numerator, denominator);
7059
7060 retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));
7061
7062 if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) {
7063 return retval;
7064 }
7065 }
7066 }
7067
7068 { /* else if (a == 0.0) */
7069 simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)));
7070 mask = simde_mm_andnot_pd(matched, mask);
7071 matched = simde_mm_or_pd(matched, mask);
7072
7073 simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY);
7074
7075 retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));
7076 }
7077
7078 { /* else */
7079 /* (a >= 2.0) */
7080 retval = simde_mm_or_pd(retval, simde_mm_andnot_pd(matched, simde_mm_set1_pd(-SIMDE_MATH_INFINITY)));
7081 }
7082
7083 return retval;
7084 #else
7085 simde__m128d_private
7086 r_,
7087 a_ = simde__m128d_to_private(a);
7088
7089 SIMDE_VECTORIZE
7090 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
7091 r_.f64[i] = simde_math_erfcinv(a_.f64[i]);
7092 }
7093
7094 return simde__m128d_from_private(r_);
7095 #endif
7096 }
7097 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7098 #undef _mm_erfcinv_pd
7099 #define _mm_erfcinv_pd(a) simde_mm_erfcinv_pd(a)
7100 #endif
7101
7102 SIMDE_FUNCTION_ATTRIBUTES
7103 simde__m256
simde_mm256_erfcinv_ps(simde__m256 a)7104 simde_mm256_erfcinv_ps (simde__m256 a) {
7105 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
7106 return _mm256_erfcinv_ps(a);
7107 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256)
7108 simde__m256 matched, retval = simde_mm256_setzero_ps();
7109
7110 { /* if (a < 2.0f && a > 0.0625f) */
7111 matched = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ);
7112 matched = simde_mm256_and_ps(matched, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ));
7113
7114 if (!simde_mm256_testz_ps(matched, matched)) {
7115 retval = simde_mm256_erfinv_ps(simde_mm256_sub_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), a));
7116 }
7117
7118 if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) {
7119 return retval;
7120 }
7121 }
7122
7123 { /* else if (a < 0.0625f && a > 0.0f) */
7124 simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ);
7125 mask = simde_mm256_and_ps(mask, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ));
7126 mask = simde_mm256_andnot_ps(matched, mask);
7127
7128 if (!simde_mm256_testz_ps(mask, mask)) {
7129 matched = simde_mm256_or_ps(matched, mask);
7130
7131 /* t = 1/(sqrt(-log(a))) */
7132 simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a));
7133 t = simde_mm256_sqrt_ps(t);
7134 t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t);
7135
7136 const simde__m256 p[] = {
7137 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)),
7138 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)),
7139 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)),
7140 simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)),
7141 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)),
7142 simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.16444156791))
7143 };
7144
7145 const simde__m256 q[] = {
7146 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)),
7147 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)),
7148 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.000000000000))
7149 };
7150
7151 /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */
7152 simde__m256 numerator = simde_mm256_fmadd_ps(p[5], t, p[4]);
7153 numerator = simde_mm256_fmadd_ps(numerator, t, p[3]);
7154 numerator = simde_mm256_fmadd_ps(numerator, t, p[2]);
7155 numerator = simde_mm256_fmadd_ps(numerator, t, p[1]);
7156 numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t));
7157
7158 /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7159 simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]);
7160 denominator = simde_mm256_fmadd_ps(denominator, t, q[0]);
7161
7162 simde__m256 res = simde_mm256_div_ps(numerator, denominator);
7163
7164 retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));
7165 }
7166 }
7167
7168 { /* else if (a < 0.0f) */
7169 simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ);
7170 mask = simde_mm256_andnot_ps(matched, mask);
7171
7172 if (!simde_mm256_testz_ps(mask, mask)) {
7173 matched = simde_mm256_or_ps(matched, mask);
7174
7175 /* t = 1/(sqrt(-log(a))) */
7176 simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a));
7177 t = simde_mm256_sqrt_ps(t);
7178 t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t);
7179
7180 const simde__m256 p[] = {
7181 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)),
7182 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)),
7183 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)),
7184 simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.5374947401000))
7185 };
7186
7187 const simde__m256 q[] = {
7188 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)),
7189 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)),
7190 simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000))
7191 };
7192
7193 /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */
7194 simde__m256 numerator = simde_mm256_fmadd_ps(p[3], t, p[2]);
7195 numerator = simde_mm256_fmadd_ps(numerator, t, p[1]);
7196 numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t));
7197
7198 /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7199 simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]);
7200 denominator = simde_mm256_fmadd_ps(denominator, t, q[0]);
7201
7202 simde__m256 res = simde_mm256_div_ps(numerator, denominator);
7203
7204 retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));
7205
7206 if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) {
7207 return retval;
7208 }
7209 }
7210 }
7211
7212 { /* else if (a == 0.0f) */
7213 simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);
7214 mask = simde_mm256_andnot_ps(matched, mask);
7215 matched = simde_mm256_or_ps(matched, mask);
7216
7217 simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF);
7218
7219 retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));
7220 }
7221
7222 { /* else */
7223 /* (a >= 2.0f) */
7224 retval = simde_mm256_or_ps(retval, simde_mm256_andnot_ps(matched, simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF)));
7225 }
7226
7227 return retval;
7228 #else
7229 simde__m256_private
7230 r_,
7231 a_ = simde__m256_to_private(a);
7232
7233 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
7234 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
7235 r_.m128[i] = simde_mm_erfcinv_ps(a_.m128[i]);
7236 }
7237 #else
7238 SIMDE_VECTORIZE
7239 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
7240 r_.f32[i] = simde_math_erfcinvf(a_.f32[i]);
7241 }
7242 #endif
7243
7244 return simde__m256_from_private(r_);
7245 #endif
7246 }
7247 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7248 #undef _mm256_erfcinv_ps
7249 #define _mm256_erfcinv_ps(a) simde_mm256_erfcinv_ps(a)
7250 #endif
7251
7252 SIMDE_FUNCTION_ATTRIBUTES
7253 simde__m256d
simde_mm256_erfcinv_pd(simde__m256d a)7254 simde_mm256_erfcinv_pd (simde__m256d a) {
7255 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
7256 return _mm256_erfcinv_pd(a);
7257 #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256)
7258 simde__m256d matched, retval = simde_mm256_setzero_pd();
7259
7260 { /* if (a < 2.0 && a > 0.0625) */
7261 matched = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ);
7262 matched = simde_mm256_and_pd(matched, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ));
7263
7264 if (!simde_mm256_testz_pd(matched, matched)) {
7265 retval = simde_mm256_erfinv_pd(simde_mm256_sub_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), a));
7266 }
7267
7268 if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) {
7269 return retval;
7270 }
7271 }
7272
7273 { /* else if (a < 0.0625 && a > 0.0) */
7274 simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ);
7275 mask = simde_mm256_and_pd(mask, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ));
7276 mask = simde_mm256_andnot_pd(matched, mask);
7277
7278 if (!simde_mm256_testz_pd(mask, mask)) {
7279 matched = simde_mm256_or_pd(matched, mask);
7280
7281 /* t = 1/(sqrt(-log(a))) */
7282 simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a));
7283 t = simde_mm256_sqrt_pd(t);
7284 t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t);
7285
7286 const simde__m256d p[] = {
7287 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)),
7288 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)),
7289 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)),
7290 simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)),
7291 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)),
7292 simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.16444156791))
7293 };
7294
7295 const simde__m256d q[] = {
7296 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)),
7297 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)),
7298 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.000000000000))
7299 };
7300
7301 /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */
7302 simde__m256d numerator = simde_mm256_fmadd_pd(p[5], t, p[4]);
7303 numerator = simde_mm256_fmadd_pd(numerator, t, p[3]);
7304 numerator = simde_mm256_fmadd_pd(numerator, t, p[2]);
7305 numerator = simde_mm256_fmadd_pd(numerator, t, p[1]);
7306 numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t));
7307
7308 /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7309 simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]);
7310 denominator = simde_mm256_fmadd_pd(denominator, t, q[0]);
7311
7312 simde__m256d res = simde_mm256_div_pd(numerator, denominator);
7313
7314 retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));
7315 }
7316 }
7317
7318 { /* else if (a < 0.0) */
7319 simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ);
7320 mask = simde_mm256_andnot_pd(matched, mask);
7321
7322 if (!simde_mm256_testz_pd(mask, mask)) {
7323 matched = simde_mm256_or_pd(matched, mask);
7324
7325 /* t = 1/(sqrt(-log(a))) */
7326 simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a));
7327 t = simde_mm256_sqrt_pd(t);
7328 t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t);
7329
7330 const simde__m256d p[] = {
7331 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)),
7332 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)),
7333 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)),
7334 simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.5374947401000))
7335 };
7336
7337 const simde__m256d q[] = {
7338 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)),
7339 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)),
7340 simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000))
7341 };
7342
7343 /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */
7344 simde__m256d numerator = simde_mm256_fmadd_pd(p[3], t, p[2]);
7345 numerator = simde_mm256_fmadd_pd(numerator, t, p[1]);
7346 numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t));
7347
7348 /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7349 simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]);
7350 denominator = simde_mm256_fmadd_pd(denominator, t, q[0]);
7351
7352 simde__m256d res = simde_mm256_div_pd(numerator, denominator);
7353
7354 retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));
7355
7356 if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) {
7357 return retval;
7358 }
7359 }
7360 }
7361
7362 { /* else if (a == 0.0) */
7363 simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);
7364 mask = simde_mm256_andnot_pd(matched, mask);
7365 matched = simde_mm256_or_pd(matched, mask);
7366
7367 simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY);
7368
7369 retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));
7370 }
7371
7372 { /* else */
7373 /* (a >= 2.0) */
7374 retval = simde_mm256_or_pd(retval, simde_mm256_andnot_pd(matched, simde_mm256_set1_pd(-SIMDE_MATH_INFINITY)));
7375 }
7376
7377 return retval;
7378 #else
7379 simde__m256d_private
7380 r_,
7381 a_ = simde__m256d_to_private(a);
7382
7383 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
7384 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
7385 r_.m128d[i] = simde_mm_erfcinv_pd(a_.m128d[i]);
7386 }
7387 #else
7388 SIMDE_VECTORIZE
7389 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
7390 r_.f64[i] = simde_math_erfcinv(a_.f64[i]);
7391 }
7392 #endif
7393
7394 return simde__m256d_from_private(r_);
7395 #endif
7396 }
7397 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7398 #undef _mm256_erfcinv_pd
7399 #define _mm256_erfcinv_pd(a) simde_mm256_erfcinv_pd(a)
7400 #endif
7401
7402 SIMDE_FUNCTION_ATTRIBUTES
7403 simde__m512
simde_mm512_erfcinv_ps(simde__m512 a)7404 simde_mm512_erfcinv_ps (simde__m512 a) {
7405 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7406 return _mm512_erfcinv_ps(a);
7407 #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && (!defined(SIMDE_ARCH_ARM) || defined(SIMDE_ARCH_AARCH64))
7408 /* The results on Arm are *slightly* off, which causes problems for
7409 * the edge cases; for example, if you pass 2.0 sqrt will be called
7410 * with a value of -0.0 instead of 0.0, resulting in a NaN. */
7411 simde__m512_private
7412 r_,
7413 a_ = simde__m512_to_private(a);
7414
7415 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
7416 r_.m256[i] = simde_mm256_erfcinv_ps(a_.m256[i]);
7417 }
7418 return simde__m512_from_private(r_);
7419 #else
7420 simde__m512 retval = simde_mm512_setzero_ps();
7421 simde__mmask16 matched;
7422
7423 { /* if (a < 2.0f && a > 0.0625f) */
7424 matched = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ);
7425 matched &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ);
7426
7427 if (matched != 0) {
7428 retval = simde_mm512_erfinv_ps(simde_mm512_sub_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a));
7429 }
7430
7431 if (matched == 1) {
7432 return retval;
7433 }
7434 }
7435
7436 { /* else if (a < 0.0625f && a > 0.0f) */
7437 simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ);
7438 mask &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ);
7439 mask = ~matched & mask;
7440
7441 if (mask != 0) {
7442 matched = matched | mask;
7443
7444 /* t = 1/(sqrt(-log(a))) */
7445 simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a));
7446 t = simde_mm512_sqrt_ps(t);
7447 t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t);
7448
7449 const simde__m512 p[] = {
7450 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)),
7451 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)),
7452 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)),
7453 simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)),
7454 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)),
7455 simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.16444156791))
7456 };
7457
7458 const simde__m512 q[] = {
7459 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)),
7460 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)),
7461 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.000000000000))
7462 };
7463
7464 /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */
7465 simde__m512 numerator = simde_mm512_fmadd_ps(p[5], t, p[4]);
7466 numerator = simde_mm512_fmadd_ps(numerator, t, p[3]);
7467 numerator = simde_mm512_fmadd_ps(numerator, t, p[2]);
7468 numerator = simde_mm512_fmadd_ps(numerator, t, p[1]);
7469 numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t));
7470
7471 /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7472 simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]);
7473 denominator = simde_mm512_fmadd_ps(denominator, t, q[0]);
7474
7475 simde__m512 res = simde_mm512_div_ps(numerator, denominator);
7476
7477 retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res));
7478 }
7479 }
7480
7481 { /* else if (a < 0.0f) */
7482 simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ);
7483 mask = ~matched & mask;
7484
7485 if (mask != 0) {
7486 matched = matched | mask;
7487
7488 /* t = 1/(sqrt(-log(a))) */
7489 simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a));
7490 t = simde_mm512_sqrt_ps(t);
7491 t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t);
7492
7493 const simde__m512 p[] = {
7494 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)),
7495 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)),
7496 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)),
7497 simde_mm512_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000))
7498 };
7499
7500 const simde__m512 q[] = {
7501 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)),
7502 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)),
7503 simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000))
7504 };
7505
7506 /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */
7507 simde__m512 numerator = simde_mm512_fmadd_ps(p[3], t, p[2]);
7508 numerator = simde_mm512_fmadd_ps(numerator, t, p[1]);
7509 numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t));
7510
7511 /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7512 simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]);
7513 denominator = simde_mm512_fmadd_ps(denominator, t, q[0]);
7514
7515 simde__m512 res = simde_mm512_div_ps(numerator, denominator);
7516
7517 retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res));
7518
7519 if (matched == 1) {
7520 return retval;
7521 }
7522 }
7523 }
7524
7525 { /* else if (a == 0.0f) */
7526 simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);
7527 mask = ~matched & mask;
7528 matched = matched | mask;
7529
7530 simde__m512 res = simde_mm512_set1_ps(SIMDE_MATH_INFINITYF);
7531
7532 retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res));
7533 }
7534
7535 { /* else */
7536 /* (a >= 2.0f) */
7537 retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(~matched, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF)));
7538 }
7539
7540 return retval;
7541 #endif
7542 }
7543 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7544 #undef _mm512_erfcinv_ps
7545 #define _mm512_erfcinv_ps(a) simde_mm512_erfcinv_ps(a)
7546 #endif
7547
7548 SIMDE_FUNCTION_ATTRIBUTES
7549 simde__m512d
simde_mm512_erfcinv_pd(simde__m512d a)7550 simde_mm512_erfcinv_pd (simde__m512d a) {
7551 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7552 return _mm512_erfcinv_pd(a);
7553 #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256)
7554 simde__m512d_private
7555 r_,
7556 a_ = simde__m512d_to_private(a);
7557
7558 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
7559 r_.m256d[i] = simde_mm256_erfcinv_pd(a_.m256d[i]);
7560 }
7561 return simde__m512d_from_private(r_);
7562 #else
7563 simde__m512d retval = simde_mm512_setzero_pd();
7564 simde__mmask8 matched;
7565
7566 { /* if (a < 2.0f && a > 0.0625f) */
7567 matched = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ);
7568 matched &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ);
7569
7570 if (matched != 0) {
7571 retval = simde_mm512_erfinv_pd(simde_mm512_sub_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a));
7572 }
7573
7574 if (matched == 1) {
7575 return retval;
7576 }
7577 }
7578
7579 { /* else if (a < 0.0625f && a > 0.0f) */
7580 simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ);
7581 mask &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ);
7582 mask = ~matched & mask;
7583
7584 if (mask != 0) {
7585 matched = matched | mask;
7586
7587 /* t = 1/(sqrt(-log(a))) */
7588 simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a));
7589 t = simde_mm512_sqrt_pd(t);
7590 t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t);
7591
7592 const simde__m512d p[] = {
7593 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)),
7594 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)),
7595 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)),
7596 simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)),
7597 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)),
7598 simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.16444156791))
7599 };
7600
7601 const simde__m512d q[] = {
7602 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)),
7603 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)),
7604 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.000000000000))
7605 };
7606
7607 /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */
7608 simde__m512d numerator = simde_mm512_fmadd_pd(p[5], t, p[4]);
7609 numerator = simde_mm512_fmadd_pd(numerator, t, p[3]);
7610 numerator = simde_mm512_fmadd_pd(numerator, t, p[2]);
7611 numerator = simde_mm512_fmadd_pd(numerator, t, p[1]);
7612 numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t));
7613
7614 /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7615 simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]);
7616 denominator = simde_mm512_fmadd_pd(denominator, t, q[0]);
7617
7618 simde__m512d res = simde_mm512_div_pd(numerator, denominator);
7619
7620 retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res));
7621 }
7622 }
7623
7624 { /* else if (a < 0.0f) */
7625 simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ);
7626 mask = ~matched & mask;
7627
7628 if (mask != 0) {
7629 matched = matched | mask;
7630
7631 /* t = 1/(sqrt(-log(a))) */
7632 simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a));
7633 t = simde_mm512_sqrt_pd(t);
7634 t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t);
7635
7636 const simde__m512d p[] = {
7637 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)),
7638 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)),
7639 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)),
7640 simde_mm512_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000))
7641 };
7642
7643 const simde__m512d q[] = {
7644 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)),
7645 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)),
7646 simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000))
7647 };
7648
7649 /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */
7650 simde__m512d numerator = simde_mm512_fmadd_pd(p[3], t, p[2]);
7651 numerator = simde_mm512_fmadd_pd(numerator, t, p[1]);
7652 numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t));
7653
7654 /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7655 simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]);
7656 denominator = simde_mm512_fmadd_pd(denominator, t, q[0]);
7657
7658 simde__m512d res = simde_mm512_div_pd(numerator, denominator);
7659
7660 retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res));
7661
7662 if (matched == 1) {
7663 return retval;
7664 }
7665 }
7666 }
7667
7668 { /* else if (a == 0.0f) */
7669 simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);
7670 mask = ~matched & mask;
7671 matched = matched | mask;
7672
7673 simde__m512d res = simde_mm512_set1_pd(SIMDE_MATH_INFINITY);
7674
7675 retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res));
7676 }
7677
7678 { /* else */
7679 /* (a >= 2.0f) */
7680 retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(~matched, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY)));
7681 }
7682
7683 return retval;
7684 #endif
7685 }
7686 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7687 #undef _mm512_erfcinv_pd
7688 #define _mm512_erfcinv_pd(a) simde_mm512_erfcinv_pd(a)
7689 #endif
7690
7691 SIMDE_FUNCTION_ATTRIBUTES
7692 simde__m512
simde_mm512_mask_erfcinv_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)7693 simde_mm512_mask_erfcinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
7694 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7695 return _mm512_mask_erfcinv_ps(src, k, a);
7696 #else
7697 return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfcinv_ps(a));
7698 #endif
7699 }
7700 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7701 #undef _mm512_mask_erfcinv_ps
7702 #define _mm512_mask_erfcinv_ps(src, k, a) simde_mm512_mask_erfcinv_ps(src, k, a)
7703 #endif
7704
7705 SIMDE_FUNCTION_ATTRIBUTES
7706 simde__m512d
simde_mm512_mask_erfcinv_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)7707 simde_mm512_mask_erfcinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
7708 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7709 return _mm512_mask_erfcinv_pd(src, k, a);
7710 #else
7711 return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfcinv_pd(a));
7712 #endif
7713 }
7714 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7715 #undef _mm512_mask_erfcinv_pd
7716 #define _mm512_mask_erfcinv_pd(src, k, a) simde_mm512_mask_erfcinv_pd(src, k, a)
7717 #endif
7718
7719 SIMDE_FUNCTION_ATTRIBUTES
7720 simde__m128
simde_mm_logb_ps(simde__m128 a)7721 simde_mm_logb_ps (simde__m128 a) {
7722 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
7723 return _mm_logb_ps(a);
7724 #else
7725 simde__m128_private
7726 r_,
7727 a_ = simde__m128_to_private(a);
7728
7729 SIMDE_VECTORIZE
7730 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
7731 r_.f32[i] = simde_math_logbf(a_.f32[i]);
7732 }
7733
7734 return simde__m128_from_private(r_);
7735 #endif
7736 }
7737 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7738 #undef _mm_logb_ps
7739 #define _mm_logb_ps(a) simde_mm_logb_ps(a)
7740 #endif
7741
7742 SIMDE_FUNCTION_ATTRIBUTES
7743 simde__m128d
simde_mm_logb_pd(simde__m128d a)7744 simde_mm_logb_pd (simde__m128d a) {
7745 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
7746 return _mm_logb_pd(a);
7747 #else
7748 simde__m128d_private
7749 r_,
7750 a_ = simde__m128d_to_private(a);
7751
7752 SIMDE_VECTORIZE
7753 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
7754 r_.f64[i] = simde_math_logb(a_.f64[i]);
7755 }
7756
7757 return simde__m128d_from_private(r_);
7758 #endif
7759 }
7760 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7761 #undef _mm_logb_pd
7762 #define _mm_logb_pd(a) simde_mm_logb_pd(a)
7763 #endif
7764
7765 SIMDE_FUNCTION_ATTRIBUTES
7766 simde__m256
simde_mm256_logb_ps(simde__m256 a)7767 simde_mm256_logb_ps (simde__m256 a) {
7768 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
7769 return _mm256_logb_ps(a);
7770 #else
7771 simde__m256_private
7772 r_,
7773 a_ = simde__m256_to_private(a);
7774
7775 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
7776 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
7777 r_.m128[i] = simde_mm_logb_ps(a_.m128[i]);
7778 }
7779 #else
7780 SIMDE_VECTORIZE
7781 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
7782 r_.f32[i] = simde_math_logbf(a_.f32[i]);
7783 }
7784 #endif
7785
7786 return simde__m256_from_private(r_);
7787 #endif
7788 }
7789 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7790 #undef _mm256_logb_ps
7791 #define _mm256_logb_ps(a) simde_mm256_logb_ps(a)
7792 #endif
7793
7794
7795 SIMDE_FUNCTION_ATTRIBUTES
7796 simde__m256d
simde_mm256_logb_pd(simde__m256d a)7797 simde_mm256_logb_pd (simde__m256d a) {
7798 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
7799 return _mm256_logb_pd(a);
7800 #else
7801 simde__m256d_private
7802 r_,
7803 a_ = simde__m256d_to_private(a);
7804
7805 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
7806 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
7807 r_.m128d[i] = simde_mm_logb_pd(a_.m128d[i]);
7808 }
7809 #else
7810 SIMDE_VECTORIZE
7811 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
7812 r_.f64[i] = simde_math_logb(a_.f64[i]);
7813 }
7814 #endif
7815
7816 return simde__m256d_from_private(r_);
7817 #endif
7818 }
7819 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7820 #undef _mm256_logb_pd
7821 #define _mm256_logb_pd(a) simde_mm256_logb_pd(a)
7822 #endif
7823
7824 SIMDE_FUNCTION_ATTRIBUTES
7825 simde__m512
simde_mm512_logb_ps(simde__m512 a)7826 simde_mm512_logb_ps (simde__m512 a) {
7827 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7828 return _mm512_logb_ps(a);
7829 #else
7830 simde__m512_private
7831 r_,
7832 a_ = simde__m512_to_private(a);
7833
7834 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
7835 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
7836 r_.m256[i] = simde_mm256_logb_ps(a_.m256[i]);
7837 }
7838 #else
7839 SIMDE_VECTORIZE
7840 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
7841 r_.f32[i] = simde_math_logbf(a_.f32[i]);
7842 }
7843 #endif
7844
7845 return simde__m512_from_private(r_);
7846 #endif
7847 }
7848 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7849 #undef _mm512_logb_ps
7850 #define _mm512_logb_ps(a) simde_mm512_logb_ps(a)
7851 #endif
7852
7853 SIMDE_FUNCTION_ATTRIBUTES
7854 simde__m512d
simde_mm512_logb_pd(simde__m512d a)7855 simde_mm512_logb_pd (simde__m512d a) {
7856 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7857 return _mm512_logb_pd(a);
7858 #else
7859 simde__m512d_private
7860 r_,
7861 a_ = simde__m512d_to_private(a);
7862
7863 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
7864 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
7865 r_.m256d[i] = simde_mm256_logb_pd(a_.m256d[i]);
7866 }
7867 #else
7868 SIMDE_VECTORIZE
7869 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
7870 r_.f64[i] = simde_math_logb(a_.f64[i]);
7871 }
7872 #endif
7873
7874 return simde__m512d_from_private(r_);
7875 #endif
7876 }
7877 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7878 #undef _mm512_logb_pd
7879 #define _mm512_logb_pd(a) simde_mm512_logb_pd(a)
7880 #endif
7881
7882 SIMDE_FUNCTION_ATTRIBUTES
7883 simde__m512
simde_mm512_mask_logb_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)7884 simde_mm512_mask_logb_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
7885 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7886 return _mm512_mask_logb_ps(src, k, a);
7887 #else
7888 return simde_mm512_mask_mov_ps(src, k, simde_mm512_logb_ps(a));
7889 #endif
7890 }
7891 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7892 #undef _mm512_mask_logb_ps
7893 #define _mm512_mask_logb_ps(src, k, a) simde_mm512_mask_logb_ps(src, k, a)
7894 #endif
7895
7896 SIMDE_FUNCTION_ATTRIBUTES
7897 simde__m512d
simde_mm512_mask_logb_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)7898 simde_mm512_mask_logb_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
7899 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7900 return _mm512_mask_logb_pd(src, k, a);
7901 #else
7902 return simde_mm512_mask_mov_pd(src, k, simde_mm512_logb_pd(a));
7903 #endif
7904 }
7905 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7906 #undef _mm512_mask_logb_pd
7907 #define _mm512_mask_logb_pd(src, k, a) simde_mm512_mask_logb_pd(src, k, a)
7908 #endif
7909
7910 SIMDE_FUNCTION_ATTRIBUTES
7911 simde__m128
simde_mm_log2_ps(simde__m128 a)7912 simde_mm_log2_ps (simde__m128 a) {
7913 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
7914 return _mm_log2_ps(a);
7915 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
7916 #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)
7917 return Sleef_log2f4_u35(a);
7918 #else
7919 return Sleef_log2f4_u10(a);
7920 #endif
7921 #else
7922 simde__m128_private
7923 r_,
7924 a_ = simde__m128_to_private(a);
7925
7926 SIMDE_VECTORIZE
7927 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
7928 r_.f32[i] = simde_math_log2f(a_.f32[i]);
7929 }
7930
7931 return simde__m128_from_private(r_);
7932 #endif
7933 }
7934 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7935 #undef _mm_log2_ps
7936 #define _mm_log2_ps(a) simde_mm_log2_ps(a)
7937 #endif
7938
7939 SIMDE_FUNCTION_ATTRIBUTES
7940 simde__m128d
simde_mm_log2_pd(simde__m128d a)7941 simde_mm_log2_pd (simde__m128d a) {
7942 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
7943 return _mm_log2_pd(a);
7944 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
7945 #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)
7946 return Sleef_log2d2_u35(a);
7947 #else
7948 return Sleef_log2d2_u10(a);
7949 #endif
7950 #else
7951 simde__m128d_private
7952 r_,
7953 a_ = simde__m128d_to_private(a);
7954
7955 SIMDE_VECTORIZE
7956 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
7957 r_.f64[i] = simde_math_log2(a_.f64[i]);
7958 }
7959
7960 return simde__m128d_from_private(r_);
7961 #endif
7962 }
7963 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7964 #undef _mm_log2_pd
7965 #define _mm_log2_pd(a) simde_mm_log2_pd(a)
7966 #endif
7967
7968 SIMDE_FUNCTION_ATTRIBUTES
7969 simde__m256
simde_mm256_log2_ps(simde__m256 a)7970 simde_mm256_log2_ps (simde__m256 a) {
7971 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
7972 return _mm256_log2_ps(a);
7973 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
7974 #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)
7975 return Sleef_log2f8_u35(a);
7976 #else
7977 return Sleef_log2f8_u10(a);
7978 #endif
7979 #else
7980 simde__m256_private
7981 r_,
7982 a_ = simde__m256_to_private(a);
7983
7984 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
7985 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
7986 r_.m128[i] = simde_mm_log2_ps(a_.m128[i]);
7987 }
7988 #else
7989 SIMDE_VECTORIZE
7990 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
7991 r_.f32[i] = simde_math_log2f(a_.f32[i]);
7992 }
7993 #endif
7994
7995 return simde__m256_from_private(r_);
7996 #endif
7997 }
7998 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7999 #undef _mm256_log2_ps
8000 #define _mm256_log2_ps(a) simde_mm256_log2_ps(a)
8001 #endif
8002
8003
8004 SIMDE_FUNCTION_ATTRIBUTES
8005 simde__m256d
simde_mm256_log2_pd(simde__m256d a)8006 simde_mm256_log2_pd (simde__m256d a) {
8007 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8008 return _mm256_log2_pd(a);
8009 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8010 #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)
8011 return Sleef_log2d4_u35(a);
8012 #else
8013 return Sleef_log2d4_u10(a);
8014 #endif
8015 #else
8016 simde__m256d_private
8017 r_,
8018 a_ = simde__m256d_to_private(a);
8019
8020 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8021 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
8022 r_.m128d[i] = simde_mm_log2_pd(a_.m128d[i]);
8023 }
8024 #else
8025 SIMDE_VECTORIZE
8026 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8027 r_.f64[i] = simde_math_log2(a_.f64[i]);
8028 }
8029 #endif
8030
8031 return simde__m256d_from_private(r_);
8032 #endif
8033 }
8034 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8035 #undef _mm256_log2_pd
8036 #define _mm256_log2_pd(a) simde_mm256_log2_pd(a)
8037 #endif
8038
8039 SIMDE_FUNCTION_ATTRIBUTES
8040 simde__m512
simde_mm512_log2_ps(simde__m512 a)8041 simde_mm512_log2_ps (simde__m512 a) {
8042 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8043 return _mm512_log2_ps(a);
8044 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8045 #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)
8046 return Sleef_log2f16_u35(a);
8047 #else
8048 return Sleef_log2f16_u10(a);
8049 #endif
8050 #else
8051 simde__m512_private
8052 r_,
8053 a_ = simde__m512_to_private(a);
8054
8055 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8056 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
8057 r_.m256[i] = simde_mm256_log2_ps(a_.m256[i]);
8058 }
8059 #else
8060 SIMDE_VECTORIZE
8061 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8062 r_.f32[i] = simde_math_log2f(a_.f32[i]);
8063 }
8064 #endif
8065
8066 return simde__m512_from_private(r_);
8067 #endif
8068 }
8069 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8070 #undef _mm512_log2_ps
8071 #define _mm512_log2_ps(a) simde_mm512_log2_ps(a)
8072 #endif
8073
8074 SIMDE_FUNCTION_ATTRIBUTES
8075 simde__m512d
simde_mm512_log2_pd(simde__m512d a)8076 simde_mm512_log2_pd (simde__m512d a) {
8077 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8078 return _mm512_log2_pd(a);
8079 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8080 #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)
8081 return Sleef_log2d8_u35(a);
8082 #else
8083 return Sleef_log2d8_u10(a);
8084 #endif
8085 #else
8086 simde__m512d_private
8087 r_,
8088 a_ = simde__m512d_to_private(a);
8089
8090 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8091 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
8092 r_.m256d[i] = simde_mm256_log2_pd(a_.m256d[i]);
8093 }
8094 #else
8095 SIMDE_VECTORIZE
8096 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8097 r_.f64[i] = simde_math_log2(a_.f64[i]);
8098 }
8099 #endif
8100
8101 return simde__m512d_from_private(r_);
8102 #endif
8103 }
8104 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8105 #undef _mm512_log2_pd
8106 #define _mm512_log2_pd(a) simde_mm512_log2_pd(a)
8107 #endif
8108
8109 SIMDE_FUNCTION_ATTRIBUTES
8110 simde__m512
simde_mm512_mask_log2_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)8111 simde_mm512_mask_log2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
8112 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8113 return _mm512_mask_log2_ps(src, k, a);
8114 #else
8115 return simde_mm512_mask_mov_ps(src, k, simde_mm512_log2_ps(a));
8116 #endif
8117 }
8118 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8119 #undef _mm512_mask_log2_ps
8120 #define _mm512_mask_log2_ps(src, k, a) simde_mm512_mask_log2_ps(src, k, a)
8121 #endif
8122
8123 SIMDE_FUNCTION_ATTRIBUTES
8124 simde__m512d
simde_mm512_mask_log2_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)8125 simde_mm512_mask_log2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
8126 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8127 return _mm512_mask_log2_pd(src, k, a);
8128 #else
8129 return simde_mm512_mask_mov_pd(src, k, simde_mm512_log2_pd(a));
8130 #endif
8131 }
8132 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8133 #undef _mm512_mask_log2_pd
8134 #define _mm512_mask_log2_pd(src, k, a) simde_mm512_mask_log2_pd(src, k, a)
8135 #endif
8136
8137 SIMDE_FUNCTION_ATTRIBUTES
8138 simde__m128
simde_mm_log1p_ps(simde__m128 a)8139 simde_mm_log1p_ps (simde__m128 a) {
8140 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8141 return _mm_log1p_ps(a);
8142 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
8143 return Sleef_log1pf4_u10(a);
8144 #else
8145 simde__m128_private
8146 r_,
8147 a_ = simde__m128_to_private(a);
8148
8149 SIMDE_VECTORIZE
8150 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8151 r_.f32[i] = simde_math_log1pf(a_.f32[i]);
8152 }
8153
8154 return simde__m128_from_private(r_);
8155 #endif
8156 }
8157 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8158 #undef _mm_log1p_ps
8159 #define _mm_log1p_ps(a) simde_mm_log1p_ps(a)
8160 #endif
8161
8162 SIMDE_FUNCTION_ATTRIBUTES
8163 simde__m128d
simde_mm_log1p_pd(simde__m128d a)8164 simde_mm_log1p_pd (simde__m128d a) {
8165 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8166 return _mm_log1p_pd(a);
8167 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
8168 return Sleef_log1pd2_u10(a);
8169 #else
8170 simde__m128d_private
8171 r_,
8172 a_ = simde__m128d_to_private(a);
8173
8174 SIMDE_VECTORIZE
8175 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8176 r_.f64[i] = simde_math_log1p(a_.f64[i]);
8177 }
8178
8179 return simde__m128d_from_private(r_);
8180 #endif
8181 }
8182 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8183 #undef _mm_log1p_pd
8184 #define _mm_log1p_pd(a) simde_mm_log1p_pd(a)
8185 #endif
8186
8187 SIMDE_FUNCTION_ATTRIBUTES
8188 simde__m256
simde_mm256_log1p_ps(simde__m256 a)8189 simde_mm256_log1p_ps (simde__m256 a) {
8190 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8191 return _mm256_log1p_ps(a);
8192 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8193 return Sleef_log1pf8_u10(a);
8194 #else
8195 simde__m256_private
8196 r_,
8197 a_ = simde__m256_to_private(a);
8198
8199 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
8200 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
8201 r_.m128[i] = simde_mm_log1p_ps(a_.m128[i]);
8202 }
8203 #else
8204 SIMDE_VECTORIZE
8205 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8206 r_.f32[i] = simde_math_log1pf(a_.f32[i]);
8207 }
8208 #endif
8209
8210 return simde__m256_from_private(r_);
8211 #endif
8212 }
8213 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8214 #undef _mm256_log1p_ps
8215 #define _mm256_log1p_ps(a) simde_mm256_log1p_ps(a)
8216 #endif
8217
8218
8219 SIMDE_FUNCTION_ATTRIBUTES
8220 simde__m256d
simde_mm256_log1p_pd(simde__m256d a)8221 simde_mm256_log1p_pd (simde__m256d a) {
8222 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8223 return _mm256_log1p_pd(a);
8224 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8225 return Sleef_log1pd4_u10(a);
8226 #else
8227 simde__m256d_private
8228 r_,
8229 a_ = simde__m256d_to_private(a);
8230
8231 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
8232 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
8233 r_.m128d[i] = simde_mm_log1p_pd(a_.m128d[i]);
8234 }
8235 #else
8236 SIMDE_VECTORIZE
8237 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8238 r_.f64[i] = simde_math_log1p(a_.f64[i]);
8239 }
8240 #endif
8241
8242 return simde__m256d_from_private(r_);
8243 #endif
8244 }
8245 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8246 #undef _mm256_log1p_pd
8247 #define _mm256_log1p_pd(a) simde_mm256_log1p_pd(a)
8248 #endif
8249
8250 SIMDE_FUNCTION_ATTRIBUTES
8251 simde__m512
simde_mm512_log1p_ps(simde__m512 a)8252 simde_mm512_log1p_ps (simde__m512 a) {
8253 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8254 return _mm512_log1p_ps(a);
8255 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8256 return Sleef_log1pf16_u10(a);
8257 #else
8258 simde__m512_private
8259 r_,
8260 a_ = simde__m512_to_private(a);
8261
8262 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8263 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
8264 r_.m256[i] = simde_mm256_log1p_ps(a_.m256[i]);
8265 }
8266 #else
8267 SIMDE_VECTORIZE
8268 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8269 r_.f32[i] = simde_math_log1pf(a_.f32[i]);
8270 }
8271 #endif
8272
8273 return simde__m512_from_private(r_);
8274 #endif
8275 }
8276 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8277 #undef _mm512_log1p_ps
8278 #define _mm512_log1p_ps(a) simde_mm512_log1p_ps(a)
8279 #endif
8280
8281 SIMDE_FUNCTION_ATTRIBUTES
8282 simde__m512d
simde_mm512_log1p_pd(simde__m512d a)8283 simde_mm512_log1p_pd (simde__m512d a) {
8284 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8285 return _mm512_log1p_pd(a);
8286 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8287 return Sleef_log1pd8_u10(a);
8288 #else
8289 simde__m512d_private
8290 r_,
8291 a_ = simde__m512d_to_private(a);
8292
8293 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8294 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
8295 r_.m256d[i] = simde_mm256_log1p_pd(a_.m256d[i]);
8296 }
8297 #else
8298 SIMDE_VECTORIZE
8299 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8300 r_.f64[i] = simde_math_log1p(a_.f64[i]);
8301 }
8302 #endif
8303
8304 return simde__m512d_from_private(r_);
8305 #endif
8306 }
8307 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8308 #undef _mm512_log1p_pd
8309 #define _mm512_log1p_pd(a) simde_mm512_log1p_pd(a)
8310 #endif
8311
8312 SIMDE_FUNCTION_ATTRIBUTES
8313 simde__m512
simde_mm512_mask_log1p_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)8314 simde_mm512_mask_log1p_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
8315 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8316 return _mm512_mask_log1p_ps(src, k, a);
8317 #else
8318 return simde_mm512_mask_mov_ps(src, k, simde_mm512_log1p_ps(a));
8319 #endif
8320 }
8321 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8322 #undef _mm512_mask_log1p_ps
8323 #define _mm512_mask_log1p_ps(src, k, a) simde_mm512_mask_log1p_ps(src, k, a)
8324 #endif
8325
8326 SIMDE_FUNCTION_ATTRIBUTES
8327 simde__m512d
simde_mm512_mask_log1p_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)8328 simde_mm512_mask_log1p_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
8329 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8330 return _mm512_mask_log1p_pd(src, k, a);
8331 #else
8332 return simde_mm512_mask_mov_pd(src, k, simde_mm512_log1p_pd(a));
8333 #endif
8334 }
8335 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8336 #undef _mm512_mask_log1p_pd
8337 #define _mm512_mask_log1p_pd(src, k, a) simde_mm512_mask_log1p_pd(src, k, a)
8338 #endif
8339
8340 SIMDE_FUNCTION_ATTRIBUTES
8341 simde__m128
simde_mm_log10_ps(simde__m128 a)8342 simde_mm_log10_ps (simde__m128 a) {
8343 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8344 return _mm_log10_ps(a);
8345 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
8346 return Sleef_log10f4_u10(a);
8347 #else
8348 simde__m128_private
8349 r_,
8350 a_ = simde__m128_to_private(a);
8351
8352 SIMDE_VECTORIZE
8353 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8354 r_.f32[i] = simde_math_log10f(a_.f32[i]);
8355 }
8356
8357 return simde__m128_from_private(r_);
8358 #endif
8359 }
8360 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8361 #undef _mm_log10_ps
8362 #define _mm_log10_ps(a) simde_mm_log10_ps(a)
8363 #endif
8364
8365 SIMDE_FUNCTION_ATTRIBUTES
8366 simde__m128d
simde_mm_log10_pd(simde__m128d a)8367 simde_mm_log10_pd (simde__m128d a) {
8368 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8369 return _mm_log10_pd(a);
8370 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
8371 return Sleef_log10d2_u10(a);
8372 #else
8373 simde__m128d_private
8374 r_,
8375 a_ = simde__m128d_to_private(a);
8376
8377 SIMDE_VECTORIZE
8378 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8379 r_.f64[i] = simde_math_log10(a_.f64[i]);
8380 }
8381
8382 return simde__m128d_from_private(r_);
8383 #endif
8384 }
8385 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8386 #undef _mm_log10_pd
8387 #define _mm_log10_pd(a) simde_mm_log10_pd(a)
8388 #endif
8389
8390 SIMDE_FUNCTION_ATTRIBUTES
8391 simde__m256
simde_mm256_log10_ps(simde__m256 a)8392 simde_mm256_log10_ps (simde__m256 a) {
8393 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8394 return _mm256_log10_ps(a);
8395 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8396 return Sleef_log10f8_u10(a);
8397 #else
8398 simde__m256_private
8399 r_,
8400 a_ = simde__m256_to_private(a);
8401
8402 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
8403 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
8404 r_.m128[i] = simde_mm_log10_ps(a_.m128[i]);
8405 }
8406 #else
8407 SIMDE_VECTORIZE
8408 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8409 r_.f32[i] = simde_math_log10f(a_.f32[i]);
8410 }
8411 #endif
8412
8413 return simde__m256_from_private(r_);
8414 #endif
8415 }
8416 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8417 #undef _mm256_log10_ps
8418 #define _mm256_log10_ps(a) simde_mm256_log10_ps(a)
8419 #endif
8420
8421
8422 SIMDE_FUNCTION_ATTRIBUTES
8423 simde__m256d
simde_mm256_log10_pd(simde__m256d a)8424 simde_mm256_log10_pd (simde__m256d a) {
8425 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8426 return _mm256_log10_pd(a);
8427 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8428 return Sleef_log10d4_u10(a);
8429 #else
8430 simde__m256d_private
8431 r_,
8432 a_ = simde__m256d_to_private(a);
8433
8434 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
8435 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
8436 r_.m128d[i] = simde_mm_log10_pd(a_.m128d[i]);
8437 }
8438 #else
8439 SIMDE_VECTORIZE
8440 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8441 r_.f64[i] = simde_math_log10(a_.f64[i]);
8442 }
8443 #endif
8444
8445 return simde__m256d_from_private(r_);
8446 #endif
8447 }
8448 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8449 #undef _mm256_log10_pd
8450 #define _mm256_log10_pd(a) simde_mm256_log10_pd(a)
8451 #endif
8452
8453 SIMDE_FUNCTION_ATTRIBUTES
8454 simde__m512
simde_mm512_log10_ps(simde__m512 a)8455 simde_mm512_log10_ps (simde__m512 a) {
8456 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8457 return _mm512_log10_ps(a);
8458 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8459 return Sleef_log10f16_u10(a);
8460 #else
8461 simde__m512_private
8462 r_,
8463 a_ = simde__m512_to_private(a);
8464
8465 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8466 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
8467 r_.m256[i] = simde_mm256_log10_ps(a_.m256[i]);
8468 }
8469 #else
8470 SIMDE_VECTORIZE
8471 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8472 r_.f32[i] = simde_math_log10f(a_.f32[i]);
8473 }
8474 #endif
8475
8476 return simde__m512_from_private(r_);
8477 #endif
8478 }
8479 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8480 #undef _mm512_log10_ps
8481 #define _mm512_log10_ps(a) simde_mm512_log10_ps(a)
8482 #endif
8483
8484 SIMDE_FUNCTION_ATTRIBUTES
8485 simde__m512d
simde_mm512_log10_pd(simde__m512d a)8486 simde_mm512_log10_pd (simde__m512d a) {
8487 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8488 return _mm512_log10_pd(a);
8489 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8490 return Sleef_log10d8_u10(a);
8491 #else
8492 simde__m512d_private
8493 r_,
8494 a_ = simde__m512d_to_private(a);
8495
8496 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8497 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
8498 r_.m256d[i] = simde_mm256_log10_pd(a_.m256d[i]);
8499 }
8500 #else
8501 SIMDE_VECTORIZE
8502 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8503 r_.f64[i] = simde_math_log10(a_.f64[i]);
8504 }
8505 #endif
8506
8507 return simde__m512d_from_private(r_);
8508 #endif
8509 }
8510 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8511 #undef _mm512_log10_pd
8512 #define _mm512_log10_pd(a) simde_mm512_log10_pd(a)
8513 #endif
8514
8515 SIMDE_FUNCTION_ATTRIBUTES
8516 simde__m512
simde_mm512_mask_log10_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)8517 simde_mm512_mask_log10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
8518 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8519 return _mm512_mask_log10_ps(src, k, a);
8520 #else
8521 return simde_mm512_mask_mov_ps(src, k, simde_mm512_log10_ps(a));
8522 #endif
8523 }
8524 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8525 #undef _mm512_mask_log10_ps
8526 #define _mm512_mask_log10_ps(src, k, a) simde_mm512_mask_log10_ps(src, k, a)
8527 #endif
8528
8529 SIMDE_FUNCTION_ATTRIBUTES
8530 simde__m512d
simde_mm512_mask_log10_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)8531 simde_mm512_mask_log10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
8532 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8533 return _mm512_mask_log10_pd(src, k, a);
8534 #else
8535 return simde_mm512_mask_mov_pd(src, k, simde_mm512_log10_pd(a));
8536 #endif
8537 }
8538 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8539 #undef _mm512_mask_log10_pd
8540 #define _mm512_mask_log10_pd(src, k, a) simde_mm512_mask_log10_pd(src, k, a)
8541 #endif
8542
8543 SIMDE_FUNCTION_ATTRIBUTES
8544 simde__m512
simde_mm512_nearbyint_ps(simde__m512 a)8545 simde_mm512_nearbyint_ps (simde__m512 a) {
8546 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8547 return _mm512_nearbyint_ps(a);
8548 #else
8549 simde__m512_private
8550 r_,
8551 a_ = simde__m512_to_private(a);
8552
8553 SIMDE_VECTORIZE
8554 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8555 r_.f32[i] = simde_math_nearbyintf(a_.f32[i]);
8556 }
8557
8558 return simde__m512_from_private(r_);
8559 #endif
8560 }
8561 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8562 #undef _mm512_nearbyint_ps
8563 #define _mm512_nearbyint_ps(a) simde_mm512_nearbyint_ps(a)
8564 #endif
8565
8566 SIMDE_FUNCTION_ATTRIBUTES
8567 simde__m512d
simde_mm512_nearbyint_pd(simde__m512d a)8568 simde_mm512_nearbyint_pd (simde__m512d a) {
8569 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8570 return _mm512_nearbyint_pd(a);
8571 #else
8572 simde__m512d_private
8573 r_,
8574 a_ = simde__m512d_to_private(a);
8575
8576 SIMDE_VECTORIZE
8577 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8578 r_.f64[i] = simde_math_nearbyint(a_.f64[i]);
8579 }
8580
8581 return simde__m512d_from_private(r_);
8582 #endif
8583 }
8584 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8585 #undef _mm512_nearbyint_pd
8586 #define _mm512_nearbyint_pd(a) simde_mm512_nearbyint_pd(a)
8587 #endif
8588
8589 SIMDE_FUNCTION_ATTRIBUTES
8590 simde__m512
simde_mm512_mask_nearbyint_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)8591 simde_mm512_mask_nearbyint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
8592 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8593 return _mm512_mask_nearbyint_ps(src, k, a);
8594 #else
8595 return simde_mm512_mask_mov_ps(src, k, simde_mm512_nearbyint_ps(a));
8596 #endif
8597 }
8598 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8599 #undef _mm512_mask_nearbyint_ps
8600 #define _mm512_mask_nearbyint_ps(src, k, a) simde_mm512_mask_nearbyint_ps(src, k, a)
8601 #endif
8602
8603 SIMDE_FUNCTION_ATTRIBUTES
8604 simde__m512d
simde_mm512_mask_nearbyint_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)8605 simde_mm512_mask_nearbyint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
8606 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8607 return _mm512_mask_nearbyint_pd(src, k, a);
8608 #else
8609 return simde_mm512_mask_mov_pd(src, k, simde_mm512_nearbyint_pd(a));
8610 #endif
8611 }
8612 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8613 #undef _mm512_mask_nearbyint_pd
8614 #define _mm512_mask_nearbyint_pd(src, k, a) simde_mm512_mask_nearbyint_pd(src, k, a)
8615 #endif
8616
8617 SIMDE_FUNCTION_ATTRIBUTES
8618 simde__m128
simde_mm_pow_ps(simde__m128 a,simde__m128 b)8619 simde_mm_pow_ps (simde__m128 a, simde__m128 b) {
8620 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8621 return _mm_pow_ps(a, b);
8622 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
8623 return Sleef_powf4_u10(a, b);
8624 #else
8625 simde__m128_private
8626 r_,
8627 a_ = simde__m128_to_private(a),
8628 b_ = simde__m128_to_private(b);
8629
8630 SIMDE_VECTORIZE
8631 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8632 r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]);
8633 }
8634
8635 return simde__m128_from_private(r_);
8636 #endif
8637 }
8638 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8639 #undef _mm_pow_ps
8640 #define _mm_pow_ps(a, b) simde_mm_pow_ps(a, b)
8641 #endif
8642
8643 SIMDE_FUNCTION_ATTRIBUTES
8644 simde__m128d
simde_mm_pow_pd(simde__m128d a,simde__m128d b)8645 simde_mm_pow_pd (simde__m128d a, simde__m128d b) {
8646 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8647 return _mm_pow_pd(a, b);
8648 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
8649 return Sleef_powd2_u10(a, b);
8650 #else
8651 simde__m128d_private
8652 r_,
8653 a_ = simde__m128d_to_private(a),
8654 b_ = simde__m128d_to_private(b);
8655
8656 SIMDE_VECTORIZE
8657 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8658 r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]);
8659 }
8660
8661 return simde__m128d_from_private(r_);
8662 #endif
8663 }
8664 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8665 #undef _mm_pow_pd
8666 #define _mm_pow_pd(a, b) simde_mm_pow_pd(a, b)
8667 #endif
8668
8669 SIMDE_FUNCTION_ATTRIBUTES
8670 simde__m256
simde_mm256_pow_ps(simde__m256 a,simde__m256 b)8671 simde_mm256_pow_ps (simde__m256 a, simde__m256 b) {
8672 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8673 return _mm256_pow_ps(a, b);
8674 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8675 return Sleef_powf8_u10(a, b);
8676 #else
8677 simde__m256_private
8678 r_,
8679 a_ = simde__m256_to_private(a),
8680 b_ = simde__m256_to_private(b);
8681
8682 SIMDE_VECTORIZE
8683 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8684 r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]);
8685 }
8686
8687 return simde__m256_from_private(r_);
8688 #endif
8689 }
8690 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8691 #undef _mm256_pow_ps
8692 #define _mm256_pow_ps(a, b) simde_mm256_pow_ps(a, b)
8693 #endif
8694
8695
8696 SIMDE_FUNCTION_ATTRIBUTES
8697 simde__m256d
simde_mm256_pow_pd(simde__m256d a,simde__m256d b)8698 simde_mm256_pow_pd (simde__m256d a, simde__m256d b) {
8699 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8700 return _mm256_pow_pd(a, b);
8701 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8702 return Sleef_powd4_u10(a, b);
8703 #else
8704 simde__m256d_private
8705 r_,
8706 a_ = simde__m256d_to_private(a),
8707 b_ = simde__m256d_to_private(b);
8708
8709 SIMDE_VECTORIZE
8710 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8711 r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]);
8712 }
8713
8714 return simde__m256d_from_private(r_);
8715 #endif
8716 }
8717 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8718 #undef _mm256_pow_pd
8719 #define _mm256_pow_pd(a, b) simde_mm256_pow_pd(a, b)
8720 #endif
8721
8722 SIMDE_FUNCTION_ATTRIBUTES
8723 simde__m512
simde_mm512_pow_ps(simde__m512 a,simde__m512 b)8724 simde_mm512_pow_ps (simde__m512 a, simde__m512 b) {
8725 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8726 return _mm512_pow_ps(a, b);
8727 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8728 return Sleef_powf16_u10(a, b);
8729 #else
8730 simde__m512_private
8731 r_,
8732 a_ = simde__m512_to_private(a),
8733 b_ = simde__m512_to_private(b);
8734
8735 SIMDE_VECTORIZE
8736 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8737 r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]);
8738 }
8739
8740 return simde__m512_from_private(r_);
8741 #endif
8742 }
8743 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8744 #undef _mm512_pow_ps
8745 #define _mm512_pow_ps(a, b) simde_mm512_pow_ps(a, b)
8746 #endif
8747
8748 SIMDE_FUNCTION_ATTRIBUTES
8749 simde__m512d
simde_mm512_pow_pd(simde__m512d a,simde__m512d b)8750 simde_mm512_pow_pd (simde__m512d a, simde__m512d b) {
8751 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8752 return _mm512_pow_pd(a, b);
8753 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8754 return Sleef_powd8_u10(a, b);
8755 #else
8756 simde__m512d_private
8757 r_,
8758 a_ = simde__m512d_to_private(a),
8759 b_ = simde__m512d_to_private(b);
8760
8761 SIMDE_VECTORIZE
8762 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8763 r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]);
8764 }
8765
8766 return simde__m512d_from_private(r_);
8767 #endif
8768 }
8769 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8770 #undef _mm512_pow_pd
8771 #define _mm512_pow_pd(a, b) simde_mm512_pow_pd(a, b)
8772 #endif
8773
8774 SIMDE_FUNCTION_ATTRIBUTES
8775 simde__m512
simde_mm512_mask_pow_ps(simde__m512 src,simde__mmask16 k,simde__m512 a,simde__m512 b)8776 simde_mm512_mask_pow_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
8777 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8778 return _mm512_mask_pow_ps(src, k, a, b);
8779 #else
8780 return simde_mm512_mask_mov_ps(src, k, simde_mm512_pow_ps(a, b));
8781 #endif
8782 }
8783 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8784 #undef _mm512_mask_pow_ps
8785 #define _mm512_mask_pow_ps(src, k, a, b) simde_mm512_mask_pow_ps(src, k, a, b)
8786 #endif
8787
8788 SIMDE_FUNCTION_ATTRIBUTES
8789 simde__m512d
simde_mm512_mask_pow_pd(simde__m512d src,simde__mmask8 k,simde__m512d a,simde__m512d b)8790 simde_mm512_mask_pow_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
8791 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8792 return _mm512_mask_pow_pd(src, k, a, b);
8793 #else
8794 return simde_mm512_mask_mov_pd(src, k, simde_mm512_pow_pd(a, b));
8795 #endif
8796 }
8797 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8798 #undef _mm512_mask_pow_pd
8799 #define _mm512_mask_pow_pd(src, k, a, b) simde_mm512_mask_pow_pd(src, k, a, b)
8800 #endif
8801
8802 SIMDE_FUNCTION_ATTRIBUTES
8803 simde__m128
simde_mm_clog_ps(simde__m128 a)8804 simde_mm_clog_ps (simde__m128 a) {
8805 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8806 return _mm_clog_ps(a);
8807 #else
8808 simde__m128_private
8809 r_,
8810 a_ = simde__m128_to_private(a);
8811
8812 simde__m128_private pow_res_ = simde__m128_to_private(simde_mm_pow_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))));
8813 SIMDE_VECTORIZE
8814 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {
8815 r_.f32[ i ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]));
8816 r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]);
8817 }
8818
8819 return simde__m128_from_private(r_);
8820 #endif
8821 }
8822 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8823 #undef _mm_clog_ps
8824 #define _mm_clog_ps(a) simde_mm_clog_ps(a)
8825 #endif
8826
8827 SIMDE_FUNCTION_ATTRIBUTES
8828 simde__m256
simde_mm256_clog_ps(simde__m256 a)8829 simde_mm256_clog_ps (simde__m256 a) {
8830 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8831 return _mm256_clog_ps(a);
8832 #else
8833 simde__m256_private
8834 r_,
8835 a_ = simde__m256_to_private(a);
8836
8837 simde__m256_private pow_res_ = simde__m256_to_private(simde_mm256_pow_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0))));
8838 SIMDE_VECTORIZE
8839 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {
8840 r_.f32[ i ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i + 1]));
8841 r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]);
8842 }
8843
8844 return simde__m256_from_private(r_);
8845 #endif
8846 }
8847 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8848 #undef _mm256_clog_ps
8849 #define _mm256_clog_ps(a) simde_mm256_clog_ps(a)
8850 #endif
8851
8852 SIMDE_FUNCTION_ATTRIBUTES
8853 simde__m128
simde_mm_csqrt_ps(simde__m128 a)8854 simde_mm_csqrt_ps (simde__m128 a) {
8855 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8856 return _mm_csqrt_ps(a);
8857 #else
8858 simde__m128_private
8859 r_,
8860 a_ = simde__m128_to_private(a);
8861
8862 simde__m128 pow_res= simde_mm_pow_ps(a,simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)));
8863 simde__m128_private pow_res_=simde__m128_to_private(pow_res);
8864
8865 SIMDE_VECTORIZE
8866 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) {
8867 simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]);
8868 simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]);
8869
8870 r_.f32[ i ] = simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0));
8871 r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0));
8872 }
8873
8874 return simde__m128_from_private(r_);
8875 #endif
8876 }
8877 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8878 #undef _mm_csqrt_ps
8879 #define _mm_csqrt_ps(a) simde_mm_csqrt_ps(a)
8880 #endif
8881
8882 SIMDE_FUNCTION_ATTRIBUTES
8883 simde__m256
simde_mm256_csqrt_ps(simde__m256 a)8884 simde_mm256_csqrt_ps (simde__m256 a) {
8885 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8886 return _mm256_csqrt_ps(a);
8887 #else
8888 simde__m256_private
8889 r_,
8890 a_ = simde__m256_to_private(a);
8891
8892 simde__m256 pow_res= simde_mm256_pow_ps(a,simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)));
8893 simde__m256_private pow_res_=simde__m256_to_private(pow_res);
8894
8895 SIMDE_VECTORIZE
8896 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) {
8897 simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]);
8898 simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]);
8899
8900 r_.f32[ i ] = simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0));
8901 r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0));
8902 }
8903
8904 return simde__m256_from_private(r_);
8905 #endif
8906 }
8907 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8908 #undef _mm256_csqrt_ps
8909 #define _mm256_csqrt_ps(a) simde_mm256_csqrt_ps(a)
8910 #endif
8911
8912 SIMDE_FUNCTION_ATTRIBUTES
8913 simde__m128i
simde_mm_rem_epi8(simde__m128i a,simde__m128i b)8914 simde_mm_rem_epi8 (simde__m128i a, simde__m128i b) {
8915 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
8916 return _mm_rem_epi8(a, b);
8917 #else
8918 simde__m128i_private
8919 r_,
8920 a_ = simde__m128i_to_private(a),
8921 b_ = simde__m128i_to_private(b);
8922
8923 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
8924 r_.i8 = a_.i8 % b_.i8;
8925 #else
8926 SIMDE_VECTORIZE
8927 for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
8928 r_.i8[i] = a_.i8[i] % b_.i8[i];
8929 }
8930 #endif
8931
8932 return simde__m128i_from_private(r_);
8933 #endif
8934 }
8935 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8936 #undef _mm_rem_epi8
8937 #define _mm_rem_epi8(a, b) simde_mm_rem_epi8((a), (b))
8938 #endif
8939
8940 SIMDE_FUNCTION_ATTRIBUTES
8941 simde__m128i
simde_mm_rem_epi16(simde__m128i a,simde__m128i b)8942 simde_mm_rem_epi16 (simde__m128i a, simde__m128i b) {
8943 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
8944 return _mm_rem_epi16(a, b);
8945 #else
8946 simde__m128i_private
8947 r_,
8948 a_ = simde__m128i_to_private(a),
8949 b_ = simde__m128i_to_private(b);
8950
8951 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
8952 r_.i16 = a_.i16 % b_.i16;
8953 #else
8954 SIMDE_VECTORIZE
8955 for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
8956 r_.i16[i] = a_.i16[i] % b_.i16[i];
8957 }
8958 #endif
8959
8960 return simde__m128i_from_private(r_);
8961 #endif
8962 }
8963 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8964 #undef _mm_rem_epi16
8965 #define _mm_rem_epi16(a, b) simde_mm_rem_epi16((a), (b))
8966 #endif
8967
8968 SIMDE_FUNCTION_ATTRIBUTES
8969 simde__m128i
simde_mm_rem_epi32(simde__m128i a,simde__m128i b)8970 simde_mm_rem_epi32 (simde__m128i a, simde__m128i b) {
8971 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
8972 return _mm_rem_epi32(a, b);
8973 #else
8974 simde__m128i_private
8975 r_,
8976 a_ = simde__m128i_to_private(a),
8977 b_ = simde__m128i_to_private(b);
8978
8979 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
8980 r_.i32 = a_.i32 % b_.i32;
8981 #else
8982 SIMDE_VECTORIZE
8983 for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
8984 r_.i32[i] = a_.i32[i] % b_.i32[i];
8985 }
8986 #endif
8987
8988 return simde__m128i_from_private(r_);
8989 #endif
8990 }
8991 #define simde_mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b)
8992 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8993 #undef _mm_rem_epi32
8994 #define _mm_rem_epi32(a, b) simde_mm_rem_epi32(a, b)
8995 #undef _mm_irem_epi32
8996 #define _mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b)
8997 #endif
8998
8999 SIMDE_FUNCTION_ATTRIBUTES
9000 simde__m128i
simde_mm_rem_epi64(simde__m128i a,simde__m128i b)9001 simde_mm_rem_epi64 (simde__m128i a, simde__m128i b) {
9002 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
9003 return _mm_rem_epi64(a, b);
9004 #else
9005 simde__m128i_private
9006 r_,
9007 a_ = simde__m128i_to_private(a),
9008 b_ = simde__m128i_to_private(b);
9009
9010 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9011 r_.i64 = a_.i64 % b_.i64;
9012 #else
9013 SIMDE_VECTORIZE
9014 for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
9015 r_.i64[i] = a_.i64[i] % b_.i64[i];
9016 }
9017 #endif
9018
9019 return simde__m128i_from_private(r_);
9020 #endif
9021 }
9022 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9023 #undef _mm_rem_epi64
9024 #define _mm_rem_epi64(a, b) simde_mm_rem_epi64((a), (b))
9025 #endif
9026
9027 SIMDE_FUNCTION_ATTRIBUTES
9028 simde__m128i
simde_mm_rem_epu8(simde__m128i a,simde__m128i b)9029 simde_mm_rem_epu8 (simde__m128i a, simde__m128i b) {
9030 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
9031 return _mm_rem_epu8(a, b);
9032 #else
9033 simde__m128i_private
9034 r_,
9035 a_ = simde__m128i_to_private(a),
9036 b_ = simde__m128i_to_private(b);
9037
9038 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9039 r_.u8 = a_.u8 % b_.u8;
9040 #else
9041 SIMDE_VECTORIZE
9042 for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
9043 r_.u8[i] = a_.u8[i] % b_.u8[i];
9044 }
9045 #endif
9046
9047 return simde__m128i_from_private(r_);
9048 #endif
9049 }
9050 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9051 #undef _mm_rem_epu8
9052 #define _mm_rem_epu8(a, b) simde_mm_rem_epu8((a), (b))
9053 #endif
9054
9055 SIMDE_FUNCTION_ATTRIBUTES
9056 simde__m128i
simde_mm_rem_epu16(simde__m128i a,simde__m128i b)9057 simde_mm_rem_epu16 (simde__m128i a, simde__m128i b) {
9058 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
9059 return _mm_rem_epu16(a, b);
9060 #else
9061 simde__m128i_private
9062 r_,
9063 a_ = simde__m128i_to_private(a),
9064 b_ = simde__m128i_to_private(b);
9065
9066 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9067 r_.u16 = a_.u16 % b_.u16;
9068 #else
9069 SIMDE_VECTORIZE
9070 for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
9071 r_.u16[i] = a_.u16[i] % b_.u16[i];
9072 }
9073 #endif
9074
9075 return simde__m128i_from_private(r_);
9076 #endif
9077 }
9078 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9079 #undef _mm_rem_epu16
9080 #define _mm_rem_epu16(a, b) simde_mm_rem_epu16((a), (b))
9081 #endif
9082
9083 SIMDE_FUNCTION_ATTRIBUTES
9084 simde__m128i
simde_mm_rem_epu32(simde__m128i a,simde__m128i b)9085 simde_mm_rem_epu32 (simde__m128i a, simde__m128i b) {
9086 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
9087 return _mm_rem_epu32(a, b);
9088 #else
9089 simde__m128i_private
9090 r_,
9091 a_ = simde__m128i_to_private(a),
9092 b_ = simde__m128i_to_private(b);
9093
9094 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9095 r_.u32 = a_.u32 % b_.u32;
9096 #else
9097 SIMDE_VECTORIZE
9098 for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
9099 r_.u32[i] = a_.u32[i] % b_.u32[i];
9100 }
9101 #endif
9102
9103 return simde__m128i_from_private(r_);
9104 #endif
9105 }
9106 #define simde_mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b)
9107 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9108 #undef _mm_rem_epu32
9109 #define _mm_rem_epu32(a, b) simde_mm_rem_epu32(a, b)
9110 #undef _mm_urem_epi32
9111 #define _mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b)
9112 #endif
9113
9114 SIMDE_FUNCTION_ATTRIBUTES
9115 simde__m128i
simde_mm_rem_epu64(simde__m128i a,simde__m128i b)9116 simde_mm_rem_epu64 (simde__m128i a, simde__m128i b) {
9117 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
9118 return _mm_rem_epu64(a, b);
9119 #else
9120 simde__m128i_private
9121 r_,
9122 a_ = simde__m128i_to_private(a),
9123 b_ = simde__m128i_to_private(b);
9124
9125 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9126 r_.u64 = a_.u64 % b_.u64;
9127 #else
9128 SIMDE_VECTORIZE
9129 for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
9130 r_.u64[i] = a_.u64[i] % b_.u64[i];
9131 }
9132 #endif
9133
9134 return simde__m128i_from_private(r_);
9135 #endif
9136 }
9137 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9138 #undef _mm_rem_epu64
9139 #define _mm_rem_epu64(a, b) simde_mm_rem_epu64((a), (b))
9140 #endif
9141
9142 SIMDE_FUNCTION_ATTRIBUTES
9143 simde__m256i
simde_mm256_rem_epi8(simde__m256i a,simde__m256i b)9144 simde_mm256_rem_epi8 (simde__m256i a, simde__m256i b) {
9145 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9146 return _mm256_rem_epi8(a, b);
9147 #else
9148 simde__m256i_private
9149 r_,
9150 a_ = simde__m256i_to_private(a),
9151 b_ = simde__m256i_to_private(b);
9152
9153 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9154 r_.i8 = a_.i8 % b_.i8;
9155 #else
9156 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9157 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9158 r_.m128i[i] = simde_mm_rem_epi8(a_.m128i[i], b_.m128i[i]);
9159 }
9160 #else
9161 SIMDE_VECTORIZE
9162 for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
9163 r_.i8[i] = a_.i8[i] % b_.i8[i];
9164 }
9165 #endif
9166 #endif
9167
9168 return simde__m256i_from_private(r_);
9169 #endif
9170 }
9171 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9172 #undef _mm256_rem_epi8
9173 #define _mm256_rem_epi8(a, b) simde_mm256_rem_epi8((a), (b))
9174 #endif
9175
9176 SIMDE_FUNCTION_ATTRIBUTES
9177 simde__m256i
simde_mm256_rem_epi16(simde__m256i a,simde__m256i b)9178 simde_mm256_rem_epi16 (simde__m256i a, simde__m256i b) {
9179 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9180 return _mm256_rem_epi16(a, b);
9181 #else
9182 simde__m256i_private
9183 r_,
9184 a_ = simde__m256i_to_private(a),
9185 b_ = simde__m256i_to_private(b);
9186
9187 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9188 r_.i16 = a_.i16 % b_.i16;
9189 #else
9190 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9191 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9192 r_.m128i[i] = simde_mm_rem_epi16(a_.m128i[i], b_.m128i[i]);
9193 }
9194 #else
9195 SIMDE_VECTORIZE
9196 for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
9197 r_.i16[i] = a_.i16[i] % b_.i16[i];
9198 }
9199 #endif
9200 #endif
9201
9202 return simde__m256i_from_private(r_);
9203 #endif
9204 }
9205 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9206 #undef _mm256_rem_epi16
9207 #define _mm256_rem_epi16(a, b) simde_mm256_rem_epi16((a), (b))
9208 #endif
9209
9210 SIMDE_FUNCTION_ATTRIBUTES
9211 simde__m256i
simde_mm256_rem_epi32(simde__m256i a,simde__m256i b)9212 simde_mm256_rem_epi32 (simde__m256i a, simde__m256i b) {
9213 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9214 return _mm256_rem_epi32(a, b);
9215 #else
9216 simde__m256i_private
9217 r_,
9218 a_ = simde__m256i_to_private(a),
9219 b_ = simde__m256i_to_private(b);
9220
9221 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9222 r_.i32 = a_.i32 % b_.i32;
9223 #else
9224 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9225 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9226 r_.m128i[i] = simde_mm_rem_epi32(a_.m128i[i], b_.m128i[i]);
9227 }
9228 #else
9229 SIMDE_VECTORIZE
9230 for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
9231 r_.i32[i] = a_.i32[i] % b_.i32[i];
9232 }
9233 #endif
9234 #endif
9235
9236 return simde__m256i_from_private(r_);
9237 #endif
9238 }
9239 #define simde_mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b)
9240 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9241 #undef _mm256_rem_epi32
9242 #define _mm256_rem_epi32(a, b) simde_mm256_rem_epi32(a, b)
9243 #undef _mm256_irem_epi32
9244 #define _mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b)
9245 #endif
9246
9247 SIMDE_FUNCTION_ATTRIBUTES
9248 simde__m256i
simde_mm256_rem_epi64(simde__m256i a,simde__m256i b)9249 simde_mm256_rem_epi64 (simde__m256i a, simde__m256i b) {
9250 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9251 return _mm256_rem_epi64(a, b);
9252 #else
9253 simde__m256i_private
9254 r_,
9255 a_ = simde__m256i_to_private(a),
9256 b_ = simde__m256i_to_private(b);
9257
9258 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9259 r_.i64 = a_.i64 % b_.i64;
9260 #else
9261 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9262 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9263 r_.m128i[i] = simde_mm_rem_epi64(a_.m128i[i], b_.m128i[i]);
9264 }
9265 #else
9266 SIMDE_VECTORIZE
9267 for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
9268 r_.i64[i] = a_.i64[i] % b_.i64[i];
9269 }
9270 #endif
9271 #endif
9272
9273 return simde__m256i_from_private(r_);
9274 #endif
9275 }
9276 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9277 #undef _mm256_rem_epi64
9278 #define _mm256_rem_epi64(a, b) simde_mm256_rem_epi64((a), (b))
9279 #endif
9280
9281 SIMDE_FUNCTION_ATTRIBUTES
9282 simde__m256i
simde_mm256_rem_epu8(simde__m256i a,simde__m256i b)9283 simde_mm256_rem_epu8 (simde__m256i a, simde__m256i b) {
9284 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9285 return _mm256_rem_epu8(a, b);
9286 #else
9287 simde__m256i_private
9288 r_,
9289 a_ = simde__m256i_to_private(a),
9290 b_ = simde__m256i_to_private(b);
9291
9292 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9293 r_.u8 = a_.u8 % b_.u8;
9294 #else
9295 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9296 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9297 r_.m128i[i] = simde_mm_rem_epu8(a_.m128i[i], b_.m128i[i]);
9298 }
9299 #else
9300 SIMDE_VECTORIZE
9301 for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
9302 r_.u8[i] = a_.u8[i] % b_.u8[i];
9303 }
9304 #endif
9305 #endif
9306
9307 return simde__m256i_from_private(r_);
9308 #endif
9309 }
9310 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9311 #undef _mm256_rem_epu8
9312 #define _mm256_rem_epu8(a, b) simde_mm256_rem_epu8((a), (b))
9313 #endif
9314
9315 SIMDE_FUNCTION_ATTRIBUTES
9316 simde__m256i
simde_mm256_rem_epu16(simde__m256i a,simde__m256i b)9317 simde_mm256_rem_epu16 (simde__m256i a, simde__m256i b) {
9318 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9319 return _mm256_rem_epu16(a, b);
9320 #else
9321 simde__m256i_private
9322 r_,
9323 a_ = simde__m256i_to_private(a),
9324 b_ = simde__m256i_to_private(b);
9325
9326 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9327 r_.u16 = a_.u16 % b_.u16;
9328 #else
9329 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9330 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9331 r_.m128i[i] = simde_mm_rem_epu16(a_.m128i[i], b_.m128i[i]);
9332 }
9333 #else
9334 SIMDE_VECTORIZE
9335 for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
9336 r_.u16[i] = a_.u16[i] % b_.u16[i];
9337 }
9338 #endif
9339 #endif
9340
9341 return simde__m256i_from_private(r_);
9342 #endif
9343 }
9344 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9345 #undef _mm256_rem_epu16
9346 #define _mm256_rem_epu16(a, b) simde_mm256_rem_epu16((a), (b))
9347 #endif
9348
9349 SIMDE_FUNCTION_ATTRIBUTES
9350 simde__m256i
simde_mm256_rem_epu32(simde__m256i a,simde__m256i b)9351 simde_mm256_rem_epu32 (simde__m256i a, simde__m256i b) {
9352 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9353 return _mm256_rem_epu32(a, b);
9354 #else
9355 simde__m256i_private
9356 r_,
9357 a_ = simde__m256i_to_private(a),
9358 b_ = simde__m256i_to_private(b);
9359
9360 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9361 r_.u32 = a_.u32 % b_.u32;
9362 #else
9363 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9364 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9365 r_.m128i[i] = simde_mm_rem_epu32(a_.m128i[i], b_.m128i[i]);
9366 }
9367 #else
9368 SIMDE_VECTORIZE
9369 for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
9370 r_.u32[i] = a_.u32[i] % b_.u32[i];
9371 }
9372 #endif
9373 #endif
9374
9375 return simde__m256i_from_private(r_);
9376 #endif
9377 }
9378 #define simde_mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b)
9379 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9380 #undef _mm256_rem_epu32
9381 #define _mm256_rem_epu32(a, b) simde_mm256_rem_epu32(a, b)
9382 #undef _mm256_urem_epi32
9383 #define _mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b)
9384 #endif
9385
9386 SIMDE_FUNCTION_ATTRIBUTES
9387 simde__m256i
simde_mm256_rem_epu64(simde__m256i a,simde__m256i b)9388 simde_mm256_rem_epu64 (simde__m256i a, simde__m256i b) {
9389 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9390 return _mm256_rem_epu64(a, b);
9391 #else
9392 simde__m256i_private
9393 r_,
9394 a_ = simde__m256i_to_private(a),
9395 b_ = simde__m256i_to_private(b);
9396
9397 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9398 r_.u64 = a_.u64 % b_.u64;
9399 #else
9400 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9401 for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9402 r_.m128i[i] = simde_mm_rem_epu64(a_.m128i[i], b_.m128i[i]);
9403 }
9404 #else
9405 SIMDE_VECTORIZE
9406 for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
9407 r_.u64[i] = a_.u64[i] % b_.u64[i];
9408 }
9409 #endif
9410 #endif
9411
9412 return simde__m256i_from_private(r_);
9413 #endif
9414 }
9415 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9416 #undef _mm256_rem_epu64
9417 #define _mm256_rem_epu64(a, b) simde_mm256_rem_epu64((a), (b))
9418 #endif
9419
9420 SIMDE_FUNCTION_ATTRIBUTES
9421 simde__m512i
simde_mm512_rem_epi8(simde__m512i a,simde__m512i b)9422 simde_mm512_rem_epi8 (simde__m512i a, simde__m512i b) {
9423 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9424 return _mm512_rem_epi8(a, b);
9425 #else
9426 simde__m512i_private
9427 r_,
9428 a_ = simde__m512i_to_private(a),
9429 b_ = simde__m512i_to_private(b);
9430
9431 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9432 r_.i8 = a_.i8 % b_.i8;
9433 #else
9434 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9435 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9436 r_.m256i[i] = simde_mm256_rem_epi8(a_.m256i[i], b_.m256i[i]);
9437 }
9438 #else
9439 SIMDE_VECTORIZE
9440 for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
9441 r_.i8[i] = a_.i8[i] % b_.i8[i];
9442 }
9443 #endif
9444 #endif
9445
9446 return simde__m512i_from_private(r_);
9447 #endif
9448 }
9449 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9450 #undef _mm512_rem_epi8
9451 #define _mm512_rem_epi8(a, b) simde_mm512_rem_epi8((a), (b))
9452 #endif
9453
9454 SIMDE_FUNCTION_ATTRIBUTES
9455 simde__m512i
simde_mm512_rem_epi16(simde__m512i a,simde__m512i b)9456 simde_mm512_rem_epi16 (simde__m512i a, simde__m512i b) {
9457 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9458 return _mm512_rem_epi16(a, b);
9459 #else
9460 simde__m512i_private
9461 r_,
9462 a_ = simde__m512i_to_private(a),
9463 b_ = simde__m512i_to_private(b);
9464
9465 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9466 r_.i16 = a_.i16 % b_.i16;
9467 #else
9468 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9469 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9470 r_.m256i[i] = simde_mm256_rem_epi16(a_.m256i[i], b_.m256i[i]);
9471 }
9472 #else
9473 SIMDE_VECTORIZE
9474 for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
9475 r_.i16[i] = a_.i16[i] % b_.i16[i];
9476 }
9477 #endif
9478 #endif
9479
9480 return simde__m512i_from_private(r_);
9481 #endif
9482 }
9483 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9484 #undef _mm512_rem_epi16
9485 #define _mm512_rem_epi16(a, b) simde_mm512_rem_epi16((a), (b))
9486 #endif
9487
9488 SIMDE_FUNCTION_ATTRIBUTES
9489 simde__m512i
simde_mm512_rem_epi32(simde__m512i a,simde__m512i b)9490 simde_mm512_rem_epi32 (simde__m512i a, simde__m512i b) {
9491 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9492 return _mm512_rem_epi32(a, b);
9493 #else
9494 simde__m512i_private
9495 r_,
9496 a_ = simde__m512i_to_private(a),
9497 b_ = simde__m512i_to_private(b);
9498
9499 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9500 r_.i32 = a_.i32 % b_.i32;
9501 #else
9502 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9503 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9504 r_.m256i[i] = simde_mm256_rem_epi32(a_.m256i[i], b_.m256i[i]);
9505 }
9506 #else
9507 SIMDE_VECTORIZE
9508 for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
9509 r_.i32[i] = a_.i32[i] % b_.i32[i];
9510 }
9511 #endif
9512 #endif
9513
9514 return simde__m512i_from_private(r_);
9515 #endif
9516 }
9517 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9518 #undef _mm512_rem_epi32
9519 #define _mm512_rem_epi32(a, b) simde_mm512_rem_epi32((a), (b))
9520 #endif
9521
9522 SIMDE_FUNCTION_ATTRIBUTES
9523 simde__m512i
simde_mm512_mask_rem_epi32(simde__m512i src,simde__mmask16 k,simde__m512i a,simde__m512i b)9524 simde_mm512_mask_rem_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
9525 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9526 return _mm512_mask_rem_epi32(src, k, a, b);
9527 #else
9528 return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epi32(a, b));
9529 #endif
9530 }
9531 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9532 #undef _mm512_mask_rem_epi32
9533 #define _mm512_mask_rem_epi32(src, k, a, b) simde_mm512_mask_rem_epi32(src, k, a, b)
9534 #endif
9535
9536 SIMDE_FUNCTION_ATTRIBUTES
9537 simde__m512i
simde_mm512_rem_epi64(simde__m512i a,simde__m512i b)9538 simde_mm512_rem_epi64 (simde__m512i a, simde__m512i b) {
9539 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9540 return _mm512_rem_epi64(a, b);
9541 #else
9542 simde__m512i_private
9543 r_,
9544 a_ = simde__m512i_to_private(a),
9545 b_ = simde__m512i_to_private(b);
9546
9547 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9548 r_.i64 = a_.i64 % b_.i64;
9549 #else
9550 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9551 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9552 r_.m256i[i] = simde_mm256_rem_epi64(a_.m256i[i], b_.m256i[i]);
9553 }
9554 #else
9555 SIMDE_VECTORIZE
9556 for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
9557 r_.i64[i] = a_.i64[i] % b_.i64[i];
9558 }
9559 #endif
9560 #endif
9561
9562 return simde__m512i_from_private(r_);
9563 #endif
9564 }
9565 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9566 #undef _mm512_rem_epi64
9567 #define _mm512_rem_epi64(a, b) simde_mm512_rem_epi64((a), (b))
9568 #endif
9569
9570 SIMDE_FUNCTION_ATTRIBUTES
9571 simde__m512i
simde_mm512_rem_epu8(simde__m512i a,simde__m512i b)9572 simde_mm512_rem_epu8 (simde__m512i a, simde__m512i b) {
9573 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9574 return _mm512_rem_epu8(a, b);
9575 #else
9576 simde__m512i_private
9577 r_,
9578 a_ = simde__m512i_to_private(a),
9579 b_ = simde__m512i_to_private(b);
9580
9581 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9582 r_.u8 = a_.u8 % b_.u8;
9583 #else
9584 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9585 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9586 r_.m256i[i] = simde_mm256_rem_epu8(a_.m256i[i], b_.m256i[i]);
9587 }
9588 #else
9589 SIMDE_VECTORIZE
9590 for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
9591 r_.u8[i] = a_.u8[i] % b_.u8[i];
9592 }
9593 #endif
9594 #endif
9595
9596 return simde__m512i_from_private(r_);
9597 #endif
9598 }
9599 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9600 #undef _mm512_rem_epu8
9601 #define _mm512_rem_epu8(a, b) simde_mm512_rem_epu8((a), (b))
9602 #endif
9603
9604 SIMDE_FUNCTION_ATTRIBUTES
9605 simde__m512i
simde_mm512_rem_epu16(simde__m512i a,simde__m512i b)9606 simde_mm512_rem_epu16 (simde__m512i a, simde__m512i b) {
9607 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9608 return _mm512_rem_epu16(a, b);
9609 #else
9610 simde__m512i_private
9611 r_,
9612 a_ = simde__m512i_to_private(a),
9613 b_ = simde__m512i_to_private(b);
9614
9615 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9616 r_.u16 = a_.u16 % b_.u16;
9617 #else
9618 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9619 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9620 r_.m256i[i] = simde_mm256_rem_epu16(a_.m256i[i], b_.m256i[i]);
9621 }
9622 #else
9623 SIMDE_VECTORIZE
9624 for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
9625 r_.u16[i] = a_.u16[i] % b_.u16[i];
9626 }
9627 #endif
9628 #endif
9629
9630 return simde__m512i_from_private(r_);
9631 #endif
9632 }
9633 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9634 #undef _mm512_rem_epu16
9635 #define _mm512_rem_epu16(a, b) simde_mm512_rem_epu16((a), (b))
9636 #endif
9637
9638 SIMDE_FUNCTION_ATTRIBUTES
9639 simde__m512i
simde_mm512_rem_epu32(simde__m512i a,simde__m512i b)9640 simde_mm512_rem_epu32 (simde__m512i a, simde__m512i b) {
9641 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9642 return _mm512_rem_epu32(a, b);
9643 #else
9644 simde__m512i_private
9645 r_,
9646 a_ = simde__m512i_to_private(a),
9647 b_ = simde__m512i_to_private(b);
9648
9649 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9650 r_.u32 = a_.u32 % b_.u32;
9651 #else
9652 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9653 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9654 r_.m256i[i] = simde_mm256_rem_epu32(a_.m256i[i], b_.m256i[i]);
9655 }
9656 #else
9657 SIMDE_VECTORIZE
9658 for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
9659 r_.u32[i] = a_.u32[i] % b_.u32[i];
9660 }
9661 #endif
9662 #endif
9663
9664 return simde__m512i_from_private(r_);
9665 #endif
9666 }
9667 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9668 #undef _mm512_rem_epu32
9669 #define _mm512_rem_epu32(a, b) simde_mm512_rem_epu32((a), (b))
9670 #endif
9671
9672 SIMDE_FUNCTION_ATTRIBUTES
9673 simde__m512i
simde_mm512_mask_rem_epu32(simde__m512i src,simde__mmask16 k,simde__m512i a,simde__m512i b)9674 simde_mm512_mask_rem_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
9675 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9676 return _mm512_mask_rem_epu32(src, k, a, b);
9677 #else
9678 return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epu32(a, b));
9679 #endif
9680 }
9681 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9682 #undef _mm512_mask_rem_epu32
9683 #define _mm512_mask_rem_epu32(src, k, a, b) simde_mm512_mask_rem_epu32(src, k, a, b)
9684 #endif
9685
9686 SIMDE_FUNCTION_ATTRIBUTES
9687 simde__m512i
simde_mm512_rem_epu64(simde__m512i a,simde__m512i b)9688 simde_mm512_rem_epu64 (simde__m512i a, simde__m512i b) {
9689 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9690 return _mm512_rem_epu64(a, b);
9691 #else
9692 simde__m512i_private
9693 r_,
9694 a_ = simde__m512i_to_private(a),
9695 b_ = simde__m512i_to_private(b);
9696
9697 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9698 r_.u64 = a_.u64 % b_.u64;
9699 #else
9700 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9701 for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9702 r_.m256i[i] = simde_mm256_rem_epu64(a_.m256i[i], b_.m256i[i]);
9703 }
9704 #else
9705 SIMDE_VECTORIZE
9706 for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
9707 r_.u64[i] = a_.u64[i] % b_.u64[i];
9708 }
9709 #endif
9710 #endif
9711
9712 return simde__m512i_from_private(r_);
9713 #endif
9714 }
9715 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9716 #undef _mm512_rem_epu64
9717 #define _mm512_rem_epu64(a, b) simde_mm512_rem_epu64((a), (b))
9718 #endif
9719
9720 SIMDE_FUNCTION_ATTRIBUTES
9721 simde__m512
simde_mm512_recip_ps(simde__m512 a)9722 simde_mm512_recip_ps (simde__m512 a) {
9723 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9724 return _mm512_recip_ps(a);
9725 #else
9726 return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a);
9727 #endif
9728 }
9729 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9730 #undef _mm512_recip_ps
9731 #define _mm512_recip_ps(a) simde_mm512_recip_ps(a)
9732 #endif
9733
9734 SIMDE_FUNCTION_ATTRIBUTES
9735 simde__m512d
simde_mm512_recip_pd(simde__m512d a)9736 simde_mm512_recip_pd (simde__m512d a) {
9737 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9738 return _mm512_recip_pd(a);
9739 #else
9740 return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a);
9741 #endif
9742 }
9743 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9744 #undef _mm512_recip_pd
9745 #define _mm512_recip_pd(a) simde_mm512_recip_pd(a)
9746 #endif
9747
9748 SIMDE_FUNCTION_ATTRIBUTES
9749 simde__m512
simde_mm512_mask_recip_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)9750 simde_mm512_mask_recip_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
9751 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9752 return _mm512_mask_recip_ps(src, k, a);
9753 #else
9754 return simde_mm512_mask_mov_ps(src, k, simde_mm512_recip_ps(a));
9755 #endif
9756 }
9757 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9758 #undef _mm512_mask_recip_ps
9759 #define _mm512_mask_recip_ps(src, k, a) simde_mm512_mask_recip_ps(src, k, a)
9760 #endif
9761
9762 SIMDE_FUNCTION_ATTRIBUTES
9763 simde__m512d
simde_mm512_mask_recip_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)9764 simde_mm512_mask_recip_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
9765 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9766 return _mm512_mask_recip_pd(src, k, a);
9767 #else
9768 return simde_mm512_mask_mov_pd(src, k, simde_mm512_recip_pd(a));
9769 #endif
9770 }
9771 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9772 #undef _mm512_mask_recip_pd
9773 #define _mm512_mask_recip_pd(src, k, a) simde_mm512_mask_recip_pd(src, k, a)
9774 #endif
9775
9776 SIMDE_FUNCTION_ATTRIBUTES
9777 simde__m512
simde_mm512_rint_ps(simde__m512 a)9778 simde_mm512_rint_ps (simde__m512 a) {
9779 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9780 return _mm512_rint_ps(a);
9781 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
9782 return Sleef_rintf16(a);
9783 #else
9784 simde__m512_private
9785 r_,
9786 a_ = simde__m512_to_private(a);
9787
9788 SIMDE_VECTORIZE
9789 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
9790 r_.f32[i] = simde_math_rintf(a_.f32[i]);
9791 }
9792
9793 return simde__m512_from_private(r_);
9794 #endif
9795 }
9796 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9797 #undef _mm512_rint_ps
9798 #define _mm512_rint_ps(a) simde_mm512_rint_ps(a)
9799 #endif
9800
9801 SIMDE_FUNCTION_ATTRIBUTES
9802 simde__m512d
simde_mm512_rint_pd(simde__m512d a)9803 simde_mm512_rint_pd (simde__m512d a) {
9804 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9805 return _mm512_rint_pd(a);
9806 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
9807 return Sleef_rintd8(a);
9808 #else
9809 simde__m512d_private
9810 r_,
9811 a_ = simde__m512d_to_private(a);
9812
9813 SIMDE_VECTORIZE
9814 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
9815 r_.f64[i] = simde_math_rint(a_.f64[i]);
9816 }
9817
9818 return simde__m512d_from_private(r_);
9819 #endif
9820 }
9821 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9822 #undef _mm512_rint_pd
9823 #define _mm512_rint_pd(a) simde_mm512_rint_pd(a)
9824 #endif
9825
9826 SIMDE_FUNCTION_ATTRIBUTES
9827 simde__m512
simde_mm512_mask_rint_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)9828 simde_mm512_mask_rint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
9829 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9830 return _mm512_mask_rint_ps(src, k, a);
9831 #else
9832 return simde_mm512_mask_mov_ps(src, k, simde_mm512_rint_ps(a));
9833 #endif
9834 }
9835 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9836 #undef _mm512_mask_rint_ps
9837 #define _mm512_mask_rint_ps(src, k, a) simde_mm512_mask_rint_ps(src, k, a)
9838 #endif
9839
9840 SIMDE_FUNCTION_ATTRIBUTES
9841 simde__m512d
simde_mm512_mask_rint_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)9842 simde_mm512_mask_rint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
9843 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9844 return _mm512_mask_rint_pd(src, k, a);
9845 #else
9846 return simde_mm512_mask_mov_pd(src, k, simde_mm512_rint_pd(a));
9847 #endif
9848 }
9849 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9850 #undef _mm512_mask_rint_pd
9851 #define _mm512_mask_rint_pd(src, k, a) simde_mm512_mask_rint_pd(src, k, a)
9852 #endif
9853
9854 SIMDE_FUNCTION_ATTRIBUTES
9855 simde__m128
simde_mm_sin_ps(simde__m128 a)9856 simde_mm_sin_ps (simde__m128 a) {
9857 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
9858 return _mm_sin_ps(a);
9859 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
9860 #if SIMDE_ACCURACY_PREFERENCE > 1
9861 return Sleef_sinf4_u10(a);
9862 #else
9863 return Sleef_sinf4_u35(a);
9864 #endif
9865 #else
9866 simde__m128_private
9867 r_,
9868 a_ = simde__m128_to_private(a);
9869
9870 SIMDE_VECTORIZE
9871 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
9872 r_.f32[i] = simde_math_sinf(a_.f32[i]);
9873 }
9874
9875 return simde__m128_from_private(r_);
9876 #endif
9877 }
9878 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9879 #undef _mm_sin_ps
9880 #define _mm_sin_ps(a) simde_mm_sin_ps(a)
9881 #endif
9882
9883 SIMDE_FUNCTION_ATTRIBUTES
9884 simde__m128d
simde_mm_sin_pd(simde__m128d a)9885 simde_mm_sin_pd (simde__m128d a) {
9886 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
9887 return _mm_sin_pd(a);
9888 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
9889 #if SIMDE_ACCURACY_PREFERENCE > 1
9890 return Sleef_sind2_u10(a);
9891 #else
9892 return Sleef_sind2_u35(a);
9893 #endif
9894 #else
9895 simde__m128d_private
9896 r_,
9897 a_ = simde__m128d_to_private(a);
9898
9899 SIMDE_VECTORIZE
9900 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
9901 r_.f64[i] = simde_math_sin(a_.f64[i]);
9902 }
9903
9904 return simde__m128d_from_private(r_);
9905 #endif
9906 }
9907 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9908 #undef _mm_sin_pd
9909 #define _mm_sin_pd(a) simde_mm_sin_pd(a)
9910 #endif
9911
9912 SIMDE_FUNCTION_ATTRIBUTES
9913 simde__m256
simde_mm256_sin_ps(simde__m256 a)9914 simde_mm256_sin_ps (simde__m256 a) {
9915 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9916 return _mm256_sin_ps(a);
9917 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
9918 #if SIMDE_ACCURACY_PREFERENCE > 1
9919 return Sleef_sinf8_u10(a);
9920 #else
9921 return Sleef_sinf8_u35(a);
9922 #endif
9923 #else
9924 simde__m256_private
9925 r_,
9926 a_ = simde__m256_to_private(a);
9927
9928 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9929 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
9930 r_.m128[i] = simde_mm_sin_ps(a_.m128[i]);
9931 }
9932 #else
9933 SIMDE_VECTORIZE
9934 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
9935 r_.f32[i] = simde_math_sinf(a_.f32[i]);
9936 }
9937 #endif
9938
9939 return simde__m256_from_private(r_);
9940 #endif
9941 }
9942 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9943 #undef _mm256_sin_ps
9944 #define _mm256_sin_ps(a) simde_mm256_sin_ps(a)
9945 #endif
9946
9947
9948 SIMDE_FUNCTION_ATTRIBUTES
9949 simde__m256d
simde_mm256_sin_pd(simde__m256d a)9950 simde_mm256_sin_pd (simde__m256d a) {
9951 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9952 return _mm256_sin_pd(a);
9953 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
9954 #if SIMDE_ACCURACY_PREFERENCE > 1
9955 return Sleef_sind4_u10(a);
9956 #else
9957 return Sleef_sind4_u35(a);
9958 #endif
9959 #else
9960 simde__m256d_private
9961 r_,
9962 a_ = simde__m256d_to_private(a);
9963
9964 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9965 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
9966 r_.m128d[i] = simde_mm_sin_pd(a_.m128d[i]);
9967 }
9968 #else
9969 SIMDE_VECTORIZE
9970 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
9971 r_.f64[i] = simde_math_sin(a_.f64[i]);
9972 }
9973 #endif
9974
9975 return simde__m256d_from_private(r_);
9976 #endif
9977 }
9978 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9979 #undef _mm256_sin_pd
9980 #define _mm256_sin_pd(a) simde_mm256_sin_pd(a)
9981 #endif
9982
9983 SIMDE_FUNCTION_ATTRIBUTES
9984 simde__m512
simde_mm512_sin_ps(simde__m512 a)9985 simde_mm512_sin_ps (simde__m512 a) {
9986 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9987 return _mm512_sin_ps(a);
9988 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
9989 #if SIMDE_ACCURACY_PREFERENCE > 1
9990 return Sleef_sinf16_u10(a);
9991 #else
9992 return Sleef_sinf16_u35(a);
9993 #endif
9994 #else
9995 simde__m512_private
9996 r_,
9997 a_ = simde__m512_to_private(a);
9998
9999 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10000 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
10001 r_.m256[i] = simde_mm256_sin_ps(a_.m256[i]);
10002 }
10003 #else
10004 SIMDE_VECTORIZE
10005 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10006 r_.f32[i] = simde_math_sinf(a_.f32[i]);
10007 }
10008 #endif
10009
10010 return simde__m512_from_private(r_);
10011 #endif
10012 }
10013 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10014 #undef _mm512_sin_ps
10015 #define _mm512_sin_ps(a) simde_mm512_sin_ps(a)
10016 #endif
10017
10018 SIMDE_FUNCTION_ATTRIBUTES
10019 simde__m512d
simde_mm512_sin_pd(simde__m512d a)10020 simde_mm512_sin_pd (simde__m512d a) {
10021 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10022 return _mm512_sin_pd(a);
10023 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10024 #if SIMDE_ACCURACY_PREFERENCE > 1
10025 return Sleef_sind8_u10(a);
10026 #else
10027 return Sleef_sind8_u35(a);
10028 #endif
10029 #else
10030 simde__m512d_private
10031 r_,
10032 a_ = simde__m512d_to_private(a);
10033
10034 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10035 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
10036 r_.m256d[i] = simde_mm256_sin_pd(a_.m256d[i]);
10037 }
10038 #else
10039 SIMDE_VECTORIZE
10040 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10041 r_.f64[i] = simde_math_sin(a_.f64[i]);
10042 }
10043 #endif
10044
10045 return simde__m512d_from_private(r_);
10046 #endif
10047 }
10048 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10049 #undef _mm512_sin_pd
10050 #define _mm512_sin_pd(a) simde_mm512_sin_pd(a)
10051 #endif
10052
10053 SIMDE_FUNCTION_ATTRIBUTES
10054 simde__m512
simde_mm512_mask_sin_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)10055 simde_mm512_mask_sin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
10056 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10057 return _mm512_mask_sin_ps(src, k, a);
10058 #else
10059 return simde_mm512_mask_mov_ps(src, k, simde_mm512_sin_ps(a));
10060 #endif
10061 }
10062 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10063 #undef _mm512_mask_sin_ps
10064 #define _mm512_mask_sin_ps(src, k, a) simde_mm512_mask_sin_ps(src, k, a)
10065 #endif
10066
10067 SIMDE_FUNCTION_ATTRIBUTES
10068 simde__m512d
simde_mm512_mask_sin_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)10069 simde_mm512_mask_sin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
10070 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10071 return _mm512_mask_sin_pd(src, k, a);
10072 #else
10073 return simde_mm512_mask_mov_pd(src, k, simde_mm512_sin_pd(a));
10074 #endif
10075 }
10076 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10077 #undef _mm512_mask_sin_pd
10078 #define _mm512_mask_sin_pd(src, k, a) simde_mm512_mask_sin_pd(src, k, a)
10079 #endif
10080
10081 SIMDE_FUNCTION_ATTRIBUTES
10082 simde__m128
simde_mm_sincos_ps(simde__m128 * mem_addr,simde__m128 a)10083 simde_mm_sincos_ps (simde__m128* mem_addr, simde__m128 a) {
10084 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10085 return _mm_sincos_ps(HEDLEY_REINTERPRET_CAST(__m128*, mem_addr), a);
10086 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10087 Sleef___m128_2 temp;
10088
10089 #if SIMDE_ACCURACY_PREFERENCE > 1
10090 temp = Sleef_sincosf4_u10(a);
10091 #else
10092 temp = Sleef_sincosf4_u35(a);
10093 #endif
10094
10095 *mem_addr = temp.y;
10096 return temp.x;
10097 #else
10098 simde__m128 r;
10099
10100 r = simde_mm_sin_ps(a);
10101 *mem_addr = simde_mm_cos_ps(a);
10102
10103 return r;
10104 #endif
10105 }
10106 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10107 #undef _mm_sincos_ps
10108 #define _mm_sincos_ps(mem_addr, a) simde_mm_sincos_ps((mem_addr),(a))
10109 #endif
10110
10111 SIMDE_FUNCTION_ATTRIBUTES
10112 simde__m128d
simde_mm_sincos_pd(simde__m128d * mem_addr,simde__m128d a)10113 simde_mm_sincos_pd (simde__m128d* mem_addr, simde__m128d a) {
10114 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10115 return _mm_sincos_pd(HEDLEY_REINTERPRET_CAST(__m128d*, mem_addr), a);
10116 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10117 Sleef___m128d_2 temp;
10118
10119 #if SIMDE_ACCURACY_PREFERENCE > 1
10120 temp = Sleef_sincosd2_u10(a);
10121 #else
10122 temp = Sleef_sincosd2_u35(a);
10123 #endif
10124
10125 *mem_addr = temp.y;
10126 return temp.x;
10127 #else
10128 simde__m128d r;
10129
10130 r = simde_mm_sin_pd(a);
10131 *mem_addr = simde_mm_cos_pd(a);
10132
10133 return r;
10134 #endif
10135 }
10136 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10137 #undef _mm_sincos_pd
10138 #define _mm_sincos_pd(mem_addr, a) simde_mm_sincos_pd((mem_addr),(a))
10139 #endif
10140
10141 SIMDE_FUNCTION_ATTRIBUTES
10142 simde__m256
simde_mm256_sincos_ps(simde__m256 * mem_addr,simde__m256 a)10143 simde_mm256_sincos_ps (simde__m256* mem_addr, simde__m256 a) {
10144 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10145 return _mm256_sincos_ps(HEDLEY_REINTERPRET_CAST(__m256*, mem_addr), a);
10146 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10147 Sleef___m256_2 temp;
10148
10149 #if SIMDE_ACCURACY_PREFERENCE > 1
10150 temp = Sleef_sincosf8_u10(a);
10151 #else
10152 temp = Sleef_sincosf8_u35(a);
10153 #endif
10154
10155 *mem_addr = temp.y;
10156 return temp.x;
10157 #else
10158 simde__m256 r;
10159
10160 r = simde_mm256_sin_ps(a);
10161 *mem_addr = simde_mm256_cos_ps(a);
10162
10163 return r;
10164 #endif
10165 }
10166 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10167 #undef _mm256_sincos_ps
10168 #define _mm256_sincos_ps(mem_addr, a) simde_mm256_sincos_ps((mem_addr),(a))
10169 #endif
10170
10171 SIMDE_FUNCTION_ATTRIBUTES
10172 simde__m256d
simde_mm256_sincos_pd(simde__m256d * mem_addr,simde__m256d a)10173 simde_mm256_sincos_pd (simde__m256d* mem_addr, simde__m256d a) {
10174 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10175 return _mm256_sincos_pd(HEDLEY_REINTERPRET_CAST(__m256d*, mem_addr), a);
10176 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10177 Sleef___m256d_2 temp;
10178
10179 #if SIMDE_ACCURACY_PREFERENCE > 1
10180 temp = Sleef_sincosd4_u10(a);
10181 #else
10182 temp = Sleef_sincosd4_u35(a);
10183 #endif
10184
10185 *mem_addr = temp.y;
10186 return temp.x;
10187 #else
10188 simde__m256d r;
10189
10190 r = simde_mm256_sin_pd(a);
10191 *mem_addr = simde_mm256_cos_pd(a);
10192
10193 return r;
10194 #endif
10195 }
10196 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10197 #undef _mm256_sincos_pd
10198 #define _mm256_sincos_pd(mem_addr, a) simde_mm256_sincos_pd((mem_addr),(a))
10199 #endif
10200
10201 SIMDE_FUNCTION_ATTRIBUTES
10202 simde__m512
simde_mm512_sincos_ps(simde__m512 * mem_addr,simde__m512 a)10203 simde_mm512_sincos_ps (simde__m512* mem_addr, simde__m512 a) {
10204 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10205 return _mm512_sincos_ps(HEDLEY_REINTERPRET_CAST(__m512*, mem_addr), a);
10206 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10207 Sleef___m512_2 temp;
10208
10209 #if SIMDE_ACCURACY_PREFERENCE > 1
10210 temp = Sleef_sincosf16_u10(a);
10211 #else
10212 temp = Sleef_sincosf16_u35(a);
10213 #endif
10214
10215 *mem_addr = temp.y;
10216 return temp.x;
10217 #else
10218 simde__m512 r;
10219
10220 r = simde_mm512_sin_ps(a);
10221 *mem_addr = simde_mm512_cos_ps(a);
10222
10223 return r;
10224 #endif
10225 }
10226 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10227 #undef _mm512_sincos_ps
10228 #define _mm512_sincos_ps(mem_addr, a) simde_mm512_sincos_ps((mem_addr),(a))
10229 #endif
10230
10231 SIMDE_FUNCTION_ATTRIBUTES
10232 simde__m512d
simde_mm512_sincos_pd(simde__m512d * mem_addr,simde__m512d a)10233 simde_mm512_sincos_pd (simde__m512d* mem_addr, simde__m512d a) {
10234 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10235 return _mm512_sincos_pd(HEDLEY_REINTERPRET_CAST(__m512d*, mem_addr), a);
10236 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10237 Sleef___m512d_2 temp;
10238
10239 #if SIMDE_ACCURACY_PREFERENCE > 1
10240 temp = Sleef_sincosd8_u10(a);
10241 #else
10242 temp = Sleef_sincosd8_u35(a);
10243 #endif
10244
10245 *mem_addr = temp.y;
10246 return temp.x;
10247 #else
10248 simde__m512d r;
10249
10250 r = simde_mm512_sin_pd(a);
10251 *mem_addr = simde_mm512_cos_pd(a);
10252
10253 return r;
10254 #endif
10255 }
10256 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10257 #undef _mm512_sincos_pd
10258 #define _mm512_sincos_pd(mem_addr, a) simde_mm512_sincos_pd((mem_addr),(a))
10259 #endif
10260
10261 SIMDE_FUNCTION_ATTRIBUTES
10262 simde__m512
simde_mm512_mask_sincos_ps(simde__m512 * mem_addr,simde__m512 sin_src,simde__m512 cos_src,simde__mmask16 k,simde__m512 a)10263 simde_mm512_mask_sincos_ps(simde__m512* mem_addr, simde__m512 sin_src, simde__m512 cos_src, simde__mmask16 k, simde__m512 a) {
10264 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10265 return _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a);
10266 #else
10267 simde__m512 cos_res, sin_res;
10268 sin_res = simde_mm512_sincos_ps(&cos_res, a);
10269 *mem_addr = simde_mm512_mask_mov_ps(cos_src, k, cos_res);
10270 return simde_mm512_mask_mov_ps(sin_src, k, sin_res);
10271 #endif
10272 }
10273 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10274 #undef _mm512_mask_sincos_ps
10275 #define _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a)
10276 #endif
10277
10278 SIMDE_FUNCTION_ATTRIBUTES
10279 simde__m512d
simde_mm512_mask_sincos_pd(simde__m512d * mem_addr,simde__m512d sin_src,simde__m512d cos_src,simde__mmask8 k,simde__m512d a)10280 simde_mm512_mask_sincos_pd(simde__m512d* mem_addr, simde__m512d sin_src, simde__m512d cos_src, simde__mmask8 k, simde__m512d a) {
10281 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10282 return _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a);
10283 #else
10284 simde__m512d cos_res, sin_res;
10285 sin_res = simde_mm512_sincos_pd(&cos_res, a);
10286 *mem_addr = simde_mm512_mask_mov_pd(cos_src, k, cos_res);
10287 return simde_mm512_mask_mov_pd(sin_src, k, sin_res);
10288 #endif
10289 }
10290 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10291 #undef _mm512_mask_sincos_pd
10292 #define _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a)
10293 #endif
10294
10295 SIMDE_FUNCTION_ATTRIBUTES
10296 simde__m128
simde_mm_sind_ps(simde__m128 a)10297 simde_mm_sind_ps (simde__m128 a) {
10298 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10299 return _mm_sind_ps(a);
10300 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10301 #if SIMDE_ACCURACY_PREFERENCE > 1
10302 return Sleef_sinf4_u10(simde_x_mm_deg2rad_ps(a));
10303 #else
10304 return Sleef_sinf4_u35(simde_x_mm_deg2rad_ps(a));
10305 #endif
10306 #else
10307 simde__m128_private
10308 r_,
10309 a_ = simde__m128_to_private(a);
10310
10311 SIMDE_VECTORIZE
10312 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10313 r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i]));
10314 }
10315
10316 return simde__m128_from_private(r_);
10317 #endif
10318 }
10319 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10320 #undef _mm_sind_ps
10321 #define _mm_sind_ps(a) simde_mm_sind_ps(a)
10322 #endif
10323
10324 SIMDE_FUNCTION_ATTRIBUTES
10325 simde__m128d
simde_mm_sind_pd(simde__m128d a)10326 simde_mm_sind_pd (simde__m128d a) {
10327 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10328 return _mm_sind_pd(a);
10329 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10330 #if SIMDE_ACCURACY_PREFERENCE > 1
10331 return Sleef_sind2_u10(simde_x_mm_deg2rad_pd(a));
10332 #else
10333 return Sleef_sind2_u35(simde_x_mm_deg2rad_pd(a));
10334 #endif
10335 #else
10336 simde__m128d_private
10337 r_,
10338 a_ = simde__m128d_to_private(a);
10339
10340 SIMDE_VECTORIZE
10341 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10342 r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i]));
10343 }
10344
10345 return simde__m128d_from_private(r_);
10346 #endif
10347 }
10348 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10349 #undef _mm_sind_pd
10350 #define _mm_sind_pd(a) simde_mm_sind_pd(a)
10351 #endif
10352
10353 SIMDE_FUNCTION_ATTRIBUTES
10354 simde__m256
simde_mm256_sind_ps(simde__m256 a)10355 simde_mm256_sind_ps (simde__m256 a) {
10356 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10357 return _mm256_sind_ps(a);
10358 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10359 #if SIMDE_ACCURACY_PREFERENCE > 1
10360 return Sleef_sinf8_u10(simde_x_mm256_deg2rad_ps(a));
10361 #else
10362 return Sleef_sinf8_u35(simde_x_mm256_deg2rad_ps(a));
10363 #endif
10364 #else
10365 simde__m256_private
10366 r_,
10367 a_ = simde__m256_to_private(a);
10368
10369 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
10370 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
10371 r_.m128[i] = simde_mm_sind_ps(a_.m128[i]);
10372 }
10373 #else
10374 SIMDE_VECTORIZE
10375 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10376 r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i]));
10377 }
10378 #endif
10379
10380 return simde__m256_from_private(r_);
10381 #endif
10382 }
10383 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10384 #undef _mm256_sind_ps
10385 #define _mm256_sind_ps(a) simde_mm256_sind_ps(a)
10386 #endif
10387
10388 SIMDE_FUNCTION_ATTRIBUTES
10389 simde__m256d
simde_mm256_sind_pd(simde__m256d a)10390 simde_mm256_sind_pd (simde__m256d a) {
10391 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10392 return _mm256_sind_pd(a);
10393 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10394 #if SIMDE_ACCURACY_PREFERENCE > 1
10395 return Sleef_sind4_u10(simde_x_mm256_deg2rad_pd(a));
10396 #else
10397 return Sleef_sind4_u35(simde_x_mm256_deg2rad_pd(a));
10398 #endif
10399 #else
10400 simde__m256d_private
10401 r_,
10402 a_ = simde__m256d_to_private(a);
10403
10404 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
10405 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
10406 r_.m128d[i] = simde_mm_sind_pd(a_.m128d[i]);
10407 }
10408 #else
10409 SIMDE_VECTORIZE
10410 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10411 r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i]));
10412 }
10413 #endif
10414
10415 return simde__m256d_from_private(r_);
10416 #endif
10417 }
10418 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10419 #undef _mm256_sind_pd
10420 #define _mm256_sind_pd(a) simde_mm256_sind_pd(a)
10421 #endif
10422
10423 SIMDE_FUNCTION_ATTRIBUTES
10424 simde__m512
simde_mm512_sind_ps(simde__m512 a)10425 simde_mm512_sind_ps (simde__m512 a) {
10426 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10427 return _mm512_sind_ps(a);
10428 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10429 #if SIMDE_ACCURACY_PREFERENCE > 1
10430 return Sleef_sinf16_u10(simde_x_mm512_deg2rad_ps(a));
10431 #else
10432 return Sleef_sinf16_u35(simde_x_mm512_deg2rad_ps(a));
10433 #endif
10434 #else
10435 simde__m512_private
10436 r_,
10437 a_ = simde__m512_to_private(a);
10438
10439 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10440 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
10441 r_.m256[i] = simde_mm256_sind_ps(a_.m256[i]);
10442 }
10443 #else
10444 SIMDE_VECTORIZE
10445 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10446 r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i]));
10447 }
10448 #endif
10449
10450 return simde__m512_from_private(r_);
10451 #endif
10452 }
10453 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10454 #undef _mm512_sind_ps
10455 #define _mm512_sind_ps(a) simde_mm512_sind_ps(a)
10456 #endif
10457
10458 SIMDE_FUNCTION_ATTRIBUTES
10459 simde__m512d
simde_mm512_sind_pd(simde__m512d a)10460 simde_mm512_sind_pd (simde__m512d a) {
10461 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10462 return _mm512_sind_pd(a);
10463 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10464 #if SIMDE_ACCURACY_PREFERENCE > 1
10465 return Sleef_sind8_u10(simde_x_mm512_deg2rad_pd(a));
10466 #else
10467 return Sleef_sind8_u35(simde_x_mm512_deg2rad_pd(a));
10468 #endif
10469 #else
10470 simde__m512d_private
10471 r_,
10472 a_ = simde__m512d_to_private(a);
10473
10474 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10475 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
10476 r_.m256d[i] = simde_mm256_sind_pd(a_.m256d[i]);
10477 }
10478 #else
10479 SIMDE_VECTORIZE
10480 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10481 r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i]));
10482 }
10483 #endif
10484
10485 return simde__m512d_from_private(r_);
10486 #endif
10487 }
10488 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10489 #undef _mm512_sind_pd
10490 #define _mm512_sind_pd(a) simde_mm512_sind_pd(a)
10491 #endif
10492
10493
10494 SIMDE_FUNCTION_ATTRIBUTES
10495 simde__m512
simde_mm512_mask_sind_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)10496 simde_mm512_mask_sind_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
10497 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10498 return _mm512_mask_sind_ps(src, k, a);
10499 #else
10500 return simde_mm512_mask_mov_ps(src, k, simde_mm512_sind_ps(a));
10501 #endif
10502 }
10503 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10504 #undef _mm512_mask_sind_ps
10505 #define _mm512_mask_sind_ps(src, k, a) simde_mm512_mask_sind_ps(src, k, a)
10506 #endif
10507
10508 SIMDE_FUNCTION_ATTRIBUTES
10509 simde__m512d
simde_mm512_mask_sind_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)10510 simde_mm512_mask_sind_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
10511 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10512 return _mm512_mask_sind_pd(src, k, a);
10513 #else
10514 return simde_mm512_mask_mov_pd(src, k, simde_mm512_sind_pd(a));
10515 #endif
10516 }
10517 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10518 #undef _mm512_mask_sind_pd
10519 #define _mm512_mask_sind_pd(src, k, a) simde_mm512_mask_sind_pd(src, k, a)
10520 #endif
10521
10522 SIMDE_FUNCTION_ATTRIBUTES
10523 simde__m128
simde_mm_sinh_ps(simde__m128 a)10524 simde_mm_sinh_ps (simde__m128 a) {
10525 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10526 return _mm_sinh_ps(a);
10527 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10528 return Sleef_sinhf4_u10(a);
10529 #else
10530 simde__m128_private
10531 r_,
10532 a_ = simde__m128_to_private(a);
10533
10534 SIMDE_VECTORIZE
10535 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10536 r_.f32[i] = simde_math_sinhf(a_.f32[i]);
10537 }
10538
10539 return simde__m128_from_private(r_);
10540 #endif
10541 }
10542 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10543 #undef _mm_sinh_ps
10544 #define _mm_sinh_ps(a) simde_mm_sinh_ps(a)
10545 #endif
10546
10547 SIMDE_FUNCTION_ATTRIBUTES
10548 simde__m128d
simde_mm_sinh_pd(simde__m128d a)10549 simde_mm_sinh_pd (simde__m128d a) {
10550 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10551 return _mm_sinh_pd(a);
10552 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10553 return Sleef_sinhd2_u10(a);
10554 #else
10555 simde__m128d_private
10556 r_,
10557 a_ = simde__m128d_to_private(a);
10558
10559 SIMDE_VECTORIZE
10560 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10561 r_.f64[i] = simde_math_sinh(a_.f64[i]);
10562 }
10563
10564 return simde__m128d_from_private(r_);
10565 #endif
10566 }
10567 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10568 #undef _mm_sinh_pd
10569 #define _mm_sinh_pd(a) simde_mm_sinh_pd(a)
10570 #endif
10571
10572 SIMDE_FUNCTION_ATTRIBUTES
10573 simde__m256
simde_mm256_sinh_ps(simde__m256 a)10574 simde_mm256_sinh_ps (simde__m256 a) {
10575 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10576 return _mm256_sinh_ps(a);
10577 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10578 return Sleef_sinhf8_u10(a);
10579 #else
10580 simde__m256_private
10581 r_,
10582 a_ = simde__m256_to_private(a);
10583
10584 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
10585 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
10586 r_.m128[i] = simde_mm_sinh_ps(a_.m128[i]);
10587 }
10588 #else
10589 SIMDE_VECTORIZE
10590 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10591 r_.f32[i] = simde_math_sinhf(a_.f32[i]);
10592 }
10593 #endif
10594
10595 return simde__m256_from_private(r_);
10596 #endif
10597 }
10598 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10599 #undef _mm256_sinh_ps
10600 #define _mm256_sinh_ps(a) simde_mm256_sinh_ps(a)
10601 #endif
10602
10603
10604 SIMDE_FUNCTION_ATTRIBUTES
10605 simde__m256d
simde_mm256_sinh_pd(simde__m256d a)10606 simde_mm256_sinh_pd (simde__m256d a) {
10607 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10608 return _mm256_sinh_pd(a);
10609 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10610 return Sleef_sinhd4_u10(a);
10611 #else
10612 simde__m256d_private
10613 r_,
10614 a_ = simde__m256d_to_private(a);
10615
10616 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
10617 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
10618 r_.m128d[i] = simde_mm_sinh_pd(a_.m128d[i]);
10619 }
10620 #else
10621 SIMDE_VECTORIZE
10622 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10623 r_.f64[i] = simde_math_sinh(a_.f64[i]);
10624 }
10625 #endif
10626
10627 return simde__m256d_from_private(r_);
10628 #endif
10629 }
10630 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10631 #undef _mm256_sinh_pd
10632 #define _mm256_sinh_pd(a) simde_mm256_sinh_pd(a)
10633 #endif
10634
10635 SIMDE_FUNCTION_ATTRIBUTES
10636 simde__m512
simde_mm512_sinh_ps(simde__m512 a)10637 simde_mm512_sinh_ps (simde__m512 a) {
10638 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10639 return _mm512_sinh_ps(a);
10640 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10641 return Sleef_sinhf16_u10(a);
10642 #else
10643 simde__m512_private
10644 r_,
10645 a_ = simde__m512_to_private(a);
10646
10647 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10648 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
10649 r_.m256[i] = simde_mm256_sinh_ps(a_.m256[i]);
10650 }
10651 #else
10652 SIMDE_VECTORIZE
10653 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10654 r_.f32[i] = simde_math_sinhf(a_.f32[i]);
10655 }
10656 #endif
10657
10658 return simde__m512_from_private(r_);
10659 #endif
10660 }
10661 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10662 #undef _mm512_sinh_ps
10663 #define _mm512_sinh_ps(a) simde_mm512_sinh_ps(a)
10664 #endif
10665
10666 SIMDE_FUNCTION_ATTRIBUTES
10667 simde__m512d
simde_mm512_sinh_pd(simde__m512d a)10668 simde_mm512_sinh_pd (simde__m512d a) {
10669 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10670 return _mm512_sinh_pd(a);
10671 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10672 return Sleef_sinhd8_u10(a);
10673 #else
10674 simde__m512d_private
10675 r_,
10676 a_ = simde__m512d_to_private(a);
10677
10678 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10679 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
10680 r_.m256d[i] = simde_mm256_sinh_pd(a_.m256d[i]);
10681 }
10682 #else
10683 SIMDE_VECTORIZE
10684 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10685 r_.f64[i] = simde_math_sinh(a_.f64[i]);
10686 }
10687 #endif
10688
10689 return simde__m512d_from_private(r_);
10690 #endif
10691 }
10692 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10693 #undef _mm512_sinh_pd
10694 #define _mm512_sinh_pd(a) simde_mm512_sinh_pd(a)
10695 #endif
10696
10697 SIMDE_FUNCTION_ATTRIBUTES
10698 simde__m512
simde_mm512_mask_sinh_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)10699 simde_mm512_mask_sinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
10700 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10701 return _mm512_mask_sinh_ps(src, k, a);
10702 #else
10703 return simde_mm512_mask_mov_ps(src, k, simde_mm512_sinh_ps(a));
10704 #endif
10705 }
10706 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10707 #undef _mm512_mask_sinh_ps
10708 #define _mm512_mask_sinh_ps(src, k, a) simde_mm512_mask_sinh_ps(src, k, a)
10709 #endif
10710
10711 SIMDE_FUNCTION_ATTRIBUTES
10712 simde__m512d
simde_mm512_mask_sinh_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)10713 simde_mm512_mask_sinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
10714 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10715 return _mm512_mask_sinh_pd(src, k, a);
10716 #else
10717 return simde_mm512_mask_mov_pd(src, k, simde_mm512_sinh_pd(a));
10718 #endif
10719 }
10720 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10721 #undef _mm512_mask_sinh_pd
10722 #define _mm512_mask_sinh_pd(src, k, a) simde_mm512_mask_sinh_pd(src, k, a)
10723 #endif
10724
10725 SIMDE_FUNCTION_ATTRIBUTES
10726 simde__m128
simde_mm_svml_ceil_ps(simde__m128 a)10727 simde_mm_svml_ceil_ps (simde__m128 a) {
10728 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10729 return _mm_svml_ceil_ps(a);
10730 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10731 return Sleef_ceilf4(a);
10732 #else
10733 return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF);
10734 #endif
10735 }
10736 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10737 #undef _mm_svml_ceil_ps
10738 #define _mm_svml_ceil_ps(a) simde_mm_svml_ceil_ps(a)
10739 #endif
10740
10741 SIMDE_FUNCTION_ATTRIBUTES
10742 simde__m128d
simde_mm_svml_ceil_pd(simde__m128d a)10743 simde_mm_svml_ceil_pd (simde__m128d a) {
10744 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10745 return _mm_svml_ceil_pd(a);
10746 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10747 return Sleef_ceild2(a);
10748 #else
10749 return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF);
10750 #endif
10751 }
10752 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10753 #undef _mm_svml_ceil_pd
10754 #define _mm_svml_ceil_pd(a) simde_mm_svml_ceil_pd(a)
10755 #endif
10756
10757 SIMDE_FUNCTION_ATTRIBUTES
10758 simde__m256
simde_mm256_svml_ceil_ps(simde__m256 a)10759 simde_mm256_svml_ceil_ps (simde__m256 a) {
10760 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10761 return _mm256_svml_ceil_ps(a);
10762 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10763 return Sleef_ceilf8(a);
10764 #else
10765 return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF);
10766 #endif
10767 }
10768 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10769 #undef _mm256_svml_ceil_ps
10770 #define _mm256_svml_ceil_ps(a) simde_mm256_svml_ceil_ps(a)
10771 #endif
10772
10773 SIMDE_FUNCTION_ATTRIBUTES
10774 simde__m256d
simde_mm256_svml_ceil_pd(simde__m256d a)10775 simde_mm256_svml_ceil_pd (simde__m256d a) {
10776 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10777 return _mm256_svml_ceil_pd(a);
10778 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10779 return Sleef_ceild4(a);
10780 #else
10781 return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF);
10782 #endif
10783 }
10784 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10785 #undef _mm256_svml_ceil_pd
10786 #define _mm256_svml_ceil_pd(a) simde_mm256_svml_ceil_pd(a)
10787 #endif
10788
10789 SIMDE_FUNCTION_ATTRIBUTES
10790 simde__m512
simde_mm512_ceil_ps(simde__m512 a)10791 simde_mm512_ceil_ps (simde__m512 a) {
10792 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10793 return _mm512_ceil_ps(a);
10794 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10795 return Sleef_ceilf16(a);
10796 #else
10797 simde__m512_private
10798 r_,
10799 a_ = simde__m512_to_private(a);
10800
10801 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10802 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
10803 r_.m256[i] = simde_mm256_ceil_ps(a_.m256[i]);
10804 }
10805 #else
10806 SIMDE_VECTORIZE
10807 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10808 r_.f32[i] = simde_math_ceilf(a_.f32[i]);
10809 }
10810 #endif
10811
10812 return simde__m512_from_private(r_);
10813 #endif
10814 }
10815 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10816 #undef _mm512_ceil_ps
10817 #define _mm512_ceil_ps(a) simde_mm512_ceil_ps(a)
10818 #endif
10819
10820 SIMDE_FUNCTION_ATTRIBUTES
10821 simde__m512d
simde_mm512_ceil_pd(simde__m512d a)10822 simde_mm512_ceil_pd (simde__m512d a) {
10823 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10824 return _mm512_ceil_pd(a);
10825 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10826 return Sleef_ceild8(a);
10827 #else
10828 simde__m512d_private
10829 r_,
10830 a_ = simde__m512d_to_private(a);
10831
10832 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10833 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
10834 r_.m256d[i] = simde_mm256_ceil_pd(a_.m256d[i]);
10835 }
10836 #else
10837 SIMDE_VECTORIZE
10838 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10839 r_.f64[i] = simde_math_ceil(a_.f64[i]);
10840 }
10841 #endif
10842
10843 return simde__m512d_from_private(r_);
10844 #endif
10845 }
10846 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10847 #undef _mm512_ceil_pd
10848 #define _mm512_ceil_pd(a) simde_mm512_ceil_pd(a)
10849 #endif
10850
10851 SIMDE_FUNCTION_ATTRIBUTES
10852 simde__m512
simde_mm512_mask_ceil_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)10853 simde_mm512_mask_ceil_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
10854 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10855 return _mm512_mask_ceil_ps(src, k, a);
10856 #else
10857 return simde_mm512_mask_mov_ps(src, k, simde_mm512_ceil_ps(a));
10858 #endif
10859 }
10860 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10861 #undef _mm512_mask_ceil_ps
10862 #define _mm512_mask_ceil_ps(src, k, a) simde_mm512_mask_ceil_ps(src, k, a)
10863 #endif
10864
10865 SIMDE_FUNCTION_ATTRIBUTES
10866 simde__m512d
simde_mm512_mask_ceil_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)10867 simde_mm512_mask_ceil_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
10868 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10869 return _mm512_mask_ceil_pd(src, k, a);
10870 #else
10871 return simde_mm512_mask_mov_pd(src, k, simde_mm512_ceil_pd(a));
10872 #endif
10873 }
10874 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10875 #undef _mm512_mask_ceil_pd
10876 #define _mm512_mask_ceil_pd(src, k, a) simde_mm512_mask_ceil_pd(src, k, a)
10877 #endif
10878
10879 SIMDE_FUNCTION_ATTRIBUTES
10880 simde__m128
simde_mm_svml_floor_ps(simde__m128 a)10881 simde_mm_svml_floor_ps (simde__m128 a) {
10882 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10883 return _mm_svml_floor_ps(a);
10884 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10885 return Sleef_floorf4(a);
10886 #else
10887 return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF);
10888 #endif
10889 }
10890 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10891 #undef _mm_svml_floor_ps
10892 #define _mm_svml_floor_ps(a) simde_mm_svml_floor_ps(a)
10893 #endif
10894
10895 SIMDE_FUNCTION_ATTRIBUTES
10896 simde__m128d
simde_mm_svml_floor_pd(simde__m128d a)10897 simde_mm_svml_floor_pd (simde__m128d a) {
10898 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10899 return _mm_svml_floor_pd(a);
10900 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10901 return Sleef_floord2(a);
10902 #else
10903 return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF);
10904 #endif
10905 }
10906 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10907 #undef _mm_svml_floor_pd
10908 #define _mm_svml_floor_pd(a) simde_mm_svml_floor_pd(a)
10909 #endif
10910
10911 SIMDE_FUNCTION_ATTRIBUTES
10912 simde__m256
simde_mm256_svml_floor_ps(simde__m256 a)10913 simde_mm256_svml_floor_ps (simde__m256 a) {
10914 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10915 return _mm256_svml_floor_ps(a);
10916 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10917 return Sleef_floorf8(a);
10918 #else
10919 return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF);
10920 #endif
10921 }
10922 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10923 #undef _mm256_svml_floor_ps
10924 #define _mm256_svml_floor_ps(a) simde_mm256_svml_floor_ps(a)
10925 #endif
10926
10927 SIMDE_FUNCTION_ATTRIBUTES
10928 simde__m256d
simde_mm256_svml_floor_pd(simde__m256d a)10929 simde_mm256_svml_floor_pd (simde__m256d a) {
10930 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10931 return _mm256_svml_floor_pd(a);
10932 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10933 return Sleef_floord4(a);
10934 #else
10935 return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF);
10936 #endif
10937 }
10938 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10939 #undef _mm256_svml_floor_pd
10940 #define _mm256_svml_floor_pd(a) simde_mm256_svml_floor_pd(a)
10941 #endif
10942
10943 SIMDE_FUNCTION_ATTRIBUTES
10944 simde__m512
simde_mm512_floor_ps(simde__m512 a)10945 simde_mm512_floor_ps (simde__m512 a) {
10946 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10947 return _mm512_floor_ps(a);
10948 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10949 return Sleef_floorf16(a);
10950 #else
10951 simde__m512_private
10952 r_,
10953 a_ = simde__m512_to_private(a);
10954
10955 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10956 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
10957 r_.m256[i] = simde_mm256_floor_ps(a_.m256[i]);
10958 }
10959 #else
10960 SIMDE_VECTORIZE
10961 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10962 r_.f32[i] = simde_math_floorf(a_.f32[i]);
10963 }
10964 #endif
10965
10966 return simde__m512_from_private(r_);
10967 #endif
10968 }
10969 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10970 #undef _mm512_floor_ps
10971 #define _mm512_floor_ps(a) simde_mm512_floor_ps(a)
10972 #endif
10973
10974 SIMDE_FUNCTION_ATTRIBUTES
10975 simde__m512d
simde_mm512_floor_pd(simde__m512d a)10976 simde_mm512_floor_pd (simde__m512d a) {
10977 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10978 return _mm512_floor_pd(a);
10979 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10980 return Sleef_floord8(a);
10981 #else
10982 simde__m512d_private
10983 r_,
10984 a_ = simde__m512d_to_private(a);
10985
10986 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10987 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
10988 r_.m256d[i] = simde_mm256_floor_pd(a_.m256d[i]);
10989 }
10990 #else
10991 SIMDE_VECTORIZE
10992 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10993 r_.f64[i] = simde_math_floor(a_.f64[i]);
10994 }
10995 #endif
10996
10997 return simde__m512d_from_private(r_);
10998 #endif
10999 }
11000 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11001 #undef _mm512_floor_pd
11002 #define _mm512_floor_pd(a) simde_mm512_floor_pd(a)
11003 #endif
11004
11005 SIMDE_FUNCTION_ATTRIBUTES
11006 simde__m512
simde_mm512_mask_floor_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)11007 simde_mm512_mask_floor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
11008 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11009 return _mm512_mask_floor_ps(src, k, a);
11010 #else
11011 return simde_mm512_mask_mov_ps(src, k, simde_mm512_floor_ps(a));
11012 #endif
11013 }
11014 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11015 #undef _mm512_mask_floor_ps
11016 #define _mm512_mask_floor_ps(src, k, a) simde_mm512_mask_floor_ps(src, k, a)
11017 #endif
11018
11019 SIMDE_FUNCTION_ATTRIBUTES
11020 simde__m512d
simde_mm512_mask_floor_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)11021 simde_mm512_mask_floor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
11022 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11023 return _mm512_mask_floor_pd(src, k, a);
11024 #else
11025 return simde_mm512_mask_mov_pd(src, k, simde_mm512_floor_pd(a));
11026 #endif
11027 }
11028 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11029 #undef _mm512_mask_floor_pd
11030 #define _mm512_mask_floor_pd(src, k, a) simde_mm512_mask_floor_pd(src, k, a)
11031 #endif
11032
11033 SIMDE_FUNCTION_ATTRIBUTES
11034 simde__m128
simde_mm_svml_round_ps(simde__m128 a)11035 simde_mm_svml_round_ps (simde__m128 a) {
11036 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11037 return _mm_svml_round_ps(a);
11038 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11039 return Sleef_roundf4(a);
11040 #else
11041 simde__m128_private
11042 r_,
11043 a_ = simde__m128_to_private(a);
11044
11045 SIMDE_VECTORIZE
11046 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11047 r_.f32[i] = simde_math_roundf(a_.f32[i]);
11048 }
11049
11050 return simde__m128_from_private(r_);
11051 #endif
11052 }
11053 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11054 #undef _mm_svml_round_ps
11055 #define _mm_svml_round_ps(a) simde_mm_svml_round_ps(a)
11056 #endif
11057
11058 SIMDE_FUNCTION_ATTRIBUTES
11059 simde__m128d
simde_mm_svml_round_pd(simde__m128d a)11060 simde_mm_svml_round_pd (simde__m128d a) {
11061 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11062 return _mm_svml_round_pd(a);
11063 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11064 return Sleef_roundd2(a);
11065 #else
11066 simde__m128d_private
11067 r_,
11068 a_ = simde__m128d_to_private(a);
11069
11070 SIMDE_VECTORIZE
11071 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11072 r_.f64[i] = simde_math_round(a_.f64[i]);
11073 }
11074
11075 return simde__m128d_from_private(r_);
11076 #endif
11077 }
11078 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11079 #undef _mm_svml_round_pd
11080 #define _mm_svml_round_pd(a) simde_mm_svml_round_pd(a)
11081 #endif
11082
11083 SIMDE_FUNCTION_ATTRIBUTES
11084 simde__m256
simde_mm256_svml_round_ps(simde__m256 a)11085 simde_mm256_svml_round_ps (simde__m256 a) {
11086 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11087 return _mm256_svml_round_ps(a);
11088 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11089 return Sleef_roundf8(a);
11090 #else
11091 simde__m256_private
11092 r_,
11093 a_ = simde__m256_to_private(a);
11094
11095 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
11096 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
11097 r_.m128[i] = simde_mm_svml_round_ps(a_.m128[i]);
11098 }
11099 #else
11100 SIMDE_VECTORIZE
11101 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11102 r_.f32[i] = simde_math_roundf(a_.f32[i]);
11103 }
11104 #endif
11105
11106 return simde__m256_from_private(r_);
11107 #endif
11108 }
11109 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11110 #undef _mm256_svml_round_ps
11111 #define _mm256_svml_round_ps(a) simde_mm256_svml_round_ps(a)
11112 #endif
11113
11114
11115 SIMDE_FUNCTION_ATTRIBUTES
11116 simde__m256d
simde_mm256_svml_round_pd(simde__m256d a)11117 simde_mm256_svml_round_pd (simde__m256d a) {
11118 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11119 return _mm256_svml_round_pd(a);
11120 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11121 return Sleef_roundd4(a);
11122 #else
11123 simde__m256d_private
11124 r_,
11125 a_ = simde__m256d_to_private(a);
11126
11127 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11128 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
11129 r_.m128d[i] = simde_mm_svml_round_pd(a_.m128d[i]);
11130 }
11131 #else
11132 SIMDE_VECTORIZE
11133 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11134 r_.f64[i] = simde_math_round(a_.f64[i]);
11135 }
11136 #endif
11137
11138 return simde__m256d_from_private(r_);
11139 #endif
11140 }
11141 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11142 #undef _mm256_svml_round_pd
11143 #define _mm256_svml_round_pd(a) simde_mm256_svml_round_pd(a)
11144 #endif
11145
11146 SIMDE_FUNCTION_ATTRIBUTES
11147 simde__m512d
simde_mm512_svml_round_pd(simde__m512d a)11148 simde_mm512_svml_round_pd (simde__m512d a) {
11149 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11150 return _mm512_svml_round_pd(a);
11151 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11152 return Sleef_roundd8(a);
11153 #else
11154 simde__m512d_private
11155 r_,
11156 a_ = simde__m512d_to_private(a);
11157
11158 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11159 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
11160 r_.m256d[i] = simde_mm256_svml_round_pd(a_.m256d[i]);
11161 }
11162 #else
11163 SIMDE_VECTORIZE
11164 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11165 r_.f64[i] = simde_math_round(a_.f64[i]);
11166 }
11167 #endif
11168
11169 return simde__m512d_from_private(r_);
11170 #endif
11171 }
11172 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11173 #undef _mm512_svml_round_pd
11174 #define _mm512_svml_round_pd(a) simde_mm512_svml_round_pd(a)
11175 #endif
11176
11177 SIMDE_FUNCTION_ATTRIBUTES
11178 simde__m512d
simde_mm512_mask_svml_round_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)11179 simde_mm512_mask_svml_round_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
11180 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11181 return _mm512_mask_svml_round_pd(src, k, a);
11182 #else
11183 return simde_mm512_mask_mov_pd(src, k, simde_mm512_svml_round_pd(a));
11184 #endif
11185 }
11186 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11187 #undef _mm512_mask_svml_round_pd
11188 #define _mm512_mask_svml_round_pd(src, k, a) simde_mm512_mask_svml_round_pd(src, k, a)
11189 #endif
11190
11191 SIMDE_FUNCTION_ATTRIBUTES
11192 simde__m128
simde_mm_svml_sqrt_ps(simde__m128 a)11193 simde_mm_svml_sqrt_ps (simde__m128 a) {
11194 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11195 return _mm_svml_sqrt_ps(a);
11196 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11197 return Sleef_sqrtf4(a);
11198 #else
11199 return simde_mm_sqrt_ps(a);
11200 #endif
11201 }
11202 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11203 #undef _mm_svml_sqrt_ps
11204 #define _mm_svml_sqrt_ps(a) simde_mm_svml_sqrt_ps(a)
11205 #endif
11206
11207 SIMDE_FUNCTION_ATTRIBUTES
11208 simde__m128d
simde_mm_svml_sqrt_pd(simde__m128d a)11209 simde_mm_svml_sqrt_pd (simde__m128d a) {
11210 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11211 return _mm_svml_sqrt_pd(a);
11212 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11213 return Sleef_sqrtd2(a);
11214 #else
11215 return simde_mm_sqrt_pd(a);
11216 #endif
11217 }
11218 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11219 #undef _mm_svml_sqrt_pd
11220 #define _mm_svml_sqrt_pd(a) simde_mm_svml_sqrt_pd(a)
11221 #endif
11222
11223 SIMDE_FUNCTION_ATTRIBUTES
11224 simde__m256
simde_mm256_svml_sqrt_ps(simde__m256 a)11225 simde_mm256_svml_sqrt_ps (simde__m256 a) {
11226 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11227 return _mm256_svml_sqrt_ps(a);
11228 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11229 return Sleef_sqrtf8(a);
11230 #else
11231 return simde_mm256_sqrt_ps(a);
11232 #endif
11233 }
11234 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11235 #undef _mm256_svml_sqrt_ps
11236 #define _mm256_svml_sqrt_ps(a) simde_mm256_svml_sqrt_ps(a)
11237 #endif
11238
11239 SIMDE_FUNCTION_ATTRIBUTES
11240 simde__m256d
simde_mm256_svml_sqrt_pd(simde__m256d a)11241 simde_mm256_svml_sqrt_pd (simde__m256d a) {
11242 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11243 return _mm256_svml_sqrt_pd(a);
11244 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11245 return Sleef_sqrtd4(a);
11246 #else
11247 return simde_mm256_sqrt_pd(a);
11248 #endif
11249 }
11250 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11251 #undef _mm256_svml_sqrt_pd
11252 #define _mm256_svml_sqrt_pd(a) simde_mm256_svml_sqrt_pd(a)
11253 #endif
11254
11255 SIMDE_FUNCTION_ATTRIBUTES
11256 simde__m512
simde_mm512_svml_sqrt_ps(simde__m512 a)11257 simde_mm512_svml_sqrt_ps (simde__m512 a) {
11258 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11259 return _mm512_svml_sqrt_ps(a);
11260 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11261 return Sleef_sqrtf16(a);
11262 #else
11263 return simde_mm512_sqrt_ps(a);
11264 #endif
11265 }
11266 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11267 #undef _mm512_svml_sqrt_ps
11268 #define _mm512_svml_sqrt_ps(a) simde_mm512_svml_sqrt_ps(a)
11269 #endif
11270
11271 SIMDE_FUNCTION_ATTRIBUTES
11272 simde__m512d
simde_mm512_svml_sqrt_pd(simde__m512d a)11273 simde_mm512_svml_sqrt_pd (simde__m512d a) {
11274 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11275 return _mm512_svml_sqrt_pd(a);
11276 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11277 return Sleef_sqrtd8(a);
11278 #else
11279 return simde_mm512_sqrt_pd(a);
11280 #endif
11281 }
11282 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11283 #undef _mm512_svml_sqrt_pd
11284 #define _mm512_svml_sqrt_pd(a) simde_mm512_svml_sqrt_pd(a)
11285 #endif
11286
11287 SIMDE_FUNCTION_ATTRIBUTES
11288 simde__m128
simde_mm_tan_ps(simde__m128 a)11289 simde_mm_tan_ps (simde__m128 a) {
11290 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11291 return _mm_tan_ps(a);
11292 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11293 #if SIMDE_ACCURACY_PREFERENCE > 1
11294 return Sleef_tanf4_u10(a);
11295 #else
11296 return Sleef_tanf4_u35(a);
11297 #endif
11298 #else
11299 simde__m128_private
11300 r_,
11301 a_ = simde__m128_to_private(a);
11302
11303 SIMDE_VECTORIZE
11304 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11305 r_.f32[i] = simde_math_tanf(a_.f32[i]);
11306 }
11307
11308 return simde__m128_from_private(r_);
11309 #endif
11310 }
11311 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11312 #undef _mm_tan_ps
11313 #define _mm_tan_ps(a) simde_mm_tan_ps(a)
11314 #endif
11315
11316 SIMDE_FUNCTION_ATTRIBUTES
11317 simde__m128d
simde_mm_tan_pd(simde__m128d a)11318 simde_mm_tan_pd (simde__m128d a) {
11319 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11320 return _mm_tan_pd(a);
11321 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11322 #if SIMDE_ACCURACY_PREFERENCE > 1
11323 return Sleef_tand2_u10(a);
11324 #else
11325 return Sleef_tand2_u35(a);
11326 #endif
11327 #else
11328 simde__m128d_private
11329 r_,
11330 a_ = simde__m128d_to_private(a);
11331
11332 SIMDE_VECTORIZE
11333 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11334 r_.f64[i] = simde_math_tan(a_.f64[i]);
11335 }
11336
11337 return simde__m128d_from_private(r_);
11338 #endif
11339 }
11340 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11341 #undef _mm_tan_pd
11342 #define _mm_tan_pd(a) simde_mm_tan_pd(a)
11343 #endif
11344
11345 SIMDE_FUNCTION_ATTRIBUTES
11346 simde__m256
simde_mm256_tan_ps(simde__m256 a)11347 simde_mm256_tan_ps (simde__m256 a) {
11348 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11349 return _mm256_tan_ps(a);
11350 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11351 #if SIMDE_ACCURACY_PREFERENCE > 1
11352 return Sleef_tanf8_u10(a);
11353 #else
11354 return Sleef_tanf8_u35(a);
11355 #endif
11356 #else
11357 simde__m256_private
11358 r_,
11359 a_ = simde__m256_to_private(a);
11360
11361 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
11362 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
11363 r_.m128[i] = simde_mm_tan_ps(a_.m128[i]);
11364 }
11365 #else
11366 SIMDE_VECTORIZE
11367 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11368 r_.f32[i] = simde_math_tanf(a_.f32[i]);
11369 }
11370 #endif
11371
11372 return simde__m256_from_private(r_);
11373 #endif
11374 }
11375 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11376 #undef _mm256_tan_ps
11377 #define _mm256_tan_ps(a) simde_mm256_tan_ps(a)
11378 #endif
11379
11380
11381 SIMDE_FUNCTION_ATTRIBUTES
11382 simde__m256d
simde_mm256_tan_pd(simde__m256d a)11383 simde_mm256_tan_pd (simde__m256d a) {
11384 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11385 return _mm256_tan_pd(a);
11386 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11387 #if SIMDE_ACCURACY_PREFERENCE > 1
11388 return Sleef_tand4_u10(a);
11389 #else
11390 return Sleef_tand4_u35(a);
11391 #endif
11392 #else
11393 simde__m256d_private
11394 r_,
11395 a_ = simde__m256d_to_private(a);
11396
11397 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11398 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
11399 r_.m128d[i] = simde_mm_tan_pd(a_.m128d[i]);
11400 }
11401 #else
11402 SIMDE_VECTORIZE
11403 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11404 r_.f64[i] = simde_math_tan(a_.f64[i]);
11405 }
11406 #endif
11407
11408 return simde__m256d_from_private(r_);
11409 #endif
11410 }
11411 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11412 #undef _mm256_tan_pd
11413 #define _mm256_tan_pd(a) simde_mm256_tan_pd(a)
11414 #endif
11415
11416 SIMDE_FUNCTION_ATTRIBUTES
11417 simde__m512
simde_mm512_tan_ps(simde__m512 a)11418 simde_mm512_tan_ps (simde__m512 a) {
11419 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11420 return _mm512_tan_ps(a);
11421 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11422 #if SIMDE_ACCURACY_PREFERENCE > 1
11423 return Sleef_tanf16_u10(a);
11424 #else
11425 return Sleef_tanf16_u35(a);
11426 #endif
11427 #else
11428 simde__m512_private
11429 r_,
11430 a_ = simde__m512_to_private(a);
11431
11432 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11433 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
11434 r_.m256[i] = simde_mm256_tan_ps(a_.m256[i]);
11435 }
11436 #else
11437 SIMDE_VECTORIZE
11438 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11439 r_.f32[i] = simde_math_tanf(a_.f32[i]);
11440 }
11441 #endif
11442
11443 return simde__m512_from_private(r_);
11444 #endif
11445 }
11446 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11447 #undef _mm512_tan_ps
11448 #define _mm512_tan_ps(a) simde_mm512_tan_ps(a)
11449 #endif
11450
11451 SIMDE_FUNCTION_ATTRIBUTES
11452 simde__m512d
simde_mm512_tan_pd(simde__m512d a)11453 simde_mm512_tan_pd (simde__m512d a) {
11454 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11455 return _mm512_tan_pd(a);
11456 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11457 #if SIMDE_ACCURACY_PREFERENCE > 1
11458 return Sleef_tand8_u10(a);
11459 #else
11460 return Sleef_tand8_u35(a);
11461 #endif
11462 #else
11463 simde__m512d_private
11464 r_,
11465 a_ = simde__m512d_to_private(a);
11466
11467 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11468 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
11469 r_.m256d[i] = simde_mm256_tan_pd(a_.m256d[i]);
11470 }
11471 #else
11472 SIMDE_VECTORIZE
11473 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11474 r_.f64[i] = simde_math_tan(a_.f64[i]);
11475 }
11476 #endif
11477
11478 return simde__m512d_from_private(r_);
11479 #endif
11480 }
11481 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11482 #undef _mm512_tan_pd
11483 #define _mm512_tan_pd(a) simde_mm512_tan_pd(a)
11484 #endif
11485
11486 SIMDE_FUNCTION_ATTRIBUTES
11487 simde__m512
simde_mm512_mask_tan_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)11488 simde_mm512_mask_tan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
11489 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11490 return _mm512_mask_tan_ps(src, k, a);
11491 #else
11492 return simde_mm512_mask_mov_ps(src, k, simde_mm512_tan_ps(a));
11493 #endif
11494 }
11495 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11496 #undef _mm512_mask_tan_ps
11497 #define _mm512_mask_tan_ps(src, k, a) simde_mm512_mask_tan_ps(src, k, a)
11498 #endif
11499
11500 SIMDE_FUNCTION_ATTRIBUTES
11501 simde__m512d
simde_mm512_mask_tan_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)11502 simde_mm512_mask_tan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
11503 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11504 return _mm512_mask_tan_pd(src, k, a);
11505 #else
11506 return simde_mm512_mask_mov_pd(src, k, simde_mm512_tan_pd(a));
11507 #endif
11508 }
11509 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11510 #undef _mm512_mask_tan_pd
11511 #define _mm512_mask_tan_pd(src, k, a) simde_mm512_mask_tan_pd(src, k, a)
11512 #endif
11513
11514 SIMDE_FUNCTION_ATTRIBUTES
11515 simde__m128
simde_mm_tand_ps(simde__m128 a)11516 simde_mm_tand_ps (simde__m128 a) {
11517 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11518 return _mm_tand_ps(a);
11519 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11520 #if SIMDE_ACCURACY_PREFERENCE > 1
11521 return Sleef_tanf4_u10(simde_x_mm_deg2rad_ps(a));
11522 #else
11523 return Sleef_tanf4_u35(simde_x_mm_deg2rad_ps(a));
11524 #endif
11525 #else
11526 simde__m128_private
11527 r_,
11528 a_ = simde__m128_to_private(a);
11529
11530 SIMDE_VECTORIZE
11531 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11532 r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i]));
11533 }
11534
11535 return simde__m128_from_private(r_);
11536 #endif
11537 }
11538 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11539 #undef _mm_tand_ps
11540 #define _mm_tand_ps(a) simde_mm_tand_ps(a)
11541 #endif
11542
11543 SIMDE_FUNCTION_ATTRIBUTES
11544 simde__m128d
simde_mm_tand_pd(simde__m128d a)11545 simde_mm_tand_pd (simde__m128d a) {
11546 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11547 return _mm_tand_pd(a);
11548 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11549 #if SIMDE_ACCURACY_PREFERENCE > 1
11550 return Sleef_tand2_u10(simde_x_mm_deg2rad_pd(a));
11551 #else
11552 return Sleef_tand2_u35(simde_x_mm_deg2rad_pd(a));
11553 #endif
11554 #else
11555 simde__m128d_private
11556 r_,
11557 a_ = simde__m128d_to_private(a);
11558
11559 SIMDE_VECTORIZE
11560 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11561 r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i]));
11562 }
11563
11564 return simde__m128d_from_private(r_);
11565 #endif
11566 }
11567 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11568 #undef _mm_tand_pd
11569 #define _mm_tand_pd(a) simde_mm_tand_pd(a)
11570 #endif
11571
11572 SIMDE_FUNCTION_ATTRIBUTES
11573 simde__m256
simde_mm256_tand_ps(simde__m256 a)11574 simde_mm256_tand_ps (simde__m256 a) {
11575 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11576 return _mm256_tand_ps(a);
11577 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11578 #if SIMDE_ACCURACY_PREFERENCE > 1
11579 return Sleef_tanf8_u10(simde_x_mm256_deg2rad_ps(a));
11580 #else
11581 return Sleef_tanf8_u35(simde_x_mm256_deg2rad_ps(a));
11582 #endif
11583 #else
11584 simde__m256_private
11585 r_,
11586 a_ = simde__m256_to_private(a);
11587
11588 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
11589 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
11590 r_.m128[i] = simde_mm_tand_ps(a_.m128[i]);
11591 }
11592 #else
11593 SIMDE_VECTORIZE
11594 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11595 r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i]));
11596 }
11597 #endif
11598
11599 return simde__m256_from_private(r_);
11600 #endif
11601 }
11602 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11603 #undef _mm256_tand_ps
11604 #define _mm256_tand_ps(a) simde_mm256_tand_ps(a)
11605 #endif
11606
11607 SIMDE_FUNCTION_ATTRIBUTES
11608 simde__m256d
simde_mm256_tand_pd(simde__m256d a)11609 simde_mm256_tand_pd (simde__m256d a) {
11610 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11611 return _mm256_tand_pd(a);
11612 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11613 #if SIMDE_ACCURACY_PREFERENCE > 1
11614 return Sleef_tand4_u10(simde_x_mm256_deg2rad_pd(a));
11615 #else
11616 return Sleef_tand4_u35(simde_x_mm256_deg2rad_pd(a));
11617 #endif
11618 #else
11619 simde__m256d_private
11620 r_,
11621 a_ = simde__m256d_to_private(a);
11622
11623 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
11624 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
11625 r_.m128d[i] = simde_mm_tand_pd(a_.m128d[i]);
11626 }
11627 #else
11628 SIMDE_VECTORIZE
11629 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11630 r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i]));
11631 }
11632 #endif
11633
11634 return simde__m256d_from_private(r_);
11635 #endif
11636 }
11637 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11638 #undef _mm256_tand_pd
11639 #define _mm256_tand_pd(a) simde_mm256_tand_pd(a)
11640 #endif
11641
11642 SIMDE_FUNCTION_ATTRIBUTES
11643 simde__m512
simde_mm512_tand_ps(simde__m512 a)11644 simde_mm512_tand_ps (simde__m512 a) {
11645 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11646 return _mm512_tand_ps(a);
11647 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11648 #if SIMDE_ACCURACY_PREFERENCE > 1
11649 return Sleef_tanf16_u10(simde_x_mm512_deg2rad_ps(a));
11650 #else
11651 return Sleef_tanf16_u35(simde_x_mm512_deg2rad_ps(a));
11652 #endif
11653 #else
11654 simde__m512_private
11655 r_,
11656 a_ = simde__m512_to_private(a);
11657
11658 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11659 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
11660 r_.m256[i] = simde_mm256_tand_ps(a_.m256[i]);
11661 }
11662 #else
11663 SIMDE_VECTORIZE
11664 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11665 r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i]));
11666 }
11667 #endif
11668
11669 return simde__m512_from_private(r_);
11670 #endif
11671 }
11672 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11673 #undef _mm512_tand_ps
11674 #define _mm512_tand_ps(a) simde_mm512_tand_ps(a)
11675 #endif
11676
11677 SIMDE_FUNCTION_ATTRIBUTES
11678 simde__m512d
simde_mm512_tand_pd(simde__m512d a)11679 simde_mm512_tand_pd (simde__m512d a) {
11680 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11681 return _mm512_tand_pd(a);
11682 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11683 #if SIMDE_ACCURACY_PREFERENCE > 1
11684 return Sleef_tand8_u10(simde_x_mm512_deg2rad_pd(a));
11685 #else
11686 return Sleef_tand8_u35(simde_x_mm512_deg2rad_pd(a));
11687 #endif
11688 #else
11689 simde__m512d_private
11690 r_,
11691 a_ = simde__m512d_to_private(a);
11692
11693 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11694 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
11695 r_.m256d[i] = simde_mm256_tand_pd(a_.m256d[i]);
11696 }
11697 #else
11698 SIMDE_VECTORIZE
11699 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11700 r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i]));
11701 }
11702 #endif
11703
11704 return simde__m512d_from_private(r_);
11705 #endif
11706 }
11707 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11708 #undef _mm512_tand_pd
11709 #define _mm512_tand_pd(a) simde_mm512_tand_pd(a)
11710 #endif
11711
11712 SIMDE_FUNCTION_ATTRIBUTES
11713 simde__m512
simde_mm512_mask_tand_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)11714 simde_mm512_mask_tand_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
11715 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11716 return _mm512_mask_tand_ps(src, k, a);
11717 #else
11718 return simde_mm512_mask_mov_ps(src, k, simde_mm512_tand_ps(a));
11719 #endif
11720 }
11721 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11722 #undef _mm512_mask_tand_ps
11723 #define _mm512_mask_tand_ps(src, k, a) simde_mm512_mask_tand_ps(src, k, a)
11724 #endif
11725
11726 SIMDE_FUNCTION_ATTRIBUTES
11727 simde__m512d
simde_mm512_mask_tand_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)11728 simde_mm512_mask_tand_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
11729 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11730 return _mm512_mask_tand_pd(src, k, a);
11731 #else
11732 return simde_mm512_mask_mov_pd(src, k, simde_mm512_tand_pd(a));
11733 #endif
11734 }
11735 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11736 #undef _mm512_mask_tand_pd
11737 #define _mm512_mask_tand_pd(src, k, a) simde_mm512_mask_tand_pd(src, k, a)
11738 #endif
11739
11740 SIMDE_FUNCTION_ATTRIBUTES
11741 simde__m128
simde_mm_tanh_ps(simde__m128 a)11742 simde_mm_tanh_ps (simde__m128 a) {
11743 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11744 return _mm_tanh_ps(a);
11745 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11746 return Sleef_tanhf4_u10(a);
11747 #else
11748 simde__m128_private
11749 r_,
11750 a_ = simde__m128_to_private(a);
11751
11752 SIMDE_VECTORIZE
11753 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11754 r_.f32[i] = simde_math_tanhf(a_.f32[i]);
11755 }
11756
11757 return simde__m128_from_private(r_);
11758 #endif
11759 }
11760 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11761 #undef _mm_tanh_ps
11762 #define _mm_tanh_ps(a) simde_mm_tanh_ps(a)
11763 #endif
11764
11765 SIMDE_FUNCTION_ATTRIBUTES
11766 simde__m128d
simde_mm_tanh_pd(simde__m128d a)11767 simde_mm_tanh_pd (simde__m128d a) {
11768 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11769 return _mm_tanh_pd(a);
11770 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11771 return Sleef_tanhd2_u10(a);
11772 #else
11773 simde__m128d_private
11774 r_,
11775 a_ = simde__m128d_to_private(a);
11776
11777 SIMDE_VECTORIZE
11778 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11779 r_.f64[i] = simde_math_tanh(a_.f64[i]);
11780 }
11781
11782 return simde__m128d_from_private(r_);
11783 #endif
11784 }
11785 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11786 #undef _mm_tanh_pd
11787 #define _mm_tanh_pd(a) simde_mm_tanh_pd(a)
11788 #endif
11789
11790 SIMDE_FUNCTION_ATTRIBUTES
11791 simde__m256
simde_mm256_tanh_ps(simde__m256 a)11792 simde_mm256_tanh_ps (simde__m256 a) {
11793 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11794 return _mm256_tanh_ps(a);
11795 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11796 return Sleef_tanhf8_u10(a);
11797 #else
11798 simde__m256_private
11799 r_,
11800 a_ = simde__m256_to_private(a);
11801
11802 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
11803 for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
11804 r_.m128[i] = simde_mm_tanh_ps(a_.m128[i]);
11805 }
11806 #else
11807 SIMDE_VECTORIZE
11808 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11809 r_.f32[i] = simde_math_tanhf(a_.f32[i]);
11810 }
11811 #endif
11812
11813 return simde__m256_from_private(r_);
11814 #endif
11815 }
11816 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11817 #undef _mm256_tanh_ps
11818 #define _mm256_tanh_ps(a) simde_mm256_tanh_ps(a)
11819 #endif
11820
11821
11822 SIMDE_FUNCTION_ATTRIBUTES
11823 simde__m256d
simde_mm256_tanh_pd(simde__m256d a)11824 simde_mm256_tanh_pd (simde__m256d a) {
11825 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11826 return _mm256_tanh_pd(a);
11827 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11828 return Sleef_tanhd4_u10(a);
11829 #else
11830 simde__m256d_private
11831 r_,
11832 a_ = simde__m256d_to_private(a);
11833
11834 #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
11835 for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
11836 r_.m128d[i] = simde_mm_tanh_pd(a_.m128d[i]);
11837 }
11838 #else
11839 SIMDE_VECTORIZE
11840 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11841 r_.f64[i] = simde_math_tanh(a_.f64[i]);
11842 }
11843 #endif
11844
11845 return simde__m256d_from_private(r_);
11846 #endif
11847 }
11848 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11849 #undef _mm256_tanh_pd
11850 #define _mm256_tanh_pd(a) simde_mm256_tanh_pd(a)
11851 #endif
11852
11853 SIMDE_FUNCTION_ATTRIBUTES
11854 simde__m512
simde_mm512_tanh_ps(simde__m512 a)11855 simde_mm512_tanh_ps (simde__m512 a) {
11856 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11857 return _mm512_tanh_ps(a);
11858 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11859 return Sleef_tanhf16_u10(a);
11860 #else
11861 simde__m512_private
11862 r_,
11863 a_ = simde__m512_to_private(a);
11864
11865 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11866 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
11867 r_.m256[i] = simde_mm256_tanh_ps(a_.m256[i]);
11868 }
11869 #else
11870 SIMDE_VECTORIZE
11871 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11872 r_.f32[i] = simde_math_tanhf(a_.f32[i]);
11873 }
11874 #endif
11875
11876 return simde__m512_from_private(r_);
11877 #endif
11878 }
11879 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11880 #undef _mm512_tanh_ps
11881 #define _mm512_tanh_ps(a) simde_mm512_tanh_ps(a)
11882 #endif
11883
11884 SIMDE_FUNCTION_ATTRIBUTES
11885 simde__m512d
simde_mm512_tanh_pd(simde__m512d a)11886 simde_mm512_tanh_pd (simde__m512d a) {
11887 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11888 return _mm512_tanh_pd(a);
11889 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11890 return Sleef_tanhd8_u10(a);
11891 #else
11892 simde__m512d_private
11893 r_,
11894 a_ = simde__m512d_to_private(a);
11895
11896 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11897 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
11898 r_.m256d[i] = simde_mm256_tanh_pd(a_.m256d[i]);
11899 }
11900 #else
11901 SIMDE_VECTORIZE
11902 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11903 r_.f64[i] = simde_math_tanh(a_.f64[i]);
11904 }
11905 #endif
11906
11907 return simde__m512d_from_private(r_);
11908 #endif
11909 }
11910 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11911 #undef _mm512_tanh_pd
11912 #define _mm512_tanh_pd(a) simde_mm512_tanh_pd(a)
11913 #endif
11914
11915 SIMDE_FUNCTION_ATTRIBUTES
11916 simde__m512
simde_mm512_mask_tanh_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)11917 simde_mm512_mask_tanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
11918 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11919 return _mm512_mask_tanh_ps(src, k, a);
11920 #else
11921 return simde_mm512_mask_mov_ps(src, k, simde_mm512_tanh_ps(a));
11922 #endif
11923 }
11924 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11925 #undef _mm512_mask_tanh_ps
11926 #define _mm512_mask_tanh_ps(src, k, a) simde_mm512_mask_tanh_ps(src, k, a)
11927 #endif
11928
11929 SIMDE_FUNCTION_ATTRIBUTES
11930 simde__m512d
simde_mm512_mask_tanh_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)11931 simde_mm512_mask_tanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
11932 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11933 return _mm512_mask_tanh_pd(src, k, a);
11934 #else
11935 return simde_mm512_mask_mov_pd(src, k, simde_mm512_tanh_pd(a));
11936 #endif
11937 }
11938 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11939 #undef _mm512_mask_tanh_pd
11940 #define _mm512_mask_tanh_pd(src, k, a) simde_mm512_mask_tanh_pd(src, k, a)
11941 #endif
11942
11943 SIMDE_FUNCTION_ATTRIBUTES
11944 simde__m128
simde_mm_trunc_ps(simde__m128 a)11945 simde_mm_trunc_ps (simde__m128 a) {
11946 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11947 return _mm_trunc_ps(a);
11948 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11949 return Sleef_truncf4(a);
11950 #else
11951 return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_ZERO);
11952 #endif
11953 }
11954 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11955 #undef _mm_trunc_ps
11956 #define _mm_trunc_ps(a) simde_mm_trunc_ps(a)
11957 #endif
11958
11959 SIMDE_FUNCTION_ATTRIBUTES
11960 simde__m128d
simde_mm_trunc_pd(simde__m128d a)11961 simde_mm_trunc_pd (simde__m128d a) {
11962 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11963 return _mm_trunc_pd(a);
11964 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11965 return Sleef_truncd2(a);
11966 #else
11967 return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_ZERO);
11968 #endif
11969 }
11970 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11971 #undef _mm_trunc_pd
11972 #define _mm_trunc_pd(a) simde_mm_trunc_pd(a)
11973 #endif
11974
11975 SIMDE_FUNCTION_ATTRIBUTES
11976 simde__m256
simde_mm256_trunc_ps(simde__m256 a)11977 simde_mm256_trunc_ps (simde__m256 a) {
11978 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11979 return _mm256_trunc_ps(a);
11980 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11981 return Sleef_truncf8(a);
11982 #else
11983 return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_ZERO);
11984 #endif
11985 }
11986 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11987 #undef _mm256_trunc_ps
11988 #define _mm256_trunc_ps(a) simde_mm256_trunc_ps(a)
11989 #endif
11990
11991 SIMDE_FUNCTION_ATTRIBUTES
11992 simde__m256d
simde_mm256_trunc_pd(simde__m256d a)11993 simde_mm256_trunc_pd (simde__m256d a) {
11994 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11995 return _mm256_trunc_pd(a);
11996 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11997 return Sleef_truncd4(a);
11998 #else
11999 return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_ZERO);
12000 #endif
12001 }
12002 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12003 #undef _mm256_trunc_pd
12004 #define _mm256_trunc_pd(a) simde_mm256_trunc_pd(a)
12005 #endif
12006
12007 SIMDE_FUNCTION_ATTRIBUTES
12008 simde__m512
simde_mm512_trunc_ps(simde__m512 a)12009 simde_mm512_trunc_ps (simde__m512 a) {
12010 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
12011 return _mm512_trunc_ps(a);
12012 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
12013 return Sleef_truncf16(a);
12014 #else
12015 simde__m512_private
12016 r_,
12017 a_ = simde__m512_to_private(a);
12018
12019 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
12020 for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
12021 r_.m256[i] = simde_mm256_trunc_ps(a_.m256[i]);
12022 }
12023 #else
12024 SIMDE_VECTORIZE
12025 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
12026 r_.f32[i] = simde_math_truncf(a_.f32[i]);
12027 }
12028 #endif
12029
12030 return simde__m512_from_private(r_);
12031 #endif
12032 }
12033 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12034 #undef _mm512_trunc_ps
12035 #define _mm512_trunc_ps(a) simde_mm512_trunc_ps(a)
12036 #endif
12037
12038 SIMDE_FUNCTION_ATTRIBUTES
12039 simde__m512d
simde_mm512_trunc_pd(simde__m512d a)12040 simde_mm512_trunc_pd (simde__m512d a) {
12041 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
12042 return _mm512_trunc_pd(a);
12043 #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
12044 return Sleef_truncd8(a);
12045 #else
12046 simde__m512d_private
12047 r_,
12048 a_ = simde__m512d_to_private(a);
12049
12050 #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
12051 for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
12052 r_.m256d[i] = simde_mm256_trunc_pd(a_.m256d[i]);
12053 }
12054 #else
12055 SIMDE_VECTORIZE
12056 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
12057 r_.f64[i] = simde_math_trunc(a_.f64[i]);
12058 }
12059 #endif
12060
12061 return simde__m512d_from_private(r_);
12062 #endif
12063 }
12064 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12065 #undef _mm512_trunc_pd
12066 #define _mm512_trunc_pd(a) simde_mm512_trunc_pd(a)
12067 #endif
12068
12069 SIMDE_FUNCTION_ATTRIBUTES
12070 simde__m512
simde_mm512_mask_trunc_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)12071 simde_mm512_mask_trunc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
12072 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
12073 return _mm512_mask_trunc_ps(src, k, a);
12074 #else
12075 return simde_mm512_mask_mov_ps(src, k, simde_mm512_trunc_ps(a));
12076 #endif
12077 }
12078 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12079 #undef _mm512_mask_trunc_ps
12080 #define _mm512_mask_trunc_ps(src, k, a) simde_mm512_mask_trunc_ps(src, k, a)
12081 #endif
12082
12083 SIMDE_FUNCTION_ATTRIBUTES
12084 simde__m512d
simde_mm512_mask_trunc_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)12085 simde_mm512_mask_trunc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
12086 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
12087 return _mm512_mask_trunc_pd(src, k, a);
12088 #else
12089 return simde_mm512_mask_mov_pd(src, k, simde_mm512_trunc_pd(a));
12090 #endif
12091 }
12092 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12093 #undef _mm512_mask_trunc_pd
12094 #define _mm512_mask_trunc_pd(src, k, a) simde_mm512_mask_trunc_pd(src, k, a)
12095 #endif
12096
12097 SIMDE_FUNCTION_ATTRIBUTES
12098 simde__m128i
simde_mm_udivrem_epi32(simde__m128i * mem_addr,simde__m128i a,simde__m128i b)12099 simde_mm_udivrem_epi32 (simde__m128i * mem_addr, simde__m128i a, simde__m128i b) {
12100 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
12101 return _mm_udivrem_epi32(mem_addr, a, b);
12102 #else
12103 simde__m128i r;
12104
12105 r = simde_mm_div_epu32(a, b);
12106 *mem_addr = simde_x_mm_sub_epu32(a, simde_x_mm_mullo_epu32(r, b));
12107
12108 return r;
12109 #endif
12110 }
12111 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12112 #undef _mm_udivrem_epi32
12113 #define _mm_udivrem_epi32(mem_addr, a, b) simde_mm_udivrem_epi32((mem_addr),(a), (b))
12114 #endif
12115
12116 SIMDE_FUNCTION_ATTRIBUTES
12117 simde__m256i
simde_mm256_udivrem_epi32(simde__m256i * mem_addr,simde__m256i a,simde__m256i b)12118 simde_mm256_udivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) {
12119 #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
12120 return _mm256_udivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b);
12121 #else
12122 simde__m256i r;
12123
12124 r = simde_mm256_div_epu32(a, b);
12125 *mem_addr = simde_x_mm256_sub_epu32(a, simde_x_mm256_mullo_epu32(r, b));
12126
12127 return r;
12128 #endif
12129 }
12130 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12131 #undef _mm256_udivrem_epi32
12132 #define _mm256_udivrem_epi32(mem_addr, a, b) simde_mm256_udivrem_epi32((mem_addr),(a), (b))
12133 #endif
12134
12135 SIMDE_END_DECLS_
12136
12137 HEDLEY_DIAGNOSTIC_POP
12138
12139 #endif /* !defined(SIMDE_X86_SVML_H) */
12140