1 /* SPDX-License-Identifier: MIT
2  *
3  * Permission is hereby granted, free of charge, to any person
4  * obtaining a copy of this software and associated documentation
5  * files (the "Software"), to deal in the Software without
6  * restriction, including without limitation the rights to use, copy,
7  * modify, merge, publish, distribute, sublicense, and/or sell copies
8  * of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Copyright:
24  *   2020      Evan Nemerson <evan@nemerson.com>
25  *   2020      Himanshi Mathur <himanshi18037@iiitd.ac.in>
26  */
27 
28 #if !defined(SIMDE_X86_SVML_H)
29 #define SIMDE_X86_SVML_H
30 
31 #include "fma.h"
32 #include "avx2.h"
33 #include "avx512/abs.h"
34 #include "avx512/add.h"
35 #include "avx512/cmp.h"
36 #include "avx512/copysign.h"
37 #include "avx512/xorsign.h"
38 #include "avx512/div.h"
39 #include "avx512/fmadd.h"
40 #include "avx512/mov.h"
41 #include "avx512/mul.h"
42 #include "avx512/negate.h"
43 #include "avx512/or.h"
44 #include "avx512/set1.h"
45 #include "avx512/setone.h"
46 #include "avx512/setzero.h"
47 #include "avx512/sqrt.h"
48 #include "avx512/sub.h"
49 
50 #include "../simde-complex.h"
51 
52 #if !defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
53 #  define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES
54 #endif
55 
56 HEDLEY_DIAGNOSTIC_PUSH
57 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
58 SIMDE_BEGIN_DECLS_
59 
60 #if !defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
61 #  define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES
62 #endif
63 
64 SIMDE_FUNCTION_ATTRIBUTES
65 simde__m128
simde_mm_acos_ps(simde__m128 a)66 simde_mm_acos_ps (simde__m128 a) {
67   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
68     return _mm_acos_ps(a);
69   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
70     #if SIMDE_ACCURACY_PREFERENCE > 1
71       return Sleef_acosf4_u10(a);
72     #else
73       return Sleef_acosf4_u35(a);
74     #endif
75   #else
76     simde__m128_private
77       r_,
78       a_ = simde__m128_to_private(a);
79 
80     SIMDE_VECTORIZE
81     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
82       r_.f32[i] = simde_math_acosf(a_.f32[i]);
83     }
84 
85     return simde__m128_from_private(r_);
86   #endif
87 }
88 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
89   #undef _mm_acos_ps
90   #define _mm_acos_ps(a) simde_mm_acos_ps(a)
91 #endif
92 
93 SIMDE_FUNCTION_ATTRIBUTES
94 simde__m128d
simde_mm_acos_pd(simde__m128d a)95 simde_mm_acos_pd (simde__m128d a) {
96   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
97     return _mm_acos_pd(a);
98   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
99     #if SIMDE_ACCURACY_PREFERENCE > 1
100       return Sleef_acosd2_u10(a);
101     #else
102       return Sleef_acosd2_u35(a);
103     #endif
104   #else
105     simde__m128d_private
106       r_,
107       a_ = simde__m128d_to_private(a);
108 
109     SIMDE_VECTORIZE
110     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
111       r_.f64[i] = simde_math_acos(a_.f64[i]);
112     }
113 
114     return simde__m128d_from_private(r_);
115   #endif
116 }
117 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
118   #undef _mm_acos_pd
119   #define _mm_acos_pd(a) simde_mm_acos_pd(a)
120 #endif
121 
122 SIMDE_FUNCTION_ATTRIBUTES
123 simde__m256
simde_mm256_acos_ps(simde__m256 a)124 simde_mm256_acos_ps (simde__m256 a) {
125   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
126     return _mm256_acos_ps(a);
127   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
128     #if SIMDE_ACCURACY_PREFERENCE > 1
129       return Sleef_acosf8_u10(a);
130     #else
131       return Sleef_acosf8_u35(a);
132     #endif
133   #else
134     simde__m256_private
135       r_,
136       a_ = simde__m256_to_private(a);
137 
138     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
139       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
140         r_.m128[i] = simde_mm_acos_ps(a_.m128[i]);
141       }
142     #else
143       SIMDE_VECTORIZE
144       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
145         r_.f32[i] = simde_math_acosf(a_.f32[i]);
146       }
147     #endif
148 
149     return simde__m256_from_private(r_);
150   #endif
151 }
152 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
153   #undef _mm256_acos_ps
154   #define _mm256_acos_ps(a) simde_mm256_acos_ps(a)
155 #endif
156 
157 
158 SIMDE_FUNCTION_ATTRIBUTES
159 simde__m256d
simde_mm256_acos_pd(simde__m256d a)160 simde_mm256_acos_pd (simde__m256d a) {
161   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
162     return _mm256_acos_pd(a);
163   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
164     #if SIMDE_ACCURACY_PREFERENCE > 1
165       return Sleef_acosd4_u10(a);
166     #else
167       return Sleef_acosd4_u35(a);
168     #endif
169   #else
170     simde__m256d_private
171       r_,
172       a_ = simde__m256d_to_private(a);
173 
174     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
175       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
176         r_.m128d[i] = simde_mm_acos_pd(a_.m128d[i]);
177       }
178     #else
179       SIMDE_VECTORIZE
180       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
181         r_.f64[i] = simde_math_acos(a_.f64[i]);
182       }
183     #endif
184 
185     return simde__m256d_from_private(r_);
186   #endif
187 }
188 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
189   #undef _mm256_acos_pd
190   #define _mm256_acos_pd(a) simde_mm256_acos_pd(a)
191 #endif
192 
193 SIMDE_FUNCTION_ATTRIBUTES
194 simde__m512
simde_mm512_acos_ps(simde__m512 a)195 simde_mm512_acos_ps (simde__m512 a) {
196   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
197     return _mm512_acos_ps(a);
198   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
199     #if SIMDE_ACCURACY_PREFERENCE > 1
200       return Sleef_acosf16_u10(a);
201     #else
202       return Sleef_acosf16_u35(a);
203     #endif
204   #else
205     simde__m512_private
206       r_,
207       a_ = simde__m512_to_private(a);
208 
209     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
210       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
211         r_.m256[i] = simde_mm256_acos_ps(a_.m256[i]);
212       }
213     #else
214       SIMDE_VECTORIZE
215       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
216         r_.f32[i] = simde_math_acosf(a_.f32[i]);
217       }
218     #endif
219 
220     return simde__m512_from_private(r_);
221   #endif
222 }
223 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
224   #undef _mm512_acos_ps
225   #define _mm512_acos_ps(a) simde_mm512_acos_ps(a)
226 #endif
227 
228 SIMDE_FUNCTION_ATTRIBUTES
229 simde__m512d
simde_mm512_acos_pd(simde__m512d a)230 simde_mm512_acos_pd (simde__m512d a) {
231   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
232     return _mm512_acos_pd(a);
233   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
234     #if SIMDE_ACCURACY_PREFERENCE > 1
235       return Sleef_acosd8_u10(a);
236     #else
237       return Sleef_acosd8_u35(a);
238     #endif
239   #else
240     simde__m512d_private
241       r_,
242       a_ = simde__m512d_to_private(a);
243 
244     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
245       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
246         r_.m256d[i] = simde_mm256_acos_pd(a_.m256d[i]);
247       }
248     #else
249       SIMDE_VECTORIZE
250       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
251         r_.f64[i] = simde_math_acos(a_.f64[i]);
252       }
253     #endif
254 
255     return simde__m512d_from_private(r_);
256   #endif
257 }
258 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
259   #undef _mm512_acos_pd
260   #define _mm512_acos_pd(a) simde_mm512_acos_pd(a)
261 #endif
262 
263 SIMDE_FUNCTION_ATTRIBUTES
264 simde__m512
simde_mm512_mask_acos_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)265 simde_mm512_mask_acos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
266   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
267     return _mm512_mask_acos_ps(src, k, a);
268   #else
269     return simde_mm512_mask_mov_ps(src, k, simde_mm512_acos_ps(a));
270   #endif
271 }
272 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
273   #undef _mm512_mask_acos_ps
274   #define _mm512_mask_acos_ps(src, k, a) simde_mm512_mask_acos_ps(src, k, a)
275 #endif
276 
277 SIMDE_FUNCTION_ATTRIBUTES
278 simde__m512d
simde_mm512_mask_acos_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)279 simde_mm512_mask_acos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
280   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
281     return _mm512_mask_acos_pd(src, k, a);
282   #else
283     return simde_mm512_mask_mov_pd(src, k, simde_mm512_acos_pd(a));
284   #endif
285 }
286 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
287   #undef _mm512_mask_acos_pd
288   #define _mm512_mask_acos_pd(src, k, a) simde_mm512_mask_acos_pd(src, k, a)
289 #endif
290 
291 SIMDE_FUNCTION_ATTRIBUTES
292 simde__m128
simde_mm_acosh_ps(simde__m128 a)293 simde_mm_acosh_ps (simde__m128 a) {
294   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
295     return _mm_acosh_ps(a);
296   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
297     return Sleef_acoshf4_u10(a);
298   #else
299     simde__m128_private
300       r_,
301       a_ = simde__m128_to_private(a);
302 
303     SIMDE_VECTORIZE
304     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
305       r_.f32[i] = simde_math_acoshf(a_.f32[i]);
306     }
307 
308     return simde__m128_from_private(r_);
309   #endif
310 }
311 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
312   #undef _mm_acosh_ps
313   #define _mm_acosh_ps(a) simde_mm_acosh_ps(a)
314 #endif
315 
316 SIMDE_FUNCTION_ATTRIBUTES
317 simde__m128d
simde_mm_acosh_pd(simde__m128d a)318 simde_mm_acosh_pd (simde__m128d a) {
319   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
320     return _mm_acosh_pd(a);
321   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
322     return Sleef_acoshd2_u10(a);
323   #else
324     simde__m128d_private
325       r_,
326       a_ = simde__m128d_to_private(a);
327 
328     SIMDE_VECTORIZE
329     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
330       r_.f64[i] = simde_math_acosh(a_.f64[i]);
331     }
332 
333     return simde__m128d_from_private(r_);
334   #endif
335 }
336 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
337   #undef _mm_acosh_pd
338   #define _mm_acosh_pd(a) simde_mm_acosh_pd(a)
339 #endif
340 
341 SIMDE_FUNCTION_ATTRIBUTES
342 simde__m256
simde_mm256_acosh_ps(simde__m256 a)343 simde_mm256_acosh_ps (simde__m256 a) {
344   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
345     return _mm256_acosh_ps(a);
346   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
347     return Sleef_acoshf8_u10(a);
348   #else
349     simde__m256_private
350       r_,
351       a_ = simde__m256_to_private(a);
352 
353     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
354       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
355         r_.m128[i] = simde_mm_acosh_ps(a_.m128[i]);
356       }
357     #else
358       SIMDE_VECTORIZE
359       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
360         r_.f32[i] = simde_math_acoshf(a_.f32[i]);
361       }
362     #endif
363 
364     return simde__m256_from_private(r_);
365   #endif
366 }
367 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
368   #undef _mm256_acosh_ps
369   #define _mm256_acosh_ps(a) simde_mm256_acosh_ps(a)
370 #endif
371 
372 
373 SIMDE_FUNCTION_ATTRIBUTES
374 simde__m256d
simde_mm256_acosh_pd(simde__m256d a)375 simde_mm256_acosh_pd (simde__m256d a) {
376   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
377     return _mm256_acosh_pd(a);
378   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
379     return Sleef_acoshd4_u10(a);
380   #else
381     simde__m256d_private
382       r_,
383       a_ = simde__m256d_to_private(a);
384 
385     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
386       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
387         r_.m128d[i] = simde_mm_acosh_pd(a_.m128d[i]);
388       }
389     #else
390       SIMDE_VECTORIZE
391       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
392         r_.f64[i] = simde_math_acosh(a_.f64[i]);
393       }
394     #endif
395 
396     return simde__m256d_from_private(r_);
397   #endif
398 }
399 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
400   #undef _mm256_acosh_pd
401   #define _mm256_acosh_pd(a) simde_mm256_acosh_pd(a)
402 #endif
403 
404 SIMDE_FUNCTION_ATTRIBUTES
405 simde__m512
simde_mm512_acosh_ps(simde__m512 a)406 simde_mm512_acosh_ps (simde__m512 a) {
407   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
408     return _mm512_acosh_ps(a);
409   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
410     return Sleef_acoshf16_u10(a);
411   #else
412     simde__m512_private
413       r_,
414       a_ = simde__m512_to_private(a);
415 
416     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
417       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
418         r_.m256[i] = simde_mm256_acosh_ps(a_.m256[i]);
419       }
420     #else
421       SIMDE_VECTORIZE
422       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
423         r_.f32[i] = simde_math_acoshf(a_.f32[i]);
424       }
425     #endif
426 
427     return simde__m512_from_private(r_);
428   #endif
429 }
430 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
431   #undef _mm512_acosh_ps
432   #define _mm512_acosh_ps(a) simde_mm512_acosh_ps(a)
433 #endif
434 
435 SIMDE_FUNCTION_ATTRIBUTES
436 simde__m512d
simde_mm512_acosh_pd(simde__m512d a)437 simde_mm512_acosh_pd (simde__m512d a) {
438   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
439     return _mm512_acosh_pd(a);
440   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
441     return Sleef_acoshd8_u10(a);
442   #else
443     simde__m512d_private
444       r_,
445       a_ = simde__m512d_to_private(a);
446 
447     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
448       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
449         r_.m256d[i] = simde_mm256_acosh_pd(a_.m256d[i]);
450       }
451     #else
452       SIMDE_VECTORIZE
453       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
454         r_.f64[i] = simde_math_acosh(a_.f64[i]);
455       }
456     #endif
457 
458     return simde__m512d_from_private(r_);
459   #endif
460 }
461 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
462   #undef _mm512_acosh_pd
463   #define _mm512_acosh_pd(a) simde_mm512_acosh_pd(a)
464 #endif
465 
466 SIMDE_FUNCTION_ATTRIBUTES
467 simde__m512
simde_mm512_mask_acosh_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)468 simde_mm512_mask_acosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
469   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
470     return _mm512_mask_acosh_ps(src, k, a);
471   #else
472     return simde_mm512_mask_mov_ps(src, k, simde_mm512_acosh_ps(a));
473   #endif
474 }
475 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
476   #undef _mm512_mask_acosh_ps
477   #define _mm512_mask_acosh_ps(src, k, a) simde_mm512_mask_acosh_ps(src, k, a)
478 #endif
479 
480 SIMDE_FUNCTION_ATTRIBUTES
481 simde__m512d
simde_mm512_mask_acosh_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)482 simde_mm512_mask_acosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
483   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
484     return _mm512_mask_acosh_pd(src, k, a);
485   #else
486     return simde_mm512_mask_mov_pd(src, k, simde_mm512_acosh_pd(a));
487   #endif
488 }
489 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
490   #undef _mm512_mask_acosh_pd
491   #define _mm512_mask_acosh_pd(src, k, a) simde_mm512_mask_acosh_pd(src, k, a)
492 #endif
493 
494 SIMDE_FUNCTION_ATTRIBUTES
495 simde__m128
simde_mm_asin_ps(simde__m128 a)496 simde_mm_asin_ps (simde__m128 a) {
497   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
498     return _mm_asin_ps(a);
499   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
500     #if SIMDE_ACCURACY_PREFERENCE > 1
501       return Sleef_asinf4_u10(a);
502     #else
503       return Sleef_asinf4_u35(a);
504     #endif
505   #else
506     simde__m128_private
507       r_,
508       a_ = simde__m128_to_private(a);
509 
510     SIMDE_VECTORIZE
511     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
512       r_.f32[i] = simde_math_asinf(a_.f32[i]);
513     }
514 
515     return simde__m128_from_private(r_);
516   #endif
517 }
518 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
519   #undef _mm_asin_ps
520   #define _mm_asin_ps(a) simde_mm_asin_ps(a)
521 #endif
522 
523 SIMDE_FUNCTION_ATTRIBUTES
524 simde__m128d
simde_mm_asin_pd(simde__m128d a)525 simde_mm_asin_pd (simde__m128d a) {
526   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
527     return _mm_asin_pd(a);
528   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
529     #if SIMDE_ACCURACY_PREFERENCE > 1
530       return Sleef_asind2_u10(a);
531     #else
532       return Sleef_asind2_u35(a);
533     #endif
534   #else
535     simde__m128d_private
536       r_,
537       a_ = simde__m128d_to_private(a);
538 
539     SIMDE_VECTORIZE
540     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
541       r_.f64[i] = simde_math_asin(a_.f64[i]);
542     }
543 
544     return simde__m128d_from_private(r_);
545   #endif
546 }
547 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
548   #undef _mm_asin_pd
549   #define _mm_asin_pd(a) simde_mm_asin_pd(a)
550 #endif
551 
552 SIMDE_FUNCTION_ATTRIBUTES
553 simde__m256
simde_mm256_asin_ps(simde__m256 a)554 simde_mm256_asin_ps (simde__m256 a) {
555   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
556     return _mm256_asin_ps(a);
557   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
558     #if SIMDE_ACCURACY_PREFERENCE > 1
559       return Sleef_asinf8_u10(a);
560     #else
561       return Sleef_asinf8_u35(a);
562     #endif
563   #else
564     simde__m256_private
565       r_,
566       a_ = simde__m256_to_private(a);
567 
568     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
569       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
570         r_.m128[i] = simde_mm_asin_ps(a_.m128[i]);
571       }
572     #else
573       SIMDE_VECTORIZE
574       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
575         r_.f32[i] = simde_math_asinf(a_.f32[i]);
576       }
577     #endif
578 
579     return simde__m256_from_private(r_);
580   #endif
581 }
582 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
583   #undef _mm256_asin_ps
584   #define _mm256_asin_ps(a) simde_mm256_asin_ps(a)
585 #endif
586 
587 
588 SIMDE_FUNCTION_ATTRIBUTES
589 simde__m256d
simde_mm256_asin_pd(simde__m256d a)590 simde_mm256_asin_pd (simde__m256d a) {
591   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
592     return _mm256_asin_pd(a);
593   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
594     #if SIMDE_ACCURACY_PREFERENCE > 1
595       return Sleef_asind4_u10(a);
596     #else
597       return Sleef_asind4_u35(a);
598     #endif
599   #else
600     simde__m256d_private
601       r_,
602       a_ = simde__m256d_to_private(a);
603 
604     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
605       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
606         r_.m128d[i] = simde_mm_asin_pd(a_.m128d[i]);
607       }
608     #else
609       SIMDE_VECTORIZE
610       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
611         r_.f64[i] = simde_math_asin(a_.f64[i]);
612       }
613     #endif
614 
615     return simde__m256d_from_private(r_);
616   #endif
617 }
618 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
619   #undef _mm256_asin_pd
620   #define _mm256_asin_pd(a) simde_mm256_asin_pd(a)
621 #endif
622 
623 SIMDE_FUNCTION_ATTRIBUTES
624 simde__m512
simde_mm512_asin_ps(simde__m512 a)625 simde_mm512_asin_ps (simde__m512 a) {
626   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
627     return _mm512_asin_ps(a);
628   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
629     #if SIMDE_ACCURACY_PREFERENCE > 1
630       return Sleef_asinf16_u10(a);
631     #else
632       return Sleef_asinf16_u35(a);
633     #endif
634   #else
635     simde__m512_private
636       r_,
637       a_ = simde__m512_to_private(a);
638 
639     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
640       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
641         r_.m256[i] = simde_mm256_asin_ps(a_.m256[i]);
642       }
643     #else
644       SIMDE_VECTORIZE
645       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
646         r_.f32[i] = simde_math_asinf(a_.f32[i]);
647       }
648     #endif
649 
650     return simde__m512_from_private(r_);
651   #endif
652 }
653 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
654   #undef _mm512_asin_ps
655   #define _mm512_asin_ps(a) simde_mm512_asin_ps(a)
656 #endif
657 
658 SIMDE_FUNCTION_ATTRIBUTES
659 simde__m512d
simde_mm512_asin_pd(simde__m512d a)660 simde_mm512_asin_pd (simde__m512d a) {
661   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
662     return _mm512_asin_pd(a);
663   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
664     #if SIMDE_ACCURACY_PREFERENCE > 1
665       return Sleef_asind8_u10(a);
666     #else
667       return Sleef_asind8_u35(a);
668     #endif
669   #else
670     simde__m512d_private
671       r_,
672       a_ = simde__m512d_to_private(a);
673 
674     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
675       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
676         r_.m256d[i] = simde_mm256_asin_pd(a_.m256d[i]);
677       }
678     #else
679       SIMDE_VECTORIZE
680       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
681         r_.f64[i] = simde_math_asin(a_.f64[i]);
682       }
683     #endif
684 
685     return simde__m512d_from_private(r_);
686   #endif
687 }
688 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
689   #undef _mm512_asin_pd
690   #define _mm512_asin_pd(a) simde_mm512_asin_pd(a)
691 #endif
692 
693 SIMDE_FUNCTION_ATTRIBUTES
694 simde__m512
simde_mm512_mask_asin_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)695 simde_mm512_mask_asin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
696   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
697     return _mm512_mask_asin_ps(src, k, a);
698   #else
699     return simde_mm512_mask_mov_ps(src, k, simde_mm512_asin_ps(a));
700   #endif
701 }
702 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
703   #undef _mm512_mask_asin_ps
704   #define _mm512_mask_asin_ps(src, k, a) simde_mm512_mask_asin_ps(src, k, a)
705 #endif
706 
707 SIMDE_FUNCTION_ATTRIBUTES
708 simde__m512d
simde_mm512_mask_asin_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)709 simde_mm512_mask_asin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
710   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
711     return _mm512_mask_asin_pd(src, k, a);
712   #else
713     return simde_mm512_mask_mov_pd(src, k, simde_mm512_asin_pd(a));
714   #endif
715 }
716 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
717   #undef _mm512_mask_asin_pd
718   #define _mm512_mask_asin_pd(src, k, a) simde_mm512_mask_asin_pd(src, k, a)
719 #endif
720 
721 SIMDE_FUNCTION_ATTRIBUTES
722 simde__m128
simde_mm_asinh_ps(simde__m128 a)723 simde_mm_asinh_ps (simde__m128 a) {
724   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
725     return _mm_asinh_ps(a);
726   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
727     return Sleef_asinhf4_u10(a);
728   #else
729     simde__m128_private
730       r_,
731       a_ = simde__m128_to_private(a);
732 
733     SIMDE_VECTORIZE
734     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
735       r_.f32[i] = simde_math_asinhf(a_.f32[i]);
736     }
737 
738     return simde__m128_from_private(r_);
739   #endif
740 }
741 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
742   #undef _mm_asinh_ps
743   #define _mm_asinh_ps(a) simde_mm_asinh_ps(a)
744 #endif
745 
746 SIMDE_FUNCTION_ATTRIBUTES
747 simde__m128d
simde_mm_asinh_pd(simde__m128d a)748 simde_mm_asinh_pd (simde__m128d a) {
749   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
750     return _mm_asinh_pd(a);
751   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
752     return Sleef_asinhd2_u10(a);
753   #else
754     simde__m128d_private
755       r_,
756       a_ = simde__m128d_to_private(a);
757 
758     SIMDE_VECTORIZE
759     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
760       r_.f64[i] = simde_math_asinh(a_.f64[i]);
761     }
762 
763     return simde__m128d_from_private(r_);
764   #endif
765 }
766 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
767   #undef _mm_asinh_pd
768   #define _mm_asinh_pd(a) simde_mm_asinh_pd(a)
769 #endif
770 
771 SIMDE_FUNCTION_ATTRIBUTES
772 simde__m256
simde_mm256_asinh_ps(simde__m256 a)773 simde_mm256_asinh_ps (simde__m256 a) {
774   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
775     return _mm256_asinh_ps(a);
776   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
777     return Sleef_asinhf8_u10(a);
778   #else
779     simde__m256_private
780       r_,
781       a_ = simde__m256_to_private(a);
782 
783     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
784       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
785         r_.m128[i] = simde_mm_asinh_ps(a_.m128[i]);
786       }
787     #else
788       SIMDE_VECTORIZE
789       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
790         r_.f32[i] = simde_math_asinhf(a_.f32[i]);
791       }
792     #endif
793 
794     return simde__m256_from_private(r_);
795   #endif
796 }
797 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
798   #undef _mm256_asinh_ps
799   #define _mm256_asinh_ps(a) simde_mm256_asinh_ps(a)
800 #endif
801 
802 
803 SIMDE_FUNCTION_ATTRIBUTES
804 simde__m256d
simde_mm256_asinh_pd(simde__m256d a)805 simde_mm256_asinh_pd (simde__m256d a) {
806   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
807     return _mm256_asinh_pd(a);
808   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
809     return Sleef_asinhd4_u10(a);
810   #else
811     simde__m256d_private
812       r_,
813       a_ = simde__m256d_to_private(a);
814 
815     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
816       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
817         r_.m128d[i] = simde_mm_asinh_pd(a_.m128d[i]);
818       }
819     #else
820       SIMDE_VECTORIZE
821       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
822         r_.f64[i] = simde_math_asinh(a_.f64[i]);
823       }
824     #endif
825 
826     return simde__m256d_from_private(r_);
827   #endif
828 }
829 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
830   #undef _mm256_asinh_pd
831   #define _mm256_asinh_pd(a) simde_mm256_asinh_pd(a)
832 #endif
833 
834 SIMDE_FUNCTION_ATTRIBUTES
835 simde__m512
simde_mm512_asinh_ps(simde__m512 a)836 simde_mm512_asinh_ps (simde__m512 a) {
837   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
838     return _mm512_asinh_ps(a);
839   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
840     return Sleef_asinhf16_u10(a);
841   #else
842     simde__m512_private
843       r_,
844       a_ = simde__m512_to_private(a);
845 
846     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
847       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
848         r_.m256[i] = simde_mm256_asinh_ps(a_.m256[i]);
849       }
850     #else
851       SIMDE_VECTORIZE
852       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
853         r_.f32[i] = simde_math_asinhf(a_.f32[i]);
854       }
855     #endif
856 
857     return simde__m512_from_private(r_);
858   #endif
859 }
860 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
861   #undef _mm512_asinh_ps
862   #define _mm512_asinh_ps(a) simde_mm512_asinh_ps(a)
863 #endif
864 
865 SIMDE_FUNCTION_ATTRIBUTES
866 simde__m512d
simde_mm512_asinh_pd(simde__m512d a)867 simde_mm512_asinh_pd (simde__m512d a) {
868   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
869     return _mm512_asinh_pd(a);
870   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
871     return Sleef_asinhd8_u10(a);
872   #else
873     simde__m512d_private
874       r_,
875       a_ = simde__m512d_to_private(a);
876 
877     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
878       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
879         r_.m256d[i] = simde_mm256_asinh_pd(a_.m256d[i]);
880       }
881     #else
882       SIMDE_VECTORIZE
883       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
884         r_.f64[i] = simde_math_asinh(a_.f64[i]);
885       }
886     #endif
887 
888     return simde__m512d_from_private(r_);
889   #endif
890 }
891 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
892   #undef _mm512_asinh_pd
893   #define _mm512_asinh_pd(a) simde_mm512_asinh_pd(a)
894 #endif
895 
896 SIMDE_FUNCTION_ATTRIBUTES
897 simde__m512
simde_mm512_mask_asinh_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)898 simde_mm512_mask_asinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
899   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
900     return _mm512_mask_asinh_ps(src, k, a);
901   #else
902     return simde_mm512_mask_mov_ps(src, k, simde_mm512_asinh_ps(a));
903   #endif
904 }
905 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
906   #undef _mm512_mask_asinh_ps
907   #define _mm512_mask_asinh_ps(src, k, a) simde_mm512_mask_asinh_ps(src, k, a)
908 #endif
909 
910 SIMDE_FUNCTION_ATTRIBUTES
911 simde__m512d
simde_mm512_mask_asinh_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)912 simde_mm512_mask_asinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
913   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
914     return _mm512_mask_asinh_pd(src, k, a);
915   #else
916     return simde_mm512_mask_mov_pd(src, k, simde_mm512_asinh_pd(a));
917   #endif
918 }
919 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
920   #undef _mm512_mask_asinh_pd
921   #define _mm512_mask_asinh_pd(src, k, a) simde_mm512_mask_asinh_pd(src, k, a)
922 #endif
923 
924 SIMDE_FUNCTION_ATTRIBUTES
925 simde__m128
simde_mm_atan_ps(simde__m128 a)926 simde_mm_atan_ps (simde__m128 a) {
927   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
928     return _mm_atan_ps(a);
929   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
930     #if SIMDE_ACCURACY_PREFERENCE > 1
931       return Sleef_atanf4_u10(a);
932     #else
933       return Sleef_atanf4_u35(a);
934     #endif
935   #else
936     simde__m128_private
937       r_,
938       a_ = simde__m128_to_private(a);
939 
940     SIMDE_VECTORIZE
941     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
942       r_.f32[i] = simde_math_atanf(a_.f32[i]);
943     }
944 
945     return simde__m128_from_private(r_);
946   #endif
947 }
948 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
949   #undef _mm_atan_ps
950   #define _mm_atan_ps(a) simde_mm_atan_ps(a)
951 #endif
952 
953 SIMDE_FUNCTION_ATTRIBUTES
954 simde__m128d
simde_mm_atan_pd(simde__m128d a)955 simde_mm_atan_pd (simde__m128d a) {
956   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
957     return _mm_atan_pd(a);
958   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
959     #if SIMDE_ACCURACY_PREFERENCE > 1
960       return Sleef_atand2_u10(a);
961     #else
962       return Sleef_atand2_u35(a);
963     #endif
964   #else
965     simde__m128d_private
966       r_,
967       a_ = simde__m128d_to_private(a);
968 
969     SIMDE_VECTORIZE
970     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
971       r_.f64[i] = simde_math_atan(a_.f64[i]);
972     }
973 
974     return simde__m128d_from_private(r_);
975   #endif
976 }
977 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
978   #undef _mm_atan_pd
979   #define _mm_atan_pd(a) simde_mm_atan_pd(a)
980 #endif
981 
982 SIMDE_FUNCTION_ATTRIBUTES
983 simde__m256
simde_mm256_atan_ps(simde__m256 a)984 simde_mm256_atan_ps (simde__m256 a) {
985   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
986     return _mm256_atan_ps(a);
987   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
988     #if SIMDE_ACCURACY_PREFERENCE > 1
989       return Sleef_atanf8_u10(a);
990     #else
991       return Sleef_atanf8_u35(a);
992     #endif
993   #else
994     simde__m256_private
995       r_,
996       a_ = simde__m256_to_private(a);
997 
998     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
999       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
1000         r_.m128[i] = simde_mm_atan_ps(a_.m128[i]);
1001       }
1002     #else
1003       SIMDE_VECTORIZE
1004       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1005         r_.f32[i] = simde_math_atanf(a_.f32[i]);
1006       }
1007     #endif
1008 
1009     return simde__m256_from_private(r_);
1010   #endif
1011 }
1012 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1013   #undef _mm256_atan_ps
1014   #define _mm256_atan_ps(a) simde_mm256_atan_ps(a)
1015 #endif
1016 
1017 
1018 SIMDE_FUNCTION_ATTRIBUTES
1019 simde__m256d
simde_mm256_atan_pd(simde__m256d a)1020 simde_mm256_atan_pd (simde__m256d a) {
1021   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1022     return _mm256_atan_pd(a);
1023   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1024     #if SIMDE_ACCURACY_PREFERENCE > 1
1025       return Sleef_atand4_u10(a);
1026     #else
1027       return Sleef_atand4_u35(a);
1028     #endif
1029   #else
1030     simde__m256d_private
1031       r_,
1032       a_ = simde__m256d_to_private(a);
1033 
1034     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1035       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
1036         r_.m128d[i] = simde_mm_atan_pd(a_.m128d[i]);
1037       }
1038     #else
1039       SIMDE_VECTORIZE
1040       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1041         r_.f64[i] = simde_math_atan(a_.f64[i]);
1042       }
1043     #endif
1044 
1045     return simde__m256d_from_private(r_);
1046   #endif
1047 }
1048 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1049   #undef _mm256_atan_pd
1050   #define _mm256_atan_pd(a) simde_mm256_atan_pd(a)
1051 #endif
1052 
1053 SIMDE_FUNCTION_ATTRIBUTES
1054 simde__m512
simde_mm512_atan_ps(simde__m512 a)1055 simde_mm512_atan_ps (simde__m512 a) {
1056   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1057     return _mm512_atan_ps(a);
1058   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1059     #if SIMDE_ACCURACY_PREFERENCE > 1
1060       return Sleef_atanf16_u10(a);
1061     #else
1062       return Sleef_atanf16_u35(a);
1063     #endif
1064   #else
1065     simde__m512_private
1066       r_,
1067       a_ = simde__m512_to_private(a);
1068 
1069     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1070       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1071         r_.m256[i] = simde_mm256_atan_ps(a_.m256[i]);
1072       }
1073     #else
1074       SIMDE_VECTORIZE
1075       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1076         r_.f32[i] = simde_math_atanf(a_.f32[i]);
1077       }
1078     #endif
1079 
1080     return simde__m512_from_private(r_);
1081   #endif
1082 }
1083 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1084   #undef _mm512_atan_ps
1085   #define _mm512_atan_ps(a) simde_mm512_atan_ps(a)
1086 #endif
1087 
1088 SIMDE_FUNCTION_ATTRIBUTES
1089 simde__m512d
simde_mm512_atan_pd(simde__m512d a)1090 simde_mm512_atan_pd (simde__m512d a) {
1091   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1092     return _mm512_atan_pd(a);
1093   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1094     #if SIMDE_ACCURACY_PREFERENCE > 1
1095       return Sleef_atand8_u10(a);
1096     #else
1097       return Sleef_atand8_u35(a);
1098     #endif
1099   #else
1100     simde__m512d_private
1101       r_,
1102       a_ = simde__m512d_to_private(a);
1103 
1104     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1105       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
1106         r_.m256d[i] = simde_mm256_atan_pd(a_.m256d[i]);
1107       }
1108     #else
1109       SIMDE_VECTORIZE
1110       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1111         r_.f64[i] = simde_math_atan(a_.f64[i]);
1112       }
1113     #endif
1114 
1115     return simde__m512d_from_private(r_);
1116   #endif
1117 }
1118 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1119   #undef _mm512_atan_pd
1120   #define _mm512_atan_pd(a) simde_mm512_atan_pd(a)
1121 #endif
1122 
1123 SIMDE_FUNCTION_ATTRIBUTES
1124 simde__m512
simde_mm512_mask_atan_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)1125 simde_mm512_mask_atan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
1126   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1127     return _mm512_mask_atan_ps(src, k, a);
1128   #else
1129     return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan_ps(a));
1130   #endif
1131 }
1132 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1133   #undef _mm512_mask_atan_ps
1134   #define _mm512_mask_atan_ps(src, k, a) simde_mm512_mask_atan_ps(src, k, a)
1135 #endif
1136 
1137 SIMDE_FUNCTION_ATTRIBUTES
1138 simde__m512d
simde_mm512_mask_atan_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)1139 simde_mm512_mask_atan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
1140   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1141     return _mm512_mask_atan_pd(src, k, a);
1142   #else
1143     return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan_pd(a));
1144   #endif
1145 }
1146 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1147   #undef _mm512_mask_atan_pd
1148   #define _mm512_mask_atan_pd(src, k, a) simde_mm512_mask_atan_pd(src, k, a)
1149 #endif
1150 
1151 SIMDE_FUNCTION_ATTRIBUTES
1152 simde__m128
simde_mm_atan2_ps(simde__m128 a,simde__m128 b)1153 simde_mm_atan2_ps (simde__m128 a, simde__m128 b) {
1154   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1155     return _mm_atan2_ps(a, b);
1156   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1157     #if SIMDE_ACCURACY_PREFERENCE > 1
1158       return Sleef_atan2f4_u10(a, b);
1159     #else
1160       return Sleef_atan2f4_u35(a, b);
1161     #endif
1162   #else
1163     simde__m128_private
1164       r_,
1165       a_ = simde__m128_to_private(a),
1166       b_ = simde__m128_to_private(b);
1167 
1168     SIMDE_VECTORIZE
1169     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1170       r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]);
1171     }
1172 
1173     return simde__m128_from_private(r_);
1174   #endif
1175 }
1176 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1177   #undef _mm_atan2_ps
1178   #define _mm_atan2_ps(a, b) simde_mm_atan2_ps(a, b)
1179 #endif
1180 
1181 SIMDE_FUNCTION_ATTRIBUTES
1182 simde__m128d
simde_mm_atan2_pd(simde__m128d a,simde__m128d b)1183 simde_mm_atan2_pd (simde__m128d a, simde__m128d b) {
1184   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1185     return _mm_atan2_pd(a, b);
1186   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1187     #if SIMDE_ACCURACY_PREFERENCE > 1
1188       return Sleef_atan2d2_u10(a, b);
1189     #else
1190       return Sleef_atan2d2_u35(a, b);
1191     #endif
1192   #else
1193     simde__m128d_private
1194       r_,
1195       a_ = simde__m128d_to_private(a),
1196       b_ = simde__m128d_to_private(b);
1197 
1198     SIMDE_VECTORIZE
1199     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1200       r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]);
1201     }
1202 
1203     return simde__m128d_from_private(r_);
1204   #endif
1205 }
1206 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1207   #undef _mm_atan2_pd
1208   #define _mm_atan2_pd(a, b) simde_mm_atan2_pd(a, b)
1209 #endif
1210 
1211 SIMDE_FUNCTION_ATTRIBUTES
1212 simde__m256
simde_mm256_atan2_ps(simde__m256 a,simde__m256 b)1213 simde_mm256_atan2_ps (simde__m256 a, simde__m256 b) {
1214   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1215     return _mm256_atan2_ps(a, b);
1216   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1217     #if SIMDE_ACCURACY_PREFERENCE > 1
1218       return Sleef_atan2f8_u10(a, b);
1219     #else
1220       return Sleef_atan2f8_u35(a, b);
1221     #endif
1222   #else
1223     simde__m256_private
1224       r_,
1225       a_ = simde__m256_to_private(a),
1226       b_ = simde__m256_to_private(b);
1227 
1228     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1229       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
1230         r_.m128[i] = simde_mm_atan2_ps(a_.m128[i], b_.m128[i]);
1231     }
1232     #else
1233       SIMDE_VECTORIZE
1234       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1235         r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]);
1236       }
1237     #endif
1238 
1239     return simde__m256_from_private(r_);
1240   #endif
1241 }
1242 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1243   #undef _mm256_atan2_ps
1244   #define _mm256_atan2_ps(a, b) simde_mm256_atan2_ps(a, b)
1245 #endif
1246 
1247 
1248 SIMDE_FUNCTION_ATTRIBUTES
1249 simde__m256d
simde_mm256_atan2_pd(simde__m256d a,simde__m256d b)1250 simde_mm256_atan2_pd (simde__m256d a, simde__m256d b) {
1251   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1252     return _mm256_atan2_pd(a, b);
1253   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1254     #if SIMDE_ACCURACY_PREFERENCE > 1
1255       return Sleef_atan2d4_u10(a, b);
1256     #else
1257       return Sleef_atan2d4_u35(a, b);
1258     #endif
1259   #else
1260     simde__m256d_private
1261       r_,
1262       a_ = simde__m256d_to_private(a),
1263       b_ = simde__m256d_to_private(b);
1264 
1265     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1266       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
1267         r_.m128d[i] = simde_mm_atan2_pd(a_.m128d[i], b_.m128d[i]);
1268       }
1269     #else
1270       SIMDE_VECTORIZE
1271       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1272         r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]);
1273       }
1274   #endif
1275 
1276     return simde__m256d_from_private(r_);
1277   #endif
1278 }
1279 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1280   #undef _mm256_atan2_pd
1281   #define _mm256_atan2_pd(a, b) simde_mm256_atan2_pd(a, b)
1282 #endif
1283 
1284 SIMDE_FUNCTION_ATTRIBUTES
1285 simde__m512
simde_mm512_atan2_ps(simde__m512 a,simde__m512 b)1286 simde_mm512_atan2_ps (simde__m512 a, simde__m512 b) {
1287   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1288     return _mm512_atan2_ps(a, b);
1289   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1290     #if SIMDE_ACCURACY_PREFERENCE > 1
1291       return Sleef_atan2f16_u10(a, b);
1292     #else
1293       return Sleef_atan2f16_u35(a, b);
1294     #endif
1295   #else
1296     simde__m512_private
1297       r_,
1298       a_ = simde__m512_to_private(a),
1299       b_ = simde__m512_to_private(b);
1300 
1301     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1302       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1303         r_.m256[i] = simde_mm256_atan2_ps(a_.m256[i], b_.m256[i]);
1304       }
1305     #else
1306       SIMDE_VECTORIZE
1307       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1308         r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]);
1309       }
1310     #endif
1311 
1312     return simde__m512_from_private(r_);
1313   #endif
1314 }
1315 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1316   #undef _mm512_atan2_ps
1317   #define _mm512_atan2_ps(a, b) simde_mm512_atan2_ps(a, b)
1318 #endif
1319 
1320 SIMDE_FUNCTION_ATTRIBUTES
1321 simde__m512d
simde_mm512_atan2_pd(simde__m512d a,simde__m512d b)1322 simde_mm512_atan2_pd (simde__m512d a, simde__m512d b) {
1323   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1324     return _mm512_atan2_pd(a, b);
1325   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1326     #if SIMDE_ACCURACY_PREFERENCE > 1
1327       return Sleef_atan2d8_u10(a, b);
1328     #else
1329       return Sleef_atan2d8_u35(a, b);
1330     #endif
1331   #else
1332     simde__m512d_private
1333       r_,
1334       a_ = simde__m512d_to_private(a),
1335       b_ = simde__m512d_to_private(b);
1336 
1337     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1338       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
1339         r_.m256d[i] = simde_mm256_atan2_pd(a_.m256d[i], b_.m256d[i]);
1340       }
1341     #else
1342       SIMDE_VECTORIZE
1343       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1344         r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]);
1345       }
1346     #endif
1347 
1348     return simde__m512d_from_private(r_);
1349   #endif
1350 }
1351 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1352   #undef _mm512_atan2_pd
1353   #define _mm512_atan2_pd(a, b) simde_mm512_atan2_pd(a, b)
1354 #endif
1355 
1356 SIMDE_FUNCTION_ATTRIBUTES
1357 simde__m512
simde_mm512_mask_atan2_ps(simde__m512 src,simde__mmask16 k,simde__m512 a,simde__m512 b)1358 simde_mm512_mask_atan2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
1359   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1360     return _mm512_mask_atan2_ps(src, k, a, b);
1361   #else
1362     return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan2_ps(a, b));
1363   #endif
1364 }
1365 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1366   #undef _mm512_mask_atan2_ps
1367   #define _mm512_mask_atan2_ps(src, k, a, b) simde_mm512_mask_atan2_ps(src, k, a, b)
1368 #endif
1369 
1370 SIMDE_FUNCTION_ATTRIBUTES
1371 simde__m512d
simde_mm512_mask_atan2_pd(simde__m512d src,simde__mmask8 k,simde__m512d a,simde__m512d b)1372 simde_mm512_mask_atan2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
1373   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1374     return _mm512_mask_atan2_pd(src, k, a, b);
1375   #else
1376     return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan2_pd(a, b));
1377   #endif
1378 }
1379 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1380   #undef _mm512_mask_atan2_pd
1381   #define _mm512_mask_atan2_pd(src, k, a, b) simde_mm512_mask_atan2_pd(src, k, a, b)
1382 #endif
1383 
1384 SIMDE_FUNCTION_ATTRIBUTES
1385 simde__m128
simde_mm_atanh_ps(simde__m128 a)1386 simde_mm_atanh_ps (simde__m128 a) {
1387   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1388     return _mm_atanh_ps(a);
1389   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1390     return Sleef_atanhf4_u10(a);
1391   #else
1392     simde__m128_private
1393       r_,
1394       a_ = simde__m128_to_private(a);
1395 
1396     SIMDE_VECTORIZE
1397     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1398       r_.f32[i] = simde_math_atanhf(a_.f32[i]);
1399     }
1400 
1401     return simde__m128_from_private(r_);
1402   #endif
1403 }
1404 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1405   #undef _mm_atanh_ps
1406   #define _mm_atanh_ps(a) simde_mm_atanh_ps(a)
1407 #endif
1408 
1409 SIMDE_FUNCTION_ATTRIBUTES
1410 simde__m128d
simde_mm_atanh_pd(simde__m128d a)1411 simde_mm_atanh_pd (simde__m128d a) {
1412   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1413     return _mm_atanh_pd(a);
1414   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1415     return Sleef_atanhd2_u10(a);
1416   #else
1417     simde__m128d_private
1418       r_,
1419       a_ = simde__m128d_to_private(a);
1420 
1421     SIMDE_VECTORIZE
1422     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1423       r_.f64[i] = simde_math_atanh(a_.f64[i]);
1424     }
1425 
1426     return simde__m128d_from_private(r_);
1427   #endif
1428 }
1429 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1430   #undef _mm_atanh_pd
1431   #define _mm_atanh_pd(a) simde_mm_atanh_pd(a)
1432 #endif
1433 
1434 SIMDE_FUNCTION_ATTRIBUTES
1435 simde__m256
simde_mm256_atanh_ps(simde__m256 a)1436 simde_mm256_atanh_ps (simde__m256 a) {
1437   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1438     return _mm256_atanh_ps(a);
1439   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1440     return Sleef_atanhf8_u10(a);
1441   #else
1442     simde__m256_private
1443       r_,
1444       a_ = simde__m256_to_private(a);
1445 
1446     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1447       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
1448         r_.m128[i] = simde_mm_atanh_ps(a_.m128[i]);
1449       }
1450     #else
1451       SIMDE_VECTORIZE
1452       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1453         r_.f32[i] = simde_math_atanhf(a_.f32[i]);
1454       }
1455     #endif
1456 
1457     return simde__m256_from_private(r_);
1458   #endif
1459 }
1460 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1461   #undef _mm256_atanh_ps
1462   #define _mm256_atanh_ps(a) simde_mm256_atanh_ps(a)
1463 #endif
1464 
1465 
1466 SIMDE_FUNCTION_ATTRIBUTES
1467 simde__m256d
simde_mm256_atanh_pd(simde__m256d a)1468 simde_mm256_atanh_pd (simde__m256d a) {
1469   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1470     return _mm256_atanh_pd(a);
1471   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1472     return Sleef_atanhd4_u10(a);
1473   #else
1474     simde__m256d_private
1475       r_,
1476       a_ = simde__m256d_to_private(a);
1477 
1478     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1479       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
1480         r_.m128d[i] = simde_mm_atanh_pd(a_.m128d[i]);
1481       }
1482     #else
1483       SIMDE_VECTORIZE
1484       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1485         r_.f64[i] = simde_math_atanh(a_.f64[i]);
1486       }
1487     #endif
1488 
1489     return simde__m256d_from_private(r_);
1490   #endif
1491 }
1492 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1493   #undef _mm256_atanh_pd
1494   #define _mm256_atanh_pd(a) simde_mm256_atanh_pd(a)
1495 #endif
1496 
1497 SIMDE_FUNCTION_ATTRIBUTES
1498 simde__m512
simde_mm512_atanh_ps(simde__m512 a)1499 simde_mm512_atanh_ps (simde__m512 a) {
1500   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1501     return _mm512_atanh_ps(a);
1502   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1503     return Sleef_atanhf16_u10(a);
1504   #else
1505     simde__m512_private
1506       r_,
1507       a_ = simde__m512_to_private(a);
1508 
1509     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1510       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1511         r_.m256[i] = simde_mm256_atanh_ps(a_.m256[i]);
1512       }
1513     #else
1514       SIMDE_VECTORIZE
1515       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1516         r_.f32[i] = simde_math_atanhf(a_.f32[i]);
1517       }
1518     #endif
1519 
1520     return simde__m512_from_private(r_);
1521   #endif
1522 }
1523 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1524   #undef _mm512_atanh_ps
1525   #define _mm512_atanh_ps(a) simde_mm512_atanh_ps(a)
1526 #endif
1527 
1528 SIMDE_FUNCTION_ATTRIBUTES
1529 simde__m512d
simde_mm512_atanh_pd(simde__m512d a)1530 simde_mm512_atanh_pd (simde__m512d a) {
1531   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1532     return _mm512_atanh_pd(a);
1533   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1534     return Sleef_atanhd8_u10(a);
1535   #else
1536     simde__m512d_private
1537       r_,
1538       a_ = simde__m512d_to_private(a);
1539 
1540     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1541       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
1542         r_.m256d[i] = simde_mm256_atanh_pd(a_.m256d[i]);
1543       }
1544     #else
1545       SIMDE_VECTORIZE
1546       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1547         r_.f64[i] = simde_math_atanh(a_.f64[i]);
1548       }
1549     #endif
1550 
1551     return simde__m512d_from_private(r_);
1552   #endif
1553 }
1554 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1555   #undef _mm512_atanh_pd
1556   #define _mm512_atanh_pd(a) simde_mm512_atanh_pd(a)
1557 #endif
1558 
1559 SIMDE_FUNCTION_ATTRIBUTES
1560 simde__m512
simde_mm512_mask_atanh_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)1561 simde_mm512_mask_atanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
1562   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1563     return _mm512_mask_atanh_ps(src, k, a);
1564   #else
1565     return simde_mm512_mask_mov_ps(src, k, simde_mm512_atanh_ps(a));
1566   #endif
1567 }
1568 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1569   #undef _mm512_mask_atanh_ps
1570   #define _mm512_mask_atanh_ps(src, k, a) simde_mm512_mask_atanh_ps(src, k, a)
1571 #endif
1572 
1573 SIMDE_FUNCTION_ATTRIBUTES
1574 simde__m512d
simde_mm512_mask_atanh_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)1575 simde_mm512_mask_atanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
1576   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1577     return _mm512_mask_atanh_pd(src, k, a);
1578   #else
1579     return simde_mm512_mask_mov_pd(src, k, simde_mm512_atanh_pd(a));
1580   #endif
1581 }
1582 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1583   #undef _mm512_mask_atanh_pd
1584   #define _mm512_mask_atanh_pd(src, k, a) simde_mm512_mask_atanh_pd(src, k, a)
1585 #endif
1586 
1587 SIMDE_FUNCTION_ATTRIBUTES
1588 simde__m128
simde_mm_cbrt_ps(simde__m128 a)1589 simde_mm_cbrt_ps (simde__m128 a) {
1590   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1591     return _mm_cbrt_ps(a);
1592   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1593     return Sleef_cbrtf4_u10(a);
1594   #else
1595     simde__m128_private
1596       r_,
1597       a_ = simde__m128_to_private(a);
1598 
1599     SIMDE_VECTORIZE
1600     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1601       r_.f32[i] = simde_math_cbrtf(a_.f32[i]);
1602     }
1603 
1604     return simde__m128_from_private(r_);
1605   #endif
1606 }
1607 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1608   #undef _mm_cbrt_ps
1609   #define _mm_cbrt_ps(a) simde_mm_cbrt_ps(a)
1610 #endif
1611 
1612 SIMDE_FUNCTION_ATTRIBUTES
1613 simde__m128d
simde_mm_cbrt_pd(simde__m128d a)1614 simde_mm_cbrt_pd (simde__m128d a) {
1615   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1616     return _mm_cbrt_pd(a);
1617   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1618     return Sleef_cbrtd2_u10(a);
1619   #else
1620     simde__m128d_private
1621       r_,
1622       a_ = simde__m128d_to_private(a);
1623 
1624     SIMDE_VECTORIZE
1625     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1626       r_.f64[i] = simde_math_cbrt(a_.f64[i]);
1627     }
1628 
1629     return simde__m128d_from_private(r_);
1630   #endif
1631 }
1632 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1633   #undef _mm_cbrt_pd
1634   #define _mm_cbrt_pd(a) simde_mm_cbrt_pd(a)
1635 #endif
1636 
1637 SIMDE_FUNCTION_ATTRIBUTES
1638 simde__m256
simde_mm256_cbrt_ps(simde__m256 a)1639 simde_mm256_cbrt_ps (simde__m256 a) {
1640   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1641     return _mm256_cbrt_ps(a);
1642   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1643     return Sleef_cbrtf8_u10(a);
1644   #else
1645     simde__m256_private
1646       r_,
1647       a_ = simde__m256_to_private(a);
1648 
1649     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1650       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
1651         r_.m128[i] = simde_mm_cbrt_ps(a_.m128[i]);
1652       }
1653     #else
1654       SIMDE_VECTORIZE
1655       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1656         r_.f32[i] = simde_math_cbrtf(a_.f32[i]);
1657       }
1658     #endif
1659 
1660     return simde__m256_from_private(r_);
1661   #endif
1662 }
1663 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1664   #undef _mm256_cbrt_ps
1665   #define _mm256_cbrt_ps(a) simde_mm256_cbrt_ps(a)
1666 #endif
1667 
1668 
1669 SIMDE_FUNCTION_ATTRIBUTES
1670 simde__m256d
simde_mm256_cbrt_pd(simde__m256d a)1671 simde_mm256_cbrt_pd (simde__m256d a) {
1672   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1673     return _mm256_cbrt_pd(a);
1674   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1675     return Sleef_cbrtd4_u10(a);
1676   #else
1677     simde__m256d_private
1678       r_,
1679       a_ = simde__m256d_to_private(a);
1680 
1681     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1682       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
1683         r_.m128d[i] = simde_mm_cbrt_pd(a_.m128d[i]);
1684       }
1685     #else
1686       SIMDE_VECTORIZE
1687       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1688         r_.f64[i] = simde_math_cbrt(a_.f64[i]);
1689       }
1690     #endif
1691 
1692     return simde__m256d_from_private(r_);
1693   #endif
1694 }
1695 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1696   #undef _mm256_cbrt_pd
1697   #define _mm256_cbrt_pd(a) simde_mm256_cbrt_pd(a)
1698 #endif
1699 
1700 SIMDE_FUNCTION_ATTRIBUTES
1701 simde__m512
simde_mm512_cbrt_ps(simde__m512 a)1702 simde_mm512_cbrt_ps (simde__m512 a) {
1703   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1704     return _mm512_cbrt_ps(a);
1705   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1706     return Sleef_cbrtf16_u10(a);
1707   #else
1708     simde__m512_private
1709       r_,
1710       a_ = simde__m512_to_private(a);
1711 
1712     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1713       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1714         r_.m256[i] = simde_mm256_cbrt_ps(a_.m256[i]);
1715       }
1716     #else
1717       SIMDE_VECTORIZE
1718       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1719         r_.f32[i] = simde_math_cbrtf(a_.f32[i]);
1720       }
1721     #endif
1722 
1723     return simde__m512_from_private(r_);
1724   #endif
1725 }
1726 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1727   #undef _mm512_cbrt_ps
1728   #define _mm512_cbrt_ps(a) simde_mm512_cbrt_ps(a)
1729 #endif
1730 
1731 SIMDE_FUNCTION_ATTRIBUTES
1732 simde__m512d
simde_mm512_cbrt_pd(simde__m512d a)1733 simde_mm512_cbrt_pd (simde__m512d a) {
1734   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1735     return _mm512_cbrt_pd(a);
1736   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1737     return Sleef_cbrtd8_u10(a);
1738   #else
1739     simde__m512d_private
1740       r_,
1741       a_ = simde__m512d_to_private(a);
1742 
1743     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1744       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
1745         r_.m256d[i] = simde_mm256_cbrt_pd(a_.m256d[i]);
1746       }
1747     #else
1748       SIMDE_VECTORIZE
1749       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1750         r_.f64[i] = simde_math_cbrt(a_.f64[i]);
1751       }
1752     #endif
1753 
1754     return simde__m512d_from_private(r_);
1755   #endif
1756 }
1757 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1758   #undef _mm512_cbrt_pd
1759   #define _mm512_cbrt_pd(a) simde_mm512_cbrt_pd(a)
1760 #endif
1761 
1762 SIMDE_FUNCTION_ATTRIBUTES
1763 simde__m512
simde_mm512_mask_cbrt_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)1764 simde_mm512_mask_cbrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
1765   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1766     return _mm512_mask_cbrt_ps(src, k, a);
1767   #else
1768     return simde_mm512_mask_mov_ps(src, k, simde_mm512_cbrt_ps(a));
1769   #endif
1770 }
1771 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1772   #undef _mm512_mask_cbrt_ps
1773   #define _mm512_mask_cbrt_ps(src, k, a) simde_mm512_mask_cbrt_ps(src, k, a)
1774 #endif
1775 
1776 SIMDE_FUNCTION_ATTRIBUTES
1777 simde__m512d
simde_mm512_mask_cbrt_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)1778 simde_mm512_mask_cbrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
1779   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1780     return _mm512_mask_cbrt_pd(src, k, a);
1781   #else
1782     return simde_mm512_mask_mov_pd(src, k, simde_mm512_cbrt_pd(a));
1783   #endif
1784 }
1785 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1786   #undef _mm512_mask_cbrt_pd
1787   #define _mm512_mask_cbrt_pd(src, k, a) simde_mm512_mask_cbrt_pd(src, k, a)
1788 #endif
1789 
1790 SIMDE_FUNCTION_ATTRIBUTES
1791 simde__m128
simde_mm_cexp_ps(simde__m128 a)1792 simde_mm_cexp_ps (simde__m128 a) {
1793   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1794     return _mm_cexp_ps(a);
1795   #else
1796     simde__m128_private
1797       r_,
1798       a_ = simde__m128_to_private(a);
1799 
1800     SIMDE_VECTORIZE
1801     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) {
1802       simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1]));
1803       r_.f32[  i  ] = simde_math_crealf(val);
1804       r_.f32[i + 1] = simde_math_cimagf(val);
1805     }
1806 
1807     return simde__m128_from_private(r_);
1808   #endif
1809 }
1810 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1811   #undef _mm_cexp_ps
1812   #define _mm_cexp_ps(a) simde_mm_cexp_ps(a)
1813 #endif
1814 
1815 SIMDE_FUNCTION_ATTRIBUTES
1816 simde__m256
simde_mm256_cexp_ps(simde__m256 a)1817 simde_mm256_cexp_ps (simde__m256 a) {
1818   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1819     return _mm256_cexp_ps(a);
1820   #else
1821     simde__m256_private
1822       r_,
1823       a_ = simde__m256_to_private(a);
1824 
1825     SIMDE_VECTORIZE
1826     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) {
1827       simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1]));
1828       r_.f32[  i  ] = simde_math_crealf(val);
1829       r_.f32[i + 1] = simde_math_cimagf(val);
1830     }
1831 
1832     return simde__m256_from_private(r_);
1833   #endif
1834 }
1835 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1836   #undef _mm256_cexp_ps
1837   #define _mm256_cexp_ps(a) simde_mm256_cexp_ps(a)
1838 #endif
1839 
1840 SIMDE_FUNCTION_ATTRIBUTES
1841 simde__m128
simde_mm_cos_ps(simde__m128 a)1842 simde_mm_cos_ps (simde__m128 a) {
1843   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1844     return _mm_cos_ps(a);
1845   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1846     #if SIMDE_ACCURACY_PREFERENCE > 1
1847       return Sleef_cosf4_u10(a);
1848     #else
1849       return Sleef_cosf4_u35(a);
1850     #endif
1851   #else
1852     simde__m128_private
1853       r_,
1854       a_ = simde__m128_to_private(a);
1855 
1856     SIMDE_VECTORIZE
1857     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1858       r_.f32[i] = simde_math_cosf(a_.f32[i]);
1859     }
1860 
1861     return simde__m128_from_private(r_);
1862   #endif
1863 }
1864 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1865   #undef _mm_cos_ps
1866   #define _mm_cos_ps(a) simde_mm_cos_ps(a)
1867 #endif
1868 
1869 SIMDE_FUNCTION_ATTRIBUTES
1870 simde__m128d
simde_mm_cos_pd(simde__m128d a)1871 simde_mm_cos_pd (simde__m128d a) {
1872   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
1873     return _mm_cos_pd(a);
1874   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
1875     #if SIMDE_ACCURACY_PREFERENCE > 1
1876       return Sleef_cosd2_u10(a);
1877     #else
1878       return Sleef_cosd2_u35(a);
1879     #endif
1880   #else
1881     simde__m128d_private
1882       r_,
1883       a_ = simde__m128d_to_private(a);
1884 
1885     SIMDE_VECTORIZE
1886     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1887       r_.f64[i] = simde_math_cos(a_.f64[i]);
1888     }
1889 
1890     return simde__m128d_from_private(r_);
1891   #endif
1892 }
1893 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1894   #undef _mm_cos_pd
1895   #define _mm_cos_pd(a) simde_mm_cos_pd(a)
1896 #endif
1897 
1898 SIMDE_FUNCTION_ATTRIBUTES
1899 simde__m256
simde_mm256_cos_ps(simde__m256 a)1900 simde_mm256_cos_ps (simde__m256 a) {
1901   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1902     return _mm256_cos_ps(a);
1903   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1904     #if SIMDE_ACCURACY_PREFERENCE > 1
1905       return Sleef_cosf8_u10(a);
1906     #else
1907       return Sleef_cosf8_u35(a);
1908     #endif
1909   #else
1910     simde__m256_private
1911       r_,
1912       a_ = simde__m256_to_private(a);
1913 
1914     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1915       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
1916         r_.m128[i] = simde_mm_cos_ps(a_.m128[i]);
1917       }
1918     #else
1919       SIMDE_VECTORIZE
1920       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1921         r_.f32[i] = simde_math_cosf(a_.f32[i]);
1922       }
1923     #endif
1924 
1925     return simde__m256_from_private(r_);
1926   #endif
1927 }
1928 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1929   #undef _mm256_cos_ps
1930   #define _mm256_cos_ps(a) simde_mm256_cos_ps(a)
1931 #endif
1932 
1933 
1934 SIMDE_FUNCTION_ATTRIBUTES
1935 simde__m256d
simde_mm256_cos_pd(simde__m256d a)1936 simde_mm256_cos_pd (simde__m256d a) {
1937   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
1938     return _mm256_cos_pd(a);
1939   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
1940     #if SIMDE_ACCURACY_PREFERENCE > 1
1941       return Sleef_cosd4_u10(a);
1942     #else
1943       return Sleef_cosd4_u35(a);
1944     #endif
1945   #else
1946     simde__m256d_private
1947       r_,
1948       a_ = simde__m256d_to_private(a);
1949 
1950     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
1951       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
1952         r_.m128d[i] = simde_mm_cos_pd(a_.m128d[i]);
1953       }
1954     #else
1955       SIMDE_VECTORIZE
1956       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
1957         r_.f64[i] = simde_math_cos(a_.f64[i]);
1958       }
1959     #endif
1960 
1961     return simde__m256d_from_private(r_);
1962   #endif
1963 }
1964 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
1965   #undef _mm256_cos_pd
1966   #define _mm256_cos_pd(a) simde_mm256_cos_pd(a)
1967 #endif
1968 
1969 SIMDE_FUNCTION_ATTRIBUTES
1970 simde__m512
simde_mm512_cos_ps(simde__m512 a)1971 simde_mm512_cos_ps (simde__m512 a) {
1972   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
1973     return _mm512_cos_ps(a);
1974   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
1975     #if SIMDE_ACCURACY_PREFERENCE > 1
1976       return Sleef_cosf16_u10(a);
1977     #else
1978       return Sleef_cosf16_u35(a);
1979     #endif
1980   #else
1981     simde__m512_private
1982       r_,
1983       a_ = simde__m512_to_private(a);
1984 
1985     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
1986       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
1987         r_.m256[i] = simde_mm256_cos_ps(a_.m256[i]);
1988       }
1989     #else
1990       SIMDE_VECTORIZE
1991       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
1992         r_.f32[i] = simde_math_cosf(a_.f32[i]);
1993       }
1994     #endif
1995 
1996     return simde__m512_from_private(r_);
1997   #endif
1998 }
1999 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2000   #undef _mm512_cos_ps
2001   #define _mm512_cos_ps(a) simde_mm512_cos_ps(a)
2002 #endif
2003 
2004 SIMDE_FUNCTION_ATTRIBUTES
2005 simde__m512d
simde_mm512_cos_pd(simde__m512d a)2006 simde_mm512_cos_pd (simde__m512d a) {
2007   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2008     return _mm512_cos_pd(a);
2009   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
2010     #if SIMDE_ACCURACY_PREFERENCE > 1
2011       return Sleef_cosd8_u10(a);
2012     #else
2013       return Sleef_cosd8_u35(a);
2014     #endif
2015   #else
2016     simde__m512d_private
2017       r_,
2018       a_ = simde__m512d_to_private(a);
2019 
2020     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2021       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2022         r_.m256d[i] = simde_mm256_cos_pd(a_.m256d[i]);
2023       }
2024     #else
2025       SIMDE_VECTORIZE
2026       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2027         r_.f64[i] = simde_math_cos(a_.f64[i]);
2028       }
2029     #endif
2030 
2031     return simde__m512d_from_private(r_);
2032   #endif
2033 }
2034 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2035   #undef _mm512_cos_pd
2036   #define _mm512_cos_pd(a) simde_mm512_cos_pd(a)
2037 #endif
2038 
2039 SIMDE_FUNCTION_ATTRIBUTES
2040 simde__m512
simde_mm512_mask_cos_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)2041 simde_mm512_mask_cos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
2042   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2043     return _mm512_mask_cos_ps(src, k, a);
2044   #else
2045     return simde_mm512_mask_mov_ps(src, k, simde_mm512_cos_ps(a));
2046   #endif
2047 }
2048 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2049   #undef _mm512_mask_cos_ps
2050   #define _mm512_mask_cos_ps(src, k, a) simde_mm512_mask_cos_ps(src, k, a)
2051 #endif
2052 
2053 SIMDE_FUNCTION_ATTRIBUTES
2054 simde__m512d
simde_mm512_mask_cos_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)2055 simde_mm512_mask_cos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
2056   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2057     return _mm512_mask_cos_pd(src, k, a);
2058   #else
2059     return simde_mm512_mask_mov_pd(src, k, simde_mm512_cos_pd(a));
2060   #endif
2061 }
2062 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2063   #undef _mm512_mask_cos_pd
2064   #define _mm512_mask_cos_pd(src, k, a) simde_mm512_mask_cos_pd(src, k, a)
2065 #endif
2066 
2067 SIMDE_FUNCTION_ATTRIBUTES
2068 simde__m128
simde_x_mm_deg2rad_ps(simde__m128 a)2069 simde_x_mm_deg2rad_ps(simde__m128 a) {
2070   #if SIMDE_NATURAL_VECTOR_SIZE_GE(128)
2071     return simde_mm_mul_ps(a, simde_mm_set1_ps(SIMDE_MATH_PI_OVER_180F));
2072   #else
2073     simde__m128_private
2074       r_,
2075       a_ = simde__m128_to_private(a);
2076 
2077     #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
2078       r_.neon_f32 = vmulq_n_f32(a_.neon_i32, SIMDE_MATH_PI_OVER_180F);
2079     #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
2080       r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F;
2081     #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2082     const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F };
2083       r_.f32 = a_.f32 * tmp;
2084     #else
2085       SIMDE_VECTORIZE
2086       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2087         r_.f32[i] = simde_math_deg2radf(a_.f32[i]);
2088       }
2089 
2090     #endif
2091     return simde__m128_from_private(r_);
2092   #endif
2093 }
2094 
2095 SIMDE_FUNCTION_ATTRIBUTES
2096 simde__m128d
simde_x_mm_deg2rad_pd(simde__m128d a)2097 simde_x_mm_deg2rad_pd(simde__m128d a) {
2098   #if SIMDE_NATURAL_VECTOR_SIZE_GE(128)
2099       return simde_mm_mul_pd(a, simde_mm_set1_pd(SIMDE_MATH_PI_OVER_180));
2100   #else
2101     simde__m128d_private
2102       r_,
2103       a_ = simde__m128d_to_private(a);
2104 
2105     #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
2106       r_.neon_f64 = vmulq_n_f64(a_.neon_i64, SIMDE_MATH_PI_OVER_180);
2107     #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
2108     r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180;
2109     #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2110     const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 };
2111       r_.f64 = a_.f64 * tmp;
2112     #else
2113       SIMDE_VECTORIZE
2114       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2115         r_.f64[i] = simde_math_deg2rad(a_.f64[i]);
2116       }
2117 
2118     #endif
2119     return simde__m128d_from_private(r_);
2120   #endif
2121 }
2122 
2123 SIMDE_FUNCTION_ATTRIBUTES
2124 simde__m256
simde_x_mm256_deg2rad_ps(simde__m256 a)2125 simde_x_mm256_deg2rad_ps(simde__m256 a) {
2126   #if SIMDE_NATURAL_VECTOR_SIZE_GE(256)
2127     return simde_mm256_mul_ps(a, simde_mm256_set1_ps(SIMDE_MATH_PI_OVER_180F));
2128   #else
2129     simde__m256_private
2130       r_,
2131       a_ = simde__m256_to_private(a);
2132 
2133     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2134       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
2135         r_.m128[i] = simde_x_mm_deg2rad_ps(a_.m128[i]);
2136       }
2137     #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
2138     r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F;
2139     #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2140     const __typeof__(r_.f32) tmp = {
2141         SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F,
2142         SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F
2143       };
2144       r_.f32 = a_.f32 * tmp;
2145     #else
2146       SIMDE_VECTORIZE
2147       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2148         r_.f32[i] = simde_math_deg2radf(a_.f32[i]);
2149       }
2150 
2151     #endif
2152     return simde__m256_from_private(r_);
2153   #endif
2154 }
2155 
2156 SIMDE_FUNCTION_ATTRIBUTES
2157 simde__m256d
simde_x_mm256_deg2rad_pd(simde__m256d a)2158 simde_x_mm256_deg2rad_pd(simde__m256d a) {
2159   #if SIMDE_NATURAL_VECTOR_SIZE_GE(256)
2160     return simde_mm256_mul_pd(a, simde_mm256_set1_pd(SIMDE_MATH_PI_OVER_180));
2161   #else
2162     simde__m256d_private
2163       r_,
2164       a_ = simde__m256d_to_private(a);
2165 
2166     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2167       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
2168         r_.m128d[i] = simde_x_mm_deg2rad_pd(a_.m128d[i]);
2169       }
2170     #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
2171     r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180;
2172     #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2173     const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 };
2174       r_.f64 = a_.f64 * tmp;
2175     #else
2176       SIMDE_VECTORIZE
2177       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2178         r_.f64[i] = simde_math_deg2rad(a_.f64[i]);
2179       }
2180 
2181     #endif
2182     return simde__m256d_from_private(r_);
2183   #endif
2184 }
2185 
2186 SIMDE_FUNCTION_ATTRIBUTES
2187 simde__m512
simde_x_mm512_deg2rad_ps(simde__m512 a)2188 simde_x_mm512_deg2rad_ps(simde__m512 a) {
2189   #if SIMDE_NATURAL_VECTOR_SIZE_GE(512)
2190       return simde_mm512_mul_ps(a, simde_mm512_set1_ps(SIMDE_MATH_PI_OVER_180F));
2191   #else
2192     simde__m512_private
2193         r_,
2194         a_ = simde__m512_to_private(a);
2195 
2196     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2197       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
2198         r_.m256[i] = simde_x_mm256_deg2rad_ps(a_.m256[i]);
2199       }
2200     #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
2201     r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F;
2202     #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2203     const __typeof__(r_.f32) tmp = {
2204         SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F,
2205         SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F,
2206         SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F,
2207         SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F
2208       };
2209       r_.f32 = a_.f32 * tmp;
2210     #else
2211       SIMDE_VECTORIZE
2212       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2213         r_.f32[i] = simde_math_deg2radf(a_.f32[i]);
2214       }
2215 
2216     #endif
2217     return simde__m512_from_private(r_);
2218   #endif
2219 }
2220 
2221 SIMDE_FUNCTION_ATTRIBUTES
2222 simde__m512d
simde_x_mm512_deg2rad_pd(simde__m512d a)2223 simde_x_mm512_deg2rad_pd(simde__m512d a) {
2224   #if SIMDE_NATURAL_VECTOR_SIZE_GE(512)
2225       return simde_mm512_mul_pd(a, simde_mm512_set1_pd(SIMDE_MATH_PI_OVER_180));
2226   #else
2227     simde__m512d_private
2228         r_,
2229         a_ = simde__m512d_to_private(a);
2230 
2231     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2232       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2233         r_.m256d[i] = simde_x_mm256_deg2rad_pd(a_.m256d[i]);
2234       }
2235     #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784)
2236     r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180;
2237     #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2238     const __typeof__(r_.f64) tmp = {
2239         SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180,
2240         SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180
2241       };
2242       r_.f64 = a_.f64 * tmp;
2243     #else
2244       SIMDE_VECTORIZE
2245       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2246         r_.f64[i] = simde_math_deg2rad(a_.f64[i]);
2247       }
2248 
2249     #endif
2250     return simde__m512d_from_private(r_);
2251   #endif
2252 }
2253 
2254 SIMDE_FUNCTION_ATTRIBUTES
2255 simde__m128
simde_mm_cosd_ps(simde__m128 a)2256 simde_mm_cosd_ps (simde__m128 a) {
2257   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
2258     return _mm_cosd_ps(a);
2259   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
2260     #if SIMDE_ACCURACY_PREFERENCE > 1
2261       return Sleef_cosf4_u10(simde_x_mm_deg2rad_ps(a));
2262     #else
2263       return Sleef_cosf4_u35(simde_x_mm_deg2rad_ps(a));
2264     #endif
2265   #else
2266     simde__m128_private
2267       r_,
2268       a_ = simde__m128_to_private(a);
2269 
2270     SIMDE_VECTORIZE
2271     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2272       r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i]));
2273     }
2274 
2275     return simde__m128_from_private(r_);
2276   #endif
2277 }
2278 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2279   #undef _mm_cosd_ps
2280   #define _mm_cosd_ps(a) simde_mm_cosd_ps(a)
2281 #endif
2282 
2283 SIMDE_FUNCTION_ATTRIBUTES
2284 simde__m128d
simde_mm_cosd_pd(simde__m128d a)2285 simde_mm_cosd_pd (simde__m128d a) {
2286   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
2287     return _mm_cosd_pd(a);
2288   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
2289     #if SIMDE_ACCURACY_PREFERENCE > 1
2290       return Sleef_cosd2_u10(simde_x_mm_deg2rad_pd(a));
2291     #else
2292       return Sleef_cosd2_u35(simde_x_mm_deg2rad_pd(a));
2293     #endif
2294   #else
2295     simde__m128d_private
2296       r_,
2297       a_ = simde__m128d_to_private(a);
2298 
2299     SIMDE_VECTORIZE
2300     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2301       r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i]));
2302     }
2303 
2304     return simde__m128d_from_private(r_);
2305   #endif
2306 }
2307 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2308   #undef _mm_cosd_pd
2309   #define _mm_cosd_pd(a) simde_mm_cosd_pd(a)
2310 #endif
2311 
2312 SIMDE_FUNCTION_ATTRIBUTES
2313 simde__m256
simde_mm256_cosd_ps(simde__m256 a)2314 simde_mm256_cosd_ps (simde__m256 a) {
2315   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
2316     return _mm256_cosd_ps(a);
2317   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
2318     #if SIMDE_ACCURACY_PREFERENCE > 1
2319       return Sleef_cosf8_u10(simde_x_mm256_deg2rad_ps(a));
2320     #else
2321       return Sleef_cosf8_u35(simde_x_mm256_deg2rad_ps(a));
2322     #endif
2323   #else
2324     simde__m256_private
2325       r_,
2326       a_ = simde__m256_to_private(a);
2327 
2328     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2329       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
2330         r_.m128[i] = simde_mm_cosd_ps(a_.m128[i]);
2331       }
2332     #else
2333       SIMDE_VECTORIZE
2334       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2335         r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i]));
2336       }
2337     #endif
2338 
2339     return simde__m256_from_private(r_);
2340   #endif
2341 }
2342 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2343   #undef _mm256_cosd_ps
2344   #define _mm256_cosd_ps(a) simde_mm256_cosd_ps(a)
2345 #endif
2346 
2347 SIMDE_FUNCTION_ATTRIBUTES
2348 simde__m256d
simde_mm256_cosd_pd(simde__m256d a)2349 simde_mm256_cosd_pd (simde__m256d a) {
2350   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
2351     return _mm256_cosd_pd(a);
2352   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
2353     #if SIMDE_ACCURACY_PREFERENCE > 1
2354       return Sleef_cosd4_u10(simde_x_mm256_deg2rad_pd(a));
2355     #else
2356       return Sleef_cosd4_u35(simde_x_mm256_deg2rad_pd(a));
2357     #endif
2358   #else
2359     simde__m256d_private
2360       r_,
2361       a_ = simde__m256d_to_private(a);
2362 
2363     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2364       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
2365         r_.m128d[i] = simde_mm_cosd_pd(a_.m128d[i]);
2366       }
2367     #else
2368       SIMDE_VECTORIZE
2369       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2370         r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i]));
2371       }
2372     #endif
2373 
2374     return simde__m256d_from_private(r_);
2375   #endif
2376 }
2377 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2378   #undef _mm256_cosd_pd
2379   #define _mm256_cosd_pd(a) simde_mm256_cosd_pd(a)
2380 #endif
2381 
2382 SIMDE_FUNCTION_ATTRIBUTES
2383 simde__m512
simde_mm512_cosd_ps(simde__m512 a)2384 simde_mm512_cosd_ps (simde__m512 a) {
2385   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2386     return _mm512_cosd_ps(a);
2387   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
2388     #if SIMDE_ACCURACY_PREFERENCE > 1
2389       return Sleef_cosf16_u10(simde_x_mm512_deg2rad_ps(a));
2390     #else
2391       return Sleef_cosf16_u35(simde_x_mm512_deg2rad_ps(a));
2392     #endif
2393   #else
2394     simde__m512_private
2395       r_,
2396       a_ = simde__m512_to_private(a);
2397 
2398     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2399       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
2400         r_.m256[i] = simde_mm256_cosd_ps(a_.m256[i]);
2401       }
2402     #else
2403       SIMDE_VECTORIZE
2404       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2405         r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i]));
2406       }
2407     #endif
2408 
2409     return simde__m512_from_private(r_);
2410   #endif
2411 }
2412 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2413   #undef _mm512_cosd_ps
2414   #define _mm512_cosd_ps(a) simde_mm512_cosd_ps(a)
2415 #endif
2416 
2417 SIMDE_FUNCTION_ATTRIBUTES
2418 simde__m512d
simde_mm512_cosd_pd(simde__m512d a)2419 simde_mm512_cosd_pd (simde__m512d a) {
2420   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2421     return _mm512_cosd_pd(a);
2422   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
2423     #if SIMDE_ACCURACY_PREFERENCE > 1
2424       return Sleef_cosd8_u10(simde_x_mm512_deg2rad_pd(a));
2425     #else
2426       return Sleef_cosd8_u35(simde_x_mm512_deg2rad_pd(a));
2427     #endif
2428   #else
2429     simde__m512d_private
2430       r_,
2431       a_ = simde__m512d_to_private(a);
2432 
2433   #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2434       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2435         r_.m256d[i] = simde_mm256_cosd_pd(a_.m256d[i]);
2436       }
2437     #else
2438       SIMDE_VECTORIZE
2439       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2440         r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i]));
2441       }
2442     #endif
2443 
2444     return simde__m512d_from_private(r_);
2445   #endif
2446 }
2447 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2448   #undef _mm512_cosd_pd
2449   #define _mm512_cosd_pd(a) simde_mm512_cosd_pd(a)
2450 #endif
2451 
2452 SIMDE_FUNCTION_ATTRIBUTES
2453 simde__m512
simde_mm512_mask_cosd_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)2454 simde_mm512_mask_cosd_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
2455   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2456     return _mm512_mask_cosd_ps(src, k, a);
2457   #else
2458     return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosd_ps(a));
2459   #endif
2460 }
2461 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2462   #undef _mm512_mask_cosd_ps
2463   #define _mm512_mask_cosd_ps(src, k, a) simde_mm512_mask_cosd_ps(src, k, a)
2464 #endif
2465 
2466 SIMDE_FUNCTION_ATTRIBUTES
2467 simde__m512d
simde_mm512_mask_cosd_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)2468 simde_mm512_mask_cosd_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
2469   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2470     return _mm512_mask_cosd_pd(src, k, a);
2471   #else
2472     return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosd_pd(a));
2473   #endif
2474 }
2475 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2476   #undef _mm512_mask_cosd_pd
2477   #define _mm512_mask_cosd_pd(src, k, a) simde_mm512_mask_cosd_pd(src, k, a)
2478 #endif
2479 
2480 SIMDE_FUNCTION_ATTRIBUTES
2481 simde__m128
simde_mm_cosh_ps(simde__m128 a)2482 simde_mm_cosh_ps (simde__m128 a) {
2483   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
2484     return _mm_cosh_ps(a);
2485   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
2486     return Sleef_coshf4_u10(a);
2487   #else
2488     simde__m128_private
2489       r_,
2490       a_ = simde__m128_to_private(a);
2491 
2492     SIMDE_VECTORIZE
2493     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2494       r_.f32[i] = simde_math_coshf(a_.f32[i]);
2495     }
2496 
2497     return simde__m128_from_private(r_);
2498   #endif
2499 }
2500 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2501   #undef _mm_cosh_ps
2502   #define _mm_cosh_ps(a) simde_mm_cosh_ps(a)
2503 #endif
2504 
2505 SIMDE_FUNCTION_ATTRIBUTES
2506 simde__m128d
simde_mm_cosh_pd(simde__m128d a)2507 simde_mm_cosh_pd (simde__m128d a) {
2508   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
2509     return _mm_cosh_pd(a);
2510   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
2511     return Sleef_coshd2_u10(a);
2512   #else
2513     simde__m128d_private
2514       r_,
2515       a_ = simde__m128d_to_private(a);
2516 
2517     SIMDE_VECTORIZE
2518     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2519       r_.f64[i] = simde_math_cosh(a_.f64[i]);
2520     }
2521 
2522     return simde__m128d_from_private(r_);
2523   #endif
2524 }
2525 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2526   #undef _mm_cosh_pd
2527   #define _mm_cosh_pd(a) simde_mm_cosh_pd(a)
2528 #endif
2529 
2530 SIMDE_FUNCTION_ATTRIBUTES
2531 simde__m256
simde_mm256_cosh_ps(simde__m256 a)2532 simde_mm256_cosh_ps (simde__m256 a) {
2533   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
2534     return _mm256_cosh_ps(a);
2535   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
2536     return Sleef_coshf8_u10(a);
2537   #else
2538     simde__m256_private
2539       r_,
2540       a_ = simde__m256_to_private(a);
2541 
2542     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2543       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
2544         r_.m128[i] = simde_mm_cosh_ps(a_.m128[i]);
2545       }
2546     #else
2547       SIMDE_VECTORIZE
2548       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2549         r_.f32[i] = simde_math_coshf(a_.f32[i]);
2550       }
2551     #endif
2552 
2553     return simde__m256_from_private(r_);
2554   #endif
2555 }
2556 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2557   #undef _mm256_cosh_ps
2558   #define _mm256_cosh_ps(a) simde_mm256_cosh_ps(a)
2559 #endif
2560 
2561 
2562 SIMDE_FUNCTION_ATTRIBUTES
2563 simde__m256d
simde_mm256_cosh_pd(simde__m256d a)2564 simde_mm256_cosh_pd (simde__m256d a) {
2565   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
2566     return _mm256_cosh_pd(a);
2567   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
2568     return Sleef_coshd4_u10(a);
2569   #else
2570     simde__m256d_private
2571       r_,
2572       a_ = simde__m256d_to_private(a);
2573 
2574     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2575       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
2576         r_.m128d[i] = simde_mm_cosh_pd(a_.m128d[i]);
2577       }
2578     #else
2579       SIMDE_VECTORIZE
2580       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2581         r_.f64[i] = simde_math_cosh(a_.f64[i]);
2582       }
2583     #endif
2584 
2585     return simde__m256d_from_private(r_);
2586   #endif
2587 }
2588 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2589   #undef _mm256_cosh_pd
2590   #define _mm256_cosh_pd(a) simde_mm256_cosh_pd(a)
2591 #endif
2592 
2593 SIMDE_FUNCTION_ATTRIBUTES
2594 simde__m512
simde_mm512_cosh_ps(simde__m512 a)2595 simde_mm512_cosh_ps (simde__m512 a) {
2596   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2597     return _mm512_cosh_ps(a);
2598   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
2599     return Sleef_coshf16_u10(a);
2600   #else
2601     simde__m512_private
2602       r_,
2603       a_ = simde__m512_to_private(a);
2604 
2605     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2606       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
2607         r_.m256[i] = simde_mm256_cosh_ps(a_.m256[i]);
2608       }
2609     #else
2610       SIMDE_VECTORIZE
2611       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
2612         r_.f32[i] = simde_math_coshf(a_.f32[i]);
2613       }
2614     #endif
2615 
2616     return simde__m512_from_private(r_);
2617   #endif
2618 }
2619 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2620   #undef _mm512_cosh_ps
2621   #define _mm512_cosh_ps(a) simde_mm512_cosh_ps(a)
2622 #endif
2623 
2624 SIMDE_FUNCTION_ATTRIBUTES
2625 simde__m512d
simde_mm512_cosh_pd(simde__m512d a)2626 simde_mm512_cosh_pd (simde__m512d a) {
2627   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2628     return _mm512_cosh_pd(a);
2629   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
2630     return Sleef_coshd8_u10(a);
2631   #else
2632     simde__m512d_private
2633       r_,
2634       a_ = simde__m512d_to_private(a);
2635 
2636     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
2637       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
2638         r_.m256d[i] = simde_mm256_cosh_pd(a_.m256d[i]);
2639       }
2640     #else
2641       SIMDE_VECTORIZE
2642       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
2643         r_.f64[i] = simde_math_cosh(a_.f64[i]);
2644       }
2645     #endif
2646 
2647     return simde__m512d_from_private(r_);
2648   #endif
2649 }
2650 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2651   #undef _mm512_cosh_pd
2652   #define _mm512_cosh_pd(a) simde_mm512_cosh_pd(a)
2653 #endif
2654 
2655 SIMDE_FUNCTION_ATTRIBUTES
2656 simde__m512
simde_mm512_mask_cosh_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)2657 simde_mm512_mask_cosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
2658   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2659     return _mm512_mask_cosh_ps(src, k, a);
2660   #else
2661     return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosh_ps(a));
2662   #endif
2663 }
2664 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2665   #undef _mm512_mask_cosh_ps
2666   #define _mm512_mask_cosh_ps(src, k, a) simde_mm512_mask_cosh_ps(src, k, a)
2667 #endif
2668 
2669 SIMDE_FUNCTION_ATTRIBUTES
2670 simde__m512d
simde_mm512_mask_cosh_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)2671 simde_mm512_mask_cosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
2672   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
2673     return _mm512_mask_cosh_pd(src, k, a);
2674   #else
2675     return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosh_pd(a));
2676   #endif
2677 }
2678 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2679   #undef _mm512_mask_cosh_pd
2680   #define _mm512_mask_cosh_pd(src, k, a) simde_mm512_mask_cosh_pd(src, k, a)
2681 #endif
2682 
2683 SIMDE_FUNCTION_ATTRIBUTES
2684 simde__m128i
simde_mm_div_epi8(simde__m128i a,simde__m128i b)2685 simde_mm_div_epi8 (simde__m128i a, simde__m128i b) {
2686   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2687     return _mm_div_epi8(a, b);
2688   #else
2689     simde__m128i_private
2690       r_,
2691       a_ = simde__m128i_to_private(a),
2692       b_ = simde__m128i_to_private(b);
2693 
2694     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2695       r_.i8 = a_.i8 / b_.i8;
2696     #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2697     r_.wasm_v128 = wasm_i8x4_div(a_.wasm_v128, b_.wasm_v128);
2698     #else
2699       SIMDE_VECTORIZE
2700       for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
2701         r_.i8[i] = a_.i8[i] / b_.i8[i];
2702       }
2703     #endif
2704 
2705     return simde__m128i_from_private(r_);
2706   #endif
2707 }
2708 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2709   #undef _mm_div_epi8
2710   #define _mm_div_epi8(a, b) simde_mm_div_epi8((a), (b))
2711 #endif
2712 
2713 SIMDE_FUNCTION_ATTRIBUTES
2714 simde__m128i
simde_mm_div_epi16(simde__m128i a,simde__m128i b)2715 simde_mm_div_epi16 (simde__m128i a, simde__m128i b) {
2716   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2717     return _mm_div_epi16(a, b);
2718   #else
2719     simde__m128i_private
2720       r_,
2721       a_ = simde__m128i_to_private(a),
2722       b_ = simde__m128i_to_private(b);
2723 
2724     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2725       r_.i16 = a_.i16 / b_.i16;
2726     #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2727     r_.wasm_v128 = wasm_i16x4_div(a_.wasm_v128, b_.wasm_v128);
2728     #else
2729       SIMDE_VECTORIZE
2730       for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
2731         r_.i16[i] = a_.i16[i] / b_.i16[i];
2732       }
2733     #endif
2734 
2735     return simde__m128i_from_private(r_);
2736   #endif
2737 }
2738 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2739   #undef _mm_div_epi16
2740   #define _mm_div_epi16(a, b) simde_mm_div_epi16((a), (b))
2741 #endif
2742 
2743 SIMDE_FUNCTION_ATTRIBUTES
2744 simde__m128i
simde_mm_div_epi32(simde__m128i a,simde__m128i b)2745 simde_mm_div_epi32 (simde__m128i a, simde__m128i b) {
2746   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2747     return _mm_div_epi32(a, b);
2748   #else
2749     simde__m128i_private
2750       r_,
2751       a_ = simde__m128i_to_private(a),
2752       b_ = simde__m128i_to_private(b);
2753 
2754     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2755       r_.i32 = a_.i32 / b_.i32;
2756     #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2757     r_.wasm_v128 = wasm_i32x4_div(a_.wasm_v128, b_.wasm_v128);
2758     #else
2759       SIMDE_VECTORIZE
2760       for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
2761         r_.i32[i] = a_.i32[i] / b_.i32[i];
2762       }
2763     #endif
2764 
2765     return simde__m128i_from_private(r_);
2766   #endif
2767 }
2768 #define simde_mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b)
2769 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2770   #undef _mm_div_epi32
2771   #define _mm_div_epi32(a, b) simde_mm_div_epi32(a, b)
2772   #undef _mm_idiv_epi32
2773   #define _mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b)
2774 #endif
2775 
2776 SIMDE_FUNCTION_ATTRIBUTES
2777 simde__m128i
simde_mm_div_epi64(simde__m128i a,simde__m128i b)2778 simde_mm_div_epi64 (simde__m128i a, simde__m128i b) {
2779   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2780     return _mm_div_epi64(a, b);
2781   #else
2782     simde__m128i_private
2783       r_,
2784       a_ = simde__m128i_to_private(a),
2785       b_ = simde__m128i_to_private(b);
2786 
2787     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2788       r_.i64 = a_.i64 / b_.i64;
2789     #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2790     r_.wasm_v128 = wasm_i64x4_div(a_.wasm_v128, b_.wasm_v128);
2791     #else
2792       SIMDE_VECTORIZE
2793       for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
2794         r_.i64[i] = a_.i64[i] / b_.i64[i];
2795       }
2796     #endif
2797 
2798     return simde__m128i_from_private(r_);
2799   #endif
2800 }
2801 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2802   #undef _mm_div_epi64
2803   #define _mm_div_epi64(a, b) simde_mm_div_epi64((a), (b))
2804 #endif
2805 
2806 SIMDE_FUNCTION_ATTRIBUTES
2807 simde__m128i
simde_mm_div_epu8(simde__m128i a,simde__m128i b)2808 simde_mm_div_epu8 (simde__m128i a, simde__m128i b) {
2809   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2810     return _mm_div_epu8(a, b);
2811   #else
2812     simde__m128i_private
2813       r_,
2814       a_ = simde__m128i_to_private(a),
2815       b_ = simde__m128i_to_private(b);
2816 
2817     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2818       r_.u8 = a_.u8 / b_.u8;
2819     #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2820     r_.wasm_v128 = wasm_u8x16_div(a_.wasm_v128, b_.wasm_v128);
2821     #else
2822       SIMDE_VECTORIZE
2823       for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
2824         r_.u8[i] = a_.u8[i] / b_.u8[i];
2825       }
2826     #endif
2827 
2828     return simde__m128i_from_private(r_);
2829   #endif
2830 }
2831 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2832   #undef _mm_div_epu8
2833   #define _mm_div_epu8(a, b) simde_mm_div_epu8((a), (b))
2834 #endif
2835 
2836 SIMDE_FUNCTION_ATTRIBUTES
2837 simde__m128i
simde_mm_div_epu16(simde__m128i a,simde__m128i b)2838 simde_mm_div_epu16 (simde__m128i a, simde__m128i b) {
2839   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2840     return _mm_div_epu16(a, b);
2841   #else
2842     simde__m128i_private
2843       r_,
2844       a_ = simde__m128i_to_private(a),
2845       b_ = simde__m128i_to_private(b);
2846 
2847     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2848       r_.u16 = a_.u16 / b_.u16;
2849     #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2850     r_.wasm_v128 = wasm_u16x16_div(a_.wasm_v128, b_.wasm_v128);
2851     #else
2852       SIMDE_VECTORIZE
2853       for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
2854         r_.u16[i] = a_.u16[i] / b_.u16[i];
2855       }
2856     #endif
2857 
2858     return simde__m128i_from_private(r_);
2859   #endif
2860 }
2861 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2862   #undef _mm_div_epu16
2863   #define _mm_div_epu16(a, b) simde_mm_div_epu16((a), (b))
2864 #endif
2865 
2866 SIMDE_FUNCTION_ATTRIBUTES
2867 simde__m128i
simde_mm_div_epu32(simde__m128i a,simde__m128i b)2868 simde_mm_div_epu32 (simde__m128i a, simde__m128i b) {
2869   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2870     return _mm_div_epu32(a, b);
2871   #else
2872     simde__m128i_private
2873       r_,
2874       a_ = simde__m128i_to_private(a),
2875       b_ = simde__m128i_to_private(b);
2876 
2877     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2878       r_.u32 = a_.u32 / b_.u32;
2879     #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2880     r_.wasm_v128 = wasm_u32x16_div(a_.wasm_v128, b_.wasm_v128);
2881     #else
2882       SIMDE_VECTORIZE
2883       for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
2884         r_.u32[i] = a_.u32[i] / b_.u32[i];
2885       }
2886     #endif
2887 
2888     return simde__m128i_from_private(r_);
2889   #endif
2890 }
2891 #define simde_mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b)
2892 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2893   #undef _mm_div_epu32
2894   #define _mm_div_epu32(a, b) simde_mm_div_epu32(a, b)
2895   #undef _mm_udiv_epi32
2896   #define _mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b)
2897 #endif
2898 
2899 SIMDE_FUNCTION_ATTRIBUTES
2900 simde__m128i
simde_mm_div_epu64(simde__m128i a,simde__m128i b)2901 simde_mm_div_epu64 (simde__m128i a, simde__m128i b) {
2902   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
2903     return _mm_div_epu64(a, b);
2904   #else
2905     simde__m128i_private
2906       r_,
2907       a_ = simde__m128i_to_private(a),
2908       b_ = simde__m128i_to_private(b);
2909 
2910     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2911       r_.u64 = a_.u64 / b_.u64;
2912     #elif defined(SIMDE_WASM_SIMD128_NATIVE)
2913     r_.wasm_v128 = wasm_u64x16_div(a_.wasm_v128, b_.wasm_v128);
2914     #else
2915       SIMDE_VECTORIZE
2916       for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
2917         r_.u64[i] = a_.u64[i] / b_.u64[i];
2918       }
2919     #endif
2920 
2921     return simde__m128i_from_private(r_);
2922   #endif
2923 }
2924 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2925   #undef _mm_div_epu64
2926   #define _mm_div_epu64(a, b) simde_mm_div_epu64((a), (b))
2927 #endif
2928 
2929 SIMDE_FUNCTION_ATTRIBUTES
2930 simde__m256i
simde_mm256_div_epi8(simde__m256i a,simde__m256i b)2931 simde_mm256_div_epi8 (simde__m256i a, simde__m256i b) {
2932   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
2933     return _mm256_div_epi8(a, b);
2934   #else
2935     simde__m256i_private
2936       r_,
2937       a_ = simde__m256i_to_private(a),
2938       b_ = simde__m256i_to_private(b);
2939 
2940     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2941       r_.i8 = a_.i8 / b_.i8;
2942     #else
2943       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2944         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
2945           r_.m128i[i] = simde_mm_div_epi8(a_.m128i[i], b_.m128i[i]);
2946         }
2947       #else
2948         SIMDE_VECTORIZE
2949         for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
2950           r_.i8[i] = a_.i8[i] / b_.i8[i];
2951         }
2952       #endif
2953     #endif
2954 
2955     return simde__m256i_from_private(r_);
2956   #endif
2957 }
2958 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2959   #undef _mm256_div_epi8
2960   #define _mm256_div_epi8(a, b) simde_mm256_div_epi8((a), (b))
2961 #endif
2962 
2963 SIMDE_FUNCTION_ATTRIBUTES
2964 simde__m256i
simde_mm256_div_epi16(simde__m256i a,simde__m256i b)2965 simde_mm256_div_epi16 (simde__m256i a, simde__m256i b) {
2966   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
2967     return _mm256_div_epi16(a, b);
2968   #else
2969     simde__m256i_private
2970       r_,
2971       a_ = simde__m256i_to_private(a),
2972       b_ = simde__m256i_to_private(b);
2973 
2974     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
2975       r_.i16 = a_.i16 / b_.i16;
2976     #else
2977       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
2978         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
2979           r_.m128i[i] = simde_mm_div_epi16(a_.m128i[i], b_.m128i[i]);
2980         }
2981       #else
2982         SIMDE_VECTORIZE
2983         for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
2984           r_.i16[i] = a_.i16[i] / b_.i16[i];
2985         }
2986        #endif
2987     #endif
2988 
2989     return simde__m256i_from_private(r_);
2990   #endif
2991 }
2992 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
2993   #undef _mm256_div_epi16
2994   #define _mm256_div_epi16(a, b) simde_mm256_div_epi16((a), (b))
2995 #endif
2996 
2997 SIMDE_FUNCTION_ATTRIBUTES
2998 simde__m256i
simde_mm256_div_epi32(simde__m256i a,simde__m256i b)2999 simde_mm256_div_epi32 (simde__m256i a, simde__m256i b) {
3000   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3001     return _mm256_div_epi32(a, b);
3002   #else
3003     simde__m256i_private
3004       r_,
3005       a_ = simde__m256i_to_private(a),
3006       b_ = simde__m256i_to_private(b);
3007 
3008     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3009       r_.i32 = a_.i32 / b_.i32;
3010     #else
3011       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3012         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
3013           r_.m128i[i] = simde_mm_div_epi32(a_.m128i[i], b_.m128i[i]);
3014         }
3015       #else
3016         SIMDE_VECTORIZE
3017         for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
3018           r_.i32[i] = a_.i32[i] / b_.i32[i];
3019         }
3020        #endif
3021     #endif
3022 
3023     return simde__m256i_from_private(r_);
3024   #endif
3025 }
3026 #define simde_mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b)
3027 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3028   #undef _mm256_div_epi32
3029   #define _mm256_div_epi32(a, b) simde_mm256_div_epi32(a, b)
3030   #undef _mm256_idiv_epi32
3031   #define _mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b)
3032 #endif
3033 
3034 SIMDE_FUNCTION_ATTRIBUTES
3035 simde__m256i
simde_mm256_div_epi64(simde__m256i a,simde__m256i b)3036 simde_mm256_div_epi64 (simde__m256i a, simde__m256i b) {
3037   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3038     return _mm256_div_epi64(a, b);
3039   #else
3040     simde__m256i_private
3041       r_,
3042       a_ = simde__m256i_to_private(a),
3043       b_ = simde__m256i_to_private(b);
3044 
3045     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3046       r_.i64 = a_.i64 / b_.i64;
3047     #else
3048       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3049         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
3050           r_.m128i[i] = simde_mm_div_epi64(a_.m128i[i], b_.m128i[i]);
3051         }
3052       #else
3053         SIMDE_VECTORIZE
3054         for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
3055           r_.i64[i] = a_.i64[i] / b_.i64[i];
3056         }
3057         #endif
3058     #endif
3059 
3060     return simde__m256i_from_private(r_);
3061   #endif
3062 }
3063 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3064   #undef _mm256_div_epi64
3065   #define _mm256_div_epi64(a, b) simde_mm256_div_epi64((a), (b))
3066 #endif
3067 
3068 SIMDE_FUNCTION_ATTRIBUTES
3069 simde__m256i
simde_mm256_div_epu8(simde__m256i a,simde__m256i b)3070 simde_mm256_div_epu8 (simde__m256i a, simde__m256i b) {
3071   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3072     return _mm256_div_epu8(a, b);
3073   #else
3074     simde__m256i_private
3075       r_,
3076       a_ = simde__m256i_to_private(a),
3077       b_ = simde__m256i_to_private(b);
3078 
3079     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3080       r_.u8 = a_.u8 / b_.u8;
3081     #else
3082       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3083         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
3084           r_.m128i[i] = simde_mm_div_epu8(a_.m128i[i], b_.m128i[i]);
3085         }
3086       #else
3087         SIMDE_VECTORIZE
3088         for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
3089           r_.u8[i] = a_.u8[i] / b_.u8[i];
3090         }
3091       #endif
3092     #endif
3093 
3094     return simde__m256i_from_private(r_);
3095   #endif
3096 }
3097 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3098   #undef _mm256_div_epu8
3099   #define _mm256_div_epu8(a, b) simde_mm256_div_epu8((a), (b))
3100 #endif
3101 
3102 SIMDE_FUNCTION_ATTRIBUTES
3103 simde__m256i
simde_mm256_div_epu16(simde__m256i a,simde__m256i b)3104 simde_mm256_div_epu16 (simde__m256i a, simde__m256i b) {
3105   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3106     return _mm256_div_epu16(a, b);
3107   #else
3108     simde__m256i_private
3109       r_,
3110       a_ = simde__m256i_to_private(a),
3111       b_ = simde__m256i_to_private(b);
3112 
3113     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3114       r_.u16 = a_.u16 / b_.u16;
3115     #else
3116       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3117         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
3118           r_.m128i[i] = simde_mm_div_epu16(a_.m128i[i], b_.m128i[i]);
3119         }
3120       #else
3121         SIMDE_VECTORIZE
3122         for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
3123           r_.u16[i] = a_.u16[i] / b_.u16[i];
3124         }
3125       #endif
3126     #endif
3127 
3128     return simde__m256i_from_private(r_);
3129   #endif
3130 }
3131 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3132   #undef _mm256_div_epu16
3133   #define _mm256_div_epu16(a, b) simde_mm256_div_epu16((a), (b))
3134 #endif
3135 
3136 SIMDE_FUNCTION_ATTRIBUTES
3137 simde__m256i
simde_mm256_div_epu32(simde__m256i a,simde__m256i b)3138 simde_mm256_div_epu32 (simde__m256i a, simde__m256i b) {
3139   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3140     return _mm256_div_epu32(a, b);
3141   #else
3142     simde__m256i_private
3143       r_,
3144       a_ = simde__m256i_to_private(a),
3145       b_ = simde__m256i_to_private(b);
3146 
3147     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3148       r_.u32 = a_.u32 / b_.u32;
3149     #else
3150       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3151         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
3152           r_.m128i[i] = simde_mm_div_epu32(a_.m128i[i], b_.m128i[i]);
3153         }
3154       #else
3155         SIMDE_VECTORIZE
3156         for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
3157           r_.u32[i] = a_.u32[i] / b_.u32[i];
3158         }
3159       #endif
3160     #endif
3161 
3162     return simde__m256i_from_private(r_);
3163   #endif
3164 }
3165 #define simde_mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b)
3166 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3167   #undef _mm256_div_epu32
3168   #define _mm256_div_epu32(a, b) simde_mm256_div_epu32(a, b)
3169   #undef _mm256_udiv_epi32
3170   #define _mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b)
3171 #endif
3172 
3173 SIMDE_FUNCTION_ATTRIBUTES
3174 simde__m256i
simde_mm256_div_epu64(simde__m256i a,simde__m256i b)3175 simde_mm256_div_epu64 (simde__m256i a, simde__m256i b) {
3176   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3177     return _mm256_div_epu64(a, b);
3178   #else
3179     simde__m256i_private
3180       r_,
3181       a_ = simde__m256i_to_private(a),
3182       b_ = simde__m256i_to_private(b);
3183 
3184     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3185       r_.u64 = a_.u64 / b_.u64;
3186     #else
3187       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3188         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
3189           r_.m128i[i] = simde_mm_div_epu64(a_.m128i[i], b_.m128i[i]);
3190         }
3191       #else
3192         SIMDE_VECTORIZE
3193         for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
3194           r_.u64[i] = a_.u64[i] / b_.u64[i];
3195         }
3196       #endif
3197     #endif
3198 
3199     return simde__m256i_from_private(r_);
3200   #endif
3201 }
3202 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3203   #undef _mm256_div_epu64
3204   #define _mm256_div_epu64(a, b) simde_mm256_div_epu64((a), (b))
3205 #endif
3206 
3207 SIMDE_FUNCTION_ATTRIBUTES
3208 simde__m512i
simde_mm512_div_epi8(simde__m512i a,simde__m512i b)3209 simde_mm512_div_epi8 (simde__m512i a, simde__m512i b) {
3210   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3211     return _mm512_div_epi8(a, b);
3212   #else
3213     simde__m512i_private
3214       r_,
3215       a_ = simde__m512i_to_private(a),
3216       b_ = simde__m512i_to_private(b);
3217 
3218     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3219       r_.i8 = a_.i8 / b_.i8;
3220     #else
3221       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3222         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3223           r_.m256i[i] = simde_mm256_div_epi8(a_.m256i[i], b_.m256i[i]);
3224         }
3225       #else
3226         SIMDE_VECTORIZE
3227         for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
3228           r_.i8[i] = a_.i8[i] / b_.i8[i];
3229         }
3230       #endif
3231     #endif
3232 
3233     return simde__m512i_from_private(r_);
3234   #endif
3235 }
3236 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3237   #undef _mm512_div_epi8
3238   #define _mm512_div_epi8(a, b) simde_mm512_div_epi8((a), (b))
3239 #endif
3240 
3241 SIMDE_FUNCTION_ATTRIBUTES
3242 simde__m512i
simde_mm512_div_epi16(simde__m512i a,simde__m512i b)3243 simde_mm512_div_epi16 (simde__m512i a, simde__m512i b) {
3244   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3245     return _mm512_div_epi16(a, b);
3246   #else
3247     simde__m512i_private
3248       r_,
3249       a_ = simde__m512i_to_private(a),
3250       b_ = simde__m512i_to_private(b);
3251 
3252     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3253       r_.i16 = a_.i16 / b_.i16;
3254     #else
3255       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3256         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3257           r_.m256i[i] = simde_mm256_div_epi16(a_.m256i[i], b_.m256i[i]);
3258         }
3259       #else
3260         SIMDE_VECTORIZE
3261         for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
3262           r_.i16[i] = a_.i16[i] / b_.i16[i];
3263         }
3264       #endif
3265     #endif
3266 
3267     return simde__m512i_from_private(r_);
3268   #endif
3269 }
3270 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3271   #undef _mm512_div_epi16
3272   #define _mm512_div_epi16(a, b) simde_mm512_div_epi16((a), (b))
3273 #endif
3274 
3275 SIMDE_FUNCTION_ATTRIBUTES
3276 simde__m512i
simde_mm512_div_epi32(simde__m512i a,simde__m512i b)3277 simde_mm512_div_epi32 (simde__m512i a, simde__m512i b) {
3278   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3279     return _mm512_div_epi32(a, b);
3280   #else
3281     simde__m512i_private
3282       r_,
3283       a_ = simde__m512i_to_private(a),
3284       b_ = simde__m512i_to_private(b);
3285 
3286     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3287       r_.i32 = a_.i32 / b_.i32;
3288     #else
3289       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3290         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3291           r_.m256i[i] = simde_mm256_div_epi32(a_.m256i[i], b_.m256i[i]);
3292         }
3293       #else
3294         SIMDE_VECTORIZE
3295         for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
3296           r_.i32[i] = a_.i32[i] / b_.i32[i];
3297         }
3298         #endif
3299     #endif
3300 
3301     return simde__m512i_from_private(r_);
3302   #endif
3303 }
3304 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3305   #undef _mm512_div_epi32
3306   #define _mm512_div_epi32(a, b) simde_mm512_div_epi32((a), (b))
3307 #endif
3308 
3309 SIMDE_FUNCTION_ATTRIBUTES
3310 simde__m512i
simde_mm512_mask_div_epi32(simde__m512i src,simde__mmask16 k,simde__m512i a,simde__m512i b)3311 simde_mm512_mask_div_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
3312   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3313     return _mm512_mask_div_epi32(src, k, a, b);
3314   #else
3315     return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epi32(a, b));
3316   #endif
3317 }
3318 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3319   #undef _mm512_mask_div_epi32
3320   #define _mm512_mask_div_epi32(src, k, a, b) simde_mm512_mask_div_epi32(src, k, a, b)
3321 #endif
3322 
3323 SIMDE_FUNCTION_ATTRIBUTES
3324 simde__m512i
simde_mm512_div_epi64(simde__m512i a,simde__m512i b)3325 simde_mm512_div_epi64 (simde__m512i a, simde__m512i b) {
3326   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3327     return _mm512_div_epi64(a, b);
3328   #else
3329     simde__m512i_private
3330       r_,
3331       a_ = simde__m512i_to_private(a),
3332       b_ = simde__m512i_to_private(b);
3333 
3334     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3335       r_.i64 = a_.i64 / b_.i64;
3336     #else
3337       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3338         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3339           r_.m256i[i] = simde_mm256_div_epi64(a_.m256i[i], b_.m256i[i]);
3340         }
3341       #else
3342         SIMDE_VECTORIZE
3343         for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
3344           r_.i64[i] = a_.i64[i] / b_.i64[i];
3345         }
3346       #endif
3347     #endif
3348 
3349     return simde__m512i_from_private(r_);
3350   #endif
3351 }
3352 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3353   #undef _mm512_div_epi64
3354   #define _mm512_div_epi64(a, b) simde_mm512_div_epi64((a), (b))
3355 #endif
3356 
3357 SIMDE_FUNCTION_ATTRIBUTES
3358 simde__m512i
simde_mm512_div_epu8(simde__m512i a,simde__m512i b)3359 simde_mm512_div_epu8 (simde__m512i a, simde__m512i b) {
3360   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3361     return _mm512_div_epu8(a, b);
3362   #else
3363     simde__m512i_private
3364       r_,
3365       a_ = simde__m512i_to_private(a),
3366       b_ = simde__m512i_to_private(b);
3367 
3368     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3369       r_.u8 = a_.u8 / b_.u8;
3370     #else
3371       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3372         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3373           r_.m256i[i] = simde_mm256_div_epu8(a_.m256i[i], b_.m256i[i]);
3374         }
3375       #else
3376         SIMDE_VECTORIZE
3377         for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
3378           r_.u8[i] = a_.u8[i] / b_.u8[i];
3379         }
3380       #endif
3381     #endif
3382 
3383     return simde__m512i_from_private(r_);
3384   #endif
3385 }
3386 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3387   #undef _mm512_div_epu8
3388   #define _mm512_div_epu8(a, b) simde_mm512_div_epu8((a), (b))
3389 #endif
3390 
3391 SIMDE_FUNCTION_ATTRIBUTES
3392 simde__m512i
simde_mm512_div_epu16(simde__m512i a,simde__m512i b)3393 simde_mm512_div_epu16 (simde__m512i a, simde__m512i b) {
3394   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3395     return _mm512_div_epu16(a, b);
3396   #else
3397     simde__m512i_private
3398       r_,
3399       a_ = simde__m512i_to_private(a),
3400       b_ = simde__m512i_to_private(b);
3401 
3402     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3403       r_.u16 = a_.u16 / b_.u16;
3404     #else
3405       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3406         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3407           r_.m256i[i] = simde_mm256_div_epu16(a_.m256i[i], b_.m256i[i]);
3408         }
3409       #else
3410         SIMDE_VECTORIZE
3411         for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
3412           r_.u16[i] = a_.u16[i] / b_.u16[i];
3413         }
3414       #endif
3415     #endif
3416 
3417     return simde__m512i_from_private(r_);
3418   #endif
3419 }
3420 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3421   #undef _mm512_div_epu16
3422   #define _mm512_div_epu16(a, b) simde_mm512_div_epu16((a), (b))
3423 #endif
3424 
3425 SIMDE_FUNCTION_ATTRIBUTES
3426 simde__m512i
simde_mm512_div_epu32(simde__m512i a,simde__m512i b)3427 simde_mm512_div_epu32 (simde__m512i a, simde__m512i b) {
3428   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3429     return _mm512_div_epu32(a, b);
3430   #else
3431     simde__m512i_private
3432       r_,
3433       a_ = simde__m512i_to_private(a),
3434       b_ = simde__m512i_to_private(b);
3435 
3436     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3437       r_.u32 = a_.u32 / b_.u32;
3438     #else
3439       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3440         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3441           r_.m256i[i] = simde_mm256_div_epu32(a_.m256i[i], b_.m256i[i]);
3442         }
3443       #else
3444         SIMDE_VECTORIZE
3445         for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
3446           r_.u32[i] = a_.u32[i] / b_.u32[i];
3447         }
3448       #endif
3449     #endif
3450 
3451     return simde__m512i_from_private(r_);
3452   #endif
3453 }
3454 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3455   #undef _mm512_div_epu32
3456   #define _mm512_div_epu32(a, b) simde_mm512_div_epu32((a), (b))
3457 #endif
3458 
3459 SIMDE_FUNCTION_ATTRIBUTES
3460 simde__m512i
simde_mm512_mask_div_epu32(simde__m512i src,simde__mmask16 k,simde__m512i a,simde__m512i b)3461 simde_mm512_mask_div_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
3462   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3463     return _mm512_mask_div_epu32(src, k, a, b);
3464   #else
3465     return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epu32(a, b));
3466   #endif
3467 }
3468 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3469   #undef _mm512_mask_div_epu32
3470   #define _mm512_mask_div_epu32(src, k, a, b) simde_mm512_mask_div_epu32(src, k, a, b)
3471 #endif
3472 
3473 SIMDE_FUNCTION_ATTRIBUTES
3474 simde__m512i
simde_mm512_div_epu64(simde__m512i a,simde__m512i b)3475 simde_mm512_div_epu64 (simde__m512i a, simde__m512i b) {
3476   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3477     return _mm512_div_epu64(a, b);
3478   #else
3479     simde__m512i_private
3480       r_,
3481       a_ = simde__m512i_to_private(a),
3482       b_ = simde__m512i_to_private(b);
3483 
3484     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
3485       r_.u64 = a_.u64 / b_.u64;
3486     #else
3487       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3488         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
3489           r_.m256i[i] = simde_mm256_div_epu64(a_.m256i[i], b_.m256i[i]);
3490         }
3491       #else
3492         SIMDE_VECTORIZE
3493         for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
3494           r_.u64[i] = a_.u64[i] / b_.u64[i];
3495         }
3496       #endif
3497     #endif
3498 
3499     return simde__m512i_from_private(r_);
3500   #endif
3501 }
3502 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3503   #undef _mm512_div_epu64
3504   #define _mm512_div_epu64(a, b) simde_mm512_div_epu64((a), (b))
3505 #endif
3506 
3507 SIMDE_FUNCTION_ATTRIBUTES
3508 simde__m128
simde_mm_erf_ps(simde__m128 a)3509 simde_mm_erf_ps (simde__m128 a) {
3510   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
3511     return _mm_erf_ps(a);
3512   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
3513     return Sleef_erff4_u10(a);
3514   #else
3515     simde__m128_private
3516       r_,
3517       a_ = simde__m128_to_private(a);
3518 
3519     SIMDE_VECTORIZE
3520     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3521       r_.f32[i] = simde_math_erff(a_.f32[i]);
3522     }
3523 
3524     return simde__m128_from_private(r_);
3525   #endif
3526 }
3527 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3528   #undef _mm_erf_ps
3529   #define _mm_erf_ps(a) simde_mm_erf_ps(a)
3530 #endif
3531 
3532 SIMDE_FUNCTION_ATTRIBUTES
3533 simde__m128d
simde_mm_erf_pd(simde__m128d a)3534 simde_mm_erf_pd (simde__m128d a) {
3535   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
3536     return _mm_erf_pd(a);
3537   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
3538     return Sleef_erfd2_u10(a);
3539   #else
3540     simde__m128d_private
3541       r_,
3542       a_ = simde__m128d_to_private(a);
3543 
3544     SIMDE_VECTORIZE
3545     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3546       r_.f64[i] = simde_math_erf(a_.f64[i]);
3547     }
3548 
3549     return simde__m128d_from_private(r_);
3550   #endif
3551 }
3552 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3553   #undef _mm_erf_pd
3554   #define _mm_erf_pd(a) simde_mm_erf_pd(a)
3555 #endif
3556 
3557 SIMDE_FUNCTION_ATTRIBUTES
3558 simde__m256
simde_mm256_erf_ps(simde__m256 a)3559 simde_mm256_erf_ps (simde__m256 a) {
3560   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3561     return _mm256_erf_ps(a);
3562   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
3563     return Sleef_erff8_u10(a);
3564   #else
3565     simde__m256_private
3566       r_,
3567       a_ = simde__m256_to_private(a);
3568 
3569     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3570       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
3571         r_.m128[i] = simde_mm_erf_ps(a_.m128[i]);
3572       }
3573     #else
3574       SIMDE_VECTORIZE
3575       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3576         r_.f32[i] = simde_math_erff(a_.f32[i]);
3577       }
3578     #endif
3579 
3580     return simde__m256_from_private(r_);
3581   #endif
3582 }
3583 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3584   #undef _mm256_erf_ps
3585   #define _mm256_erf_ps(a) simde_mm256_erf_ps(a)
3586 #endif
3587 
3588 
3589 SIMDE_FUNCTION_ATTRIBUTES
3590 simde__m256d
simde_mm256_erf_pd(simde__m256d a)3591 simde_mm256_erf_pd (simde__m256d a) {
3592   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3593     return _mm256_erf_pd(a);
3594   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
3595     return Sleef_erfd4_u10(a);
3596   #else
3597     simde__m256d_private
3598       r_,
3599       a_ = simde__m256d_to_private(a);
3600 
3601     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3602       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
3603         r_.m128d[i] = simde_mm_erf_pd(a_.m128d[i]);
3604       }
3605     #else
3606       SIMDE_VECTORIZE
3607       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3608         r_.f64[i] = simde_math_erf(a_.f64[i]);
3609       }
3610     #endif
3611 
3612     return simde__m256d_from_private(r_);
3613   #endif
3614 }
3615 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3616   #undef _mm256_erf_pd
3617   #define _mm256_erf_pd(a) simde_mm256_erf_pd(a)
3618 #endif
3619 
3620 SIMDE_FUNCTION_ATTRIBUTES
3621 simde__m512
simde_mm512_erf_ps(simde__m512 a)3622 simde_mm512_erf_ps (simde__m512 a) {
3623   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3624     return _mm512_erf_ps(a);
3625   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
3626     return Sleef_erff16_u10(a);
3627   #else
3628     simde__m512_private
3629       r_,
3630       a_ = simde__m512_to_private(a);
3631 
3632     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3633       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
3634         r_.m256[i] = simde_mm256_erf_ps(a_.m256[i]);
3635       }
3636     #else
3637       SIMDE_VECTORIZE
3638       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3639         r_.f32[i] = simde_math_erff(a_.f32[i]);
3640       }
3641     #endif
3642 
3643     return simde__m512_from_private(r_);
3644   #endif
3645 }
3646 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3647   #undef _mm512_erf_ps
3648   #define _mm512_erf_ps(a) simde_mm512_erf_ps(a)
3649 #endif
3650 
3651 SIMDE_FUNCTION_ATTRIBUTES
3652 simde__m512d
simde_mm512_erf_pd(simde__m512d a)3653 simde_mm512_erf_pd (simde__m512d a) {
3654   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3655     return _mm512_erf_pd(a);
3656   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
3657     return Sleef_erfd8_u10(a);
3658   #else
3659     simde__m512d_private
3660       r_,
3661       a_ = simde__m512d_to_private(a);
3662 
3663     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3664       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
3665         r_.m256d[i] = simde_mm256_erf_pd(a_.m256d[i]);
3666       }
3667     #else
3668       SIMDE_VECTORIZE
3669       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3670         r_.f64[i] = simde_math_erf(a_.f64[i]);
3671       }
3672     #endif
3673 
3674     return simde__m512d_from_private(r_);
3675   #endif
3676 }
3677 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3678   #undef _mm512_erf_pd
3679   #define _mm512_erf_pd(a) simde_mm512_erf_pd(a)
3680 #endif
3681 
3682 SIMDE_FUNCTION_ATTRIBUTES
3683 simde__m512
simde_mm512_mask_erf_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)3684 simde_mm512_mask_erf_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
3685   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3686     return _mm512_mask_erf_ps(src, k, a);
3687   #else
3688     return simde_mm512_mask_mov_ps(src, k, simde_mm512_erf_ps(a));
3689   #endif
3690 }
3691 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3692   #undef _mm512_mask_erf_ps
3693   #define _mm512_mask_erf_ps(src, k, a) simde_mm512_mask_erf_ps(src, k, a)
3694 #endif
3695 
3696 SIMDE_FUNCTION_ATTRIBUTES
3697 simde__m512d
simde_mm512_mask_erf_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)3698 simde_mm512_mask_erf_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
3699   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3700     return _mm512_mask_erf_pd(src, k, a);
3701   #else
3702     return simde_mm512_mask_mov_pd(src, k, simde_mm512_erf_pd(a));
3703   #endif
3704 }
3705 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3706   #undef _mm512_mask_erf_pd
3707   #define _mm512_mask_erf_pd(src, k, a) simde_mm512_mask_erf_pd(src, k, a)
3708 #endif
3709 
3710 SIMDE_FUNCTION_ATTRIBUTES
3711 simde__m128
simde_mm_erfc_ps(simde__m128 a)3712 simde_mm_erfc_ps (simde__m128 a) {
3713   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
3714     return _mm_erfc_ps(a);
3715   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
3716     return Sleef_erfcf4_u15(a);
3717   #else
3718     simde__m128_private
3719       r_,
3720       a_ = simde__m128_to_private(a);
3721 
3722     SIMDE_VECTORIZE
3723     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3724       r_.f32[i] = simde_math_erfcf(a_.f32[i]);
3725     }
3726 
3727     return simde__m128_from_private(r_);
3728   #endif
3729 }
3730 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3731   #undef _mm_erfc_ps
3732   #define _mm_erfc_ps(a) simde_mm_erfc_ps(a)
3733 #endif
3734 
3735 SIMDE_FUNCTION_ATTRIBUTES
3736 simde__m128d
simde_mm_erfc_pd(simde__m128d a)3737 simde_mm_erfc_pd (simde__m128d a) {
3738   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
3739     return _mm_erfc_pd(a);
3740   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
3741     return Sleef_erfcd2_u15(a);
3742   #else
3743     simde__m128d_private
3744       r_,
3745       a_ = simde__m128d_to_private(a);
3746 
3747     SIMDE_VECTORIZE
3748     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3749       r_.f64[i] = simde_math_erfc(a_.f64[i]);
3750     }
3751 
3752     return simde__m128d_from_private(r_);
3753   #endif
3754 }
3755 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3756   #undef _mm_erfc_pd
3757   #define _mm_erfc_pd(a) simde_mm_erfc_pd(a)
3758 #endif
3759 
3760 SIMDE_FUNCTION_ATTRIBUTES
3761 simde__m256
simde_mm256_erfc_ps(simde__m256 a)3762 simde_mm256_erfc_ps (simde__m256 a) {
3763   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3764     return _mm256_erfc_ps(a);
3765   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
3766     return Sleef_erfcf8_u15(a);
3767   #else
3768     simde__m256_private
3769       r_,
3770       a_ = simde__m256_to_private(a);
3771 
3772     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3773       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
3774         r_.m128[i] = simde_mm_erfc_ps(a_.m128[i]);
3775       }
3776     #else
3777       SIMDE_VECTORIZE
3778       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3779         r_.f32[i] = simde_math_erfcf(a_.f32[i]);
3780       }
3781     #endif
3782 
3783     return simde__m256_from_private(r_);
3784   #endif
3785 }
3786 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3787   #undef _mm256_erfc_ps
3788   #define _mm256_erfc_ps(a) simde_mm256_erfc_ps(a)
3789 #endif
3790 
3791 
3792 SIMDE_FUNCTION_ATTRIBUTES
3793 simde__m256d
simde_mm256_erfc_pd(simde__m256d a)3794 simde_mm256_erfc_pd (simde__m256d a) {
3795   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3796     return _mm256_erfc_pd(a);
3797   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
3798     return Sleef_erfcd4_u15(a);
3799   #else
3800     simde__m256d_private
3801       r_,
3802       a_ = simde__m256d_to_private(a);
3803 
3804     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3805       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
3806         r_.m128d[i] = simde_mm_erfc_pd(a_.m128d[i]);
3807       }
3808     #else
3809       SIMDE_VECTORIZE
3810       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3811         r_.f64[i] = simde_math_erfc(a_.f64[i]);
3812       }
3813     #endif
3814 
3815     return simde__m256d_from_private(r_);
3816   #endif
3817 }
3818 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3819   #undef _mm256_erfc_pd
3820   #define _mm256_erfc_pd(a) simde_mm256_erfc_pd(a)
3821 #endif
3822 
3823 SIMDE_FUNCTION_ATTRIBUTES
3824 simde__m512
simde_mm512_erfc_ps(simde__m512 a)3825 simde_mm512_erfc_ps (simde__m512 a) {
3826   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3827     return _mm512_erfc_ps(a);
3828   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
3829     return Sleef_erfcf16_u15(a);
3830   #else
3831     simde__m512_private
3832       r_,
3833       a_ = simde__m512_to_private(a);
3834 
3835     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3836       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
3837         r_.m256[i] = simde_mm256_erfc_ps(a_.m256[i]);
3838       }
3839     #else
3840       SIMDE_VECTORIZE
3841       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3842         r_.f32[i] = simde_math_erfcf(a_.f32[i]);
3843       }
3844     #endif
3845 
3846     return simde__m512_from_private(r_);
3847   #endif
3848 }
3849 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3850   #undef _mm512_erfc_ps
3851   #define _mm512_erfc_ps(a) simde_mm512_erfc_ps(a)
3852 #endif
3853 
3854 SIMDE_FUNCTION_ATTRIBUTES
3855 simde__m512d
simde_mm512_erfc_pd(simde__m512d a)3856 simde_mm512_erfc_pd (simde__m512d a) {
3857   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3858     return _mm512_erfc_pd(a);
3859   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
3860     return Sleef_erfcd8_u15(a);
3861   #else
3862     simde__m512d_private
3863       r_,
3864       a_ = simde__m512d_to_private(a);
3865 
3866     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
3867       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
3868         r_.m256d[i] = simde_mm256_erfc_pd(a_.m256d[i]);
3869       }
3870     #else
3871       SIMDE_VECTORIZE
3872       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3873         r_.f64[i] = simde_math_erfc(a_.f64[i]);
3874       }
3875     #endif
3876 
3877     return simde__m512d_from_private(r_);
3878   #endif
3879 }
3880 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3881   #undef _mm512_erfc_pd
3882   #define _mm512_erfc_pd(a) simde_mm512_erfc_pd(a)
3883 #endif
3884 
3885 SIMDE_FUNCTION_ATTRIBUTES
3886 simde__m512
simde_mm512_mask_erfc_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)3887 simde_mm512_mask_erfc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
3888   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3889     return _mm512_mask_erfc_ps(src, k, a);
3890   #else
3891     return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfc_ps(a));
3892   #endif
3893 }
3894 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3895   #undef _mm512_mask_erfc_ps
3896   #define _mm512_mask_erfc_ps(src, k, a) simde_mm512_mask_erfc_ps(src, k, a)
3897 #endif
3898 
3899 SIMDE_FUNCTION_ATTRIBUTES
3900 simde__m512d
simde_mm512_mask_erfc_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)3901 simde_mm512_mask_erfc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
3902   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
3903     return _mm512_mask_erfc_pd(src, k, a);
3904   #else
3905     return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfc_pd(a));
3906   #endif
3907 }
3908 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3909   #undef _mm512_mask_erfc_pd
3910   #define _mm512_mask_erfc_pd(src, k, a) simde_mm512_mask_erfc_pd(src, k, a)
3911 #endif
3912 
3913 SIMDE_FUNCTION_ATTRIBUTES
3914 simde__m128
simde_mm_exp_ps(simde__m128 a)3915 simde_mm_exp_ps (simde__m128 a) {
3916   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
3917     return _mm_exp_ps(a);
3918   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
3919     return Sleef_expf4_u10(a);
3920   #else
3921     simde__m128_private
3922       r_,
3923       a_ = simde__m128_to_private(a);
3924 
3925     SIMDE_VECTORIZE
3926     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3927       r_.f32[i] = simde_math_expf(a_.f32[i]);
3928     }
3929 
3930     return simde__m128_from_private(r_);
3931   #endif
3932 }
3933 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3934   #undef _mm_exp_ps
3935   #define _mm_exp_ps(a) simde_mm_exp_ps(a)
3936 #endif
3937 
3938 SIMDE_FUNCTION_ATTRIBUTES
3939 simde__m128d
simde_mm_exp_pd(simde__m128d a)3940 simde_mm_exp_pd (simde__m128d a) {
3941   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
3942     return _mm_exp_pd(a);
3943   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
3944     return Sleef_expd2_u10(a);
3945   #else
3946     simde__m128d_private
3947       r_,
3948       a_ = simde__m128d_to_private(a);
3949 
3950     SIMDE_VECTORIZE
3951     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
3952       r_.f64[i] = simde_math_exp(a_.f64[i]);
3953     }
3954 
3955     return simde__m128d_from_private(r_);
3956   #endif
3957 }
3958 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3959   #undef _mm_exp_pd
3960   #define _mm_exp_pd(a) simde_mm_exp_pd(a)
3961 #endif
3962 
3963 SIMDE_FUNCTION_ATTRIBUTES
3964 simde__m256
simde_mm256_exp_ps(simde__m256 a)3965 simde_mm256_exp_ps (simde__m256 a) {
3966   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3967     return _mm256_exp_ps(a);
3968   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
3969     return Sleef_expf8_u10(a);
3970   #else
3971     simde__m256_private
3972       r_,
3973       a_ = simde__m256_to_private(a);
3974 
3975     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3976       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
3977         r_.m128[i] = simde_mm_exp_ps(a_.m128[i]);
3978       }
3979     #else
3980       SIMDE_VECTORIZE
3981       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
3982         r_.f32[i] = simde_math_expf(a_.f32[i]);
3983       }
3984     #endif
3985 
3986     return simde__m256_from_private(r_);
3987   #endif
3988 }
3989 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
3990   #undef _mm256_exp_ps
3991   #define _mm256_exp_ps(a) simde_mm256_exp_ps(a)
3992 #endif
3993 
3994 
3995 SIMDE_FUNCTION_ATTRIBUTES
3996 simde__m256d
simde_mm256_exp_pd(simde__m256d a)3997 simde_mm256_exp_pd (simde__m256d a) {
3998   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
3999     return _mm256_exp_pd(a);
4000   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4001     return Sleef_expd4_u10(a);
4002   #else
4003     simde__m256d_private
4004       r_,
4005       a_ = simde__m256d_to_private(a);
4006 
4007     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4008       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
4009         r_.m128d[i] = simde_mm_exp_pd(a_.m128d[i]);
4010       }
4011     #else
4012       SIMDE_VECTORIZE
4013       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4014         r_.f64[i] = simde_math_exp(a_.f64[i]);
4015       }
4016     #endif
4017 
4018     return simde__m256d_from_private(r_);
4019   #endif
4020 }
4021 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4022   #undef _mm256_exp_pd
4023   #define _mm256_exp_pd(a) simde_mm256_exp_pd(a)
4024 #endif
4025 
4026 SIMDE_FUNCTION_ATTRIBUTES
4027 simde__m512
simde_mm512_exp_ps(simde__m512 a)4028 simde_mm512_exp_ps (simde__m512 a) {
4029   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4030     return _mm512_exp_ps(a);
4031   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4032     return Sleef_expf16_u10(a);
4033   #else
4034     simde__m512_private
4035       r_,
4036       a_ = simde__m512_to_private(a);
4037 
4038     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4039       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
4040         r_.m256[i] = simde_mm256_exp_ps(a_.m256[i]);
4041       }
4042     #else
4043       SIMDE_VECTORIZE
4044       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4045         r_.f32[i] = simde_math_expf(a_.f32[i]);
4046       }
4047     #endif
4048 
4049     return simde__m512_from_private(r_);
4050   #endif
4051 }
4052 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4053   #undef _mm512_exp_ps
4054   #define _mm512_exp_ps(a) simde_mm512_exp_ps(a)
4055 #endif
4056 
4057 SIMDE_FUNCTION_ATTRIBUTES
4058 simde__m512d
simde_mm512_exp_pd(simde__m512d a)4059 simde_mm512_exp_pd (simde__m512d a) {
4060   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4061     return _mm512_exp_pd(a);
4062   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4063     return Sleef_expd8_u10(a);
4064   #else
4065     simde__m512d_private
4066       r_,
4067       a_ = simde__m512d_to_private(a);
4068 
4069     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4070       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
4071         r_.m256d[i] = simde_mm256_exp_pd(a_.m256d[i]);
4072       }
4073     #else
4074       SIMDE_VECTORIZE
4075       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4076         r_.f64[i] = simde_math_exp(a_.f64[i]);
4077       }
4078     #endif
4079 
4080     return simde__m512d_from_private(r_);
4081   #endif
4082 }
4083 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4084   #undef _mm512_exp_pd
4085   #define _mm512_exp_pd(a) simde_mm512_exp_pd(a)
4086 #endif
4087 
4088 SIMDE_FUNCTION_ATTRIBUTES
4089 simde__m512
simde_mm512_mask_exp_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)4090 simde_mm512_mask_exp_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
4091   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4092     return _mm512_mask_exp_ps(src, k, a);
4093   #else
4094     return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp_ps(a));
4095   #endif
4096 }
4097 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4098   #undef _mm512_mask_exp_ps
4099   #define _mm512_mask_exp_ps(src, k, a) simde_mm512_mask_exp_ps(src, k, a)
4100 #endif
4101 
4102 SIMDE_FUNCTION_ATTRIBUTES
4103 simde__m512d
simde_mm512_mask_exp_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)4104 simde_mm512_mask_exp_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
4105   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4106     return _mm512_mask_exp_pd(src, k, a);
4107   #else
4108     return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp_pd(a));
4109   #endif
4110 }
4111 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4112   #undef _mm512_mask_exp_pd
4113   #define _mm512_mask_exp_pd(src, k, a) simde_mm512_mask_exp_pd(src, k, a)
4114 #endif
4115 
4116 SIMDE_FUNCTION_ATTRIBUTES
4117 simde__m128
simde_mm_expm1_ps(simde__m128 a)4118 simde_mm_expm1_ps (simde__m128 a) {
4119   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4120     return _mm_expm1_ps(a);
4121   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
4122     return Sleef_expm1f4_u10(a);
4123   #else
4124     simde__m128_private
4125       r_,
4126       a_ = simde__m128_to_private(a);
4127 
4128     SIMDE_VECTORIZE
4129     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4130       r_.f32[i] = simde_math_expm1f(a_.f32[i]);
4131     }
4132 
4133     return simde__m128_from_private(r_);
4134   #endif
4135 }
4136 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4137   #undef _mm_expm1_ps
4138   #define _mm_expm1_ps(a) simde_mm_expm1_ps(a)
4139 #endif
4140 
4141 SIMDE_FUNCTION_ATTRIBUTES
4142 simde__m128d
simde_mm_expm1_pd(simde__m128d a)4143 simde_mm_expm1_pd (simde__m128d a) {
4144   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4145     return _mm_expm1_pd(a);
4146   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
4147     return Sleef_expm1d2_u10(a);
4148   #else
4149     simde__m128d_private
4150       r_,
4151       a_ = simde__m128d_to_private(a);
4152 
4153     SIMDE_VECTORIZE
4154     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4155       r_.f64[i] = simde_math_expm1(a_.f64[i]);
4156     }
4157 
4158     return simde__m128d_from_private(r_);
4159   #endif
4160 }
4161 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4162   #undef _mm_expm1_pd
4163   #define _mm_expm1_pd(a) simde_mm_expm1_pd(a)
4164 #endif
4165 
4166 SIMDE_FUNCTION_ATTRIBUTES
4167 simde__m256
simde_mm256_expm1_ps(simde__m256 a)4168 simde_mm256_expm1_ps (simde__m256 a) {
4169   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4170     return _mm256_expm1_ps(a);
4171   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4172     return Sleef_expm1f8_u10(a);
4173   #else
4174     simde__m256_private
4175       r_,
4176       a_ = simde__m256_to_private(a);
4177 
4178     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4179       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
4180         r_.m128[i] = simde_mm_expm1_ps(a_.m128[i]);
4181       }
4182     #else
4183       SIMDE_VECTORIZE
4184       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4185         r_.f32[i] = simde_math_expm1f(a_.f32[i]);
4186       }
4187     #endif
4188 
4189     return simde__m256_from_private(r_);
4190   #endif
4191 }
4192 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4193   #undef _mm256_expm1_ps
4194   #define _mm256_expm1_ps(a) simde_mm256_expm1_ps(a)
4195 #endif
4196 
4197 
4198 SIMDE_FUNCTION_ATTRIBUTES
4199 simde__m256d
simde_mm256_expm1_pd(simde__m256d a)4200 simde_mm256_expm1_pd (simde__m256d a) {
4201   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4202     return _mm256_expm1_pd(a);
4203   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4204     return Sleef_expm1d4_u10(a);
4205   #else
4206     simde__m256d_private
4207       r_,
4208       a_ = simde__m256d_to_private(a);
4209 
4210     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4211       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
4212         r_.m128d[i] = simde_mm_expm1_pd(a_.m128d[i]);
4213       }
4214     #else
4215       SIMDE_VECTORIZE
4216       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4217         r_.f64[i] = simde_math_expm1(a_.f64[i]);
4218       }
4219     #endif
4220 
4221     return simde__m256d_from_private(r_);
4222   #endif
4223 }
4224 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4225   #undef _mm256_expm1_pd
4226   #define _mm256_expm1_pd(a) simde_mm256_expm1_pd(a)
4227 #endif
4228 
4229 SIMDE_FUNCTION_ATTRIBUTES
4230 simde__m512
simde_mm512_expm1_ps(simde__m512 a)4231 simde_mm512_expm1_ps (simde__m512 a) {
4232   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4233     return _mm512_expm1_ps(a);
4234   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4235     return Sleef_expm1f16_u10(a);
4236   #else
4237     simde__m512_private
4238       r_,
4239       a_ = simde__m512_to_private(a);
4240 
4241     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4242       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
4243         r_.m256[i] = simde_mm256_expm1_ps(a_.m256[i]);
4244       }
4245     #else
4246       SIMDE_VECTORIZE
4247       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4248         r_.f32[i] = simde_math_expm1f(a_.f32[i]);
4249       }
4250     #endif
4251 
4252     return simde__m512_from_private(r_);
4253   #endif
4254 }
4255 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4256   #undef _mm512_expm1_ps
4257   #define _mm512_expm1_ps(a) simde_mm512_expm1_ps(a)
4258 #endif
4259 
4260 SIMDE_FUNCTION_ATTRIBUTES
4261 simde__m512d
simde_mm512_expm1_pd(simde__m512d a)4262 simde_mm512_expm1_pd (simde__m512d a) {
4263   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4264     return _mm512_expm1_pd(a);
4265   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4266     return Sleef_expm1d8_u10(a);
4267   #else
4268     simde__m512d_private
4269       r_,
4270       a_ = simde__m512d_to_private(a);
4271 
4272     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4273       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
4274         r_.m256d[i] = simde_mm256_expm1_pd(a_.m256d[i]);
4275       }
4276     #else
4277       SIMDE_VECTORIZE
4278       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4279         r_.f64[i] = simde_math_expm1(a_.f64[i]);
4280       }
4281     #endif
4282 
4283     return simde__m512d_from_private(r_);
4284   #endif
4285 }
4286 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4287   #undef _mm512_expm1_pd
4288   #define _mm512_expm1_pd(a) simde_mm512_expm1_pd(a)
4289 #endif
4290 
4291 SIMDE_FUNCTION_ATTRIBUTES
4292 simde__m512
simde_mm512_mask_expm1_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)4293 simde_mm512_mask_expm1_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
4294   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4295     return _mm512_mask_expm1_ps(src, k, a);
4296   #else
4297     return simde_mm512_mask_mov_ps(src, k, simde_mm512_expm1_ps(a));
4298   #endif
4299 }
4300 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4301   #undef _mm512_mask_expm1_ps
4302   #define _mm512_mask_expm1_ps(src, k, a) simde_mm512_mask_expm1_ps(src, k, a)
4303 #endif
4304 
4305 SIMDE_FUNCTION_ATTRIBUTES
4306 simde__m512d
simde_mm512_mask_expm1_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)4307 simde_mm512_mask_expm1_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
4308   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4309     return _mm512_mask_expm1_pd(src, k, a);
4310   #else
4311     return simde_mm512_mask_mov_pd(src, k, simde_mm512_expm1_pd(a));
4312   #endif
4313 }
4314 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4315   #undef _mm512_mask_expm1_pd
4316   #define _mm512_mask_expm1_pd(src, k, a) simde_mm512_mask_expm1_pd(src, k, a)
4317 #endif
4318 
4319 SIMDE_FUNCTION_ATTRIBUTES
4320 simde__m128
simde_mm_exp2_ps(simde__m128 a)4321 simde_mm_exp2_ps (simde__m128 a) {
4322   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4323     return _mm_exp2_ps(a);
4324   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
4325     return Sleef_exp2f4_u10(a);
4326   #else
4327     simde__m128_private
4328       r_,
4329       a_ = simde__m128_to_private(a);
4330 
4331     SIMDE_VECTORIZE
4332     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4333       r_.f32[i] = simde_math_exp2f(a_.f32[i]);
4334     }
4335 
4336     return simde__m128_from_private(r_);
4337   #endif
4338 }
4339 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4340   #undef _mm_exp2_ps
4341   #define _mm_exp2_ps(a) simde_mm_exp2_ps(a)
4342 #endif
4343 
4344 SIMDE_FUNCTION_ATTRIBUTES
4345 simde__m128d
simde_mm_exp2_pd(simde__m128d a)4346 simde_mm_exp2_pd (simde__m128d a) {
4347   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4348     return _mm_exp2_pd(a);
4349   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
4350     return Sleef_exp2d2_u10(a);
4351   #else
4352     simde__m128d_private
4353       r_,
4354       a_ = simde__m128d_to_private(a);
4355 
4356     SIMDE_VECTORIZE
4357     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4358       r_.f64[i] = simde_math_exp2(a_.f64[i]);
4359     }
4360 
4361     return simde__m128d_from_private(r_);
4362   #endif
4363 }
4364 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4365   #undef _mm_exp2_pd
4366   #define _mm_exp2_pd(a) simde_mm_exp2_pd(a)
4367 #endif
4368 
4369 SIMDE_FUNCTION_ATTRIBUTES
4370 simde__m256
simde_mm256_exp2_ps(simde__m256 a)4371 simde_mm256_exp2_ps (simde__m256 a) {
4372   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4373     return _mm256_exp2_ps(a);
4374   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4375     return Sleef_exp2f8_u10(a);
4376   #else
4377     simde__m256_private
4378       r_,
4379       a_ = simde__m256_to_private(a);
4380 
4381     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4382       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
4383         r_.m128[i] = simde_mm_exp2_ps(a_.m128[i]);
4384       }
4385     #else
4386       SIMDE_VECTORIZE
4387       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4388         r_.f32[i] = simde_math_exp2f(a_.f32[i]);
4389       }
4390     #endif
4391 
4392     return simde__m256_from_private(r_);
4393   #endif
4394 }
4395 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4396   #undef _mm256_exp2_ps
4397   #define _mm256_exp2_ps(a) simde_mm256_exp2_ps(a)
4398 #endif
4399 
4400 
4401 SIMDE_FUNCTION_ATTRIBUTES
4402 simde__m256d
simde_mm256_exp2_pd(simde__m256d a)4403 simde_mm256_exp2_pd (simde__m256d a) {
4404   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4405     return _mm256_exp2_pd(a);
4406   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4407     return Sleef_exp2d4_u10(a);
4408   #else
4409     simde__m256d_private
4410       r_,
4411       a_ = simde__m256d_to_private(a);
4412 
4413     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4414       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
4415         r_.m128d[i] = simde_mm_exp2_pd(a_.m128d[i]);
4416       }
4417     #else
4418       SIMDE_VECTORIZE
4419       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4420         r_.f64[i] = simde_math_exp2(a_.f64[i]);
4421       }
4422     #endif
4423 
4424     return simde__m256d_from_private(r_);
4425   #endif
4426 }
4427 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4428   #undef _mm256_exp2_pd
4429   #define _mm256_exp2_pd(a) simde_mm256_exp2_pd(a)
4430 #endif
4431 
4432 SIMDE_FUNCTION_ATTRIBUTES
4433 simde__m512
simde_mm512_exp2_ps(simde__m512 a)4434 simde_mm512_exp2_ps (simde__m512 a) {
4435   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4436     return _mm512_exp2_ps(a);
4437   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4438     return Sleef_exp2f16_u10(a);
4439   #else
4440     simde__m512_private
4441       r_,
4442       a_ = simde__m512_to_private(a);
4443 
4444     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4445       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
4446         r_.m256[i] = simde_mm256_exp2_ps(a_.m256[i]);
4447       }
4448     #else
4449       SIMDE_VECTORIZE
4450       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4451         r_.f32[i] = simde_math_exp2f(a_.f32[i]);
4452       }
4453     #endif
4454 
4455     return simde__m512_from_private(r_);
4456   #endif
4457 }
4458 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4459   #undef _mm512_exp2_ps
4460   #define _mm512_exp2_ps(a) simde_mm512_exp2_ps(a)
4461 #endif
4462 
4463 SIMDE_FUNCTION_ATTRIBUTES
4464 simde__m512d
simde_mm512_exp2_pd(simde__m512d a)4465 simde_mm512_exp2_pd (simde__m512d a) {
4466   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4467     return _mm512_exp2_pd(a);
4468   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4469     return Sleef_exp2d8_u10(a);
4470   #else
4471     simde__m512d_private
4472       r_,
4473       a_ = simde__m512d_to_private(a);
4474 
4475     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4476       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
4477         r_.m256d[i] = simde_mm256_exp2_pd(a_.m256d[i]);
4478       }
4479     #else
4480       SIMDE_VECTORIZE
4481       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4482         r_.f64[i] = simde_math_exp2(a_.f64[i]);
4483       }
4484     #endif
4485 
4486     return simde__m512d_from_private(r_);
4487   #endif
4488 }
4489 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4490   #undef _mm512_exp2_pd
4491   #define _mm512_exp2_pd(a) simde_mm512_exp2_pd(a)
4492 #endif
4493 
4494 SIMDE_FUNCTION_ATTRIBUTES
4495 simde__m512
simde_mm512_mask_exp2_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)4496 simde_mm512_mask_exp2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
4497   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4498     return _mm512_mask_exp2_ps(src, k, a);
4499   #else
4500     return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp2_ps(a));
4501   #endif
4502 }
4503 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4504   #undef _mm512_mask_exp2_ps
4505   #define _mm512_mask_exp2_ps(src, k, a) simde_mm512_mask_exp2_ps(src, k, a)
4506 #endif
4507 
4508 SIMDE_FUNCTION_ATTRIBUTES
4509 simde__m512d
simde_mm512_mask_exp2_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)4510 simde_mm512_mask_exp2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
4511   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4512     return _mm512_mask_exp2_pd(src, k, a);
4513   #else
4514     return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp2_pd(a));
4515   #endif
4516 }
4517 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4518   #undef _mm512_mask_exp2_pd
4519   #define _mm512_mask_exp2_pd(src, k, a) simde_mm512_mask_exp2_pd(src, k, a)
4520 #endif
4521 
4522 SIMDE_FUNCTION_ATTRIBUTES
4523 simde__m128
simde_mm_exp10_ps(simde__m128 a)4524 simde_mm_exp10_ps (simde__m128 a) {
4525   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4526     return _mm_exp10_ps(a);
4527   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
4528     return Sleef_exp10f4_u10(a);
4529   #else
4530     simde__m128_private
4531       r_,
4532       a_ = simde__m128_to_private(a);
4533 
4534     SIMDE_VECTORIZE
4535     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4536       r_.f32[i] = simde_math_exp10f(a_.f32[i]);
4537     }
4538 
4539     return simde__m128_from_private(r_);
4540   #endif
4541 }
4542 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4543   #undef _mm_exp10_ps
4544   #define _mm_exp10_ps(a) simde_mm_exp10_ps(a)
4545 #endif
4546 
4547 SIMDE_FUNCTION_ATTRIBUTES
4548 simde__m128d
simde_mm_exp10_pd(simde__m128d a)4549 simde_mm_exp10_pd (simde__m128d a) {
4550   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4551     return _mm_exp10_pd(a);
4552   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
4553     return Sleef_exp10d2_u10(a);
4554   #else
4555     simde__m128d_private
4556       r_,
4557       a_ = simde__m128d_to_private(a);
4558 
4559     SIMDE_VECTORIZE
4560     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4561       r_.f64[i] = simde_math_exp10(a_.f64[i]);
4562     }
4563 
4564     return simde__m128d_from_private(r_);
4565   #endif
4566 }
4567 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4568   #undef _mm_exp10_pd
4569   #define _mm_exp10_pd(a) simde_mm_exp10_pd(a)
4570 #endif
4571 
4572 SIMDE_FUNCTION_ATTRIBUTES
4573 simde__m256
simde_mm256_exp10_ps(simde__m256 a)4574 simde_mm256_exp10_ps (simde__m256 a) {
4575   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4576     return _mm256_exp10_ps(a);
4577   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4578     return Sleef_exp10f8_u10(a);
4579   #else
4580     simde__m256_private
4581       r_,
4582       a_ = simde__m256_to_private(a);
4583 
4584     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4585       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
4586         r_.m128[i] = simde_mm_exp10_ps(a_.m128[i]);
4587       }
4588     #else
4589       SIMDE_VECTORIZE
4590       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4591         r_.f32[i] = simde_math_exp10f(a_.f32[i]);
4592       }
4593     #endif
4594 
4595     return simde__m256_from_private(r_);
4596   #endif
4597 }
4598 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4599   #undef _mm256_exp10_ps
4600   #define _mm256_exp10_ps(a) simde_mm256_exp10_ps(a)
4601 #endif
4602 
4603 
4604 SIMDE_FUNCTION_ATTRIBUTES
4605 simde__m256d
simde_mm256_exp10_pd(simde__m256d a)4606 simde_mm256_exp10_pd (simde__m256d a) {
4607   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4608     return _mm256_exp10_pd(a);
4609   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
4610     return Sleef_exp10d4_u10(a);
4611   #else
4612     simde__m256d_private
4613       r_,
4614       a_ = simde__m256d_to_private(a);
4615 
4616     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4617       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
4618         r_.m128d[i] = simde_mm_exp10_pd(a_.m128d[i]);
4619       }
4620     #else
4621       SIMDE_VECTORIZE
4622       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4623         r_.f64[i] = simde_math_exp10(a_.f64[i]);
4624       }
4625     #endif
4626 
4627     return simde__m256d_from_private(r_);
4628   #endif
4629 }
4630 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4631   #undef _mm256_exp10_pd
4632   #define _mm256_exp10_pd(a) simde_mm256_exp10_pd(a)
4633 #endif
4634 
4635 SIMDE_FUNCTION_ATTRIBUTES
4636 simde__m512
simde_mm512_exp10_ps(simde__m512 a)4637 simde_mm512_exp10_ps (simde__m512 a) {
4638   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4639     return _mm512_exp10_ps(a);
4640   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4641     return Sleef_exp10f16_u10(a);
4642   #else
4643     simde__m512_private
4644       r_,
4645       a_ = simde__m512_to_private(a);
4646 
4647     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4648       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
4649         r_.m256[i] = simde_mm256_exp10_ps(a_.m256[i]);
4650       }
4651     #else
4652       SIMDE_VECTORIZE
4653       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4654         r_.f32[i] = simde_math_exp10f(a_.f32[i]);
4655       }
4656     #endif
4657 
4658     return simde__m512_from_private(r_);
4659   #endif
4660 }
4661 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4662   #undef _mm512_exp10_ps
4663   #define _mm512_exp10_ps(a) simde_mm512_exp10_ps(a)
4664 #endif
4665 
4666 SIMDE_FUNCTION_ATTRIBUTES
4667 simde__m512d
simde_mm512_exp10_pd(simde__m512d a)4668 simde_mm512_exp10_pd (simde__m512d a) {
4669   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4670     return _mm512_exp10_pd(a);
4671   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
4672     return Sleef_exp10d8_u10(a);
4673   #else
4674     simde__m512d_private
4675       r_,
4676       a_ = simde__m512d_to_private(a);
4677 
4678     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4679       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
4680         r_.m256d[i] = simde_mm256_exp10_pd(a_.m256d[i]);
4681       }
4682     #else
4683       SIMDE_VECTORIZE
4684       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4685         r_.f64[i] = simde_math_exp10(a_.f64[i]);
4686       }
4687     #endif
4688 
4689     return simde__m512d_from_private(r_);
4690   #endif
4691 }
4692 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4693   #undef _mm512_exp10_pd
4694   #define _mm512_exp10_pd(a) simde_mm512_exp10_pd(a)
4695 #endif
4696 
4697 SIMDE_FUNCTION_ATTRIBUTES
4698 simde__m512
simde_mm512_mask_exp10_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)4699 simde_mm512_mask_exp10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
4700   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4701     return _mm512_mask_exp10_ps(src, k, a);
4702   #else
4703     return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp10_ps(a));
4704   #endif
4705 }
4706 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4707   #undef _mm512_mask_exp10_ps
4708   #define _mm512_mask_exp10_ps(src, k, a) simde_mm512_mask_exp10_ps(src, k, a)
4709 #endif
4710 
4711 SIMDE_FUNCTION_ATTRIBUTES
4712 simde__m512d
simde_mm512_mask_exp10_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)4713 simde_mm512_mask_exp10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
4714   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4715     return _mm512_mask_exp10_pd(src, k, a);
4716   #else
4717     return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp10_pd(a));
4718   #endif
4719 }
4720 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4721   #undef _mm512_mask_exp10_pd
4722   #define _mm512_mask_exp10_pd(src, k, a) simde_mm512_mask_exp10_pd(src, k, a)
4723 #endif
4724 
4725 SIMDE_FUNCTION_ATTRIBUTES
4726 simde__m128
simde_mm_cdfnorm_ps(simde__m128 a)4727 simde_mm_cdfnorm_ps (simde__m128 a) {
4728   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4729     return _mm_cdfnorm_ps(a);
4730   #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
4731     /* https://www.johndcook.com/blog/cpp_phi/ */
4732     const simde__m128 a1 = simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.254829592));
4733     const simde__m128 a2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.284496736));
4734     const simde__m128 a3 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.421413741));
4735     const simde__m128 a4 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.453152027));
4736     const simde__m128 a5 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.061405429));
4737     const simde__m128 p = simde_mm_set1_ps(SIMDE_FLOAT32_C(0.3275911));
4738     const simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0));
4739 
4740     /* simde_math_fabsf(x) / sqrtf(2.0) */
4741     const simde__m128 x = simde_mm_div_ps(simde_x_mm_abs_ps(a), simde_mm_sqrt_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))));
4742 
4743     /* 1.0 / (1.0 + p * x) */
4744     const simde__m128 t = simde_mm_div_ps(one, simde_mm_add_ps(one, simde_mm_mul_ps(p, x)));
4745 
4746     /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */
4747     simde__m128 y = simde_mm_mul_ps(a5, t);
4748     y = simde_mm_add_ps(y, a4);
4749     y = simde_mm_mul_ps(y, t);
4750     y = simde_mm_add_ps(y, a3);
4751     y = simde_mm_mul_ps(y, t);
4752     y = simde_mm_add_ps(y, a2);
4753     y = simde_mm_mul_ps(y, t);
4754     y = simde_mm_add_ps(y, a1);
4755     y = simde_mm_mul_ps(y, t);
4756     y = simde_mm_mul_ps(y, simde_mm_exp_ps(simde_mm_mul_ps(x, simde_x_mm_negate_ps(x))));
4757     y = simde_mm_sub_ps(one, y);
4758 
4759     /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */
4760     return simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm_add_ps(one, simde_x_mm_xorsign_ps(y, a)));
4761   #else
4762     simde__m128_private
4763       r_,
4764       a_ = simde__m128_to_private(a);
4765 
4766     SIMDE_VECTORIZE
4767     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4768       r_.f32[i] = simde_math_cdfnormf(a_.f32[i]);
4769     }
4770 
4771     return simde__m128_from_private(r_);
4772   #endif
4773 }
4774 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4775   #undef _mm_cdfnorm_ps
4776   #define _mm_cdfnorm_ps(a) simde_mm_cdfnorm_ps(a)
4777 #endif
4778 
4779 SIMDE_FUNCTION_ATTRIBUTES
4780 simde__m128d
simde_mm_cdfnorm_pd(simde__m128d a)4781 simde_mm_cdfnorm_pd (simde__m128d a) {
4782   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
4783     return _mm_cdfnorm_pd(a);
4784   #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
4785     /* https://www.johndcook.com/blog/cpp_phi/ */
4786     const simde__m128d a1 = simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.254829592));
4787     const simde__m128d a2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.284496736));
4788     const simde__m128d a3 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.421413741));
4789     const simde__m128d a4 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.453152027));
4790     const simde__m128d a5 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.061405429));
4791     const simde__m128d p = simde_mm_set1_pd(SIMDE_FLOAT64_C(0.6475911));
4792     const simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0));
4793 
4794     /* simde_math_fabs(x) / sqrt(2.0) */
4795     const simde__m128d x = simde_mm_div_pd(simde_x_mm_abs_pd(a), simde_mm_sqrt_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0))));
4796 
4797     /* 1.0 / (1.0 + p * x) */
4798     const simde__m128d t = simde_mm_div_pd(one, simde_mm_add_pd(one, simde_mm_mul_pd(p, x)));
4799 
4800     /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */
4801     simde__m128d y = simde_mm_mul_pd(a5, t);
4802     y = simde_mm_add_pd(y, a4);
4803     y = simde_mm_mul_pd(y, t);
4804     y = simde_mm_add_pd(y, a3);
4805     y = simde_mm_mul_pd(y, t);
4806     y = simde_mm_add_pd(y, a2);
4807     y = simde_mm_mul_pd(y, t);
4808     y = simde_mm_add_pd(y, a1);
4809     y = simde_mm_mul_pd(y, t);
4810     y = simde_mm_mul_pd(y, simde_mm_exp_pd(simde_mm_mul_pd(x, simde_x_mm_negate_pd(x))));
4811     y = simde_mm_sub_pd(one, y);
4812 
4813     /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */
4814     return simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm_add_pd(one, simde_x_mm_xorsign_pd(y, a)));
4815   #else
4816     simde__m128d_private
4817       r_,
4818       a_ = simde__m128d_to_private(a);
4819 
4820     SIMDE_VECTORIZE
4821     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4822       r_.f64[i] = simde_math_cdfnorm(a_.f64[i]);
4823     }
4824 
4825     return simde__m128d_from_private(r_);
4826   #endif
4827 }
4828 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4829   #undef _mm_cdfnorm_pd
4830   #define _mm_cdfnorm_pd(a) simde_mm_cdfnorm_pd(a)
4831 #endif
4832 
4833 SIMDE_FUNCTION_ATTRIBUTES
4834 simde__m256
simde_mm256_cdfnorm_ps(simde__m256 a)4835 simde_mm256_cdfnorm_ps (simde__m256 a) {
4836   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4837     return _mm256_cdfnorm_ps(a);
4838   #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
4839     /* https://www.johndcook.com/blog/cpp_phi/ */
4840     const simde__m256 a1 = simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.254829592));
4841     const simde__m256 a2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.284496736));
4842     const simde__m256 a3 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.421413741));
4843     const simde__m256 a4 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.453152027));
4844     const simde__m256 a5 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.061405429));
4845     const simde__m256 p = simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.3275911));
4846     const simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0));
4847 
4848     /* simde_math_fabsf(x) / sqrtf(2.0) */
4849     const simde__m256 x = simde_mm256_div_ps(simde_x_mm256_abs_ps(a), simde_mm256_sqrt_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0))));
4850 
4851     /* 1.0 / (1.0 + p * x) */
4852     const simde__m256 t = simde_mm256_div_ps(one, simde_mm256_add_ps(one, simde_mm256_mul_ps(p, x)));
4853 
4854     /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */
4855     simde__m256 y = simde_mm256_mul_ps(a5, t);
4856     y = simde_mm256_add_ps(y, a4);
4857     y = simde_mm256_mul_ps(y, t);
4858     y = simde_mm256_add_ps(y, a3);
4859     y = simde_mm256_mul_ps(y, t);
4860     y = simde_mm256_add_ps(y, a2);
4861     y = simde_mm256_mul_ps(y, t);
4862     y = simde_mm256_add_ps(y, a1);
4863     y = simde_mm256_mul_ps(y, t);
4864     y = simde_mm256_mul_ps(y, simde_mm256_exp_ps(simde_mm256_mul_ps(x, simde_x_mm256_negate_ps(x))));
4865     y = simde_mm256_sub_ps(one, y);
4866 
4867     /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */
4868     return simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm256_add_ps(one, simde_x_mm256_xorsign_ps(y, a)));
4869   #else
4870     simde__m256_private
4871       r_,
4872       a_ = simde__m256_to_private(a);
4873 
4874     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
4875       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
4876         r_.m128[i] = simde_mm_cdfnorm_ps(a_.m128[i]);
4877       }
4878     #else
4879       SIMDE_VECTORIZE
4880       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
4881         r_.f32[i] = simde_math_cdfnormf(a_.f32[i]);
4882       }
4883     #endif
4884 
4885     return simde__m256_from_private(r_);
4886   #endif
4887 }
4888 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4889   #undef _mm256_cdfnorm_ps
4890   #define _mm256_cdfnorm_ps(a) simde_mm256_cdfnorm_ps(a)
4891 #endif
4892 
4893 SIMDE_FUNCTION_ATTRIBUTES
4894 simde__m256d
simde_mm256_cdfnorm_pd(simde__m256d a)4895 simde_mm256_cdfnorm_pd (simde__m256d a) {
4896   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
4897     return _mm256_cdfnorm_pd(a);
4898   #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
4899     /* https://www.johndcook.com/blog/cpp_phi/ */
4900     const simde__m256d a1 = simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.254829592));
4901     const simde__m256d a2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.284496736));
4902     const simde__m256d a3 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.421413741));
4903     const simde__m256d a4 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.453152027));
4904     const simde__m256d a5 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.061405429));
4905     const simde__m256d p = simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.6475911));
4906     const simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0));
4907 
4908     /* simde_math_fabs(x) / sqrt(2.0) */
4909     const simde__m256d x = simde_mm256_div_pd(simde_x_mm256_abs_pd(a), simde_mm256_sqrt_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0))));
4910 
4911     /* 1.0 / (1.0 + p * x) */
4912     const simde__m256d t = simde_mm256_div_pd(one, simde_mm256_add_pd(one, simde_mm256_mul_pd(p, x)));
4913 
4914     /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */
4915     simde__m256d y = simde_mm256_mul_pd(a5, t);
4916     y = simde_mm256_add_pd(y, a4);
4917     y = simde_mm256_mul_pd(y, t);
4918     y = simde_mm256_add_pd(y, a3);
4919     y = simde_mm256_mul_pd(y, t);
4920     y = simde_mm256_add_pd(y, a2);
4921     y = simde_mm256_mul_pd(y, t);
4922     y = simde_mm256_add_pd(y, a1);
4923     y = simde_mm256_mul_pd(y, t);
4924     y = simde_mm256_mul_pd(y, simde_mm256_exp_pd(simde_mm256_mul_pd(x, simde_x_mm256_negate_pd(x))));
4925     y = simde_mm256_sub_pd(one, y);
4926 
4927     /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */
4928     return simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm256_add_pd(one, simde_x_mm256_xorsign_pd(y, a)));
4929   #else
4930     simde__m256d_private
4931       r_,
4932       a_ = simde__m256d_to_private(a);
4933 
4934     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4935       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
4936         r_.m128d[i] = simde_mm_cdfnorm_pd(a_.m128d[i]);
4937       }
4938     #else
4939       SIMDE_VECTORIZE
4940       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
4941         r_.f64[i] = simde_math_cdfnorm(a_.f64[i]);
4942       }
4943     #endif
4944 
4945     return simde__m256d_from_private(r_);
4946   #endif
4947 }
4948 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
4949   #undef _mm256_cdfnorm_pd
4950   #define _mm256_cdfnorm_pd(a) simde_mm256_cdfnorm_pd(a)
4951 #endif
4952 
4953 SIMDE_FUNCTION_ATTRIBUTES
4954 simde__m512
simde_mm512_cdfnorm_ps(simde__m512 a)4955 simde_mm512_cdfnorm_ps (simde__m512 a) {
4956   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
4957     return _mm512_cdfnorm_ps(a);
4958   #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
4959     /* https://www.johndcook.com/blog/cpp_phi/ */
4960     const simde__m512 a1 = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.254829592));
4961     const simde__m512 a2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.284496736));
4962     const simde__m512 a3 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.421413741));
4963     const simde__m512 a4 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.453152027));
4964     const simde__m512 a5 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.061405429));
4965     const simde__m512 p = simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.3275911));
4966     const simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0));
4967 
4968     /* simde_math_fabsf(x) / sqrtf(2.0) */
4969     const simde__m512 x = simde_mm512_div_ps(simde_mm512_abs_ps(a), simde_mm512_sqrt_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0))));
4970 
4971     /* 1.0 / (1.0 + p * x) */
4972     const simde__m512 t = simde_mm512_div_ps(one, simde_mm512_add_ps(one, simde_mm512_mul_ps(p, x)));
4973 
4974     /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */
4975     simde__m512 y = simde_mm512_mul_ps(a5, t);
4976     y = simde_mm512_add_ps(y, a4);
4977     y = simde_mm512_mul_ps(y, t);
4978     y = simde_mm512_add_ps(y, a3);
4979     y = simde_mm512_mul_ps(y, t);
4980     y = simde_mm512_add_ps(y, a2);
4981     y = simde_mm512_mul_ps(y, t);
4982     y = simde_mm512_add_ps(y, a1);
4983     y = simde_mm512_mul_ps(y, t);
4984     y = simde_mm512_mul_ps(y, simde_mm512_exp_ps(simde_mm512_mul_ps(x, simde_x_mm512_negate_ps(x))));
4985     y = simde_mm512_sub_ps(one, y);
4986 
4987     /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */
4988     return simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm512_add_ps(one, simde_x_mm512_xorsign_ps(y, a)));
4989   #else
4990     simde__m512_private
4991       r_,
4992       a_ = simde__m512_to_private(a);
4993 
4994     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
4995       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
4996         r_.m256[i] = simde_mm256_cdfnorm_ps(a_.m256[i]);
4997       }
4998     #else
4999       SIMDE_VECTORIZE
5000       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5001         r_.f32[i] = simde_math_cdfnormf(a_.f32[i]);
5002       }
5003     #endif
5004 
5005     return simde__m512_from_private(r_);
5006   #endif
5007 }
5008 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5009   #undef _mm512_cdfnorm_ps
5010   #define _mm512_cdfnorm_ps(a) simde_mm512_cdfnorm_ps(a)
5011 #endif
5012 
5013 SIMDE_FUNCTION_ATTRIBUTES
5014 simde__m512d
simde_mm512_cdfnorm_pd(simde__m512d a)5015 simde_mm512_cdfnorm_pd (simde__m512d a) {
5016   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5017     return _mm512_cdfnorm_pd(a);
5018   #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
5019     /* https://www.johndcook.com/blog/cpp_phi/ */
5020     const simde__m512d a1 = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.254829592));
5021     const simde__m512d a2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.284496736));
5022     const simde__m512d a3 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.421413741));
5023     const simde__m512d a4 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.453152027));
5024     const simde__m512d a5 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.061405429));
5025     const simde__m512d p = simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.6475911));
5026     const simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0));
5027 
5028     /* simde_math_fabs(x) / sqrt(2.0) */
5029     const simde__m512d x = simde_mm512_div_pd(simde_mm512_abs_pd(a), simde_mm512_sqrt_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0))));
5030 
5031     /* 1.0 / (1.0 + p * x) */
5032     const simde__m512d t = simde_mm512_div_pd(one, simde_mm512_add_pd(one, simde_mm512_mul_pd(p, x)));
5033 
5034     /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */
5035     simde__m512d y = simde_mm512_mul_pd(a5, t);
5036     y = simde_mm512_add_pd(y, a4);
5037     y = simde_mm512_mul_pd(y, t);
5038     y = simde_mm512_add_pd(y, a3);
5039     y = simde_mm512_mul_pd(y, t);
5040     y = simde_mm512_add_pd(y, a2);
5041     y = simde_mm512_mul_pd(y, t);
5042     y = simde_mm512_add_pd(y, a1);
5043     y = simde_mm512_mul_pd(y, t);
5044     y = simde_mm512_mul_pd(y, simde_mm512_exp_pd(simde_mm512_mul_pd(x, simde_x_mm512_negate_pd(x))));
5045     y = simde_mm512_sub_pd(one, y);
5046 
5047     /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */
5048     return simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm512_add_pd(one, simde_x_mm512_xorsign_pd(y, a)));
5049   #else
5050     simde__m512d_private
5051       r_,
5052       a_ = simde__m512d_to_private(a);
5053 
5054     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
5055       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
5056         r_.m256d[i] = simde_mm256_cdfnorm_pd(a_.m256d[i]);
5057       }
5058     #else
5059       SIMDE_VECTORIZE
5060       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5061         r_.f64[i] = simde_math_cdfnorm(a_.f64[i]);
5062       }
5063     #endif
5064 
5065     return simde__m512d_from_private(r_);
5066   #endif
5067 }
5068 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5069   #undef _mm512_cdfnorm_pd
5070   #define _mm512_cdfnorm_pd(a) simde_mm512_cdfnorm_pd(a)
5071 #endif
5072 
5073 SIMDE_FUNCTION_ATTRIBUTES
5074 simde__m512
simde_mm512_mask_cdfnorm_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)5075 simde_mm512_mask_cdfnorm_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
5076   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5077     return _mm512_mask_cdfnorm_ps(src, k, a);
5078   #else
5079     return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorm_ps(a));
5080   #endif
5081 }
5082 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5083   #undef _mm512_mask_cdfnorm_ps
5084   #define _mm512_mask_cdfnorm_ps(src, k, a) simde_mm512_mask_cdfnorm_ps(src, k, a)
5085 #endif
5086 
5087 SIMDE_FUNCTION_ATTRIBUTES
5088 simde__m512d
simde_mm512_mask_cdfnorm_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)5089 simde_mm512_mask_cdfnorm_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
5090   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5091     return _mm512_mask_cdfnorm_pd(src, k, a);
5092   #else
5093     return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorm_pd(a));
5094   #endif
5095 }
5096 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5097   #undef _mm512_mask_cdfnorm_pd
5098   #define _mm512_mask_cdfnorm_pd(src, k, a) simde_mm512_mask_cdfnorm_pd(src, k, a)
5099 #endif
5100 
5101 SIMDE_FUNCTION_ATTRIBUTES
5102 simde__m128i
simde_mm_idivrem_epi32(simde__m128i * mem_addr,simde__m128i a,simde__m128i b)5103 simde_mm_idivrem_epi32 (simde__m128i* mem_addr, simde__m128i a, simde__m128i b) {
5104   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
5105     return _mm_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m128i*, mem_addr), a, b);
5106   #else
5107     simde__m128i r;
5108 
5109     r = simde_mm_div_epi32(a, b);
5110     *mem_addr = simde_mm_sub_epi32(a, simde_mm_mullo_epi32(r, b));
5111 
5112     return r;
5113   #endif
5114 }
5115 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5116   #undef _mm_idivrem_epi32
5117   #define _mm_idivrem_epi32(mem_addr, a, b) simde_mm_idivrem_epi32((mem_addr),(a), (b))
5118 #endif
5119 
5120 SIMDE_FUNCTION_ATTRIBUTES
5121 simde__m256i
simde_mm256_idivrem_epi32(simde__m256i * mem_addr,simde__m256i a,simde__m256i b)5122 simde_mm256_idivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) {
5123   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5124     return _mm256_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b);
5125   #else
5126     simde__m256i r;
5127 
5128     r = simde_mm256_div_epi32(a, b);
5129     *mem_addr = simde_mm256_sub_epi32(a, simde_mm256_mullo_epi32(r, b));
5130 
5131     return r;
5132   #endif
5133 }
5134 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5135   #undef _mm256_idivrem_epi32
5136   #define _mm256_idivrem_epi32(mem_addr, a, b) simde_mm256_idivrem_epi32((mem_addr),(a), (b))
5137 #endif
5138 
5139 SIMDE_FUNCTION_ATTRIBUTES
5140 simde__m128
simde_mm_hypot_ps(simde__m128 a,simde__m128 b)5141 simde_mm_hypot_ps (simde__m128 a, simde__m128 b) {
5142   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5143     return _mm_hypot_ps(a, b);
5144   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
5145     #if SIMDE_ACCURACY_PREFERENCE > 1
5146       return Sleef_hypotf4_u05(a, b);
5147     #else
5148       return Sleef_hypotf4_u35(a, b);
5149     #endif
5150   #else
5151     simde__m128_private
5152       r_,
5153       a_ = simde__m128_to_private(a),
5154       b_ = simde__m128_to_private(b);
5155 
5156     SIMDE_VECTORIZE
5157     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5158       r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]);
5159     }
5160 
5161     return simde__m128_from_private(r_);
5162   #endif
5163 }
5164 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5165   #undef _mm_hypot_ps
5166   #define _mm_hypot_ps(a, b) simde_mm_hypot_ps(a, b)
5167 #endif
5168 
5169 SIMDE_FUNCTION_ATTRIBUTES
5170 simde__m128d
simde_mm_hypot_pd(simde__m128d a,simde__m128d b)5171 simde_mm_hypot_pd (simde__m128d a, simde__m128d b) {
5172   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5173     return _mm_hypot_pd(a, b);
5174   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
5175     #if SIMDE_ACCURACY_PREFERENCE > 1
5176       return Sleef_hypotd2_u05(a, b);
5177     #else
5178       return Sleef_hypotd2_u35(a, b);
5179     #endif
5180   #else
5181     simde__m128d_private
5182       r_,
5183       a_ = simde__m128d_to_private(a),
5184       b_ = simde__m128d_to_private(b);
5185 
5186     SIMDE_VECTORIZE
5187     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5188       r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]);
5189     }
5190 
5191     return simde__m128d_from_private(r_);
5192   #endif
5193 }
5194 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5195   #undef _mm_hypot_pd
5196   #define _mm_hypot_pd(a, b) simde_mm_hypot_pd(a, b)
5197 #endif
5198 
5199 SIMDE_FUNCTION_ATTRIBUTES
5200 simde__m256
simde_mm256_hypot_ps(simde__m256 a,simde__m256 b)5201 simde_mm256_hypot_ps (simde__m256 a, simde__m256 b) {
5202   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5203     return _mm256_hypot_ps(a, b);
5204   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
5205     #if SIMDE_ACCURACY_PREFERENCE > 1
5206       return Sleef_hypotf8_u05(a, b);
5207     #else
5208       return Sleef_hypotf8_u35(a, b);
5209     #endif
5210   #else
5211     simde__m256_private
5212       r_,
5213       a_ = simde__m256_to_private(a),
5214       b_ = simde__m256_to_private(b);
5215 
5216     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
5217       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
5218         r_.m128[i] = simde_mm_hypot_ps(a_.m128[i], b_.m128[i]);
5219     }
5220     #else
5221       SIMDE_VECTORIZE
5222       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5223         r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]);
5224       }
5225     #endif
5226 
5227     return simde__m256_from_private(r_);
5228   #endif
5229 }
5230 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5231   #undef _mm256_hypot_ps
5232   #define _mm256_hypot_ps(a, b) simde_mm256_hypot_ps(a, b)
5233 #endif
5234 
5235 
5236 SIMDE_FUNCTION_ATTRIBUTES
5237 simde__m256d
simde_mm256_hypot_pd(simde__m256d a,simde__m256d b)5238 simde_mm256_hypot_pd (simde__m256d a, simde__m256d b) {
5239   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5240     return _mm256_hypot_pd(a, b);
5241   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
5242     #if SIMDE_ACCURACY_PREFERENCE > 1
5243       return Sleef_hypotd4_u05(a, b);
5244     #else
5245       return Sleef_hypotd4_u35(a, b);
5246     #endif
5247   #else
5248     simde__m256d_private
5249       r_,
5250       a_ = simde__m256d_to_private(a),
5251       b_ = simde__m256d_to_private(b);
5252 
5253     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
5254       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
5255         r_.m128d[i] = simde_mm_hypot_pd(a_.m128d[i], b_.m128d[i]);
5256       }
5257     #else
5258       SIMDE_VECTORIZE
5259       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5260         r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]);
5261       }
5262   #endif
5263 
5264     return simde__m256d_from_private(r_);
5265   #endif
5266 }
5267 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5268   #undef _mm256_hypot_pd
5269   #define _mm256_hypot_pd(a, b) simde_mm256_hypot_pd(a, b)
5270 #endif
5271 
5272 SIMDE_FUNCTION_ATTRIBUTES
5273 simde__m512
simde_mm512_hypot_ps(simde__m512 a,simde__m512 b)5274 simde_mm512_hypot_ps (simde__m512 a, simde__m512 b) {
5275   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5276     return _mm512_hypot_ps(a, b);
5277   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
5278     #if SIMDE_ACCURACY_PREFERENCE > 1
5279       return Sleef_hypotf16_u05(a, b);
5280     #else
5281       return Sleef_hypotf16_u35(a, b);
5282     #endif
5283   #else
5284     simde__m512_private
5285       r_,
5286       a_ = simde__m512_to_private(a),
5287       b_ = simde__m512_to_private(b);
5288 
5289     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
5290       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
5291         r_.m256[i] = simde_mm256_hypot_ps(a_.m256[i], b_.m256[i]);
5292       }
5293     #else
5294       SIMDE_VECTORIZE
5295       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5296         r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]);
5297       }
5298     #endif
5299 
5300     return simde__m512_from_private(r_);
5301   #endif
5302 }
5303 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5304   #undef _mm512_hypot_ps
5305   #define _mm512_hypot_ps(a, b) simde_mm512_hypot_ps(a, b)
5306 #endif
5307 
5308 SIMDE_FUNCTION_ATTRIBUTES
5309 simde__m512d
simde_mm512_hypot_pd(simde__m512d a,simde__m512d b)5310 simde_mm512_hypot_pd (simde__m512d a, simde__m512d b) {
5311   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5312     return _mm512_hypot_pd(a, b);
5313   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
5314     #if SIMDE_ACCURACY_PREFERENCE > 1
5315       return Sleef_hypotd8_u05(a, b);
5316     #else
5317       return Sleef_hypotd8_u35(a, b);
5318     #endif
5319   #else
5320     simde__m512d_private
5321       r_,
5322       a_ = simde__m512d_to_private(a),
5323       b_ = simde__m512d_to_private(b);
5324 
5325     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
5326       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
5327         r_.m256d[i] = simde_mm256_hypot_pd(a_.m256d[i], b_.m256d[i]);
5328       }
5329     #else
5330       SIMDE_VECTORIZE
5331       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5332         r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]);
5333       }
5334     #endif
5335 
5336     return simde__m512d_from_private(r_);
5337   #endif
5338 }
5339 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5340   #undef _mm512_hypot_pd
5341   #define _mm512_hypot_pd(a, b) simde_mm512_hypot_pd(a, b)
5342 #endif
5343 
5344 SIMDE_FUNCTION_ATTRIBUTES
5345 simde__m512
simde_mm512_mask_hypot_ps(simde__m512 src,simde__mmask16 k,simde__m512 a,simde__m512 b)5346 simde_mm512_mask_hypot_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
5347   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5348     return _mm512_mask_hypot_ps(src, k, a, b);
5349   #else
5350     return simde_mm512_mask_mov_ps(src, k, simde_mm512_hypot_ps(a, b));
5351   #endif
5352 }
5353 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5354   #undef _mm512_mask_hypot_ps
5355   #define _mm512_mask_hypot_ps(src, k, a, b) simde_mm512_mask_hypot_ps(src, k, a, b)
5356 #endif
5357 
5358 SIMDE_FUNCTION_ATTRIBUTES
5359 simde__m512d
simde_mm512_mask_hypot_pd(simde__m512d src,simde__mmask8 k,simde__m512d a,simde__m512d b)5360 simde_mm512_mask_hypot_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
5361   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5362     return _mm512_mask_hypot_pd(src, k, a, b);
5363   #else
5364     return simde_mm512_mask_mov_pd(src, k, simde_mm512_hypot_pd(a, b));
5365   #endif
5366 }
5367 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5368   #undef _mm512_mask_hypot_pd
5369   #define _mm512_mask_hypot_pd(src, k, a, b) simde_mm512_mask_hypot_pd(src, k, a, b)
5370 #endif
5371 
5372 SIMDE_FUNCTION_ATTRIBUTES
5373 simde__m128
simde_mm_invcbrt_ps(simde__m128 a)5374 simde_mm_invcbrt_ps (simde__m128 a) {
5375   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5376     return _mm_invcbrt_ps(a);
5377   #else
5378     return simde_mm_rcp_ps(simde_mm_cbrt_ps(a));
5379   #endif
5380 }
5381 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5382   #undef _mm_invcbrt_ps
5383   #define _mm_invcbrt_ps(a) simde_mm_invcbrt_ps(a)
5384 #endif
5385 
5386 SIMDE_FUNCTION_ATTRIBUTES
5387 simde__m128d
simde_mm_invcbrt_pd(simde__m128d a)5388 simde_mm_invcbrt_pd (simde__m128d a) {
5389   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5390     return _mm_invcbrt_pd(a);
5391   #else
5392     return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_cbrt_pd(a));
5393   #endif
5394 }
5395 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5396   #undef _mm_invcbrt_pd
5397   #define _mm_invcbrt_pd(a) simde_mm_invcbrt_pd(a)
5398 #endif
5399 
5400 SIMDE_FUNCTION_ATTRIBUTES
5401 simde__m256
simde_mm256_invcbrt_ps(simde__m256 a)5402 simde_mm256_invcbrt_ps (simde__m256 a) {
5403   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5404     return _mm256_invcbrt_ps(a);
5405   #else
5406     return simde_mm256_rcp_ps(simde_mm256_cbrt_ps(a));
5407   #endif
5408 }
5409 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5410   #undef _mm256_invcbrt_ps
5411   #define _mm256_invcbrt_ps(a) simde_mm256_invcbrt_ps(a)
5412 #endif
5413 
5414 SIMDE_FUNCTION_ATTRIBUTES
5415 simde__m256d
simde_mm256_invcbrt_pd(simde__m256d a)5416 simde_mm256_invcbrt_pd (simde__m256d a) {
5417   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5418     return _mm256_invcbrt_pd(a);
5419   #else
5420     return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_cbrt_pd(a));
5421   #endif
5422 }
5423 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5424   #undef _mm256_invcbrt_pd
5425   #define _mm256_invcbrt_pd(a) simde_mm256_invcbrt_pd(a)
5426 #endif
5427 
5428 SIMDE_FUNCTION_ATTRIBUTES
5429 simde__m128
simde_mm_invsqrt_ps(simde__m128 a)5430 simde_mm_invsqrt_ps (simde__m128 a) {
5431   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5432     return _mm_invsqrt_ps(a);
5433   #else
5434     return simde_mm_rcp_ps(simde_mm_sqrt_ps(a));
5435   #endif
5436 }
5437 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5438   #undef _mm_invsqrt_ps
5439   #define _mm_invsqrt_ps(a) simde_mm_invsqrt_ps(a)
5440 #endif
5441 
5442 SIMDE_FUNCTION_ATTRIBUTES
5443 simde__m128d
simde_mm_invsqrt_pd(simde__m128d a)5444 simde_mm_invsqrt_pd (simde__m128d a) {
5445   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5446     return _mm_invsqrt_pd(a);
5447   #else
5448     return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_sqrt_pd(a));
5449   #endif
5450 }
5451 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5452   #undef _mm_invsqrt_pd
5453   #define _mm_invsqrt_pd(a) simde_mm_invsqrt_pd(a)
5454 #endif
5455 
5456 SIMDE_FUNCTION_ATTRIBUTES
5457 simde__m256
simde_mm256_invsqrt_ps(simde__m256 a)5458 simde_mm256_invsqrt_ps (simde__m256 a) {
5459   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5460     return _mm256_invsqrt_ps(a);
5461   #else
5462     return simde_mm256_rcp_ps(simde_mm256_sqrt_ps(a));
5463   #endif
5464 }
5465 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5466   #undef _mm256_invsqrt_ps
5467   #define _mm256_invsqrt_ps(a) simde_mm256_invsqrt_ps(a)
5468 #endif
5469 
5470 SIMDE_FUNCTION_ATTRIBUTES
5471 simde__m256d
simde_mm256_invsqrt_pd(simde__m256d a)5472 simde_mm256_invsqrt_pd (simde__m256d a) {
5473   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5474     return _mm256_invsqrt_pd(a);
5475   #else
5476     return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_sqrt_pd(a));
5477   #endif
5478 }
5479 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5480   #undef _mm256_invsqrt_pd
5481   #define _mm256_invsqrt_pd(a) simde_mm256_invsqrt_pd(a)
5482 #endif
5483 
5484 SIMDE_FUNCTION_ATTRIBUTES
5485 simde__m512
simde_mm512_invsqrt_ps(simde__m512 a)5486 simde_mm512_invsqrt_ps (simde__m512 a) {
5487   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5488     return _mm512_invsqrt_ps(a);
5489   #else
5490     return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), simde_mm512_sqrt_ps(a));
5491   #endif
5492 }
5493 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5494   #undef _mm512_invsqrt_ps
5495   #define _mm512_invsqrt_ps(a) simde_mm512_invsqrt_ps(a)
5496 #endif
5497 
5498 SIMDE_FUNCTION_ATTRIBUTES
5499 simde__m512d
simde_mm512_invsqrt_pd(simde__m512d a)5500 simde_mm512_invsqrt_pd (simde__m512d a) {
5501   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5502     return _mm512_invsqrt_pd(a);
5503   #else
5504     return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm512_sqrt_pd(a));
5505   #endif
5506 }
5507 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5508   #undef _mm512_invsqrt_pd
5509   #define _mm512_invsqrt_pd(a) simde_mm512_invsqrt_pd(a)
5510 #endif
5511 
5512 SIMDE_FUNCTION_ATTRIBUTES
5513 simde__m512
simde_mm512_mask_invsqrt_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)5514 simde_mm512_mask_invsqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
5515   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5516     return _mm512_mask_invsqrt_ps(src, k, a);
5517   #else
5518     return simde_mm512_mask_mov_ps(src, k, simde_mm512_invsqrt_ps(a));
5519   #endif
5520 }
5521 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5522   #undef _mm512_mask_invsqrt_ps
5523   #define _mm512_mask_invsqrt_ps(src, k, a) simde_mm512_mask_invsqrt_ps(src, k, a)
5524 #endif
5525 
5526 SIMDE_FUNCTION_ATTRIBUTES
5527 simde__m512d
simde_mm512_mask_invsqrt_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)5528 simde_mm512_mask_invsqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
5529   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5530     return _mm512_mask_invsqrt_pd(src, k, a);
5531   #else
5532     return simde_mm512_mask_mov_pd(src, k, simde_mm512_invsqrt_pd(a));
5533   #endif
5534 }
5535 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5536   #undef _mm512_mask_invsqrt_pd
5537   #define _mm512_mask_invsqrt_pd(src, k, a) simde_mm512_mask_invsqrt_pd(src, k, a)
5538 #endif
5539 
5540 SIMDE_FUNCTION_ATTRIBUTES
5541 simde__m128
simde_mm_log_ps(simde__m128 a)5542 simde_mm_log_ps (simde__m128 a) {
5543   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5544     return _mm_log_ps(a);
5545   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
5546     #if SIMDE_ACCURACY_PREFERENCE > 1
5547       return Sleef_logf4_u10(a);
5548     #else
5549       return Sleef_logf4_u35(a);
5550     #endif
5551   #else
5552     simde__m128_private
5553       r_,
5554       a_ = simde__m128_to_private(a);
5555 
5556     SIMDE_VECTORIZE
5557     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5558       r_.f32[i] = simde_math_logf(a_.f32[i]);
5559     }
5560 
5561     return simde__m128_from_private(r_);
5562   #endif
5563 }
5564 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5565   #undef _mm_log_ps
5566   #define _mm_log_ps(a) simde_mm_log_ps(a)
5567 #endif
5568 
5569 SIMDE_FUNCTION_ATTRIBUTES
5570 simde__m128d
simde_mm_log_pd(simde__m128d a)5571 simde_mm_log_pd (simde__m128d a) {
5572   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5573     return _mm_log_pd(a);
5574   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
5575     #if SIMDE_ACCURACY_PREFERENCE > 1
5576       return Sleef_logd2_u10(a);
5577     #else
5578       return Sleef_logd2_u35(a);
5579     #endif
5580   #else
5581     simde__m128d_private
5582       r_,
5583       a_ = simde__m128d_to_private(a);
5584 
5585     SIMDE_VECTORIZE
5586     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5587       r_.f64[i] = simde_math_log(a_.f64[i]);
5588     }
5589 
5590     return simde__m128d_from_private(r_);
5591   #endif
5592 }
5593 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5594   #undef _mm_log_pd
5595   #define _mm_log_pd(a) simde_mm_log_pd(a)
5596 #endif
5597 
5598 SIMDE_FUNCTION_ATTRIBUTES
5599 simde__m256
simde_mm256_log_ps(simde__m256 a)5600 simde_mm256_log_ps (simde__m256 a) {
5601   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5602     return _mm256_log_ps(a);
5603   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
5604     #if SIMDE_ACCURACY_PREFERENCE > 1
5605       return Sleef_logf8_u10(a);
5606     #else
5607       return Sleef_logf8_u35(a);
5608     #endif
5609   #else
5610     simde__m256_private
5611       r_,
5612       a_ = simde__m256_to_private(a);
5613 
5614     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
5615       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
5616         r_.m128[i] = simde_mm_log_ps(a_.m128[i]);
5617       }
5618     #else
5619       SIMDE_VECTORIZE
5620       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5621         r_.f32[i] = simde_math_logf(a_.f32[i]);
5622       }
5623     #endif
5624 
5625     return simde__m256_from_private(r_);
5626   #endif
5627 }
5628 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5629   #undef _mm256_log_ps
5630   #define _mm256_log_ps(a) simde_mm256_log_ps(a)
5631 #endif
5632 
5633 
5634 SIMDE_FUNCTION_ATTRIBUTES
5635 simde__m256d
simde_mm256_log_pd(simde__m256d a)5636 simde_mm256_log_pd (simde__m256d a) {
5637   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
5638     return _mm256_log_pd(a);
5639   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
5640     #if SIMDE_ACCURACY_PREFERENCE > 1
5641       return Sleef_logd4_u10(a);
5642     #else
5643       return Sleef_logd4_u35(a);
5644     #endif
5645   #else
5646     simde__m256d_private
5647       r_,
5648       a_ = simde__m256d_to_private(a);
5649 
5650     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
5651       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
5652         r_.m128d[i] = simde_mm_log_pd(a_.m128d[i]);
5653       }
5654     #else
5655       SIMDE_VECTORIZE
5656       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5657         r_.f64[i] = simde_math_log(a_.f64[i]);
5658       }
5659     #endif
5660 
5661     return simde__m256d_from_private(r_);
5662   #endif
5663 }
5664 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5665   #undef _mm256_log_pd
5666   #define _mm256_log_pd(a) simde_mm256_log_pd(a)
5667 #endif
5668 
5669 SIMDE_FUNCTION_ATTRIBUTES
5670 simde__m512
simde_mm512_log_ps(simde__m512 a)5671 simde_mm512_log_ps (simde__m512 a) {
5672   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5673     return _mm512_log_ps(a);
5674   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
5675     #if SIMDE_ACCURACY_PREFERENCE > 1
5676       return Sleef_logf16_u10(a);
5677     #else
5678       return Sleef_logf16_u35(a);
5679     #endif
5680   #else
5681     simde__m512_private
5682       r_,
5683       a_ = simde__m512_to_private(a);
5684 
5685     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
5686       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
5687         r_.m256[i] = simde_mm256_log_ps(a_.m256[i]);
5688       }
5689     #else
5690       SIMDE_VECTORIZE
5691       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5692         r_.f32[i] = simde_math_logf(a_.f32[i]);
5693       }
5694     #endif
5695 
5696     return simde__m512_from_private(r_);
5697   #endif
5698 }
5699 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5700   #undef _mm512_log_ps
5701   #define _mm512_log_ps(a) simde_mm512_log_ps(a)
5702 #endif
5703 
5704 SIMDE_FUNCTION_ATTRIBUTES
5705 simde__m512d
simde_mm512_log_pd(simde__m512d a)5706 simde_mm512_log_pd (simde__m512d a) {
5707   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5708     return _mm512_log_pd(a);
5709   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
5710     #if SIMDE_ACCURACY_PREFERENCE > 1
5711       return Sleef_logd8_u10(a);
5712     #else
5713       return Sleef_logd8_u35(a);
5714     #endif
5715   #else
5716     simde__m512d_private
5717       r_,
5718       a_ = simde__m512d_to_private(a);
5719 
5720     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
5721       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
5722         r_.m256d[i] = simde_mm256_log_pd(a_.m256d[i]);
5723       }
5724     #else
5725       SIMDE_VECTORIZE
5726       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
5727         r_.f64[i] = simde_math_log(a_.f64[i]);
5728       }
5729     #endif
5730 
5731     return simde__m512d_from_private(r_);
5732   #endif
5733 }
5734 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5735   #undef _mm512_log_pd
5736   #define _mm512_log_pd(a) simde_mm512_log_pd(a)
5737 #endif
5738 
5739 SIMDE_FUNCTION_ATTRIBUTES
5740 simde__m512
simde_mm512_mask_log_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)5741 simde_mm512_mask_log_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
5742   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5743     return _mm512_mask_log_ps(src, k, a);
5744   #else
5745     return simde_mm512_mask_mov_ps(src, k, simde_mm512_log_ps(a));
5746   #endif
5747 }
5748 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5749   #undef _mm512_mask_log_ps
5750   #define _mm512_mask_log_ps(src, k, a) simde_mm512_mask_log_ps(src, k, a)
5751 #endif
5752 
5753 SIMDE_FUNCTION_ATTRIBUTES
5754 simde__m512d
simde_mm512_mask_log_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)5755 simde_mm512_mask_log_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
5756   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
5757     return _mm512_mask_log_pd(src, k, a);
5758   #else
5759     return simde_mm512_mask_mov_pd(src, k, simde_mm512_log_pd(a));
5760   #endif
5761 }
5762 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5763   #undef _mm512_mask_log_pd
5764   #define _mm512_mask_log_pd(src, k, a) simde_mm512_mask_log_pd(src, k, a)
5765 #endif
5766 
5767 SIMDE_FUNCTION_ATTRIBUTES
5768 simde__m128
simde_mm_cdfnorminv_ps(simde__m128 a)5769 simde_mm_cdfnorminv_ps (simde__m128 a) {
5770   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5771     return _mm_cdfnorminv_ps(a);
5772   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
5773     simde__m128 matched, retval = simde_mm_setzero_ps();
5774 
5775     { /* if (a < 0 || a > 1) */
5776       matched = simde_mm_or_ps(simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))), simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))));
5777 
5778       /* We don't actually need to do anything here since we initialize
5779        * retval to 0.0. */
5780     }
5781 
5782     { /* else if (a == 0) */
5783       simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)));
5784       mask = simde_mm_andnot_ps(matched, mask);
5785       matched = simde_mm_or_ps(matched, mask);
5786 
5787       simde__m128 res = simde_mm_set1_ps(-SIMDE_MATH_INFINITYF);
5788 
5789       retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));
5790     }
5791 
5792     { /* else if (a == 1) */
5793       simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)));
5794       mask = simde_mm_andnot_ps(matched, mask);
5795       matched = simde_mm_or_ps(matched, mask);
5796 
5797       simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF);
5798 
5799       retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));
5800     }
5801 
5802     { /* Remaining conditions.
5803        *
5804        * Including the else case in this complicates things a lot, but
5805        * we're using cheap operations to get rid of expensive multiply
5806        * and add functions.  This should be a small improvement on SSE
5807        * prior to 4.1.  On SSE 4.1 we can use _mm_blendv_ps which is
5808        * very fast and this becomes a huge win.  NEON, AltiVec, and
5809        * WASM also have blend operations, so this should be a big win
5810        * there, too. */
5811 
5812       /* else if (a < 0.02425) */
5813       simde__m128 mask_lo = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.02425)));
5814       /* else if (a > 0.97575) */
5815       simde__m128 mask_hi = simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.97575)));
5816 
5817       simde__m128 mask = simde_mm_or_ps(mask_lo, mask_hi);
5818       matched = simde_mm_or_ps(matched, mask);
5819 
5820       /* else */
5821       simde__m128 mask_el = simde_x_mm_not_ps(matched);
5822       mask = simde_mm_or_ps(mask, mask_el);
5823 
5824       /* r = a - 0.5f */
5825       simde__m128 r = simde_mm_sub_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)));
5826 
5827       /* lo: q = a
5828        * hi: q = (1.0 - a) */
5829       simde__m128 q = simde_mm_and_ps(mask_lo, a);
5830       q = simde_mm_or_ps(q, simde_mm_and_ps(mask_hi, simde_mm_sub_ps(simde_mm_set1_ps(1.0f), a)));
5831 
5832       /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */
5833       q = simde_mm_log_ps(q);
5834       q = simde_mm_mul_ps(q, simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.0)));
5835       q = simde_mm_sqrt_ps(q);
5836 
5837       /* el: q = r * r */
5838       q = simde_x_mm_select_ps(q, simde_mm_mul_ps(r, r), mask_el);
5839 
5840       /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) *  1.0f); */
5841       /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */
5842       /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) *  r); */
5843       simde__m128 numerator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el);
5844       numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el));
5845       numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el));
5846       numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el));
5847       numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el));
5848       numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el));
5849       {
5850         simde__m128 multiplier;
5851         multiplier =                            simde_mm_and_ps(mask_lo, simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0)));
5852         multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_hi, simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.0))));
5853         multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_el, r));
5854         numerator = simde_mm_mul_ps(numerator, multiplier);
5855       }
5856 
5857       /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 +   0.0f) * q + 1); */
5858       /*    el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */
5859       simde__m128 denominator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el);
5860       denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el));
5861       denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el));
5862       denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el));
5863       denominator = simde_mm_fmadd_ps(denominator, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el),
5864                                                    simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el));
5865       denominator = simde_mm_fmadd_ps(denominator, q, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)));
5866 
5867       /* res = numerator / denominator; */
5868       simde__m128 res = simde_mm_div_ps(numerator, denominator);
5869 
5870       retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));
5871     }
5872 
5873     return retval;
5874   #else
5875     simde__m128_private
5876       r_,
5877       a_ = simde__m128_to_private(a);
5878 
5879     SIMDE_VECTORIZE
5880     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
5881       r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]);
5882     }
5883 
5884     return simde__m128_from_private(r_);
5885   #endif
5886 }
5887 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
5888   #undef _mm_cdfnorminv_ps
5889   #define _mm_cdfnorminv_ps(a) simde_mm_cdfnorminv_ps(a)
5890 #endif
5891 
5892 SIMDE_FUNCTION_ATTRIBUTES
5893 simde__m128d
simde_mm_cdfnorminv_pd(simde__m128d a)5894 simde_mm_cdfnorminv_pd (simde__m128d a) {
5895   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
5896     return _mm_cdfnorminv_pd(a);
5897    #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
5898     simde__m128d matched, retval = simde_mm_setzero_pd();
5899 
5900     { /* if (a < 0 || a > 1) */
5901       matched = simde_mm_or_pd(simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))), simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))));
5902 
5903       /* We don't actually need to do anything here since we initialize
5904        * retval to 0.0. */
5905     }
5906 
5907     { /* else if (a == 0) */
5908       simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)));
5909       mask = simde_mm_andnot_pd(matched, mask);
5910       matched = simde_mm_or_pd(matched, mask);
5911 
5912       simde__m128d res = simde_mm_set1_pd(-SIMDE_MATH_INFINITY);
5913 
5914       retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));
5915     }
5916 
5917     { /* else if (a == 1) */
5918       simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)));
5919       mask = simde_mm_andnot_pd(matched, mask);
5920       matched = simde_mm_or_pd(matched, mask);
5921 
5922       simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY);
5923 
5924       retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));
5925     }
5926 
5927     { /* Remaining conditions.
5928        *
5929        * Including the else case in this complicates things a lot, but
5930        * we're using cheap operations to get rid of expensive multiply
5931        * and add functions.  This should be a small improvement on SSE
5932        * prior to 4.1.  On SSE 4.1 we can use _mm_blendv_pd which is
5933        * very fast and this becomes a huge win.  NEON, AltiVec, and
5934        * WASM also have blend operations, so this should be a big win
5935        * there, too. */
5936 
5937       /* else if (a < 0.02425) */
5938       simde__m128d mask_lo = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.02425)));
5939       /* else if (a > 0.97575) */
5940       simde__m128d mask_hi = simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.97575)));
5941 
5942       simde__m128d mask = simde_mm_or_pd(mask_lo, mask_hi);
5943       matched = simde_mm_or_pd(matched, mask);
5944 
5945       /* else */
5946       simde__m128d mask_el = simde_x_mm_not_pd(matched);
5947       mask = simde_mm_or_pd(mask, mask_el);
5948 
5949       /* r = a - 0.5 */
5950       simde__m128d r = simde_mm_sub_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)));
5951 
5952       /* lo: q = a
5953        * hi: q = (1.0 - a) */
5954       simde__m128d q = simde_mm_and_pd(mask_lo, a);
5955       q = simde_mm_or_pd(q, simde_mm_and_pd(mask_hi, simde_mm_sub_pd(simde_mm_set1_pd(1.0), a)));
5956 
5957       /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */
5958       q = simde_mm_log_pd(q);
5959       q = simde_mm_mul_pd(q, simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.0)));
5960       q = simde_mm_sqrt_pd(q);
5961 
5962       /* el: q = r * r */
5963       q = simde_x_mm_select_pd(q, simde_mm_mul_pd(r, r), mask_el);
5964 
5965       /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) *  1.0); */
5966       /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */
5967       /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) *  r); */
5968       simde__m128d numerator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el);
5969       numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el));
5970       numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el));
5971       numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el));
5972       numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el));
5973       numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el));
5974       {
5975         simde__m128d multiplier;
5976         multiplier =                            simde_mm_and_pd(mask_lo, simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0)));
5977         multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_hi, simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.0))));
5978         multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_el, r));
5979         numerator = simde_mm_mul_pd(numerator, multiplier);
5980       }
5981 
5982       /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 +   0.0f) * q + 1); */
5983       /*    el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */
5984       simde__m128d denominator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el);
5985       denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el));
5986       denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el));
5987       denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el));
5988       denominator = simde_mm_fmadd_pd(denominator, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el),
5989                                                    simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el));
5990       denominator = simde_mm_fmadd_pd(denominator, q, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)));
5991 
5992       /* res = numerator / denominator; */
5993       simde__m128d res = simde_mm_div_pd(numerator, denominator);
5994 
5995       retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));
5996     }
5997 
5998     return retval;
5999   #else
6000     simde__m128d_private
6001       r_,
6002       a_ = simde__m128d_to_private(a);
6003 
6004     SIMDE_VECTORIZE
6005     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
6006       r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]);
6007     }
6008 
6009     return simde__m128d_from_private(r_);
6010   #endif
6011 }
6012 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6013   #undef _mm_cdfnorminv_pd
6014   #define _mm_cdfnorminv_pd(a) simde_mm_cdfnorminv_pd(a)
6015 #endif
6016 
6017 SIMDE_FUNCTION_ATTRIBUTES
6018 simde__m256
simde_mm256_cdfnorminv_ps(simde__m256 a)6019 simde_mm256_cdfnorminv_ps (simde__m256 a) {
6020   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
6021     return _mm256_cdfnorminv_ps(a);
6022   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256)
6023     simde__m256 matched, retval = simde_mm256_setzero_ps();
6024 
6025     { /* if (a < 0 || a > 1) */
6026       matched = simde_mm256_or_ps(simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ));
6027 
6028       /* We don't actually need to do anything here since we initialize
6029        * retval to 0.0. */
6030     }
6031 
6032     { /* else if (a == 0) */
6033       simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);
6034       mask = simde_mm256_andnot_ps(matched, mask);
6035       matched = simde_mm256_or_ps(matched, mask);
6036 
6037       simde__m256 res = simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF);
6038 
6039       retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));
6040     }
6041 
6042     { /* else if (a == 1) */
6043       simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_EQ_OQ);
6044       mask = simde_mm256_andnot_ps(matched, mask);
6045       matched = simde_mm256_or_ps(matched, mask);
6046 
6047       simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF);
6048 
6049       retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));
6050     }
6051 
6052     { /* Remaining conditions.
6053        *
6054        * Including the else case in this complicates things a lot, but
6055        * we're using cheap operations to get rid of expensive multiply
6056        * and add functions.  This should be a small improvement on SSE
6057        * prior to 4.1.  On SSE 4.1 we can use _mm256_blendv_ps which is
6058        * very fast and this becomes a huge win.  NEON, AltiVec, and
6059        * WASM also have blend operations, so this should be a big win
6060        * there, too. */
6061 
6062       /* else if (a < 0.02425) */
6063       simde__m256 mask_lo = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ);
6064       /* else if (a > 0.97575) */
6065       simde__m256 mask_hi = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ);
6066 
6067       simde__m256 mask = simde_mm256_or_ps(mask_lo, mask_hi);
6068       matched = simde_mm256_or_ps(matched, mask);
6069 
6070       /* else */
6071       simde__m256 mask_el = simde_x_mm256_not_ps(matched);
6072       mask = simde_mm256_or_ps(mask, mask_el);
6073 
6074       /* r = a - 0.5f */
6075       simde__m256 r = simde_mm256_sub_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)));
6076 
6077       /* lo: q = a
6078        * hi: q = (1.0 - a) */
6079       simde__m256 q = simde_mm256_and_ps(mask_lo, a);
6080       q = simde_mm256_or_ps(q, simde_mm256_and_ps(mask_hi, simde_mm256_sub_ps(simde_mm256_set1_ps(1.0f), a)));
6081 
6082       /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */
6083       q = simde_mm256_log_ps(q);
6084       q = simde_mm256_mul_ps(q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.0)));
6085       q = simde_mm256_sqrt_ps(q);
6086 
6087       /* el: q = r * r */
6088       q = simde_x_mm256_select_ps(q, simde_mm256_mul_ps(r, r), mask_el);
6089 
6090       /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) *  1.0f); */
6091       /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */
6092       /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) *  r); */
6093       simde__m256 numerator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el);
6094       numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el));
6095       numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el));
6096       numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el));
6097       numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el));
6098       numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el));
6099       {
6100         simde__m256 multiplier;
6101         multiplier =                            simde_mm256_and_ps(mask_lo, simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0)));
6102         multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_hi, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.0))));
6103         multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_el, r));
6104         numerator = simde_mm256_mul_ps(numerator, multiplier);
6105       }
6106 
6107       /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 +   0.0f) * q + 1); */
6108       /*    el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */
6109       simde__m256 denominator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el);
6110       denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el));
6111       denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el));
6112       denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el));
6113       denominator = simde_mm256_fmadd_ps(denominator, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el),
6114                                                    simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el));
6115       denominator = simde_mm256_fmadd_ps(denominator, q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)));
6116 
6117       /* res = numerator / denominator; */
6118       simde__m256 res = simde_mm256_div_ps(numerator, denominator);
6119 
6120       retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));
6121     }
6122 
6123     return retval;
6124   #else
6125     simde__m256_private
6126       r_,
6127       a_ = simde__m256_to_private(a);
6128 
6129     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
6130       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
6131         r_.m128[i] = simde_mm_cdfnorminv_ps(a_.m128[i]);
6132       }
6133     #else
6134       SIMDE_VECTORIZE
6135       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
6136         r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]);
6137       }
6138     #endif
6139 
6140     return simde__m256_from_private(r_);
6141   #endif
6142 }
6143 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6144   #undef _mm256_cdfnorminv_ps
6145   #define _mm256_cdfnorminv_ps(a) simde_mm256_cdfnorminv_ps(a)
6146 #endif
6147 
6148 SIMDE_FUNCTION_ATTRIBUTES
6149 simde__m256d
simde_mm256_cdfnorminv_pd(simde__m256d a)6150 simde_mm256_cdfnorminv_pd (simde__m256d a) {
6151   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
6152     return _mm256_cdfnorminv_pd(a);
6153    #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256)
6154     simde__m256d matched, retval = simde_mm256_setzero_pd();
6155 
6156     { /* if (a < 0 || a > 1) */
6157       matched = simde_mm256_or_pd(simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ));
6158 
6159       /* We don't actually need to do anything here since we initialize
6160        * retval to 0.0. */
6161     }
6162 
6163     { /* else if (a == 0) */
6164       simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);
6165       mask = simde_mm256_andnot_pd(matched, mask);
6166       matched = simde_mm256_or_pd(matched, mask);
6167 
6168       simde__m256d res = simde_mm256_set1_pd(-SIMDE_MATH_INFINITY);
6169 
6170       retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));
6171     }
6172 
6173     { /* else if (a == 1) */
6174       simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_EQ_OQ);
6175       mask = simde_mm256_andnot_pd(matched, mask);
6176       matched = simde_mm256_or_pd(matched, mask);
6177 
6178       simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY);
6179 
6180       retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));
6181     }
6182 
6183     { /* Remaining conditions.
6184        *
6185        * Including the else case in this complicates things a lot, but
6186        * we're using cheap operations to get rid of expensive multiply
6187        * and add functions.  This should be a small improvement on SSE
6188        * prior to 4.1.  On SSE 4.1 we can use _mm256_blendv_pd which is
6189        * very fast and this becomes a huge win.  NEON, AltiVec, and
6190        * WASM also have blend operations, so this should be a big win
6191        * there, too. */
6192 
6193       /* else if (a < 0.02425) */
6194       simde__m256d mask_lo = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ);
6195       /* else if (a > 0.97575) */
6196       simde__m256d mask_hi = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ);
6197 
6198       simde__m256d mask = simde_mm256_or_pd(mask_lo, mask_hi);
6199       matched = simde_mm256_or_pd(matched, mask);
6200 
6201       /* else */
6202       simde__m256d mask_el = simde_x_mm256_not_pd(matched);
6203       mask = simde_mm256_or_pd(mask, mask_el);
6204 
6205       /* r = a - 0.5 */
6206       simde__m256d r = simde_mm256_sub_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)));
6207 
6208       /* lo: q = a
6209        * hi: q = (1.0 - a) */
6210       simde__m256d q = simde_mm256_and_pd(mask_lo, a);
6211       q = simde_mm256_or_pd(q, simde_mm256_and_pd(mask_hi, simde_mm256_sub_pd(simde_mm256_set1_pd(1.0), a)));
6212 
6213       /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */
6214       q = simde_mm256_log_pd(q);
6215       q = simde_mm256_mul_pd(q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.0)));
6216       q = simde_mm256_sqrt_pd(q);
6217 
6218       /* el: q = r * r */
6219       q = simde_x_mm256_select_pd(q, simde_mm256_mul_pd(r, r), mask_el);
6220 
6221       /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) *  1.0); */
6222       /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */
6223       /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) *  r); */
6224       simde__m256d numerator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el);
6225       numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el));
6226       numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el));
6227       numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el));
6228       numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el));
6229       numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el));
6230       {
6231         simde__m256d multiplier;
6232         multiplier =                            simde_mm256_and_pd(mask_lo, simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0)));
6233         multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_hi, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.0))));
6234         multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_el, r));
6235         numerator = simde_mm256_mul_pd(numerator, multiplier);
6236       }
6237 
6238       /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 +   0.0f) * q + 1); */
6239       /*    el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */
6240       simde__m256d denominator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el);
6241       denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el));
6242       denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el));
6243       denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el));
6244       denominator = simde_mm256_fmadd_pd(denominator, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el),
6245                                                    simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el));
6246       denominator = simde_mm256_fmadd_pd(denominator, q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)));
6247 
6248       /* res = numerator / denominator; */
6249       simde__m256d res = simde_mm256_div_pd(numerator, denominator);
6250 
6251       retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));
6252     }
6253 
6254     return retval;
6255   #else
6256     simde__m256d_private
6257       r_,
6258       a_ = simde__m256d_to_private(a);
6259 
6260     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
6261       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
6262         r_.m128d[i] = simde_mm_cdfnorminv_pd(a_.m128d[i]);
6263       }
6264     #else
6265       SIMDE_VECTORIZE
6266       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
6267         r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]);
6268       }
6269     #endif
6270 
6271     return simde__m256d_from_private(r_);
6272   #endif
6273 }
6274 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6275   #undef _mm256_cdfnorminv_pd
6276   #define _mm256_cdfnorminv_pd(a) simde_mm256_cdfnorminv_pd(a)
6277 #endif
6278 
6279 SIMDE_FUNCTION_ATTRIBUTES
6280 simde__m512
simde_mm512_cdfnorminv_ps(simde__m512 a)6281 simde_mm512_cdfnorminv_ps (simde__m512 a) {
6282   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6283     return _mm512_cdfnorminv_ps(a);
6284   #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256)
6285     simde__m512_private
6286       r_,
6287       a_ = simde__m512_to_private(a);
6288 
6289     for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
6290       r_.m256[i] = simde_mm256_cdfnorminv_ps(a_.m256[i]);
6291     }
6292 
6293     return simde__m512_from_private(r_);
6294   #else
6295 
6296     simde__m512 retval = simde_mm512_setzero_ps();
6297     simde__mmask16 matched;
6298 
6299     { /* if (a < 0 || a > 1) */
6300       matched  = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ);
6301       matched |= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ);
6302 
6303       /* We don't actually need to do anything here since we initialize
6304        * retval to 0.0. */
6305     }
6306 
6307     { /* else if (a == 0) */
6308       simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);
6309       matched |= mask;
6310 
6311       retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF));
6312     }
6313 
6314     { /* else if (a == 1) */
6315       simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);
6316       matched |= mask;
6317 
6318       retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(SIMDE_MATH_INFINITYF));
6319     }
6320 
6321     { /* else if (a < 0.02425) */
6322       simde__mmask16 mask_lo = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ);
6323       /* else if (a > 0.97575) */
6324       simde__mmask16 mask_hi = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ);
6325 
6326       simde__mmask16 mask = mask_lo | mask_hi;
6327       matched = matched | mask;
6328 
6329       /* else */
6330       simde__mmask16 mask_el = ~matched;
6331       mask = mask | mask_el;
6332 
6333       /* r = a - 0.5f */
6334       simde__m512 r = simde_mm512_sub_ps(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)));
6335 
6336       /* lo: q = a
6337        * hi: q = (1.0 - a) */
6338       simde__m512 q = simde_mm512_maskz_mov_ps(mask_lo, a);
6339       q = simde_mm512_mask_sub_ps(q, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a);
6340 
6341       /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */
6342       q = simde_mm512_log_ps(q);
6343       q = simde_mm512_mul_ps(q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.0)));
6344       q = simde_mm512_sqrt_ps(q);
6345 
6346       /* el: q = r * r */
6347       q = simde_mm512_mask_mul_ps(q, mask_el, r, r);
6348 
6349       /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) *  1.0f); */
6350       /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */
6351       /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) *  r); */
6352       simde__m512 numerator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)));
6353       numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02))));
6354       numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02))));
6355       numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02))));
6356       numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01))));
6357       numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00))));
6358       {
6359         simde__m512 multiplier;
6360         multiplier =                                              simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0));
6361         multiplier = simde_mm512_mask_mov_ps(multiplier, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.0)));
6362         multiplier = simde_mm512_mask_mov_ps(multiplier, mask_el, r);
6363         numerator = simde_mm512_mul_ps(numerator, multiplier);
6364       }
6365 
6366       /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 +   0.0f) * q + 1); */
6367       /*    el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */
6368       simde__m512 denominator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)));
6369       denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02))));
6370       denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02))));
6371       denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01))));
6372       denominator = simde_mm512_fmadd_ps(denominator, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)), mask_el, q),
6373                                                       simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.0)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01))));
6374       denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)));
6375 
6376       /* res = numerator / denominator; */
6377       retval = simde_mm512_mask_div_ps(retval, mask_lo | mask_hi | mask_el, numerator, denominator);
6378     }
6379 
6380     return retval;
6381   #endif
6382 }
6383 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6384   #undef _mm512_cdfnorminv_ps
6385   #define _mm512_cdfnorminv_ps(a) simde_mm512_cdfnorminv_ps(a)
6386 #endif
6387 
6388 SIMDE_FUNCTION_ATTRIBUTES
6389 simde__m512d
simde_mm512_cdfnorminv_pd(simde__m512d a)6390 simde_mm512_cdfnorminv_pd (simde__m512d a) {
6391   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6392     return _mm512_cdfnorminv_pd(a);
6393   #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256)
6394     simde__m512d_private
6395       r_,
6396       a_ = simde__m512d_to_private(a);
6397 
6398     for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
6399       r_.m256d[i] = simde_mm256_cdfnorminv_pd(a_.m256d[i]);
6400     }
6401 
6402     return simde__m512d_from_private(r_);
6403   #else
6404 
6405     simde__m512d retval = simde_mm512_setzero_pd();
6406     simde__mmask8 matched;
6407 
6408     { /* if (a < 0 || a > 1) */
6409       matched  = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ);
6410       matched |= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ);
6411 
6412       /* We don't actually need to do anything here since we initialize
6413        * retval to 0.0. */
6414     }
6415 
6416     { /* else if (a == 0) */
6417       simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);
6418       matched |= mask;
6419 
6420       retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY));
6421     }
6422 
6423     { /* else if (a == 1) */
6424       simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);
6425       matched |= mask;
6426 
6427       retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(SIMDE_MATH_INFINITY));
6428     }
6429 
6430     { /* else if (a < 0.02425) */
6431       simde__mmask8 mask_lo = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ);
6432       /* else if (a > 0.97575) */
6433       simde__mmask8 mask_hi = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ);
6434 
6435       simde__mmask8 mask = mask_lo | mask_hi;
6436       matched = matched | mask;
6437 
6438       /* else */
6439       simde__mmask8 mask_el = ~matched;
6440       mask = mask | mask_el;
6441 
6442       /* r = a - 0.5f */
6443       simde__m512d r = simde_mm512_sub_pd(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)));
6444 
6445       /* lo: q = a
6446        * hi: q = (1.0 - a) */
6447       simde__m512d q = a;
6448       q = simde_mm512_mask_sub_pd(q, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a);
6449 
6450       /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */
6451       q = simde_mm512_log_pd(q);
6452       q = simde_mm512_mul_pd(q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.0)));
6453       q = simde_mm512_sqrt_pd(q);
6454 
6455       /* el: q = r * r */
6456       q = simde_mm512_mask_mul_pd(q, mask_el, r, r);
6457 
6458       /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) *  1.0f); */
6459       /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */
6460       /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) *  r); */
6461       simde__m512d numerator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)));
6462       numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02))));
6463       numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02))));
6464       numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02))));
6465       numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01))));
6466       numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00))));
6467       {
6468         simde__m512d multiplier;
6469         multiplier =                                              simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0));
6470         multiplier = simde_mm512_mask_mov_pd(multiplier, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.0)));
6471         multiplier = simde_mm512_mask_mov_pd(multiplier, mask_el, r);
6472         numerator = simde_mm512_mul_pd(numerator, multiplier);
6473       }
6474 
6475       /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 +   0.0f) * q + 1); */
6476       /*    el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */
6477       simde__m512d denominator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)));
6478       denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02))));
6479       denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02))));
6480       denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01))));
6481       denominator = simde_mm512_fmadd_pd(denominator, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)), mask_el, q),
6482                                                       simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.0)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01))));
6483       denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)));
6484 
6485       /* res = numerator / denominator; */
6486       retval = simde_mm512_mask_div_pd(retval, mask_lo | mask_hi | mask_el, numerator, denominator);
6487     }
6488 
6489     return retval;
6490   #endif
6491 }
6492 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6493   #undef _mm512_cdfnorminv_pd
6494   #define _mm512_cdfnorminv_pd(a) simde_mm512_cdfnorminv_pd(a)
6495 #endif
6496 
6497 SIMDE_FUNCTION_ATTRIBUTES
6498 simde__m512
simde_mm512_mask_cdfnorminv_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)6499 simde_mm512_mask_cdfnorminv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
6500   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6501     return _mm512_mask_cdfnorminv_ps(src, k, a);
6502   #else
6503     return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorminv_ps(a));
6504   #endif
6505 }
6506 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6507   #undef _mm512_mask_cdfnorminv_ps
6508   #define _mm512_mask_cdfnorminv_ps(src, k, a) simde_mm512_mask_cdfnorminv_ps(src, k, a)
6509 #endif
6510 
6511 SIMDE_FUNCTION_ATTRIBUTES
6512 simde__m512d
simde_mm512_mask_cdfnorminv_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)6513 simde_mm512_mask_cdfnorminv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
6514   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6515     return _mm512_mask_cdfnorminv_pd(src, k, a);
6516   #else
6517     return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorminv_pd(a));
6518   #endif
6519 }
6520 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6521   #undef _mm512_mask_cdfnorminv_pd
6522   #define _mm512_mask_cdfnorminv_pd(src, k, a) simde_mm512_mask_cdfnorminv_pd(src, k, a)
6523 #endif
6524 
6525 SIMDE_FUNCTION_ATTRIBUTES
6526 simde__m128
simde_mm_erfinv_ps(simde__m128 a)6527 simde_mm_erfinv_ps (simde__m128 a) {
6528   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
6529     return _mm_erfinv_ps(a);
6530   #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
6531     /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c */
6532     simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0));
6533 
6534     simde__m128 lnx = simde_mm_log_ps(simde_mm_mul_ps(simde_mm_sub_ps(one, a), simde_mm_add_ps(one, a)));
6535 
6536     simde__m128 tt1 = simde_mm_mul_ps(simde_mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.147)));
6537     tt1 = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1);
6538     tt1 = simde_mm_add_ps(tt1, simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx));
6539 
6540     simde__m128 tt2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147));
6541     tt2 = simde_mm_mul_ps(tt2, lnx);
6542 
6543     simde__m128 r = simde_mm_mul_ps(tt1, tt1);
6544     r = simde_mm_sub_ps(r, tt2);
6545     r = simde_mm_sqrt_ps(r);
6546     r = simde_mm_add_ps(simde_x_mm_negate_ps(tt1), r);
6547     r = simde_mm_sqrt_ps(r);
6548 
6549     return simde_x_mm_xorsign_ps(r, a);
6550   #else
6551     simde__m128_private
6552       a_ = simde__m128_to_private(a),
6553       r_;
6554 
6555     SIMDE_VECTORIZE
6556     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
6557       r_.f32[i] = simde_math_erfinvf(a_.f32[i]);
6558     }
6559 
6560     return simde__m128_from_private(r_);
6561   #endif
6562 }
6563 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6564   #undef _mm_erfinv_ps
6565   #define _mm_erfinv_ps(a) simde_mm_erfinv_ps(a)
6566 #endif
6567 
6568 SIMDE_FUNCTION_ATTRIBUTES
6569 simde__m128d
simde_mm_erfinv_pd(simde__m128d a)6570 simde_mm_erfinv_pd (simde__m128d a) {
6571   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
6572     return _mm_erfinv_pd(a);
6573   #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
6574     simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0));
6575 
6576     simde__m128d lnx = simde_mm_log_pd(simde_mm_mul_pd(simde_mm_sub_pd(one, a), simde_mm_add_pd(one, a)));
6577 
6578     simde__m128d tt1 = simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_MATH_PI), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.147)));
6579     tt1 = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1);
6580     tt1 = simde_mm_add_pd(tt1, simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx));
6581 
6582     simde__m128d tt2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147));
6583     tt2 = simde_mm_mul_pd(tt2, lnx);
6584 
6585     simde__m128d r = simde_mm_mul_pd(tt1, tt1);
6586     r = simde_mm_sub_pd(r, tt2);
6587     r = simde_mm_sqrt_pd(r);
6588     r = simde_mm_add_pd(simde_x_mm_negate_pd(tt1), r);
6589     r = simde_mm_sqrt_pd(r);
6590 
6591     return simde_x_mm_xorsign_pd(r, a);
6592   #else
6593     simde__m128d_private
6594       a_ = simde__m128d_to_private(a),
6595       r_;
6596 
6597     SIMDE_VECTORIZE
6598     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
6599       r_.f64[i] = simde_math_erfinv(a_.f64[i]);
6600     }
6601 
6602     return simde__m128d_from_private(r_);
6603   #endif
6604 }
6605 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6606   #undef _mm_erfinv_pd
6607   #define _mm_erfinv_pd(a) simde_mm_erfinv_pd(a)
6608 #endif
6609 
6610 SIMDE_FUNCTION_ATTRIBUTES
6611 simde__m256
simde_mm256_erfinv_ps(simde__m256 a)6612 simde_mm256_erfinv_ps (simde__m256 a) {
6613   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
6614     return _mm256_erfinv_ps(a);
6615   #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
6616     simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0));
6617     simde__m256 sgn = simde_x_mm256_copysign_ps(one, a);
6618 
6619     a = simde_mm256_mul_ps(simde_mm256_sub_ps(one, a), simde_mm256_add_ps(one, a));
6620     simde__m256 lnx = simde_mm256_log_ps(a);
6621 
6622     simde__m256 tt1 = simde_mm256_mul_ps(simde_mm256_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.147)));
6623     tt1 = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1);
6624     tt1 = simde_mm256_add_ps(tt1, simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx));
6625 
6626     simde__m256 tt2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147));
6627     tt2 = simde_mm256_mul_ps(tt2, lnx);
6628 
6629     simde__m256 r = simde_mm256_mul_ps(tt1, tt1);
6630     r = simde_mm256_sub_ps(r, tt2);
6631     r = simde_mm256_sqrt_ps(r);
6632     r = simde_mm256_add_ps(simde_x_mm256_negate_ps(tt1), r);
6633     r = simde_mm256_sqrt_ps(r);
6634 
6635     return simde_mm256_mul_ps(sgn, r);
6636   #else
6637     simde__m256_private
6638       a_ = simde__m256_to_private(a),
6639       r_;
6640 
6641     SIMDE_VECTORIZE
6642     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
6643       r_.f32[i] = simde_math_erfinvf(a_.f32[i]);
6644     }
6645 
6646     return simde__m256_from_private(r_);
6647   #endif
6648 }
6649 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6650   #undef _mm256_erfinv_ps
6651   #define _mm256_erfinv_ps(a) simde_mm256_erfinv_ps(a)
6652 #endif
6653 
6654 SIMDE_FUNCTION_ATTRIBUTES
6655 simde__m256d
simde_mm256_erfinv_pd(simde__m256d a)6656 simde_mm256_erfinv_pd (simde__m256d a) {
6657   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
6658     return _mm256_erfinv_pd(a);
6659   #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
6660     simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0));
6661     simde__m256d sgn = simde_x_mm256_copysign_pd(one, a);
6662 
6663     a = simde_mm256_mul_pd(simde_mm256_sub_pd(one, a), simde_mm256_add_pd(one, a));
6664     simde__m256d lnx = simde_mm256_log_pd(a);
6665 
6666     simde__m256d tt1 = simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_MATH_PI), simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.147)));
6667     tt1 = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1);
6668     tt1 = simde_mm256_add_pd(tt1, simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx));
6669 
6670     simde__m256d tt2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147));
6671     tt2 = simde_mm256_mul_pd(tt2, lnx);
6672 
6673     simde__m256d r = simde_mm256_mul_pd(tt1, tt1);
6674     r = simde_mm256_sub_pd(r, tt2);
6675     r = simde_mm256_sqrt_pd(r);
6676     r = simde_mm256_add_pd(simde_x_mm256_negate_pd(tt1), r);
6677     r = simde_mm256_sqrt_pd(r);
6678 
6679     return simde_mm256_mul_pd(sgn, r);
6680   #else
6681     simde__m256d_private
6682       a_ = simde__m256d_to_private(a),
6683       r_;
6684 
6685     SIMDE_VECTORIZE
6686     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
6687       r_.f64[i] = simde_math_erfinv(a_.f64[i]);
6688     }
6689 
6690     return simde__m256d_from_private(r_);
6691   #endif
6692 }
6693 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6694   #undef _mm256_erfinv_pd
6695   #define _mm256_erfinv_pd(a) simde_mm256_erfinv_pd(a)
6696 #endif
6697 
6698 SIMDE_FUNCTION_ATTRIBUTES
6699 simde__m512
simde_mm512_erfinv_ps(simde__m512 a)6700 simde_mm512_erfinv_ps (simde__m512 a) {
6701   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6702     return _mm512_erfinv_ps(a);
6703   #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
6704     simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0));
6705     simde__m512 sgn = simde_x_mm512_copysign_ps(one, a);
6706 
6707     a = simde_mm512_mul_ps(simde_mm512_sub_ps(one, a), simde_mm512_add_ps(one, a));
6708     simde__m512 lnx = simde_mm512_log_ps(a);
6709 
6710     simde__m512 tt1 = simde_mm512_mul_ps(simde_mm512_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.147)));
6711     tt1 = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1);
6712     tt1 = simde_mm512_add_ps(tt1, simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx));
6713 
6714     simde__m512 tt2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147));
6715     tt2 = simde_mm512_mul_ps(tt2, lnx);
6716 
6717     simde__m512 r = simde_mm512_mul_ps(tt1, tt1);
6718     r = simde_mm512_sub_ps(r, tt2);
6719     r = simde_mm512_sqrt_ps(r);
6720     r = simde_mm512_add_ps(simde_x_mm512_negate_ps(tt1), r);
6721     r = simde_mm512_sqrt_ps(r);
6722 
6723     return simde_mm512_mul_ps(sgn, r);
6724   #else
6725     simde__m512_private
6726       a_ = simde__m512_to_private(a),
6727       r_;
6728 
6729     SIMDE_VECTORIZE
6730     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
6731       r_.f32[i] = simde_math_erfinvf(a_.f32[i]);
6732     }
6733 
6734     return simde__m512_from_private(r_);
6735   #endif
6736 }
6737 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6738   #undef _mm512_erfinv_ps
6739   #define _mm512_erfinv_ps(a) simde_mm512_erfinv_ps(a)
6740 #endif
6741 
6742 SIMDE_FUNCTION_ATTRIBUTES
6743 simde__m512d
simde_mm512_erfinv_pd(simde__m512d a)6744 simde_mm512_erfinv_pd (simde__m512d a) {
6745   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6746     return _mm512_erfinv_pd(a);
6747   #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
6748     simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0));
6749     simde__m512d sgn = simde_x_mm512_copysign_pd(one, a);
6750 
6751     a = simde_mm512_mul_pd(simde_mm512_sub_pd(one, a), simde_mm512_add_pd(one, a));
6752     simde__m512d lnx = simde_mm512_log_pd(a);
6753 
6754     simde__m512d tt1 = simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_MATH_PI), simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.147)));
6755     tt1 = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1);
6756     tt1 = simde_mm512_add_pd(tt1, simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx));
6757 
6758     simde__m512d tt2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147));
6759     tt2 = simde_mm512_mul_pd(tt2, lnx);
6760 
6761     simde__m512d r = simde_mm512_mul_pd(tt1, tt1);
6762     r = simde_mm512_sub_pd(r, tt2);
6763     r = simde_mm512_sqrt_pd(r);
6764     r = simde_mm512_add_pd(simde_x_mm512_negate_pd(tt1), r);
6765     r = simde_mm512_sqrt_pd(r);
6766 
6767     return simde_mm512_mul_pd(sgn, r);
6768   #else
6769     simde__m512d_private
6770       a_ = simde__m512d_to_private(a),
6771       r_;
6772 
6773     SIMDE_VECTORIZE
6774     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
6775       r_.f64[i] = simde_math_erfinv(a_.f64[i]);
6776     }
6777 
6778     return simde__m512d_from_private(r_);
6779   #endif
6780 }
6781 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6782   #undef _mm512_erfinv_pd
6783   #define _mm512_erfinv_pd(a) simde_mm512_erfinv_pd(a)
6784 #endif
6785 
6786 SIMDE_FUNCTION_ATTRIBUTES
6787 simde__m512
simde_mm512_mask_erfinv_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)6788 simde_mm512_mask_erfinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
6789   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6790     return _mm512_mask_erfinv_ps(src, k, a);
6791   #else
6792     return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfinv_ps(a));
6793   #endif
6794 }
6795 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6796   #undef _mm512_mask_erfinv_ps
6797   #define _mm512_mask_erfinv_ps(src, k, a) simde_mm512_mask_erfinv_ps(src, k, a)
6798 #endif
6799 
6800 SIMDE_FUNCTION_ATTRIBUTES
6801 simde__m512d
simde_mm512_mask_erfinv_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)6802 simde_mm512_mask_erfinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
6803   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
6804     return _mm512_mask_erfinv_pd(src, k, a);
6805   #else
6806     return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfinv_pd(a));
6807   #endif
6808 }
6809 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6810   #undef _mm512_mask_erfinv_pd
6811   #define _mm512_mask_erfinv_pd(src, k, a) simde_mm512_mask_erfinv_pd(src, k, a)
6812 #endif
6813 
6814 SIMDE_FUNCTION_ATTRIBUTES
6815 simde__m128
simde_mm_erfcinv_ps(simde__m128 a)6816 simde_mm_erfcinv_ps (simde__m128 a) {
6817   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
6818     return _mm_erfcinv_ps(a);
6819   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
6820     simde__m128 matched, retval = simde_mm_setzero_ps();
6821 
6822     { /* if (a < 2.0f && a > 0.0625f) */
6823       matched = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)));
6824       matched = simde_mm_and_ps(matched, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625))));
6825 
6826       if (!simde_mm_test_all_zeros(simde_mm_castps_si128(matched), simde_x_mm_setone_si128())) {
6827         retval = simde_mm_erfinv_ps(simde_mm_sub_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), a));
6828       }
6829 
6830       if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) {
6831         return retval;
6832       }
6833     }
6834 
6835     { /* else if (a < 0.0625f && a > 0.0f) */
6836       simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625)));
6837       mask = simde_mm_and_ps(mask, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))));
6838       mask = simde_mm_andnot_ps(matched, mask);
6839 
6840       if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) {
6841         matched = simde_mm_or_ps(matched, mask);
6842 
6843         /* t =  1/(sqrt(-log(a))) */
6844         simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a));
6845         t = simde_mm_sqrt_ps(t);
6846         t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t);
6847 
6848         const simde__m128 p[] = {
6849           simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)),
6850           simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)),
6851           simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)),
6852           simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)),
6853           simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)),
6854           simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.164441567910))
6855         };
6856 
6857         const simde__m128 q[] = {
6858           simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)),
6859           simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)),
6860           simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.000000000000))
6861         };
6862 
6863         /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */
6864         simde__m128 numerator = simde_mm_fmadd_ps(p[5], t, p[4]);
6865         numerator = simde_mm_fmadd_ps(numerator, t, p[3]);
6866         numerator = simde_mm_fmadd_ps(numerator, t, p[2]);
6867         numerator = simde_mm_fmadd_ps(numerator, t, p[1]);
6868         numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t));
6869 
6870         /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
6871         simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]);
6872         denominator = simde_mm_fmadd_ps(denominator, t, q[0]);
6873 
6874         simde__m128 res = simde_mm_div_ps(numerator, denominator);
6875 
6876         retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));
6877       }
6878     }
6879 
6880     { /* else if (a < 0.0f) */
6881       simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)));
6882       mask = simde_mm_andnot_ps(matched, mask);
6883 
6884       if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) {
6885         matched = simde_mm_or_ps(matched, mask);
6886 
6887         /* t =  1/(sqrt(-log(a))) */
6888         simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a));
6889         t = simde_mm_sqrt_ps(t);
6890         t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t);
6891 
6892         const simde__m128 p[] = {
6893           simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)),
6894           simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)),
6895           simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)),
6896           simde_mm_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000))
6897         };
6898 
6899         const simde__m128 q[] = {
6900           simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)),
6901           simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)),
6902           simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000))
6903         };
6904 
6905         /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */
6906         simde__m128 numerator = simde_mm_fmadd_ps(p[3], t, p[2]);
6907         numerator = simde_mm_fmadd_ps(numerator, t, p[1]);
6908         numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t));
6909 
6910         /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
6911         simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]);
6912         denominator = simde_mm_fmadd_ps(denominator, t, q[0]);
6913 
6914         simde__m128 res = simde_mm_div_ps(numerator, denominator);
6915 
6916         retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));
6917 
6918         if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) {
6919           return retval;
6920         }
6921       }
6922     }
6923 
6924     { /* else if (a == 0.0f) */
6925       simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)));
6926       mask = simde_mm_andnot_ps(matched, mask);
6927       matched = simde_mm_or_ps(matched, mask);
6928 
6929       simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF);
6930 
6931       retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res));
6932     }
6933 
6934     { /* else */
6935       /* (a >= 2.0f) */
6936       retval = simde_mm_or_ps(retval, simde_mm_andnot_ps(matched, simde_mm_set1_ps(-SIMDE_MATH_INFINITYF)));
6937     }
6938 
6939     return retval;
6940   #else
6941     simde__m128_private
6942       r_,
6943       a_ = simde__m128_to_private(a);
6944 
6945     SIMDE_VECTORIZE
6946     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
6947       r_.f32[i] = simde_math_erfcinvf(a_.f32[i]);
6948     }
6949 
6950     return simde__m128_from_private(r_);
6951   #endif
6952 }
6953 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
6954   #undef _mm_erfcinv_ps
6955   #define _mm_erfcinv_ps(a) simde_mm_erfcinv_ps(a)
6956 #endif
6957 
6958 SIMDE_FUNCTION_ATTRIBUTES
6959 simde__m128d
simde_mm_erfcinv_pd(simde__m128d a)6960 simde_mm_erfcinv_pd (simde__m128d a) {
6961   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
6962     return _mm_erfcinv_pd(a);
6963   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128)
6964     simde__m128d matched, retval = simde_mm_setzero_pd();
6965 
6966     { /* if (a < 2.0 && a > 0.0625) */
6967       matched = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)));
6968       matched = simde_mm_and_pd(matched, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625))));
6969 
6970       if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(matched), simde_x_mm_setone_si128())) {
6971         retval = simde_mm_erfinv_pd(simde_mm_sub_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), a));
6972       }
6973 
6974       if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) {
6975         return retval;
6976       }
6977     }
6978 
6979     { /* else if (a < 0.0625 && a > 0.0) */
6980       simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625)));
6981       mask = simde_mm_and_pd(mask, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))));
6982       mask = simde_mm_andnot_pd(matched, mask);
6983 
6984       if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) {
6985         matched = simde_mm_or_pd(matched, mask);
6986 
6987         /* t =  1/(sqrt(-log(a))) */
6988         simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a));
6989         t = simde_mm_sqrt_pd(t);
6990         t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t);
6991 
6992         const simde__m128d p[] = {
6993           simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)),
6994           simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)),
6995           simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)),
6996           simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)),
6997           simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)),
6998           simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.164441567910))
6999         };
7000 
7001         const simde__m128d q[] = {
7002           simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)),
7003           simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)),
7004           simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.000000000000))
7005         };
7006 
7007         /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */
7008         simde__m128d numerator = simde_mm_fmadd_pd(p[5], t, p[4]);
7009         numerator = simde_mm_fmadd_pd(numerator, t, p[3]);
7010         numerator = simde_mm_fmadd_pd(numerator, t, p[2]);
7011         numerator = simde_mm_fmadd_pd(numerator, t, p[1]);
7012         numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t));
7013 
7014         /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7015         simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]);
7016         denominator = simde_mm_fmadd_pd(denominator, t, q[0]);
7017 
7018         simde__m128d res = simde_mm_div_pd(numerator, denominator);
7019 
7020         retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));
7021       }
7022     }
7023 
7024     { /* else if (a < 0.0) */
7025       simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)));
7026       mask = simde_mm_andnot_pd(matched, mask);
7027 
7028       if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) {
7029         matched = simde_mm_or_pd(matched, mask);
7030 
7031         /* t =  1/(sqrt(-log(a))) */
7032         simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a));
7033         t = simde_mm_sqrt_pd(t);
7034         t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t);
7035 
7036         const simde__m128d p[] = {
7037           simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)),
7038           simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)),
7039           simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)),
7040           simde_mm_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000))
7041         };
7042 
7043         const simde__m128d q[] = {
7044           simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)),
7045           simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)),
7046           simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000))
7047         };
7048 
7049         /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */
7050         simde__m128d numerator = simde_mm_fmadd_pd(p[3], t, p[2]);
7051         numerator = simde_mm_fmadd_pd(numerator, t, p[1]);
7052         numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t));
7053 
7054         /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7055         simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]);
7056         denominator = simde_mm_fmadd_pd(denominator, t, q[0]);
7057 
7058         simde__m128d res = simde_mm_div_pd(numerator, denominator);
7059 
7060         retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));
7061 
7062         if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) {
7063           return retval;
7064         }
7065       }
7066     }
7067 
7068     { /* else if (a == 0.0) */
7069       simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)));
7070       mask = simde_mm_andnot_pd(matched, mask);
7071       matched = simde_mm_or_pd(matched, mask);
7072 
7073       simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY);
7074 
7075       retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res));
7076     }
7077 
7078     { /* else */
7079       /* (a >= 2.0) */
7080       retval = simde_mm_or_pd(retval, simde_mm_andnot_pd(matched, simde_mm_set1_pd(-SIMDE_MATH_INFINITY)));
7081     }
7082 
7083     return retval;
7084   #else
7085     simde__m128d_private
7086       r_,
7087       a_ = simde__m128d_to_private(a);
7088 
7089     SIMDE_VECTORIZE
7090     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
7091       r_.f64[i] = simde_math_erfcinv(a_.f64[i]);
7092     }
7093 
7094     return simde__m128d_from_private(r_);
7095   #endif
7096 }
7097 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7098   #undef _mm_erfcinv_pd
7099   #define _mm_erfcinv_pd(a) simde_mm_erfcinv_pd(a)
7100 #endif
7101 
7102 SIMDE_FUNCTION_ATTRIBUTES
7103 simde__m256
simde_mm256_erfcinv_ps(simde__m256 a)7104 simde_mm256_erfcinv_ps (simde__m256 a) {
7105   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
7106     return _mm256_erfcinv_ps(a);
7107   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256)
7108     simde__m256 matched, retval = simde_mm256_setzero_ps();
7109 
7110     { /* if (a < 2.0f && a > 0.0625f) */
7111       matched = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ);
7112       matched = simde_mm256_and_ps(matched, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ));
7113 
7114       if (!simde_mm256_testz_ps(matched, matched)) {
7115         retval = simde_mm256_erfinv_ps(simde_mm256_sub_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), a));
7116       }
7117 
7118       if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) {
7119         return retval;
7120       }
7121     }
7122 
7123     { /* else if (a < 0.0625f && a > 0.0f) */
7124       simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ);
7125       mask = simde_mm256_and_ps(mask, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ));
7126       mask = simde_mm256_andnot_ps(matched, mask);
7127 
7128       if (!simde_mm256_testz_ps(mask, mask)) {
7129         matched = simde_mm256_or_ps(matched, mask);
7130 
7131         /* t =  1/(sqrt(-log(a))) */
7132         simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a));
7133         t = simde_mm256_sqrt_ps(t);
7134         t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t);
7135 
7136         const simde__m256 p[] = {
7137           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)),
7138           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)),
7139           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)),
7140           simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)),
7141           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)),
7142           simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.16444156791))
7143         };
7144 
7145         const simde__m256 q[] = {
7146           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)),
7147           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)),
7148           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.000000000000))
7149         };
7150 
7151         /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */
7152         simde__m256 numerator = simde_mm256_fmadd_ps(p[5], t, p[4]);
7153         numerator = simde_mm256_fmadd_ps(numerator, t, p[3]);
7154         numerator = simde_mm256_fmadd_ps(numerator, t, p[2]);
7155         numerator = simde_mm256_fmadd_ps(numerator, t, p[1]);
7156         numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t));
7157 
7158         /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7159         simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]);
7160         denominator = simde_mm256_fmadd_ps(denominator, t, q[0]);
7161 
7162         simde__m256 res = simde_mm256_div_ps(numerator, denominator);
7163 
7164         retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));
7165       }
7166     }
7167 
7168     { /* else if (a < 0.0f) */
7169       simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ);
7170       mask = simde_mm256_andnot_ps(matched, mask);
7171 
7172       if (!simde_mm256_testz_ps(mask, mask)) {
7173         matched = simde_mm256_or_ps(matched, mask);
7174 
7175         /* t =  1/(sqrt(-log(a))) */
7176         simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a));
7177         t = simde_mm256_sqrt_ps(t);
7178         t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t);
7179 
7180         const simde__m256 p[] = {
7181           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)),
7182           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)),
7183           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)),
7184           simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.5374947401000))
7185         };
7186 
7187         const simde__m256 q[] = {
7188           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)),
7189           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)),
7190           simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000))
7191         };
7192 
7193         /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */
7194         simde__m256 numerator = simde_mm256_fmadd_ps(p[3], t, p[2]);
7195         numerator = simde_mm256_fmadd_ps(numerator, t, p[1]);
7196         numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t));
7197 
7198         /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7199         simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]);
7200         denominator = simde_mm256_fmadd_ps(denominator, t, q[0]);
7201 
7202         simde__m256 res = simde_mm256_div_ps(numerator, denominator);
7203 
7204         retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));
7205 
7206         if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) {
7207           return retval;
7208         }
7209       }
7210     }
7211 
7212     { /* else if (a == 0.0f) */
7213       simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);
7214       mask = simde_mm256_andnot_ps(matched, mask);
7215       matched = simde_mm256_or_ps(matched, mask);
7216 
7217       simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF);
7218 
7219       retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res));
7220     }
7221 
7222     { /* else */
7223       /* (a >= 2.0f) */
7224       retval = simde_mm256_or_ps(retval, simde_mm256_andnot_ps(matched, simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF)));
7225     }
7226 
7227     return retval;
7228   #else
7229     simde__m256_private
7230       r_,
7231       a_ = simde__m256_to_private(a);
7232 
7233     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
7234       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
7235         r_.m128[i] = simde_mm_erfcinv_ps(a_.m128[i]);
7236       }
7237     #else
7238       SIMDE_VECTORIZE
7239       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
7240         r_.f32[i] = simde_math_erfcinvf(a_.f32[i]);
7241       }
7242     #endif
7243 
7244     return simde__m256_from_private(r_);
7245   #endif
7246 }
7247 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7248   #undef _mm256_erfcinv_ps
7249   #define _mm256_erfcinv_ps(a) simde_mm256_erfcinv_ps(a)
7250 #endif
7251 
7252 SIMDE_FUNCTION_ATTRIBUTES
7253 simde__m256d
simde_mm256_erfcinv_pd(simde__m256d a)7254 simde_mm256_erfcinv_pd (simde__m256d a) {
7255   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
7256     return _mm256_erfcinv_pd(a);
7257   #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256)
7258     simde__m256d matched, retval = simde_mm256_setzero_pd();
7259 
7260     { /* if (a < 2.0 && a > 0.0625) */
7261       matched = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ);
7262       matched = simde_mm256_and_pd(matched, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ));
7263 
7264       if (!simde_mm256_testz_pd(matched, matched)) {
7265         retval = simde_mm256_erfinv_pd(simde_mm256_sub_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), a));
7266       }
7267 
7268       if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) {
7269         return retval;
7270       }
7271     }
7272 
7273     { /* else if (a < 0.0625 && a > 0.0) */
7274       simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ);
7275       mask = simde_mm256_and_pd(mask, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ));
7276       mask = simde_mm256_andnot_pd(matched, mask);
7277 
7278       if (!simde_mm256_testz_pd(mask, mask)) {
7279         matched = simde_mm256_or_pd(matched, mask);
7280 
7281         /* t =  1/(sqrt(-log(a))) */
7282         simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a));
7283         t = simde_mm256_sqrt_pd(t);
7284         t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t);
7285 
7286         const simde__m256d p[] = {
7287           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)),
7288           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)),
7289           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)),
7290           simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)),
7291           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)),
7292           simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.16444156791))
7293         };
7294 
7295         const simde__m256d q[] = {
7296           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)),
7297           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)),
7298           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.000000000000))
7299         };
7300 
7301         /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */
7302         simde__m256d numerator = simde_mm256_fmadd_pd(p[5], t, p[4]);
7303         numerator = simde_mm256_fmadd_pd(numerator, t, p[3]);
7304         numerator = simde_mm256_fmadd_pd(numerator, t, p[2]);
7305         numerator = simde_mm256_fmadd_pd(numerator, t, p[1]);
7306         numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t));
7307 
7308         /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7309         simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]);
7310         denominator = simde_mm256_fmadd_pd(denominator, t, q[0]);
7311 
7312         simde__m256d res = simde_mm256_div_pd(numerator, denominator);
7313 
7314         retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));
7315       }
7316     }
7317 
7318     { /* else if (a < 0.0) */
7319       simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ);
7320       mask = simde_mm256_andnot_pd(matched, mask);
7321 
7322       if (!simde_mm256_testz_pd(mask, mask)) {
7323         matched = simde_mm256_or_pd(matched, mask);
7324 
7325         /* t =  1/(sqrt(-log(a))) */
7326         simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a));
7327         t = simde_mm256_sqrt_pd(t);
7328         t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t);
7329 
7330         const simde__m256d p[] = {
7331           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)),
7332           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)),
7333           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)),
7334           simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.5374947401000))
7335         };
7336 
7337         const simde__m256d q[] = {
7338           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)),
7339           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)),
7340           simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000))
7341         };
7342 
7343         /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */
7344         simde__m256d numerator = simde_mm256_fmadd_pd(p[3], t, p[2]);
7345         numerator = simde_mm256_fmadd_pd(numerator, t, p[1]);
7346         numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t));
7347 
7348         /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7349         simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]);
7350         denominator = simde_mm256_fmadd_pd(denominator, t, q[0]);
7351 
7352         simde__m256d res = simde_mm256_div_pd(numerator, denominator);
7353 
7354         retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));
7355 
7356         if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) {
7357           return retval;
7358         }
7359       }
7360     }
7361 
7362     { /* else if (a == 0.0) */
7363       simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);
7364       mask = simde_mm256_andnot_pd(matched, mask);
7365       matched = simde_mm256_or_pd(matched, mask);
7366 
7367       simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY);
7368 
7369       retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res));
7370     }
7371 
7372     { /* else */
7373       /* (a >= 2.0) */
7374       retval = simde_mm256_or_pd(retval, simde_mm256_andnot_pd(matched, simde_mm256_set1_pd(-SIMDE_MATH_INFINITY)));
7375     }
7376 
7377     return retval;
7378   #else
7379     simde__m256d_private
7380       r_,
7381       a_ = simde__m256d_to_private(a);
7382 
7383     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
7384       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
7385         r_.m128d[i] = simde_mm_erfcinv_pd(a_.m128d[i]);
7386       }
7387     #else
7388       SIMDE_VECTORIZE
7389       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
7390         r_.f64[i] = simde_math_erfcinv(a_.f64[i]);
7391       }
7392     #endif
7393 
7394     return simde__m256d_from_private(r_);
7395   #endif
7396 }
7397 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7398   #undef _mm256_erfcinv_pd
7399   #define _mm256_erfcinv_pd(a) simde_mm256_erfcinv_pd(a)
7400 #endif
7401 
7402 SIMDE_FUNCTION_ATTRIBUTES
7403 simde__m512
simde_mm512_erfcinv_ps(simde__m512 a)7404 simde_mm512_erfcinv_ps (simde__m512 a) {
7405   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7406     return _mm512_erfcinv_ps(a);
7407   #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && (!defined(SIMDE_ARCH_ARM) || defined(SIMDE_ARCH_AARCH64))
7408     /* The results on Arm are *slightly* off, which causes problems for
7409      * the edge cases; for example, if you pass 2.0 sqrt will be called
7410      * with a value of -0.0 instead of 0.0, resulting in a NaN. */
7411     simde__m512_private
7412       r_,
7413       a_ = simde__m512_to_private(a);
7414 
7415     for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
7416       r_.m256[i] = simde_mm256_erfcinv_ps(a_.m256[i]);
7417     }
7418     return simde__m512_from_private(r_);
7419   #else
7420     simde__m512 retval = simde_mm512_setzero_ps();
7421     simde__mmask16 matched;
7422 
7423     { /* if (a < 2.0f && a > 0.0625f) */
7424       matched =  simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ);
7425       matched &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ);
7426 
7427       if (matched != 0) {
7428         retval = simde_mm512_erfinv_ps(simde_mm512_sub_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a));
7429       }
7430 
7431       if (matched == 1) {
7432         return retval;
7433       }
7434     }
7435 
7436     { /* else if (a < 0.0625f && a > 0.0f) */
7437       simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ);
7438       mask &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ);
7439       mask = ~matched & mask;
7440 
7441       if (mask != 0) {
7442         matched = matched | mask;
7443 
7444         /* t =  1/(sqrt(-log(a))) */
7445         simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a));
7446         t = simde_mm512_sqrt_ps(t);
7447         t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t);
7448 
7449         const simde__m512 p[] = {
7450           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)),
7451           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)),
7452           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)),
7453           simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)),
7454           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)),
7455           simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.16444156791))
7456         };
7457 
7458         const simde__m512 q[] = {
7459           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)),
7460           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)),
7461           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.000000000000))
7462         };
7463 
7464         /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */
7465         simde__m512 numerator = simde_mm512_fmadd_ps(p[5], t, p[4]);
7466         numerator = simde_mm512_fmadd_ps(numerator, t, p[3]);
7467         numerator = simde_mm512_fmadd_ps(numerator, t, p[2]);
7468         numerator = simde_mm512_fmadd_ps(numerator, t, p[1]);
7469         numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t));
7470 
7471         /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7472         simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]);
7473         denominator = simde_mm512_fmadd_ps(denominator, t, q[0]);
7474 
7475         simde__m512 res = simde_mm512_div_ps(numerator, denominator);
7476 
7477         retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res));
7478       }
7479     }
7480 
7481     { /* else if (a < 0.0f) */
7482       simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ);
7483       mask = ~matched & mask;
7484 
7485       if (mask != 0) {
7486         matched = matched | mask;
7487 
7488         /* t =  1/(sqrt(-log(a))) */
7489         simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a));
7490         t = simde_mm512_sqrt_ps(t);
7491         t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t);
7492 
7493         const simde__m512 p[] = {
7494           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)),
7495           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)),
7496           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)),
7497           simde_mm512_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000))
7498         };
7499 
7500         const simde__m512 q[] = {
7501           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)),
7502           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)),
7503           simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000))
7504         };
7505 
7506         /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */
7507         simde__m512 numerator = simde_mm512_fmadd_ps(p[3], t, p[2]);
7508         numerator = simde_mm512_fmadd_ps(numerator, t, p[1]);
7509         numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t));
7510 
7511         /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7512         simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]);
7513         denominator = simde_mm512_fmadd_ps(denominator, t, q[0]);
7514 
7515         simde__m512 res = simde_mm512_div_ps(numerator, denominator);
7516 
7517         retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res));
7518 
7519         if (matched == 1) {
7520           return retval;
7521         }
7522       }
7523     }
7524 
7525     { /* else if (a == 0.0f) */
7526       simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ);
7527       mask = ~matched & mask;
7528       matched = matched | mask;
7529 
7530       simde__m512 res = simde_mm512_set1_ps(SIMDE_MATH_INFINITYF);
7531 
7532       retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res));
7533     }
7534 
7535     { /* else */
7536       /* (a >= 2.0f) */
7537       retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(~matched, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF)));
7538     }
7539 
7540     return retval;
7541   #endif
7542 }
7543 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7544   #undef _mm512_erfcinv_ps
7545   #define _mm512_erfcinv_ps(a) simde_mm512_erfcinv_ps(a)
7546 #endif
7547 
7548 SIMDE_FUNCTION_ATTRIBUTES
7549 simde__m512d
simde_mm512_erfcinv_pd(simde__m512d a)7550 simde_mm512_erfcinv_pd (simde__m512d a) {
7551   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7552     return _mm512_erfcinv_pd(a);
7553   #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256)
7554     simde__m512d_private
7555       r_,
7556       a_ = simde__m512d_to_private(a);
7557 
7558     for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
7559       r_.m256d[i] = simde_mm256_erfcinv_pd(a_.m256d[i]);
7560     }
7561     return simde__m512d_from_private(r_);
7562   #else
7563     simde__m512d retval = simde_mm512_setzero_pd();
7564     simde__mmask8 matched;
7565 
7566     { /* if (a < 2.0f && a > 0.0625f) */
7567       matched =  simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ);
7568       matched &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ);
7569 
7570       if (matched != 0) {
7571         retval = simde_mm512_erfinv_pd(simde_mm512_sub_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a));
7572       }
7573 
7574       if (matched == 1) {
7575         return retval;
7576       }
7577     }
7578 
7579     { /* else if (a < 0.0625f && a > 0.0f) */
7580       simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ);
7581       mask &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ);
7582       mask = ~matched & mask;
7583 
7584       if (mask != 0) {
7585         matched = matched | mask;
7586 
7587         /* t =  1/(sqrt(-log(a))) */
7588         simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a));
7589         t = simde_mm512_sqrt_pd(t);
7590         t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t);
7591 
7592         const simde__m512d p[] = {
7593           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)),
7594           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)),
7595           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)),
7596           simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)),
7597           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)),
7598           simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.16444156791))
7599         };
7600 
7601         const simde__m512d q[] = {
7602           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)),
7603           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)),
7604           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.000000000000))
7605         };
7606 
7607         /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */
7608         simde__m512d numerator = simde_mm512_fmadd_pd(p[5], t, p[4]);
7609         numerator = simde_mm512_fmadd_pd(numerator, t, p[3]);
7610         numerator = simde_mm512_fmadd_pd(numerator, t, p[2]);
7611         numerator = simde_mm512_fmadd_pd(numerator, t, p[1]);
7612         numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t));
7613 
7614         /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7615         simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]);
7616         denominator = simde_mm512_fmadd_pd(denominator, t, q[0]);
7617 
7618         simde__m512d res = simde_mm512_div_pd(numerator, denominator);
7619 
7620         retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res));
7621       }
7622     }
7623 
7624     { /* else if (a < 0.0f) */
7625       simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ);
7626       mask = ~matched & mask;
7627 
7628       if (mask != 0) {
7629         matched = matched | mask;
7630 
7631         /* t =  1/(sqrt(-log(a))) */
7632         simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a));
7633         t = simde_mm512_sqrt_pd(t);
7634         t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t);
7635 
7636         const simde__m512d p[] = {
7637           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)),
7638           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)),
7639           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)),
7640           simde_mm512_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000))
7641         };
7642 
7643         const simde__m512d q[] = {
7644           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)),
7645           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)),
7646           simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000))
7647         };
7648 
7649         /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */
7650         simde__m512d numerator = simde_mm512_fmadd_pd(p[3], t, p[2]);
7651         numerator = simde_mm512_fmadd_pd(numerator, t, p[1]);
7652         numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t));
7653 
7654         /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */
7655         simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]);
7656         denominator = simde_mm512_fmadd_pd(denominator, t, q[0]);
7657 
7658         simde__m512d res = simde_mm512_div_pd(numerator, denominator);
7659 
7660         retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res));
7661 
7662         if (matched == 1) {
7663           return retval;
7664         }
7665       }
7666     }
7667 
7668     { /* else if (a == 0.0f) */
7669       simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ);
7670       mask = ~matched & mask;
7671       matched = matched | mask;
7672 
7673       simde__m512d res = simde_mm512_set1_pd(SIMDE_MATH_INFINITY);
7674 
7675       retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res));
7676     }
7677 
7678     { /* else */
7679       /* (a >= 2.0f) */
7680       retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(~matched, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY)));
7681     }
7682 
7683     return retval;
7684   #endif
7685 }
7686 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7687   #undef _mm512_erfcinv_pd
7688   #define _mm512_erfcinv_pd(a) simde_mm512_erfcinv_pd(a)
7689 #endif
7690 
7691 SIMDE_FUNCTION_ATTRIBUTES
7692 simde__m512
simde_mm512_mask_erfcinv_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)7693 simde_mm512_mask_erfcinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
7694   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7695     return _mm512_mask_erfcinv_ps(src, k, a);
7696   #else
7697     return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfcinv_ps(a));
7698   #endif
7699 }
7700 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7701   #undef _mm512_mask_erfcinv_ps
7702   #define _mm512_mask_erfcinv_ps(src, k, a) simde_mm512_mask_erfcinv_ps(src, k, a)
7703 #endif
7704 
7705 SIMDE_FUNCTION_ATTRIBUTES
7706 simde__m512d
simde_mm512_mask_erfcinv_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)7707 simde_mm512_mask_erfcinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
7708   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7709     return _mm512_mask_erfcinv_pd(src, k, a);
7710   #else
7711     return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfcinv_pd(a));
7712   #endif
7713 }
7714 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7715   #undef _mm512_mask_erfcinv_pd
7716   #define _mm512_mask_erfcinv_pd(src, k, a) simde_mm512_mask_erfcinv_pd(src, k, a)
7717 #endif
7718 
7719 SIMDE_FUNCTION_ATTRIBUTES
7720 simde__m128
simde_mm_logb_ps(simde__m128 a)7721 simde_mm_logb_ps (simde__m128 a) {
7722   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
7723     return _mm_logb_ps(a);
7724   #else
7725     simde__m128_private
7726       r_,
7727       a_ = simde__m128_to_private(a);
7728 
7729     SIMDE_VECTORIZE
7730     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
7731       r_.f32[i] = simde_math_logbf(a_.f32[i]);
7732     }
7733 
7734     return simde__m128_from_private(r_);
7735   #endif
7736 }
7737 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7738   #undef _mm_logb_ps
7739   #define _mm_logb_ps(a) simde_mm_logb_ps(a)
7740 #endif
7741 
7742 SIMDE_FUNCTION_ATTRIBUTES
7743 simde__m128d
simde_mm_logb_pd(simde__m128d a)7744 simde_mm_logb_pd (simde__m128d a) {
7745   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
7746     return _mm_logb_pd(a);
7747   #else
7748     simde__m128d_private
7749       r_,
7750       a_ = simde__m128d_to_private(a);
7751 
7752     SIMDE_VECTORIZE
7753     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
7754       r_.f64[i] = simde_math_logb(a_.f64[i]);
7755     }
7756 
7757     return simde__m128d_from_private(r_);
7758   #endif
7759 }
7760 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7761   #undef _mm_logb_pd
7762   #define _mm_logb_pd(a) simde_mm_logb_pd(a)
7763 #endif
7764 
7765 SIMDE_FUNCTION_ATTRIBUTES
7766 simde__m256
simde_mm256_logb_ps(simde__m256 a)7767 simde_mm256_logb_ps (simde__m256 a) {
7768   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
7769     return _mm256_logb_ps(a);
7770   #else
7771     simde__m256_private
7772       r_,
7773       a_ = simde__m256_to_private(a);
7774 
7775     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
7776       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
7777         r_.m128[i] = simde_mm_logb_ps(a_.m128[i]);
7778       }
7779     #else
7780       SIMDE_VECTORIZE
7781       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
7782         r_.f32[i] = simde_math_logbf(a_.f32[i]);
7783       }
7784     #endif
7785 
7786     return simde__m256_from_private(r_);
7787   #endif
7788 }
7789 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7790   #undef _mm256_logb_ps
7791   #define _mm256_logb_ps(a) simde_mm256_logb_ps(a)
7792 #endif
7793 
7794 
7795 SIMDE_FUNCTION_ATTRIBUTES
7796 simde__m256d
simde_mm256_logb_pd(simde__m256d a)7797 simde_mm256_logb_pd (simde__m256d a) {
7798   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
7799     return _mm256_logb_pd(a);
7800   #else
7801     simde__m256d_private
7802       r_,
7803       a_ = simde__m256d_to_private(a);
7804 
7805     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
7806       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
7807         r_.m128d[i] = simde_mm_logb_pd(a_.m128d[i]);
7808       }
7809     #else
7810       SIMDE_VECTORIZE
7811       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
7812         r_.f64[i] = simde_math_logb(a_.f64[i]);
7813       }
7814     #endif
7815 
7816     return simde__m256d_from_private(r_);
7817   #endif
7818 }
7819 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7820   #undef _mm256_logb_pd
7821   #define _mm256_logb_pd(a) simde_mm256_logb_pd(a)
7822 #endif
7823 
7824 SIMDE_FUNCTION_ATTRIBUTES
7825 simde__m512
simde_mm512_logb_ps(simde__m512 a)7826 simde_mm512_logb_ps (simde__m512 a) {
7827   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7828     return _mm512_logb_ps(a);
7829   #else
7830     simde__m512_private
7831       r_,
7832       a_ = simde__m512_to_private(a);
7833 
7834     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
7835       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
7836         r_.m256[i] = simde_mm256_logb_ps(a_.m256[i]);
7837       }
7838     #else
7839       SIMDE_VECTORIZE
7840       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
7841         r_.f32[i] = simde_math_logbf(a_.f32[i]);
7842       }
7843     #endif
7844 
7845     return simde__m512_from_private(r_);
7846   #endif
7847 }
7848 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7849   #undef _mm512_logb_ps
7850   #define _mm512_logb_ps(a) simde_mm512_logb_ps(a)
7851 #endif
7852 
7853 SIMDE_FUNCTION_ATTRIBUTES
7854 simde__m512d
simde_mm512_logb_pd(simde__m512d a)7855 simde_mm512_logb_pd (simde__m512d a) {
7856   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7857     return _mm512_logb_pd(a);
7858   #else
7859     simde__m512d_private
7860       r_,
7861       a_ = simde__m512d_to_private(a);
7862 
7863     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
7864       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
7865         r_.m256d[i] = simde_mm256_logb_pd(a_.m256d[i]);
7866       }
7867     #else
7868       SIMDE_VECTORIZE
7869       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
7870         r_.f64[i] = simde_math_logb(a_.f64[i]);
7871       }
7872     #endif
7873 
7874     return simde__m512d_from_private(r_);
7875   #endif
7876 }
7877 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7878   #undef _mm512_logb_pd
7879   #define _mm512_logb_pd(a) simde_mm512_logb_pd(a)
7880 #endif
7881 
7882 SIMDE_FUNCTION_ATTRIBUTES
7883 simde__m512
simde_mm512_mask_logb_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)7884 simde_mm512_mask_logb_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
7885   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7886     return _mm512_mask_logb_ps(src, k, a);
7887   #else
7888     return simde_mm512_mask_mov_ps(src, k, simde_mm512_logb_ps(a));
7889   #endif
7890 }
7891 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7892   #undef _mm512_mask_logb_ps
7893   #define _mm512_mask_logb_ps(src, k, a) simde_mm512_mask_logb_ps(src, k, a)
7894 #endif
7895 
7896 SIMDE_FUNCTION_ATTRIBUTES
7897 simde__m512d
simde_mm512_mask_logb_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)7898 simde_mm512_mask_logb_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
7899   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
7900     return _mm512_mask_logb_pd(src, k, a);
7901   #else
7902     return simde_mm512_mask_mov_pd(src, k, simde_mm512_logb_pd(a));
7903   #endif
7904 }
7905 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7906   #undef _mm512_mask_logb_pd
7907   #define _mm512_mask_logb_pd(src, k, a) simde_mm512_mask_logb_pd(src, k, a)
7908 #endif
7909 
7910 SIMDE_FUNCTION_ATTRIBUTES
7911 simde__m128
simde_mm_log2_ps(simde__m128 a)7912 simde_mm_log2_ps (simde__m128 a) {
7913   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
7914     return _mm_log2_ps(a);
7915   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
7916     #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)
7917       return Sleef_log2f4_u35(a);
7918     #else
7919       return Sleef_log2f4_u10(a);
7920     #endif
7921   #else
7922     simde__m128_private
7923       r_,
7924       a_ = simde__m128_to_private(a);
7925 
7926     SIMDE_VECTORIZE
7927     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
7928       r_.f32[i] = simde_math_log2f(a_.f32[i]);
7929     }
7930 
7931     return simde__m128_from_private(r_);
7932   #endif
7933 }
7934 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7935   #undef _mm_log2_ps
7936   #define _mm_log2_ps(a) simde_mm_log2_ps(a)
7937 #endif
7938 
7939 SIMDE_FUNCTION_ATTRIBUTES
7940 simde__m128d
simde_mm_log2_pd(simde__m128d a)7941 simde_mm_log2_pd (simde__m128d a) {
7942   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
7943     return _mm_log2_pd(a);
7944   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
7945     #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)
7946       return Sleef_log2d2_u35(a);
7947     #else
7948       return Sleef_log2d2_u10(a);
7949     #endif
7950   #else
7951     simde__m128d_private
7952       r_,
7953       a_ = simde__m128d_to_private(a);
7954 
7955     SIMDE_VECTORIZE
7956     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
7957       r_.f64[i] = simde_math_log2(a_.f64[i]);
7958     }
7959 
7960     return simde__m128d_from_private(r_);
7961   #endif
7962 }
7963 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7964   #undef _mm_log2_pd
7965   #define _mm_log2_pd(a) simde_mm_log2_pd(a)
7966 #endif
7967 
7968 SIMDE_FUNCTION_ATTRIBUTES
7969 simde__m256
simde_mm256_log2_ps(simde__m256 a)7970 simde_mm256_log2_ps (simde__m256 a) {
7971   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
7972     return _mm256_log2_ps(a);
7973   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
7974     #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)
7975       return Sleef_log2f8_u35(a);
7976     #else
7977       return Sleef_log2f8_u10(a);
7978     #endif
7979   #else
7980     simde__m256_private
7981       r_,
7982       a_ = simde__m256_to_private(a);
7983 
7984     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
7985       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
7986         r_.m128[i] = simde_mm_log2_ps(a_.m128[i]);
7987       }
7988     #else
7989       SIMDE_VECTORIZE
7990       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
7991         r_.f32[i] = simde_math_log2f(a_.f32[i]);
7992       }
7993     #endif
7994 
7995     return simde__m256_from_private(r_);
7996   #endif
7997 }
7998 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
7999   #undef _mm256_log2_ps
8000   #define _mm256_log2_ps(a) simde_mm256_log2_ps(a)
8001 #endif
8002 
8003 
8004 SIMDE_FUNCTION_ATTRIBUTES
8005 simde__m256d
simde_mm256_log2_pd(simde__m256d a)8006 simde_mm256_log2_pd (simde__m256d a) {
8007   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8008     return _mm256_log2_pd(a);
8009   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8010     #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)
8011       return Sleef_log2d4_u35(a);
8012     #else
8013       return Sleef_log2d4_u10(a);
8014     #endif
8015   #else
8016     simde__m256d_private
8017       r_,
8018       a_ = simde__m256d_to_private(a);
8019 
8020     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8021       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
8022         r_.m128d[i] = simde_mm_log2_pd(a_.m128d[i]);
8023       }
8024     #else
8025       SIMDE_VECTORIZE
8026       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8027         r_.f64[i] = simde_math_log2(a_.f64[i]);
8028       }
8029     #endif
8030 
8031     return simde__m256d_from_private(r_);
8032   #endif
8033 }
8034 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8035   #undef _mm256_log2_pd
8036   #define _mm256_log2_pd(a) simde_mm256_log2_pd(a)
8037 #endif
8038 
8039 SIMDE_FUNCTION_ATTRIBUTES
8040 simde__m512
simde_mm512_log2_ps(simde__m512 a)8041 simde_mm512_log2_ps (simde__m512 a) {
8042   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8043     return _mm512_log2_ps(a);
8044   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8045     #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)
8046       return Sleef_log2f16_u35(a);
8047     #else
8048       return Sleef_log2f16_u10(a);
8049     #endif
8050   #else
8051     simde__m512_private
8052       r_,
8053       a_ = simde__m512_to_private(a);
8054 
8055     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8056       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
8057         r_.m256[i] = simde_mm256_log2_ps(a_.m256[i]);
8058       }
8059     #else
8060       SIMDE_VECTORIZE
8061       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8062         r_.f32[i] = simde_math_log2f(a_.f32[i]);
8063       }
8064     #endif
8065 
8066     return simde__m512_from_private(r_);
8067   #endif
8068 }
8069 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8070   #undef _mm512_log2_ps
8071   #define _mm512_log2_ps(a) simde_mm512_log2_ps(a)
8072 #endif
8073 
8074 SIMDE_FUNCTION_ATTRIBUTES
8075 simde__m512d
simde_mm512_log2_pd(simde__m512d a)8076 simde_mm512_log2_pd (simde__m512d a) {
8077   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8078     return _mm512_log2_pd(a);
8079   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8080     #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1)
8081       return Sleef_log2d8_u35(a);
8082     #else
8083       return Sleef_log2d8_u10(a);
8084     #endif
8085   #else
8086     simde__m512d_private
8087       r_,
8088       a_ = simde__m512d_to_private(a);
8089 
8090     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8091       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
8092         r_.m256d[i] = simde_mm256_log2_pd(a_.m256d[i]);
8093       }
8094     #else
8095       SIMDE_VECTORIZE
8096       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8097         r_.f64[i] = simde_math_log2(a_.f64[i]);
8098       }
8099     #endif
8100 
8101     return simde__m512d_from_private(r_);
8102   #endif
8103 }
8104 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8105   #undef _mm512_log2_pd
8106   #define _mm512_log2_pd(a) simde_mm512_log2_pd(a)
8107 #endif
8108 
8109 SIMDE_FUNCTION_ATTRIBUTES
8110 simde__m512
simde_mm512_mask_log2_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)8111 simde_mm512_mask_log2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
8112   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8113     return _mm512_mask_log2_ps(src, k, a);
8114   #else
8115     return simde_mm512_mask_mov_ps(src, k, simde_mm512_log2_ps(a));
8116   #endif
8117 }
8118 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8119   #undef _mm512_mask_log2_ps
8120   #define _mm512_mask_log2_ps(src, k, a) simde_mm512_mask_log2_ps(src, k, a)
8121 #endif
8122 
8123 SIMDE_FUNCTION_ATTRIBUTES
8124 simde__m512d
simde_mm512_mask_log2_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)8125 simde_mm512_mask_log2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
8126   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8127     return _mm512_mask_log2_pd(src, k, a);
8128   #else
8129     return simde_mm512_mask_mov_pd(src, k, simde_mm512_log2_pd(a));
8130   #endif
8131 }
8132 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8133   #undef _mm512_mask_log2_pd
8134   #define _mm512_mask_log2_pd(src, k, a) simde_mm512_mask_log2_pd(src, k, a)
8135 #endif
8136 
8137 SIMDE_FUNCTION_ATTRIBUTES
8138 simde__m128
simde_mm_log1p_ps(simde__m128 a)8139 simde_mm_log1p_ps (simde__m128 a) {
8140   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8141     return _mm_log1p_ps(a);
8142   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
8143     return Sleef_log1pf4_u10(a);
8144   #else
8145     simde__m128_private
8146       r_,
8147       a_ = simde__m128_to_private(a);
8148 
8149     SIMDE_VECTORIZE
8150     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8151       r_.f32[i] = simde_math_log1pf(a_.f32[i]);
8152     }
8153 
8154     return simde__m128_from_private(r_);
8155   #endif
8156 }
8157 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8158   #undef _mm_log1p_ps
8159   #define _mm_log1p_ps(a) simde_mm_log1p_ps(a)
8160 #endif
8161 
8162 SIMDE_FUNCTION_ATTRIBUTES
8163 simde__m128d
simde_mm_log1p_pd(simde__m128d a)8164 simde_mm_log1p_pd (simde__m128d a) {
8165   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8166     return _mm_log1p_pd(a);
8167   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
8168     return Sleef_log1pd2_u10(a);
8169   #else
8170     simde__m128d_private
8171       r_,
8172       a_ = simde__m128d_to_private(a);
8173 
8174     SIMDE_VECTORIZE
8175     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8176       r_.f64[i] = simde_math_log1p(a_.f64[i]);
8177     }
8178 
8179     return simde__m128d_from_private(r_);
8180   #endif
8181 }
8182 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8183   #undef _mm_log1p_pd
8184   #define _mm_log1p_pd(a) simde_mm_log1p_pd(a)
8185 #endif
8186 
8187 SIMDE_FUNCTION_ATTRIBUTES
8188 simde__m256
simde_mm256_log1p_ps(simde__m256 a)8189 simde_mm256_log1p_ps (simde__m256 a) {
8190   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8191     return _mm256_log1p_ps(a);
8192   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8193     return Sleef_log1pf8_u10(a);
8194   #else
8195     simde__m256_private
8196       r_,
8197       a_ = simde__m256_to_private(a);
8198 
8199     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
8200       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
8201         r_.m128[i] = simde_mm_log1p_ps(a_.m128[i]);
8202       }
8203     #else
8204       SIMDE_VECTORIZE
8205       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8206         r_.f32[i] = simde_math_log1pf(a_.f32[i]);
8207       }
8208     #endif
8209 
8210     return simde__m256_from_private(r_);
8211   #endif
8212 }
8213 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8214   #undef _mm256_log1p_ps
8215   #define _mm256_log1p_ps(a) simde_mm256_log1p_ps(a)
8216 #endif
8217 
8218 
8219 SIMDE_FUNCTION_ATTRIBUTES
8220 simde__m256d
simde_mm256_log1p_pd(simde__m256d a)8221 simde_mm256_log1p_pd (simde__m256d a) {
8222   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8223     return _mm256_log1p_pd(a);
8224   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8225     return Sleef_log1pd4_u10(a);
8226   #else
8227     simde__m256d_private
8228       r_,
8229       a_ = simde__m256d_to_private(a);
8230 
8231     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
8232       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
8233         r_.m128d[i] = simde_mm_log1p_pd(a_.m128d[i]);
8234       }
8235     #else
8236       SIMDE_VECTORIZE
8237       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8238         r_.f64[i] = simde_math_log1p(a_.f64[i]);
8239       }
8240     #endif
8241 
8242     return simde__m256d_from_private(r_);
8243   #endif
8244 }
8245 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8246   #undef _mm256_log1p_pd
8247   #define _mm256_log1p_pd(a) simde_mm256_log1p_pd(a)
8248 #endif
8249 
8250 SIMDE_FUNCTION_ATTRIBUTES
8251 simde__m512
simde_mm512_log1p_ps(simde__m512 a)8252 simde_mm512_log1p_ps (simde__m512 a) {
8253   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8254     return _mm512_log1p_ps(a);
8255   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8256     return Sleef_log1pf16_u10(a);
8257   #else
8258     simde__m512_private
8259       r_,
8260       a_ = simde__m512_to_private(a);
8261 
8262     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8263       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
8264         r_.m256[i] = simde_mm256_log1p_ps(a_.m256[i]);
8265       }
8266     #else
8267       SIMDE_VECTORIZE
8268       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8269         r_.f32[i] = simde_math_log1pf(a_.f32[i]);
8270       }
8271     #endif
8272 
8273     return simde__m512_from_private(r_);
8274   #endif
8275 }
8276 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8277   #undef _mm512_log1p_ps
8278   #define _mm512_log1p_ps(a) simde_mm512_log1p_ps(a)
8279 #endif
8280 
8281 SIMDE_FUNCTION_ATTRIBUTES
8282 simde__m512d
simde_mm512_log1p_pd(simde__m512d a)8283 simde_mm512_log1p_pd (simde__m512d a) {
8284   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8285     return _mm512_log1p_pd(a);
8286   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8287     return Sleef_log1pd8_u10(a);
8288   #else
8289     simde__m512d_private
8290       r_,
8291       a_ = simde__m512d_to_private(a);
8292 
8293     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8294       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
8295         r_.m256d[i] = simde_mm256_log1p_pd(a_.m256d[i]);
8296       }
8297     #else
8298       SIMDE_VECTORIZE
8299       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8300         r_.f64[i] = simde_math_log1p(a_.f64[i]);
8301       }
8302     #endif
8303 
8304     return simde__m512d_from_private(r_);
8305   #endif
8306 }
8307 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8308   #undef _mm512_log1p_pd
8309   #define _mm512_log1p_pd(a) simde_mm512_log1p_pd(a)
8310 #endif
8311 
8312 SIMDE_FUNCTION_ATTRIBUTES
8313 simde__m512
simde_mm512_mask_log1p_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)8314 simde_mm512_mask_log1p_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
8315   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8316     return _mm512_mask_log1p_ps(src, k, a);
8317   #else
8318     return simde_mm512_mask_mov_ps(src, k, simde_mm512_log1p_ps(a));
8319   #endif
8320 }
8321 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8322   #undef _mm512_mask_log1p_ps
8323   #define _mm512_mask_log1p_ps(src, k, a) simde_mm512_mask_log1p_ps(src, k, a)
8324 #endif
8325 
8326 SIMDE_FUNCTION_ATTRIBUTES
8327 simde__m512d
simde_mm512_mask_log1p_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)8328 simde_mm512_mask_log1p_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
8329   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8330     return _mm512_mask_log1p_pd(src, k, a);
8331   #else
8332     return simde_mm512_mask_mov_pd(src, k, simde_mm512_log1p_pd(a));
8333   #endif
8334 }
8335 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8336   #undef _mm512_mask_log1p_pd
8337   #define _mm512_mask_log1p_pd(src, k, a) simde_mm512_mask_log1p_pd(src, k, a)
8338 #endif
8339 
8340 SIMDE_FUNCTION_ATTRIBUTES
8341 simde__m128
simde_mm_log10_ps(simde__m128 a)8342 simde_mm_log10_ps (simde__m128 a) {
8343   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8344     return _mm_log10_ps(a);
8345   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
8346     return Sleef_log10f4_u10(a);
8347   #else
8348     simde__m128_private
8349       r_,
8350       a_ = simde__m128_to_private(a);
8351 
8352     SIMDE_VECTORIZE
8353     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8354       r_.f32[i] = simde_math_log10f(a_.f32[i]);
8355     }
8356 
8357     return simde__m128_from_private(r_);
8358   #endif
8359 }
8360 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8361   #undef _mm_log10_ps
8362   #define _mm_log10_ps(a) simde_mm_log10_ps(a)
8363 #endif
8364 
8365 SIMDE_FUNCTION_ATTRIBUTES
8366 simde__m128d
simde_mm_log10_pd(simde__m128d a)8367 simde_mm_log10_pd (simde__m128d a) {
8368   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8369     return _mm_log10_pd(a);
8370   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
8371     return Sleef_log10d2_u10(a);
8372   #else
8373     simde__m128d_private
8374       r_,
8375       a_ = simde__m128d_to_private(a);
8376 
8377     SIMDE_VECTORIZE
8378     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8379       r_.f64[i] = simde_math_log10(a_.f64[i]);
8380     }
8381 
8382     return simde__m128d_from_private(r_);
8383   #endif
8384 }
8385 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8386   #undef _mm_log10_pd
8387   #define _mm_log10_pd(a) simde_mm_log10_pd(a)
8388 #endif
8389 
8390 SIMDE_FUNCTION_ATTRIBUTES
8391 simde__m256
simde_mm256_log10_ps(simde__m256 a)8392 simde_mm256_log10_ps (simde__m256 a) {
8393   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8394     return _mm256_log10_ps(a);
8395   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8396     return Sleef_log10f8_u10(a);
8397   #else
8398     simde__m256_private
8399       r_,
8400       a_ = simde__m256_to_private(a);
8401 
8402     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
8403       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
8404         r_.m128[i] = simde_mm_log10_ps(a_.m128[i]);
8405       }
8406     #else
8407       SIMDE_VECTORIZE
8408       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8409         r_.f32[i] = simde_math_log10f(a_.f32[i]);
8410       }
8411     #endif
8412 
8413     return simde__m256_from_private(r_);
8414   #endif
8415 }
8416 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8417   #undef _mm256_log10_ps
8418   #define _mm256_log10_ps(a) simde_mm256_log10_ps(a)
8419 #endif
8420 
8421 
8422 SIMDE_FUNCTION_ATTRIBUTES
8423 simde__m256d
simde_mm256_log10_pd(simde__m256d a)8424 simde_mm256_log10_pd (simde__m256d a) {
8425   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8426     return _mm256_log10_pd(a);
8427   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8428     return Sleef_log10d4_u10(a);
8429   #else
8430     simde__m256d_private
8431       r_,
8432       a_ = simde__m256d_to_private(a);
8433 
8434     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
8435       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
8436         r_.m128d[i] = simde_mm_log10_pd(a_.m128d[i]);
8437       }
8438     #else
8439       SIMDE_VECTORIZE
8440       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8441         r_.f64[i] = simde_math_log10(a_.f64[i]);
8442       }
8443     #endif
8444 
8445     return simde__m256d_from_private(r_);
8446   #endif
8447 }
8448 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8449   #undef _mm256_log10_pd
8450   #define _mm256_log10_pd(a) simde_mm256_log10_pd(a)
8451 #endif
8452 
8453 SIMDE_FUNCTION_ATTRIBUTES
8454 simde__m512
simde_mm512_log10_ps(simde__m512 a)8455 simde_mm512_log10_ps (simde__m512 a) {
8456   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8457     return _mm512_log10_ps(a);
8458   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8459     return Sleef_log10f16_u10(a);
8460   #else
8461     simde__m512_private
8462       r_,
8463       a_ = simde__m512_to_private(a);
8464 
8465     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8466       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
8467         r_.m256[i] = simde_mm256_log10_ps(a_.m256[i]);
8468       }
8469     #else
8470       SIMDE_VECTORIZE
8471       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8472         r_.f32[i] = simde_math_log10f(a_.f32[i]);
8473       }
8474     #endif
8475 
8476     return simde__m512_from_private(r_);
8477   #endif
8478 }
8479 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8480   #undef _mm512_log10_ps
8481   #define _mm512_log10_ps(a) simde_mm512_log10_ps(a)
8482 #endif
8483 
8484 SIMDE_FUNCTION_ATTRIBUTES
8485 simde__m512d
simde_mm512_log10_pd(simde__m512d a)8486 simde_mm512_log10_pd (simde__m512d a) {
8487   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8488     return _mm512_log10_pd(a);
8489   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8490     return Sleef_log10d8_u10(a);
8491   #else
8492     simde__m512d_private
8493       r_,
8494       a_ = simde__m512d_to_private(a);
8495 
8496     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
8497       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
8498         r_.m256d[i] = simde_mm256_log10_pd(a_.m256d[i]);
8499       }
8500     #else
8501       SIMDE_VECTORIZE
8502       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8503         r_.f64[i] = simde_math_log10(a_.f64[i]);
8504       }
8505     #endif
8506 
8507     return simde__m512d_from_private(r_);
8508   #endif
8509 }
8510 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8511   #undef _mm512_log10_pd
8512   #define _mm512_log10_pd(a) simde_mm512_log10_pd(a)
8513 #endif
8514 
8515 SIMDE_FUNCTION_ATTRIBUTES
8516 simde__m512
simde_mm512_mask_log10_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)8517 simde_mm512_mask_log10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
8518   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8519     return _mm512_mask_log10_ps(src, k, a);
8520   #else
8521     return simde_mm512_mask_mov_ps(src, k, simde_mm512_log10_ps(a));
8522   #endif
8523 }
8524 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8525   #undef _mm512_mask_log10_ps
8526   #define _mm512_mask_log10_ps(src, k, a) simde_mm512_mask_log10_ps(src, k, a)
8527 #endif
8528 
8529 SIMDE_FUNCTION_ATTRIBUTES
8530 simde__m512d
simde_mm512_mask_log10_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)8531 simde_mm512_mask_log10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
8532   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8533     return _mm512_mask_log10_pd(src, k, a);
8534   #else
8535     return simde_mm512_mask_mov_pd(src, k, simde_mm512_log10_pd(a));
8536   #endif
8537 }
8538 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8539   #undef _mm512_mask_log10_pd
8540   #define _mm512_mask_log10_pd(src, k, a) simde_mm512_mask_log10_pd(src, k, a)
8541 #endif
8542 
8543 SIMDE_FUNCTION_ATTRIBUTES
8544 simde__m512
simde_mm512_nearbyint_ps(simde__m512 a)8545 simde_mm512_nearbyint_ps (simde__m512 a) {
8546   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8547     return _mm512_nearbyint_ps(a);
8548   #else
8549     simde__m512_private
8550       r_,
8551       a_ = simde__m512_to_private(a);
8552 
8553     SIMDE_VECTORIZE
8554     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8555       r_.f32[i] = simde_math_nearbyintf(a_.f32[i]);
8556     }
8557 
8558     return simde__m512_from_private(r_);
8559   #endif
8560 }
8561 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8562   #undef _mm512_nearbyint_ps
8563   #define _mm512_nearbyint_ps(a) simde_mm512_nearbyint_ps(a)
8564 #endif
8565 
8566 SIMDE_FUNCTION_ATTRIBUTES
8567 simde__m512d
simde_mm512_nearbyint_pd(simde__m512d a)8568 simde_mm512_nearbyint_pd (simde__m512d a) {
8569   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8570     return _mm512_nearbyint_pd(a);
8571   #else
8572     simde__m512d_private
8573       r_,
8574       a_ = simde__m512d_to_private(a);
8575 
8576     SIMDE_VECTORIZE
8577     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8578       r_.f64[i] = simde_math_nearbyint(a_.f64[i]);
8579     }
8580 
8581     return simde__m512d_from_private(r_);
8582   #endif
8583 }
8584 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8585   #undef _mm512_nearbyint_pd
8586   #define _mm512_nearbyint_pd(a) simde_mm512_nearbyint_pd(a)
8587 #endif
8588 
8589 SIMDE_FUNCTION_ATTRIBUTES
8590 simde__m512
simde_mm512_mask_nearbyint_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)8591 simde_mm512_mask_nearbyint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
8592   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8593     return _mm512_mask_nearbyint_ps(src, k, a);
8594   #else
8595     return simde_mm512_mask_mov_ps(src, k, simde_mm512_nearbyint_ps(a));
8596   #endif
8597 }
8598 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8599   #undef _mm512_mask_nearbyint_ps
8600   #define _mm512_mask_nearbyint_ps(src, k, a) simde_mm512_mask_nearbyint_ps(src, k, a)
8601 #endif
8602 
8603 SIMDE_FUNCTION_ATTRIBUTES
8604 simde__m512d
simde_mm512_mask_nearbyint_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)8605 simde_mm512_mask_nearbyint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
8606   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8607     return _mm512_mask_nearbyint_pd(src, k, a);
8608   #else
8609     return simde_mm512_mask_mov_pd(src, k, simde_mm512_nearbyint_pd(a));
8610   #endif
8611 }
8612 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8613   #undef _mm512_mask_nearbyint_pd
8614   #define _mm512_mask_nearbyint_pd(src, k, a) simde_mm512_mask_nearbyint_pd(src, k, a)
8615 #endif
8616 
8617 SIMDE_FUNCTION_ATTRIBUTES
8618 simde__m128
simde_mm_pow_ps(simde__m128 a,simde__m128 b)8619 simde_mm_pow_ps (simde__m128 a, simde__m128 b) {
8620   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8621     return _mm_pow_ps(a, b);
8622   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
8623     return Sleef_powf4_u10(a, b);
8624   #else
8625     simde__m128_private
8626       r_,
8627       a_ = simde__m128_to_private(a),
8628       b_ = simde__m128_to_private(b);
8629 
8630     SIMDE_VECTORIZE
8631     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8632       r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]);
8633     }
8634 
8635     return simde__m128_from_private(r_);
8636   #endif
8637 }
8638 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8639   #undef _mm_pow_ps
8640   #define _mm_pow_ps(a, b) simde_mm_pow_ps(a, b)
8641 #endif
8642 
8643 SIMDE_FUNCTION_ATTRIBUTES
8644 simde__m128d
simde_mm_pow_pd(simde__m128d a,simde__m128d b)8645 simde_mm_pow_pd (simde__m128d a, simde__m128d b) {
8646   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8647     return _mm_pow_pd(a, b);
8648   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
8649     return Sleef_powd2_u10(a, b);
8650   #else
8651     simde__m128d_private
8652       r_,
8653       a_ = simde__m128d_to_private(a),
8654       b_ = simde__m128d_to_private(b);
8655 
8656     SIMDE_VECTORIZE
8657     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8658       r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]);
8659     }
8660 
8661     return simde__m128d_from_private(r_);
8662   #endif
8663 }
8664 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8665   #undef _mm_pow_pd
8666   #define _mm_pow_pd(a, b) simde_mm_pow_pd(a, b)
8667 #endif
8668 
8669 SIMDE_FUNCTION_ATTRIBUTES
8670 simde__m256
simde_mm256_pow_ps(simde__m256 a,simde__m256 b)8671 simde_mm256_pow_ps (simde__m256 a, simde__m256 b) {
8672   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8673     return _mm256_pow_ps(a, b);
8674   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8675     return Sleef_powf8_u10(a, b);
8676   #else
8677     simde__m256_private
8678       r_,
8679       a_ = simde__m256_to_private(a),
8680       b_ = simde__m256_to_private(b);
8681 
8682     SIMDE_VECTORIZE
8683     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8684       r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]);
8685     }
8686 
8687     return simde__m256_from_private(r_);
8688   #endif
8689 }
8690 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8691   #undef _mm256_pow_ps
8692   #define _mm256_pow_ps(a, b) simde_mm256_pow_ps(a, b)
8693 #endif
8694 
8695 
8696 SIMDE_FUNCTION_ATTRIBUTES
8697 simde__m256d
simde_mm256_pow_pd(simde__m256d a,simde__m256d b)8698 simde_mm256_pow_pd (simde__m256d a, simde__m256d b) {
8699   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
8700     return _mm256_pow_pd(a, b);
8701   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
8702     return Sleef_powd4_u10(a, b);
8703   #else
8704     simde__m256d_private
8705       r_,
8706       a_ = simde__m256d_to_private(a),
8707       b_ = simde__m256d_to_private(b);
8708 
8709     SIMDE_VECTORIZE
8710     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8711       r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]);
8712     }
8713 
8714     return simde__m256d_from_private(r_);
8715   #endif
8716 }
8717 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8718   #undef _mm256_pow_pd
8719   #define _mm256_pow_pd(a, b) simde_mm256_pow_pd(a, b)
8720 #endif
8721 
8722 SIMDE_FUNCTION_ATTRIBUTES
8723 simde__m512
simde_mm512_pow_ps(simde__m512 a,simde__m512 b)8724 simde_mm512_pow_ps (simde__m512 a, simde__m512 b) {
8725   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8726     return _mm512_pow_ps(a, b);
8727   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8728     return Sleef_powf16_u10(a, b);
8729   #else
8730     simde__m512_private
8731       r_,
8732       a_ = simde__m512_to_private(a),
8733       b_ = simde__m512_to_private(b);
8734 
8735     SIMDE_VECTORIZE
8736     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
8737       r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]);
8738     }
8739 
8740     return simde__m512_from_private(r_);
8741   #endif
8742 }
8743 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8744   #undef _mm512_pow_ps
8745   #define _mm512_pow_ps(a, b) simde_mm512_pow_ps(a, b)
8746 #endif
8747 
8748 SIMDE_FUNCTION_ATTRIBUTES
8749 simde__m512d
simde_mm512_pow_pd(simde__m512d a,simde__m512d b)8750 simde_mm512_pow_pd (simde__m512d a, simde__m512d b) {
8751   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8752     return _mm512_pow_pd(a, b);
8753   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
8754     return Sleef_powd8_u10(a, b);
8755   #else
8756     simde__m512d_private
8757       r_,
8758       a_ = simde__m512d_to_private(a),
8759       b_ = simde__m512d_to_private(b);
8760 
8761     SIMDE_VECTORIZE
8762     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
8763       r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]);
8764     }
8765 
8766     return simde__m512d_from_private(r_);
8767   #endif
8768 }
8769 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8770   #undef _mm512_pow_pd
8771   #define _mm512_pow_pd(a, b) simde_mm512_pow_pd(a, b)
8772 #endif
8773 
8774 SIMDE_FUNCTION_ATTRIBUTES
8775 simde__m512
simde_mm512_mask_pow_ps(simde__m512 src,simde__mmask16 k,simde__m512 a,simde__m512 b)8776 simde_mm512_mask_pow_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) {
8777   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8778     return _mm512_mask_pow_ps(src, k, a, b);
8779   #else
8780     return simde_mm512_mask_mov_ps(src, k, simde_mm512_pow_ps(a, b));
8781   #endif
8782 }
8783 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8784   #undef _mm512_mask_pow_ps
8785   #define _mm512_mask_pow_ps(src, k, a, b) simde_mm512_mask_pow_ps(src, k, a, b)
8786 #endif
8787 
8788 SIMDE_FUNCTION_ATTRIBUTES
8789 simde__m512d
simde_mm512_mask_pow_pd(simde__m512d src,simde__mmask8 k,simde__m512d a,simde__m512d b)8790 simde_mm512_mask_pow_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) {
8791   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
8792     return _mm512_mask_pow_pd(src, k, a, b);
8793   #else
8794     return simde_mm512_mask_mov_pd(src, k, simde_mm512_pow_pd(a, b));
8795   #endif
8796 }
8797 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8798   #undef _mm512_mask_pow_pd
8799   #define _mm512_mask_pow_pd(src, k, a, b) simde_mm512_mask_pow_pd(src, k, a, b)
8800 #endif
8801 
8802 SIMDE_FUNCTION_ATTRIBUTES
8803 simde__m128
simde_mm_clog_ps(simde__m128 a)8804 simde_mm_clog_ps (simde__m128 a) {
8805   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8806     return _mm_clog_ps(a);
8807   #else
8808     simde__m128_private
8809       r_,
8810       a_ = simde__m128_to_private(a);
8811 
8812     simde__m128_private pow_res_ = simde__m128_to_private(simde_mm_pow_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))));
8813     SIMDE_VECTORIZE
8814     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {
8815       r_.f32[  i  ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]));
8816       r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]);
8817     }
8818 
8819     return simde__m128_from_private(r_);
8820   #endif
8821 }
8822 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8823   #undef _mm_clog_ps
8824   #define _mm_clog_ps(a) simde_mm_clog_ps(a)
8825 #endif
8826 
8827 SIMDE_FUNCTION_ATTRIBUTES
8828 simde__m256
simde_mm256_clog_ps(simde__m256 a)8829 simde_mm256_clog_ps (simde__m256 a) {
8830   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8831     return _mm256_clog_ps(a);
8832   #else
8833     simde__m256_private
8834       r_,
8835       a_ = simde__m256_to_private(a);
8836 
8837     simde__m256_private pow_res_ = simde__m256_to_private(simde_mm256_pow_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0))));
8838     SIMDE_VECTORIZE
8839     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) {
8840       r_.f32[  i  ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i + 1]));
8841       r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]);
8842     }
8843 
8844     return simde__m256_from_private(r_);
8845   #endif
8846 }
8847 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8848   #undef _mm256_clog_ps
8849   #define _mm256_clog_ps(a) simde_mm256_clog_ps(a)
8850 #endif
8851 
8852 SIMDE_FUNCTION_ATTRIBUTES
8853 simde__m128
simde_mm_csqrt_ps(simde__m128 a)8854 simde_mm_csqrt_ps (simde__m128 a) {
8855   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8856     return _mm_csqrt_ps(a);
8857   #else
8858     simde__m128_private
8859       r_,
8860       a_ = simde__m128_to_private(a);
8861 
8862     simde__m128 pow_res= simde_mm_pow_ps(a,simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)));
8863     simde__m128_private pow_res_=simde__m128_to_private(pow_res);
8864 
8865     SIMDE_VECTORIZE
8866     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) {
8867       simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]);
8868       simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]);
8869 
8870       r_.f32[  i  ] =       simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0));
8871       r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0));
8872     }
8873 
8874     return simde__m128_from_private(r_);
8875   #endif
8876 }
8877 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8878   #undef _mm_csqrt_ps
8879   #define _mm_csqrt_ps(a) simde_mm_csqrt_ps(a)
8880 #endif
8881 
8882 SIMDE_FUNCTION_ATTRIBUTES
8883 simde__m256
simde_mm256_csqrt_ps(simde__m256 a)8884 simde_mm256_csqrt_ps (simde__m256 a) {
8885   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
8886     return _mm256_csqrt_ps(a);
8887   #else
8888     simde__m256_private
8889       r_,
8890       a_ = simde__m256_to_private(a);
8891 
8892     simde__m256 pow_res= simde_mm256_pow_ps(a,simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)));
8893     simde__m256_private pow_res_=simde__m256_to_private(pow_res);
8894 
8895     SIMDE_VECTORIZE
8896     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) {
8897       simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]);
8898       simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]);
8899 
8900       r_.f32[  i  ] =       simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0));
8901       r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0));
8902     }
8903 
8904     return simde__m256_from_private(r_);
8905   #endif
8906 }
8907 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8908   #undef _mm256_csqrt_ps
8909   #define _mm256_csqrt_ps(a) simde_mm256_csqrt_ps(a)
8910 #endif
8911 
8912 SIMDE_FUNCTION_ATTRIBUTES
8913 simde__m128i
simde_mm_rem_epi8(simde__m128i a,simde__m128i b)8914 simde_mm_rem_epi8 (simde__m128i a, simde__m128i b) {
8915   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
8916     return _mm_rem_epi8(a, b);
8917   #else
8918     simde__m128i_private
8919       r_,
8920       a_ = simde__m128i_to_private(a),
8921       b_ = simde__m128i_to_private(b);
8922 
8923     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
8924       r_.i8 = a_.i8 % b_.i8;
8925     #else
8926       SIMDE_VECTORIZE
8927       for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
8928         r_.i8[i] = a_.i8[i] % b_.i8[i];
8929       }
8930     #endif
8931 
8932     return simde__m128i_from_private(r_);
8933   #endif
8934 }
8935 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8936   #undef _mm_rem_epi8
8937   #define _mm_rem_epi8(a, b) simde_mm_rem_epi8((a), (b))
8938 #endif
8939 
8940 SIMDE_FUNCTION_ATTRIBUTES
8941 simde__m128i
simde_mm_rem_epi16(simde__m128i a,simde__m128i b)8942 simde_mm_rem_epi16 (simde__m128i a, simde__m128i b) {
8943   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
8944     return _mm_rem_epi16(a, b);
8945   #else
8946     simde__m128i_private
8947       r_,
8948       a_ = simde__m128i_to_private(a),
8949       b_ = simde__m128i_to_private(b);
8950 
8951     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
8952       r_.i16 = a_.i16 % b_.i16;
8953     #else
8954       SIMDE_VECTORIZE
8955       for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
8956         r_.i16[i] = a_.i16[i] % b_.i16[i];
8957       }
8958     #endif
8959 
8960     return simde__m128i_from_private(r_);
8961   #endif
8962 }
8963 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8964   #undef _mm_rem_epi16
8965   #define _mm_rem_epi16(a, b) simde_mm_rem_epi16((a), (b))
8966 #endif
8967 
8968 SIMDE_FUNCTION_ATTRIBUTES
8969 simde__m128i
simde_mm_rem_epi32(simde__m128i a,simde__m128i b)8970 simde_mm_rem_epi32 (simde__m128i a, simde__m128i b) {
8971   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
8972     return _mm_rem_epi32(a, b);
8973   #else
8974     simde__m128i_private
8975       r_,
8976       a_ = simde__m128i_to_private(a),
8977       b_ = simde__m128i_to_private(b);
8978 
8979     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
8980       r_.i32 = a_.i32 % b_.i32;
8981     #else
8982       SIMDE_VECTORIZE
8983       for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
8984         r_.i32[i] = a_.i32[i] % b_.i32[i];
8985       }
8986     #endif
8987 
8988     return simde__m128i_from_private(r_);
8989   #endif
8990 }
8991 #define simde_mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b)
8992 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
8993   #undef _mm_rem_epi32
8994   #define _mm_rem_epi32(a, b) simde_mm_rem_epi32(a, b)
8995   #undef _mm_irem_epi32
8996   #define _mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b)
8997 #endif
8998 
8999 SIMDE_FUNCTION_ATTRIBUTES
9000 simde__m128i
simde_mm_rem_epi64(simde__m128i a,simde__m128i b)9001 simde_mm_rem_epi64 (simde__m128i a, simde__m128i b) {
9002   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
9003     return _mm_rem_epi64(a, b);
9004   #else
9005     simde__m128i_private
9006       r_,
9007       a_ = simde__m128i_to_private(a),
9008       b_ = simde__m128i_to_private(b);
9009 
9010     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9011       r_.i64 = a_.i64 % b_.i64;
9012     #else
9013       SIMDE_VECTORIZE
9014       for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
9015         r_.i64[i] = a_.i64[i] % b_.i64[i];
9016       }
9017     #endif
9018 
9019     return simde__m128i_from_private(r_);
9020   #endif
9021 }
9022 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9023   #undef _mm_rem_epi64
9024   #define _mm_rem_epi64(a, b) simde_mm_rem_epi64((a), (b))
9025 #endif
9026 
9027 SIMDE_FUNCTION_ATTRIBUTES
9028 simde__m128i
simde_mm_rem_epu8(simde__m128i a,simde__m128i b)9029 simde_mm_rem_epu8 (simde__m128i a, simde__m128i b) {
9030   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
9031     return _mm_rem_epu8(a, b);
9032   #else
9033     simde__m128i_private
9034       r_,
9035       a_ = simde__m128i_to_private(a),
9036       b_ = simde__m128i_to_private(b);
9037 
9038     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9039       r_.u8 = a_.u8 % b_.u8;
9040     #else
9041       SIMDE_VECTORIZE
9042       for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
9043         r_.u8[i] = a_.u8[i] % b_.u8[i];
9044       }
9045     #endif
9046 
9047     return simde__m128i_from_private(r_);
9048   #endif
9049 }
9050 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9051   #undef _mm_rem_epu8
9052   #define _mm_rem_epu8(a, b) simde_mm_rem_epu8((a), (b))
9053 #endif
9054 
9055 SIMDE_FUNCTION_ATTRIBUTES
9056 simde__m128i
simde_mm_rem_epu16(simde__m128i a,simde__m128i b)9057 simde_mm_rem_epu16 (simde__m128i a, simde__m128i b) {
9058   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
9059     return _mm_rem_epu16(a, b);
9060   #else
9061     simde__m128i_private
9062       r_,
9063       a_ = simde__m128i_to_private(a),
9064       b_ = simde__m128i_to_private(b);
9065 
9066     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9067       r_.u16 = a_.u16 % b_.u16;
9068     #else
9069       SIMDE_VECTORIZE
9070       for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
9071         r_.u16[i] = a_.u16[i] % b_.u16[i];
9072       }
9073     #endif
9074 
9075     return simde__m128i_from_private(r_);
9076   #endif
9077 }
9078 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9079   #undef _mm_rem_epu16
9080   #define _mm_rem_epu16(a, b) simde_mm_rem_epu16((a), (b))
9081 #endif
9082 
9083 SIMDE_FUNCTION_ATTRIBUTES
9084 simde__m128i
simde_mm_rem_epu32(simde__m128i a,simde__m128i b)9085 simde_mm_rem_epu32 (simde__m128i a, simde__m128i b) {
9086   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
9087     return _mm_rem_epu32(a, b);
9088   #else
9089     simde__m128i_private
9090       r_,
9091       a_ = simde__m128i_to_private(a),
9092       b_ = simde__m128i_to_private(b);
9093 
9094     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9095       r_.u32 = a_.u32 % b_.u32;
9096     #else
9097       SIMDE_VECTORIZE
9098       for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
9099         r_.u32[i] = a_.u32[i] % b_.u32[i];
9100       }
9101     #endif
9102 
9103     return simde__m128i_from_private(r_);
9104   #endif
9105 }
9106 #define simde_mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b)
9107 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9108   #undef _mm_rem_epu32
9109   #define _mm_rem_epu32(a, b) simde_mm_rem_epu32(a, b)
9110   #undef _mm_urem_epi32
9111   #define _mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b)
9112 #endif
9113 
9114 SIMDE_FUNCTION_ATTRIBUTES
9115 simde__m128i
simde_mm_rem_epu64(simde__m128i a,simde__m128i b)9116 simde_mm_rem_epu64 (simde__m128i a, simde__m128i b) {
9117   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
9118     return _mm_rem_epu64(a, b);
9119   #else
9120     simde__m128i_private
9121       r_,
9122       a_ = simde__m128i_to_private(a),
9123       b_ = simde__m128i_to_private(b);
9124 
9125     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9126       r_.u64 = a_.u64 % b_.u64;
9127     #else
9128       SIMDE_VECTORIZE
9129       for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
9130         r_.u64[i] = a_.u64[i] % b_.u64[i];
9131       }
9132     #endif
9133 
9134     return simde__m128i_from_private(r_);
9135   #endif
9136 }
9137 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9138   #undef _mm_rem_epu64
9139   #define _mm_rem_epu64(a, b) simde_mm_rem_epu64((a), (b))
9140 #endif
9141 
9142 SIMDE_FUNCTION_ATTRIBUTES
9143 simde__m256i
simde_mm256_rem_epi8(simde__m256i a,simde__m256i b)9144 simde_mm256_rem_epi8 (simde__m256i a, simde__m256i b) {
9145   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9146     return _mm256_rem_epi8(a, b);
9147   #else
9148     simde__m256i_private
9149       r_,
9150       a_ = simde__m256i_to_private(a),
9151       b_ = simde__m256i_to_private(b);
9152 
9153     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9154       r_.i8 = a_.i8 % b_.i8;
9155     #else
9156       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9157         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9158           r_.m128i[i] = simde_mm_rem_epi8(a_.m128i[i], b_.m128i[i]);
9159         }
9160       #else
9161         SIMDE_VECTORIZE
9162         for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
9163           r_.i8[i] = a_.i8[i] % b_.i8[i];
9164         }
9165       #endif
9166     #endif
9167 
9168     return simde__m256i_from_private(r_);
9169   #endif
9170 }
9171 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9172   #undef _mm256_rem_epi8
9173   #define _mm256_rem_epi8(a, b) simde_mm256_rem_epi8((a), (b))
9174 #endif
9175 
9176 SIMDE_FUNCTION_ATTRIBUTES
9177 simde__m256i
simde_mm256_rem_epi16(simde__m256i a,simde__m256i b)9178 simde_mm256_rem_epi16 (simde__m256i a, simde__m256i b) {
9179   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9180     return _mm256_rem_epi16(a, b);
9181   #else
9182     simde__m256i_private
9183       r_,
9184       a_ = simde__m256i_to_private(a),
9185       b_ = simde__m256i_to_private(b);
9186 
9187     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9188       r_.i16 = a_.i16 % b_.i16;
9189     #else
9190       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9191         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9192           r_.m128i[i] = simde_mm_rem_epi16(a_.m128i[i], b_.m128i[i]);
9193         }
9194       #else
9195         SIMDE_VECTORIZE
9196         for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
9197           r_.i16[i] = a_.i16[i] % b_.i16[i];
9198         }
9199        #endif
9200     #endif
9201 
9202     return simde__m256i_from_private(r_);
9203   #endif
9204 }
9205 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9206   #undef _mm256_rem_epi16
9207   #define _mm256_rem_epi16(a, b) simde_mm256_rem_epi16((a), (b))
9208 #endif
9209 
9210 SIMDE_FUNCTION_ATTRIBUTES
9211 simde__m256i
simde_mm256_rem_epi32(simde__m256i a,simde__m256i b)9212 simde_mm256_rem_epi32 (simde__m256i a, simde__m256i b) {
9213   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9214     return _mm256_rem_epi32(a, b);
9215   #else
9216     simde__m256i_private
9217       r_,
9218       a_ = simde__m256i_to_private(a),
9219       b_ = simde__m256i_to_private(b);
9220 
9221     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9222       r_.i32 = a_.i32 % b_.i32;
9223     #else
9224       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9225         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9226           r_.m128i[i] = simde_mm_rem_epi32(a_.m128i[i], b_.m128i[i]);
9227         }
9228       #else
9229         SIMDE_VECTORIZE
9230         for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
9231           r_.i32[i] = a_.i32[i] % b_.i32[i];
9232         }
9233        #endif
9234     #endif
9235 
9236     return simde__m256i_from_private(r_);
9237   #endif
9238 }
9239 #define simde_mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b)
9240 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9241   #undef _mm256_rem_epi32
9242   #define _mm256_rem_epi32(a, b) simde_mm256_rem_epi32(a, b)
9243   #undef _mm256_irem_epi32
9244   #define _mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b)
9245 #endif
9246 
9247 SIMDE_FUNCTION_ATTRIBUTES
9248 simde__m256i
simde_mm256_rem_epi64(simde__m256i a,simde__m256i b)9249 simde_mm256_rem_epi64 (simde__m256i a, simde__m256i b) {
9250   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9251     return _mm256_rem_epi64(a, b);
9252   #else
9253     simde__m256i_private
9254       r_,
9255       a_ = simde__m256i_to_private(a),
9256       b_ = simde__m256i_to_private(b);
9257 
9258     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9259       r_.i64 = a_.i64 % b_.i64;
9260     #else
9261       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9262         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9263           r_.m128i[i] = simde_mm_rem_epi64(a_.m128i[i], b_.m128i[i]);
9264         }
9265       #else
9266         SIMDE_VECTORIZE
9267         for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
9268           r_.i64[i] = a_.i64[i] % b_.i64[i];
9269         }
9270         #endif
9271     #endif
9272 
9273     return simde__m256i_from_private(r_);
9274   #endif
9275 }
9276 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9277   #undef _mm256_rem_epi64
9278   #define _mm256_rem_epi64(a, b) simde_mm256_rem_epi64((a), (b))
9279 #endif
9280 
9281 SIMDE_FUNCTION_ATTRIBUTES
9282 simde__m256i
simde_mm256_rem_epu8(simde__m256i a,simde__m256i b)9283 simde_mm256_rem_epu8 (simde__m256i a, simde__m256i b) {
9284   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9285     return _mm256_rem_epu8(a, b);
9286   #else
9287     simde__m256i_private
9288       r_,
9289       a_ = simde__m256i_to_private(a),
9290       b_ = simde__m256i_to_private(b);
9291 
9292     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9293       r_.u8 = a_.u8 % b_.u8;
9294     #else
9295       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9296         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9297           r_.m128i[i] = simde_mm_rem_epu8(a_.m128i[i], b_.m128i[i]);
9298         }
9299       #else
9300         SIMDE_VECTORIZE
9301         for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
9302           r_.u8[i] = a_.u8[i] % b_.u8[i];
9303         }
9304       #endif
9305     #endif
9306 
9307     return simde__m256i_from_private(r_);
9308   #endif
9309 }
9310 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9311   #undef _mm256_rem_epu8
9312   #define _mm256_rem_epu8(a, b) simde_mm256_rem_epu8((a), (b))
9313 #endif
9314 
9315 SIMDE_FUNCTION_ATTRIBUTES
9316 simde__m256i
simde_mm256_rem_epu16(simde__m256i a,simde__m256i b)9317 simde_mm256_rem_epu16 (simde__m256i a, simde__m256i b) {
9318   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9319     return _mm256_rem_epu16(a, b);
9320   #else
9321     simde__m256i_private
9322       r_,
9323       a_ = simde__m256i_to_private(a),
9324       b_ = simde__m256i_to_private(b);
9325 
9326     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9327       r_.u16 = a_.u16 % b_.u16;
9328     #else
9329       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9330         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9331           r_.m128i[i] = simde_mm_rem_epu16(a_.m128i[i], b_.m128i[i]);
9332         }
9333       #else
9334         SIMDE_VECTORIZE
9335         for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
9336           r_.u16[i] = a_.u16[i] % b_.u16[i];
9337         }
9338       #endif
9339     #endif
9340 
9341     return simde__m256i_from_private(r_);
9342   #endif
9343 }
9344 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9345   #undef _mm256_rem_epu16
9346   #define _mm256_rem_epu16(a, b) simde_mm256_rem_epu16((a), (b))
9347 #endif
9348 
9349 SIMDE_FUNCTION_ATTRIBUTES
9350 simde__m256i
simde_mm256_rem_epu32(simde__m256i a,simde__m256i b)9351 simde_mm256_rem_epu32 (simde__m256i a, simde__m256i b) {
9352   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9353     return _mm256_rem_epu32(a, b);
9354   #else
9355     simde__m256i_private
9356       r_,
9357       a_ = simde__m256i_to_private(a),
9358       b_ = simde__m256i_to_private(b);
9359 
9360     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9361       r_.u32 = a_.u32 % b_.u32;
9362     #else
9363       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9364         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9365           r_.m128i[i] = simde_mm_rem_epu32(a_.m128i[i], b_.m128i[i]);
9366         }
9367       #else
9368         SIMDE_VECTORIZE
9369         for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
9370           r_.u32[i] = a_.u32[i] % b_.u32[i];
9371         }
9372       #endif
9373     #endif
9374 
9375     return simde__m256i_from_private(r_);
9376   #endif
9377 }
9378 #define simde_mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b)
9379 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9380   #undef _mm256_rem_epu32
9381   #define _mm256_rem_epu32(a, b) simde_mm256_rem_epu32(a, b)
9382   #undef _mm256_urem_epi32
9383   #define _mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b)
9384 #endif
9385 
9386 SIMDE_FUNCTION_ATTRIBUTES
9387 simde__m256i
simde_mm256_rem_epu64(simde__m256i a,simde__m256i b)9388 simde_mm256_rem_epu64 (simde__m256i a, simde__m256i b) {
9389   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9390     return _mm256_rem_epu64(a, b);
9391   #else
9392     simde__m256i_private
9393       r_,
9394       a_ = simde__m256i_to_private(a),
9395       b_ = simde__m256i_to_private(b);
9396 
9397     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9398       r_.u64 = a_.u64 % b_.u64;
9399     #else
9400       #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9401         for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) {
9402           r_.m128i[i] = simde_mm_rem_epu64(a_.m128i[i], b_.m128i[i]);
9403         }
9404       #else
9405         SIMDE_VECTORIZE
9406         for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
9407           r_.u64[i] = a_.u64[i] % b_.u64[i];
9408         }
9409       #endif
9410     #endif
9411 
9412     return simde__m256i_from_private(r_);
9413   #endif
9414 }
9415 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9416   #undef _mm256_rem_epu64
9417   #define _mm256_rem_epu64(a, b) simde_mm256_rem_epu64((a), (b))
9418 #endif
9419 
9420 SIMDE_FUNCTION_ATTRIBUTES
9421 simde__m512i
simde_mm512_rem_epi8(simde__m512i a,simde__m512i b)9422 simde_mm512_rem_epi8 (simde__m512i a, simde__m512i b) {
9423   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9424     return _mm512_rem_epi8(a, b);
9425   #else
9426     simde__m512i_private
9427       r_,
9428       a_ = simde__m512i_to_private(a),
9429       b_ = simde__m512i_to_private(b);
9430 
9431     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9432       r_.i8 = a_.i8 % b_.i8;
9433     #else
9434       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9435         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9436           r_.m256i[i] = simde_mm256_rem_epi8(a_.m256i[i], b_.m256i[i]);
9437         }
9438       #else
9439         SIMDE_VECTORIZE
9440         for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
9441           r_.i8[i] = a_.i8[i] % b_.i8[i];
9442         }
9443       #endif
9444     #endif
9445 
9446     return simde__m512i_from_private(r_);
9447   #endif
9448 }
9449 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9450   #undef _mm512_rem_epi8
9451   #define _mm512_rem_epi8(a, b) simde_mm512_rem_epi8((a), (b))
9452 #endif
9453 
9454 SIMDE_FUNCTION_ATTRIBUTES
9455 simde__m512i
simde_mm512_rem_epi16(simde__m512i a,simde__m512i b)9456 simde_mm512_rem_epi16 (simde__m512i a, simde__m512i b) {
9457   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9458     return _mm512_rem_epi16(a, b);
9459   #else
9460     simde__m512i_private
9461       r_,
9462       a_ = simde__m512i_to_private(a),
9463       b_ = simde__m512i_to_private(b);
9464 
9465     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9466       r_.i16 = a_.i16 % b_.i16;
9467     #else
9468       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9469         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9470           r_.m256i[i] = simde_mm256_rem_epi16(a_.m256i[i], b_.m256i[i]);
9471         }
9472       #else
9473         SIMDE_VECTORIZE
9474         for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
9475           r_.i16[i] = a_.i16[i] % b_.i16[i];
9476         }
9477       #endif
9478     #endif
9479 
9480     return simde__m512i_from_private(r_);
9481   #endif
9482 }
9483 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9484   #undef _mm512_rem_epi16
9485   #define _mm512_rem_epi16(a, b) simde_mm512_rem_epi16((a), (b))
9486 #endif
9487 
9488 SIMDE_FUNCTION_ATTRIBUTES
9489 simde__m512i
simde_mm512_rem_epi32(simde__m512i a,simde__m512i b)9490 simde_mm512_rem_epi32 (simde__m512i a, simde__m512i b) {
9491   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9492     return _mm512_rem_epi32(a, b);
9493   #else
9494     simde__m512i_private
9495       r_,
9496       a_ = simde__m512i_to_private(a),
9497       b_ = simde__m512i_to_private(b);
9498 
9499     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9500       r_.i32 = a_.i32 % b_.i32;
9501     #else
9502       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9503         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9504           r_.m256i[i] = simde_mm256_rem_epi32(a_.m256i[i], b_.m256i[i]);
9505         }
9506       #else
9507         SIMDE_VECTORIZE
9508         for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
9509           r_.i32[i] = a_.i32[i] % b_.i32[i];
9510         }
9511         #endif
9512     #endif
9513 
9514     return simde__m512i_from_private(r_);
9515   #endif
9516 }
9517 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9518   #undef _mm512_rem_epi32
9519   #define _mm512_rem_epi32(a, b) simde_mm512_rem_epi32((a), (b))
9520 #endif
9521 
9522 SIMDE_FUNCTION_ATTRIBUTES
9523 simde__m512i
simde_mm512_mask_rem_epi32(simde__m512i src,simde__mmask16 k,simde__m512i a,simde__m512i b)9524 simde_mm512_mask_rem_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
9525   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9526     return _mm512_mask_rem_epi32(src, k, a, b);
9527   #else
9528     return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epi32(a, b));
9529   #endif
9530 }
9531 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9532   #undef _mm512_mask_rem_epi32
9533   #define _mm512_mask_rem_epi32(src, k, a, b) simde_mm512_mask_rem_epi32(src, k, a, b)
9534 #endif
9535 
9536 SIMDE_FUNCTION_ATTRIBUTES
9537 simde__m512i
simde_mm512_rem_epi64(simde__m512i a,simde__m512i b)9538 simde_mm512_rem_epi64 (simde__m512i a, simde__m512i b) {
9539   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9540     return _mm512_rem_epi64(a, b);
9541   #else
9542     simde__m512i_private
9543       r_,
9544       a_ = simde__m512i_to_private(a),
9545       b_ = simde__m512i_to_private(b);
9546 
9547     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9548       r_.i64 = a_.i64 % b_.i64;
9549     #else
9550       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9551         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9552           r_.m256i[i] = simde_mm256_rem_epi64(a_.m256i[i], b_.m256i[i]);
9553         }
9554       #else
9555         SIMDE_VECTORIZE
9556         for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
9557           r_.i64[i] = a_.i64[i] % b_.i64[i];
9558         }
9559       #endif
9560     #endif
9561 
9562     return simde__m512i_from_private(r_);
9563   #endif
9564 }
9565 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9566   #undef _mm512_rem_epi64
9567   #define _mm512_rem_epi64(a, b) simde_mm512_rem_epi64((a), (b))
9568 #endif
9569 
9570 SIMDE_FUNCTION_ATTRIBUTES
9571 simde__m512i
simde_mm512_rem_epu8(simde__m512i a,simde__m512i b)9572 simde_mm512_rem_epu8 (simde__m512i a, simde__m512i b) {
9573   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9574     return _mm512_rem_epu8(a, b);
9575   #else
9576     simde__m512i_private
9577       r_,
9578       a_ = simde__m512i_to_private(a),
9579       b_ = simde__m512i_to_private(b);
9580 
9581     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9582       r_.u8 = a_.u8 % b_.u8;
9583     #else
9584       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9585         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9586           r_.m256i[i] = simde_mm256_rem_epu8(a_.m256i[i], b_.m256i[i]);
9587         }
9588       #else
9589         SIMDE_VECTORIZE
9590         for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
9591           r_.u8[i] = a_.u8[i] % b_.u8[i];
9592         }
9593       #endif
9594     #endif
9595 
9596     return simde__m512i_from_private(r_);
9597   #endif
9598 }
9599 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9600   #undef _mm512_rem_epu8
9601   #define _mm512_rem_epu8(a, b) simde_mm512_rem_epu8((a), (b))
9602 #endif
9603 
9604 SIMDE_FUNCTION_ATTRIBUTES
9605 simde__m512i
simde_mm512_rem_epu16(simde__m512i a,simde__m512i b)9606 simde_mm512_rem_epu16 (simde__m512i a, simde__m512i b) {
9607   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9608     return _mm512_rem_epu16(a, b);
9609   #else
9610     simde__m512i_private
9611       r_,
9612       a_ = simde__m512i_to_private(a),
9613       b_ = simde__m512i_to_private(b);
9614 
9615     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9616       r_.u16 = a_.u16 % b_.u16;
9617     #else
9618       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9619         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9620           r_.m256i[i] = simde_mm256_rem_epu16(a_.m256i[i], b_.m256i[i]);
9621         }
9622       #else
9623         SIMDE_VECTORIZE
9624         for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
9625           r_.u16[i] = a_.u16[i] % b_.u16[i];
9626         }
9627       #endif
9628     #endif
9629 
9630     return simde__m512i_from_private(r_);
9631   #endif
9632 }
9633 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9634   #undef _mm512_rem_epu16
9635   #define _mm512_rem_epu16(a, b) simde_mm512_rem_epu16((a), (b))
9636 #endif
9637 
9638 SIMDE_FUNCTION_ATTRIBUTES
9639 simde__m512i
simde_mm512_rem_epu32(simde__m512i a,simde__m512i b)9640 simde_mm512_rem_epu32 (simde__m512i a, simde__m512i b) {
9641   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9642     return _mm512_rem_epu32(a, b);
9643   #else
9644     simde__m512i_private
9645       r_,
9646       a_ = simde__m512i_to_private(a),
9647       b_ = simde__m512i_to_private(b);
9648 
9649     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9650       r_.u32 = a_.u32 % b_.u32;
9651     #else
9652       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9653         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9654           r_.m256i[i] = simde_mm256_rem_epu32(a_.m256i[i], b_.m256i[i]);
9655         }
9656       #else
9657         SIMDE_VECTORIZE
9658         for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
9659           r_.u32[i] = a_.u32[i] % b_.u32[i];
9660         }
9661       #endif
9662     #endif
9663 
9664     return simde__m512i_from_private(r_);
9665   #endif
9666 }
9667 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9668   #undef _mm512_rem_epu32
9669   #define _mm512_rem_epu32(a, b) simde_mm512_rem_epu32((a), (b))
9670 #endif
9671 
9672 SIMDE_FUNCTION_ATTRIBUTES
9673 simde__m512i
simde_mm512_mask_rem_epu32(simde__m512i src,simde__mmask16 k,simde__m512i a,simde__m512i b)9674 simde_mm512_mask_rem_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) {
9675   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9676     return _mm512_mask_rem_epu32(src, k, a, b);
9677   #else
9678     return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epu32(a, b));
9679   #endif
9680 }
9681 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9682   #undef _mm512_mask_rem_epu32
9683   #define _mm512_mask_rem_epu32(src, k, a, b) simde_mm512_mask_rem_epu32(src, k, a, b)
9684 #endif
9685 
9686 SIMDE_FUNCTION_ATTRIBUTES
9687 simde__m512i
simde_mm512_rem_epu64(simde__m512i a,simde__m512i b)9688 simde_mm512_rem_epu64 (simde__m512i a, simde__m512i b) {
9689   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9690     return _mm512_rem_epu64(a, b);
9691   #else
9692     simde__m512i_private
9693       r_,
9694       a_ = simde__m512i_to_private(a),
9695       b_ = simde__m512i_to_private(b);
9696 
9697     #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
9698       r_.u64 = a_.u64 % b_.u64;
9699     #else
9700       #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9701         for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) {
9702           r_.m256i[i] = simde_mm256_rem_epu64(a_.m256i[i], b_.m256i[i]);
9703         }
9704       #else
9705         SIMDE_VECTORIZE
9706         for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
9707           r_.u64[i] = a_.u64[i] % b_.u64[i];
9708         }
9709       #endif
9710     #endif
9711 
9712     return simde__m512i_from_private(r_);
9713   #endif
9714 }
9715 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9716   #undef _mm512_rem_epu64
9717   #define _mm512_rem_epu64(a, b) simde_mm512_rem_epu64((a), (b))
9718 #endif
9719 
9720 SIMDE_FUNCTION_ATTRIBUTES
9721 simde__m512
simde_mm512_recip_ps(simde__m512 a)9722 simde_mm512_recip_ps (simde__m512 a) {
9723   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9724     return _mm512_recip_ps(a);
9725   #else
9726     return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a);
9727   #endif
9728 }
9729 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9730   #undef _mm512_recip_ps
9731   #define _mm512_recip_ps(a) simde_mm512_recip_ps(a)
9732 #endif
9733 
9734 SIMDE_FUNCTION_ATTRIBUTES
9735 simde__m512d
simde_mm512_recip_pd(simde__m512d a)9736 simde_mm512_recip_pd (simde__m512d a) {
9737   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9738     return _mm512_recip_pd(a);
9739   #else
9740     return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a);
9741   #endif
9742 }
9743 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9744   #undef _mm512_recip_pd
9745   #define _mm512_recip_pd(a) simde_mm512_recip_pd(a)
9746 #endif
9747 
9748 SIMDE_FUNCTION_ATTRIBUTES
9749 simde__m512
simde_mm512_mask_recip_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)9750 simde_mm512_mask_recip_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
9751   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9752     return _mm512_mask_recip_ps(src, k, a);
9753   #else
9754     return simde_mm512_mask_mov_ps(src, k, simde_mm512_recip_ps(a));
9755   #endif
9756 }
9757 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9758   #undef _mm512_mask_recip_ps
9759   #define _mm512_mask_recip_ps(src, k, a) simde_mm512_mask_recip_ps(src, k, a)
9760 #endif
9761 
9762 SIMDE_FUNCTION_ATTRIBUTES
9763 simde__m512d
simde_mm512_mask_recip_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)9764 simde_mm512_mask_recip_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
9765   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9766     return _mm512_mask_recip_pd(src, k, a);
9767   #else
9768     return simde_mm512_mask_mov_pd(src, k, simde_mm512_recip_pd(a));
9769   #endif
9770 }
9771 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9772   #undef _mm512_mask_recip_pd
9773   #define _mm512_mask_recip_pd(src, k, a) simde_mm512_mask_recip_pd(src, k, a)
9774 #endif
9775 
9776 SIMDE_FUNCTION_ATTRIBUTES
9777 simde__m512
simde_mm512_rint_ps(simde__m512 a)9778 simde_mm512_rint_ps (simde__m512 a) {
9779   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9780     return _mm512_rint_ps(a);
9781   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
9782     return Sleef_rintf16(a);
9783   #else
9784     simde__m512_private
9785       r_,
9786       a_ = simde__m512_to_private(a);
9787 
9788     SIMDE_VECTORIZE
9789     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
9790       r_.f32[i] = simde_math_rintf(a_.f32[i]);
9791     }
9792 
9793     return simde__m512_from_private(r_);
9794   #endif
9795 }
9796 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9797   #undef _mm512_rint_ps
9798   #define _mm512_rint_ps(a) simde_mm512_rint_ps(a)
9799 #endif
9800 
9801 SIMDE_FUNCTION_ATTRIBUTES
9802 simde__m512d
simde_mm512_rint_pd(simde__m512d a)9803 simde_mm512_rint_pd (simde__m512d a) {
9804   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9805     return _mm512_rint_pd(a);
9806   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
9807     return Sleef_rintd8(a);
9808   #else
9809     simde__m512d_private
9810       r_,
9811       a_ = simde__m512d_to_private(a);
9812 
9813     SIMDE_VECTORIZE
9814     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
9815       r_.f64[i] = simde_math_rint(a_.f64[i]);
9816     }
9817 
9818     return simde__m512d_from_private(r_);
9819   #endif
9820 }
9821 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9822   #undef _mm512_rint_pd
9823   #define _mm512_rint_pd(a) simde_mm512_rint_pd(a)
9824 #endif
9825 
9826 SIMDE_FUNCTION_ATTRIBUTES
9827 simde__m512
simde_mm512_mask_rint_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)9828 simde_mm512_mask_rint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
9829   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9830     return _mm512_mask_rint_ps(src, k, a);
9831   #else
9832     return simde_mm512_mask_mov_ps(src, k, simde_mm512_rint_ps(a));
9833   #endif
9834 }
9835 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9836   #undef _mm512_mask_rint_ps
9837   #define _mm512_mask_rint_ps(src, k, a) simde_mm512_mask_rint_ps(src, k, a)
9838 #endif
9839 
9840 SIMDE_FUNCTION_ATTRIBUTES
9841 simde__m512d
simde_mm512_mask_rint_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)9842 simde_mm512_mask_rint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
9843   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9844     return _mm512_mask_rint_pd(src, k, a);
9845   #else
9846     return simde_mm512_mask_mov_pd(src, k, simde_mm512_rint_pd(a));
9847   #endif
9848 }
9849 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9850   #undef _mm512_mask_rint_pd
9851   #define _mm512_mask_rint_pd(src, k, a) simde_mm512_mask_rint_pd(src, k, a)
9852 #endif
9853 
9854 SIMDE_FUNCTION_ATTRIBUTES
9855 simde__m128
simde_mm_sin_ps(simde__m128 a)9856 simde_mm_sin_ps (simde__m128 a) {
9857   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
9858     return _mm_sin_ps(a);
9859   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
9860     #if SIMDE_ACCURACY_PREFERENCE > 1
9861       return Sleef_sinf4_u10(a);
9862     #else
9863       return Sleef_sinf4_u35(a);
9864     #endif
9865   #else
9866     simde__m128_private
9867       r_,
9868       a_ = simde__m128_to_private(a);
9869 
9870     SIMDE_VECTORIZE
9871     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
9872       r_.f32[i] = simde_math_sinf(a_.f32[i]);
9873     }
9874 
9875     return simde__m128_from_private(r_);
9876   #endif
9877 }
9878 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9879   #undef _mm_sin_ps
9880   #define _mm_sin_ps(a) simde_mm_sin_ps(a)
9881 #endif
9882 
9883 SIMDE_FUNCTION_ATTRIBUTES
9884 simde__m128d
simde_mm_sin_pd(simde__m128d a)9885 simde_mm_sin_pd (simde__m128d a) {
9886   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
9887     return _mm_sin_pd(a);
9888   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
9889     #if SIMDE_ACCURACY_PREFERENCE > 1
9890       return Sleef_sind2_u10(a);
9891     #else
9892       return Sleef_sind2_u35(a);
9893     #endif
9894   #else
9895     simde__m128d_private
9896       r_,
9897       a_ = simde__m128d_to_private(a);
9898 
9899     SIMDE_VECTORIZE
9900     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
9901       r_.f64[i] = simde_math_sin(a_.f64[i]);
9902     }
9903 
9904     return simde__m128d_from_private(r_);
9905   #endif
9906 }
9907 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9908   #undef _mm_sin_pd
9909   #define _mm_sin_pd(a) simde_mm_sin_pd(a)
9910 #endif
9911 
9912 SIMDE_FUNCTION_ATTRIBUTES
9913 simde__m256
simde_mm256_sin_ps(simde__m256 a)9914 simde_mm256_sin_ps (simde__m256 a) {
9915   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9916     return _mm256_sin_ps(a);
9917   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
9918     #if SIMDE_ACCURACY_PREFERENCE > 1
9919       return Sleef_sinf8_u10(a);
9920     #else
9921       return Sleef_sinf8_u35(a);
9922     #endif
9923   #else
9924     simde__m256_private
9925       r_,
9926       a_ = simde__m256_to_private(a);
9927 
9928     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
9929       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
9930         r_.m128[i] = simde_mm_sin_ps(a_.m128[i]);
9931       }
9932     #else
9933       SIMDE_VECTORIZE
9934       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
9935         r_.f32[i] = simde_math_sinf(a_.f32[i]);
9936       }
9937     #endif
9938 
9939     return simde__m256_from_private(r_);
9940   #endif
9941 }
9942 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9943   #undef _mm256_sin_ps
9944   #define _mm256_sin_ps(a) simde_mm256_sin_ps(a)
9945 #endif
9946 
9947 
9948 SIMDE_FUNCTION_ATTRIBUTES
9949 simde__m256d
simde_mm256_sin_pd(simde__m256d a)9950 simde_mm256_sin_pd (simde__m256d a) {
9951   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
9952     return _mm256_sin_pd(a);
9953   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
9954     #if SIMDE_ACCURACY_PREFERENCE > 1
9955       return Sleef_sind4_u10(a);
9956     #else
9957       return Sleef_sind4_u35(a);
9958     #endif
9959   #else
9960     simde__m256d_private
9961       r_,
9962       a_ = simde__m256d_to_private(a);
9963 
9964     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
9965       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
9966         r_.m128d[i] = simde_mm_sin_pd(a_.m128d[i]);
9967       }
9968     #else
9969       SIMDE_VECTORIZE
9970       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
9971         r_.f64[i] = simde_math_sin(a_.f64[i]);
9972       }
9973     #endif
9974 
9975     return simde__m256d_from_private(r_);
9976   #endif
9977 }
9978 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
9979   #undef _mm256_sin_pd
9980   #define _mm256_sin_pd(a) simde_mm256_sin_pd(a)
9981 #endif
9982 
9983 SIMDE_FUNCTION_ATTRIBUTES
9984 simde__m512
simde_mm512_sin_ps(simde__m512 a)9985 simde_mm512_sin_ps (simde__m512 a) {
9986   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
9987     return _mm512_sin_ps(a);
9988   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
9989     #if SIMDE_ACCURACY_PREFERENCE > 1
9990       return Sleef_sinf16_u10(a);
9991     #else
9992       return Sleef_sinf16_u35(a);
9993     #endif
9994   #else
9995     simde__m512_private
9996       r_,
9997       a_ = simde__m512_to_private(a);
9998 
9999     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10000       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
10001         r_.m256[i] = simde_mm256_sin_ps(a_.m256[i]);
10002       }
10003     #else
10004       SIMDE_VECTORIZE
10005       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10006         r_.f32[i] = simde_math_sinf(a_.f32[i]);
10007       }
10008     #endif
10009 
10010     return simde__m512_from_private(r_);
10011   #endif
10012 }
10013 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10014   #undef _mm512_sin_ps
10015   #define _mm512_sin_ps(a) simde_mm512_sin_ps(a)
10016 #endif
10017 
10018 SIMDE_FUNCTION_ATTRIBUTES
10019 simde__m512d
simde_mm512_sin_pd(simde__m512d a)10020 simde_mm512_sin_pd (simde__m512d a) {
10021   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10022     return _mm512_sin_pd(a);
10023   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10024     #if SIMDE_ACCURACY_PREFERENCE > 1
10025       return Sleef_sind8_u10(a);
10026     #else
10027       return Sleef_sind8_u35(a);
10028     #endif
10029   #else
10030     simde__m512d_private
10031       r_,
10032       a_ = simde__m512d_to_private(a);
10033 
10034     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10035       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
10036         r_.m256d[i] = simde_mm256_sin_pd(a_.m256d[i]);
10037       }
10038     #else
10039       SIMDE_VECTORIZE
10040       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10041         r_.f64[i] = simde_math_sin(a_.f64[i]);
10042       }
10043     #endif
10044 
10045     return simde__m512d_from_private(r_);
10046   #endif
10047 }
10048 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10049   #undef _mm512_sin_pd
10050   #define _mm512_sin_pd(a) simde_mm512_sin_pd(a)
10051 #endif
10052 
10053 SIMDE_FUNCTION_ATTRIBUTES
10054 simde__m512
simde_mm512_mask_sin_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)10055 simde_mm512_mask_sin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
10056   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10057     return _mm512_mask_sin_ps(src, k, a);
10058   #else
10059     return simde_mm512_mask_mov_ps(src, k, simde_mm512_sin_ps(a));
10060   #endif
10061 }
10062 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10063   #undef _mm512_mask_sin_ps
10064   #define _mm512_mask_sin_ps(src, k, a) simde_mm512_mask_sin_ps(src, k, a)
10065 #endif
10066 
10067 SIMDE_FUNCTION_ATTRIBUTES
10068 simde__m512d
simde_mm512_mask_sin_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)10069 simde_mm512_mask_sin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
10070   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10071     return _mm512_mask_sin_pd(src, k, a);
10072   #else
10073     return simde_mm512_mask_mov_pd(src, k, simde_mm512_sin_pd(a));
10074   #endif
10075 }
10076 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10077   #undef _mm512_mask_sin_pd
10078   #define _mm512_mask_sin_pd(src, k, a) simde_mm512_mask_sin_pd(src, k, a)
10079 #endif
10080 
10081 SIMDE_FUNCTION_ATTRIBUTES
10082 simde__m128
simde_mm_sincos_ps(simde__m128 * mem_addr,simde__m128 a)10083 simde_mm_sincos_ps (simde__m128* mem_addr, simde__m128 a) {
10084   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10085     return _mm_sincos_ps(HEDLEY_REINTERPRET_CAST(__m128*, mem_addr), a);
10086   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10087     Sleef___m128_2 temp;
10088 
10089     #if SIMDE_ACCURACY_PREFERENCE > 1
10090       temp = Sleef_sincosf4_u10(a);
10091     #else
10092       temp = Sleef_sincosf4_u35(a);
10093     #endif
10094 
10095     *mem_addr = temp.y;
10096     return temp.x;
10097   #else
10098     simde__m128 r;
10099 
10100     r = simde_mm_sin_ps(a);
10101     *mem_addr = simde_mm_cos_ps(a);
10102 
10103     return r;
10104   #endif
10105 }
10106 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10107   #undef _mm_sincos_ps
10108   #define _mm_sincos_ps(mem_addr, a) simde_mm_sincos_ps((mem_addr),(a))
10109 #endif
10110 
10111 SIMDE_FUNCTION_ATTRIBUTES
10112 simde__m128d
simde_mm_sincos_pd(simde__m128d * mem_addr,simde__m128d a)10113 simde_mm_sincos_pd (simde__m128d* mem_addr, simde__m128d a) {
10114   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10115     return _mm_sincos_pd(HEDLEY_REINTERPRET_CAST(__m128d*, mem_addr), a);
10116   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10117     Sleef___m128d_2 temp;
10118 
10119     #if SIMDE_ACCURACY_PREFERENCE > 1
10120       temp = Sleef_sincosd2_u10(a);
10121     #else
10122       temp = Sleef_sincosd2_u35(a);
10123     #endif
10124 
10125     *mem_addr = temp.y;
10126     return temp.x;
10127   #else
10128     simde__m128d r;
10129 
10130     r = simde_mm_sin_pd(a);
10131     *mem_addr = simde_mm_cos_pd(a);
10132 
10133     return r;
10134   #endif
10135 }
10136 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10137   #undef _mm_sincos_pd
10138   #define _mm_sincos_pd(mem_addr, a) simde_mm_sincos_pd((mem_addr),(a))
10139 #endif
10140 
10141 SIMDE_FUNCTION_ATTRIBUTES
10142 simde__m256
simde_mm256_sincos_ps(simde__m256 * mem_addr,simde__m256 a)10143 simde_mm256_sincos_ps (simde__m256* mem_addr, simde__m256 a) {
10144   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10145     return _mm256_sincos_ps(HEDLEY_REINTERPRET_CAST(__m256*, mem_addr), a);
10146   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10147     Sleef___m256_2 temp;
10148 
10149     #if SIMDE_ACCURACY_PREFERENCE > 1
10150       temp = Sleef_sincosf8_u10(a);
10151     #else
10152       temp = Sleef_sincosf8_u35(a);
10153     #endif
10154 
10155     *mem_addr = temp.y;
10156     return temp.x;
10157   #else
10158     simde__m256 r;
10159 
10160     r = simde_mm256_sin_ps(a);
10161     *mem_addr = simde_mm256_cos_ps(a);
10162 
10163     return r;
10164   #endif
10165 }
10166 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10167   #undef _mm256_sincos_ps
10168   #define _mm256_sincos_ps(mem_addr, a) simde_mm256_sincos_ps((mem_addr),(a))
10169 #endif
10170 
10171 SIMDE_FUNCTION_ATTRIBUTES
10172 simde__m256d
simde_mm256_sincos_pd(simde__m256d * mem_addr,simde__m256d a)10173 simde_mm256_sincos_pd (simde__m256d* mem_addr, simde__m256d a) {
10174   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10175     return _mm256_sincos_pd(HEDLEY_REINTERPRET_CAST(__m256d*, mem_addr), a);
10176   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10177     Sleef___m256d_2 temp;
10178 
10179     #if SIMDE_ACCURACY_PREFERENCE > 1
10180       temp = Sleef_sincosd4_u10(a);
10181     #else
10182       temp = Sleef_sincosd4_u35(a);
10183     #endif
10184 
10185     *mem_addr = temp.y;
10186     return temp.x;
10187   #else
10188     simde__m256d r;
10189 
10190     r = simde_mm256_sin_pd(a);
10191     *mem_addr = simde_mm256_cos_pd(a);
10192 
10193     return r;
10194   #endif
10195 }
10196 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10197   #undef _mm256_sincos_pd
10198   #define _mm256_sincos_pd(mem_addr, a) simde_mm256_sincos_pd((mem_addr),(a))
10199 #endif
10200 
10201 SIMDE_FUNCTION_ATTRIBUTES
10202 simde__m512
simde_mm512_sincos_ps(simde__m512 * mem_addr,simde__m512 a)10203 simde_mm512_sincos_ps (simde__m512* mem_addr, simde__m512 a) {
10204   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10205     return _mm512_sincos_ps(HEDLEY_REINTERPRET_CAST(__m512*, mem_addr), a);
10206   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10207     Sleef___m512_2 temp;
10208 
10209     #if SIMDE_ACCURACY_PREFERENCE > 1
10210       temp = Sleef_sincosf16_u10(a);
10211     #else
10212       temp = Sleef_sincosf16_u35(a);
10213     #endif
10214 
10215     *mem_addr = temp.y;
10216     return temp.x;
10217   #else
10218     simde__m512 r;
10219 
10220     r = simde_mm512_sin_ps(a);
10221     *mem_addr = simde_mm512_cos_ps(a);
10222 
10223     return r;
10224   #endif
10225 }
10226 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10227   #undef _mm512_sincos_ps
10228   #define _mm512_sincos_ps(mem_addr, a) simde_mm512_sincos_ps((mem_addr),(a))
10229 #endif
10230 
10231 SIMDE_FUNCTION_ATTRIBUTES
10232 simde__m512d
simde_mm512_sincos_pd(simde__m512d * mem_addr,simde__m512d a)10233 simde_mm512_sincos_pd (simde__m512d* mem_addr, simde__m512d a) {
10234   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10235     return _mm512_sincos_pd(HEDLEY_REINTERPRET_CAST(__m512d*, mem_addr), a);
10236   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10237     Sleef___m512d_2 temp;
10238 
10239     #if SIMDE_ACCURACY_PREFERENCE > 1
10240       temp = Sleef_sincosd8_u10(a);
10241     #else
10242       temp = Sleef_sincosd8_u35(a);
10243     #endif
10244 
10245     *mem_addr = temp.y;
10246     return temp.x;
10247   #else
10248     simde__m512d r;
10249 
10250     r = simde_mm512_sin_pd(a);
10251     *mem_addr = simde_mm512_cos_pd(a);
10252 
10253     return r;
10254   #endif
10255 }
10256 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10257   #undef _mm512_sincos_pd
10258   #define _mm512_sincos_pd(mem_addr, a) simde_mm512_sincos_pd((mem_addr),(a))
10259 #endif
10260 
10261 SIMDE_FUNCTION_ATTRIBUTES
10262 simde__m512
simde_mm512_mask_sincos_ps(simde__m512 * mem_addr,simde__m512 sin_src,simde__m512 cos_src,simde__mmask16 k,simde__m512 a)10263 simde_mm512_mask_sincos_ps(simde__m512* mem_addr, simde__m512 sin_src, simde__m512 cos_src, simde__mmask16 k, simde__m512 a) {
10264   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10265     return _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a);
10266   #else
10267     simde__m512 cos_res, sin_res;
10268     sin_res = simde_mm512_sincos_ps(&cos_res, a);
10269     *mem_addr = simde_mm512_mask_mov_ps(cos_src, k, cos_res);
10270     return simde_mm512_mask_mov_ps(sin_src, k, sin_res);
10271   #endif
10272 }
10273 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10274   #undef _mm512_mask_sincos_ps
10275   #define _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a)
10276 #endif
10277 
10278 SIMDE_FUNCTION_ATTRIBUTES
10279 simde__m512d
simde_mm512_mask_sincos_pd(simde__m512d * mem_addr,simde__m512d sin_src,simde__m512d cos_src,simde__mmask8 k,simde__m512d a)10280 simde_mm512_mask_sincos_pd(simde__m512d* mem_addr, simde__m512d sin_src, simde__m512d cos_src, simde__mmask8 k, simde__m512d a) {
10281   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10282     return _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a);
10283   #else
10284     simde__m512d cos_res, sin_res;
10285     sin_res = simde_mm512_sincos_pd(&cos_res, a);
10286     *mem_addr = simde_mm512_mask_mov_pd(cos_src, k, cos_res);
10287     return simde_mm512_mask_mov_pd(sin_src, k, sin_res);
10288   #endif
10289 }
10290 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10291   #undef _mm512_mask_sincos_pd
10292   #define _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a)
10293 #endif
10294 
10295 SIMDE_FUNCTION_ATTRIBUTES
10296 simde__m128
simde_mm_sind_ps(simde__m128 a)10297 simde_mm_sind_ps (simde__m128 a) {
10298   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10299     return _mm_sind_ps(a);
10300   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10301     #if SIMDE_ACCURACY_PREFERENCE > 1
10302       return Sleef_sinf4_u10(simde_x_mm_deg2rad_ps(a));
10303     #else
10304       return Sleef_sinf4_u35(simde_x_mm_deg2rad_ps(a));
10305     #endif
10306   #else
10307     simde__m128_private
10308       r_,
10309       a_ = simde__m128_to_private(a);
10310 
10311     SIMDE_VECTORIZE
10312     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10313       r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i]));
10314     }
10315 
10316     return simde__m128_from_private(r_);
10317   #endif
10318 }
10319 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10320   #undef _mm_sind_ps
10321   #define _mm_sind_ps(a) simde_mm_sind_ps(a)
10322 #endif
10323 
10324 SIMDE_FUNCTION_ATTRIBUTES
10325 simde__m128d
simde_mm_sind_pd(simde__m128d a)10326 simde_mm_sind_pd (simde__m128d a) {
10327   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10328     return _mm_sind_pd(a);
10329   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10330     #if SIMDE_ACCURACY_PREFERENCE > 1
10331       return Sleef_sind2_u10(simde_x_mm_deg2rad_pd(a));
10332     #else
10333       return Sleef_sind2_u35(simde_x_mm_deg2rad_pd(a));
10334     #endif
10335   #else
10336     simde__m128d_private
10337       r_,
10338       a_ = simde__m128d_to_private(a);
10339 
10340     SIMDE_VECTORIZE
10341     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10342       r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i]));
10343     }
10344 
10345     return simde__m128d_from_private(r_);
10346   #endif
10347 }
10348 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10349   #undef _mm_sind_pd
10350   #define _mm_sind_pd(a) simde_mm_sind_pd(a)
10351 #endif
10352 
10353 SIMDE_FUNCTION_ATTRIBUTES
10354 simde__m256
simde_mm256_sind_ps(simde__m256 a)10355 simde_mm256_sind_ps (simde__m256 a) {
10356   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10357     return _mm256_sind_ps(a);
10358   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10359     #if SIMDE_ACCURACY_PREFERENCE > 1
10360       return Sleef_sinf8_u10(simde_x_mm256_deg2rad_ps(a));
10361     #else
10362       return Sleef_sinf8_u35(simde_x_mm256_deg2rad_ps(a));
10363     #endif
10364   #else
10365     simde__m256_private
10366       r_,
10367       a_ = simde__m256_to_private(a);
10368 
10369     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
10370       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
10371         r_.m128[i] = simde_mm_sind_ps(a_.m128[i]);
10372       }
10373     #else
10374       SIMDE_VECTORIZE
10375       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10376         r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i]));
10377       }
10378     #endif
10379 
10380     return simde__m256_from_private(r_);
10381   #endif
10382 }
10383 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10384   #undef _mm256_sind_ps
10385   #define _mm256_sind_ps(a) simde_mm256_sind_ps(a)
10386 #endif
10387 
10388 SIMDE_FUNCTION_ATTRIBUTES
10389 simde__m256d
simde_mm256_sind_pd(simde__m256d a)10390 simde_mm256_sind_pd (simde__m256d a) {
10391   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10392     return _mm256_sind_pd(a);
10393   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10394     #if SIMDE_ACCURACY_PREFERENCE > 1
10395       return Sleef_sind4_u10(simde_x_mm256_deg2rad_pd(a));
10396     #else
10397       return Sleef_sind4_u35(simde_x_mm256_deg2rad_pd(a));
10398     #endif
10399   #else
10400     simde__m256d_private
10401       r_,
10402       a_ = simde__m256d_to_private(a);
10403 
10404     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
10405       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
10406         r_.m128d[i] = simde_mm_sind_pd(a_.m128d[i]);
10407       }
10408     #else
10409       SIMDE_VECTORIZE
10410       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10411         r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i]));
10412       }
10413     #endif
10414 
10415     return simde__m256d_from_private(r_);
10416   #endif
10417 }
10418 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10419   #undef _mm256_sind_pd
10420   #define _mm256_sind_pd(a) simde_mm256_sind_pd(a)
10421 #endif
10422 
10423 SIMDE_FUNCTION_ATTRIBUTES
10424 simde__m512
simde_mm512_sind_ps(simde__m512 a)10425 simde_mm512_sind_ps (simde__m512 a) {
10426   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10427     return _mm512_sind_ps(a);
10428   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10429     #if SIMDE_ACCURACY_PREFERENCE > 1
10430       return Sleef_sinf16_u10(simde_x_mm512_deg2rad_ps(a));
10431     #else
10432       return Sleef_sinf16_u35(simde_x_mm512_deg2rad_ps(a));
10433     #endif
10434   #else
10435     simde__m512_private
10436       r_,
10437       a_ = simde__m512_to_private(a);
10438 
10439     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10440       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
10441         r_.m256[i] = simde_mm256_sind_ps(a_.m256[i]);
10442       }
10443     #else
10444       SIMDE_VECTORIZE
10445       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10446         r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i]));
10447       }
10448     #endif
10449 
10450     return simde__m512_from_private(r_);
10451   #endif
10452 }
10453 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10454   #undef _mm512_sind_ps
10455   #define _mm512_sind_ps(a) simde_mm512_sind_ps(a)
10456 #endif
10457 
10458 SIMDE_FUNCTION_ATTRIBUTES
10459 simde__m512d
simde_mm512_sind_pd(simde__m512d a)10460 simde_mm512_sind_pd (simde__m512d a) {
10461   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10462     return _mm512_sind_pd(a);
10463   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10464     #if SIMDE_ACCURACY_PREFERENCE > 1
10465       return Sleef_sind8_u10(simde_x_mm512_deg2rad_pd(a));
10466     #else
10467       return Sleef_sind8_u35(simde_x_mm512_deg2rad_pd(a));
10468     #endif
10469   #else
10470     simde__m512d_private
10471       r_,
10472       a_ = simde__m512d_to_private(a);
10473 
10474     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10475       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
10476         r_.m256d[i] = simde_mm256_sind_pd(a_.m256d[i]);
10477       }
10478     #else
10479       SIMDE_VECTORIZE
10480       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10481         r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i]));
10482       }
10483     #endif
10484 
10485     return simde__m512d_from_private(r_);
10486   #endif
10487 }
10488 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10489   #undef _mm512_sind_pd
10490   #define _mm512_sind_pd(a) simde_mm512_sind_pd(a)
10491 #endif
10492 
10493 
10494 SIMDE_FUNCTION_ATTRIBUTES
10495 simde__m512
simde_mm512_mask_sind_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)10496 simde_mm512_mask_sind_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
10497   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10498     return _mm512_mask_sind_ps(src, k, a);
10499   #else
10500     return simde_mm512_mask_mov_ps(src, k, simde_mm512_sind_ps(a));
10501   #endif
10502 }
10503 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10504   #undef _mm512_mask_sind_ps
10505   #define _mm512_mask_sind_ps(src, k, a) simde_mm512_mask_sind_ps(src, k, a)
10506 #endif
10507 
10508 SIMDE_FUNCTION_ATTRIBUTES
10509 simde__m512d
simde_mm512_mask_sind_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)10510 simde_mm512_mask_sind_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
10511   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10512     return _mm512_mask_sind_pd(src, k, a);
10513   #else
10514     return simde_mm512_mask_mov_pd(src, k, simde_mm512_sind_pd(a));
10515   #endif
10516 }
10517 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10518   #undef _mm512_mask_sind_pd
10519   #define _mm512_mask_sind_pd(src, k, a) simde_mm512_mask_sind_pd(src, k, a)
10520 #endif
10521 
10522 SIMDE_FUNCTION_ATTRIBUTES
10523 simde__m128
simde_mm_sinh_ps(simde__m128 a)10524 simde_mm_sinh_ps (simde__m128 a) {
10525   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10526     return _mm_sinh_ps(a);
10527   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10528     return Sleef_sinhf4_u10(a);
10529   #else
10530     simde__m128_private
10531       r_,
10532       a_ = simde__m128_to_private(a);
10533 
10534     SIMDE_VECTORIZE
10535     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10536       r_.f32[i] = simde_math_sinhf(a_.f32[i]);
10537     }
10538 
10539     return simde__m128_from_private(r_);
10540   #endif
10541 }
10542 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10543   #undef _mm_sinh_ps
10544   #define _mm_sinh_ps(a) simde_mm_sinh_ps(a)
10545 #endif
10546 
10547 SIMDE_FUNCTION_ATTRIBUTES
10548 simde__m128d
simde_mm_sinh_pd(simde__m128d a)10549 simde_mm_sinh_pd (simde__m128d a) {
10550   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10551     return _mm_sinh_pd(a);
10552   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10553     return Sleef_sinhd2_u10(a);
10554   #else
10555     simde__m128d_private
10556       r_,
10557       a_ = simde__m128d_to_private(a);
10558 
10559     SIMDE_VECTORIZE
10560     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10561       r_.f64[i] = simde_math_sinh(a_.f64[i]);
10562     }
10563 
10564     return simde__m128d_from_private(r_);
10565   #endif
10566 }
10567 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10568   #undef _mm_sinh_pd
10569   #define _mm_sinh_pd(a) simde_mm_sinh_pd(a)
10570 #endif
10571 
10572 SIMDE_FUNCTION_ATTRIBUTES
10573 simde__m256
simde_mm256_sinh_ps(simde__m256 a)10574 simde_mm256_sinh_ps (simde__m256 a) {
10575   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10576     return _mm256_sinh_ps(a);
10577   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10578     return Sleef_sinhf8_u10(a);
10579   #else
10580     simde__m256_private
10581       r_,
10582       a_ = simde__m256_to_private(a);
10583 
10584     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
10585       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
10586         r_.m128[i] = simde_mm_sinh_ps(a_.m128[i]);
10587       }
10588     #else
10589       SIMDE_VECTORIZE
10590       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10591         r_.f32[i] = simde_math_sinhf(a_.f32[i]);
10592       }
10593     #endif
10594 
10595     return simde__m256_from_private(r_);
10596   #endif
10597 }
10598 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10599   #undef _mm256_sinh_ps
10600   #define _mm256_sinh_ps(a) simde_mm256_sinh_ps(a)
10601 #endif
10602 
10603 
10604 SIMDE_FUNCTION_ATTRIBUTES
10605 simde__m256d
simde_mm256_sinh_pd(simde__m256d a)10606 simde_mm256_sinh_pd (simde__m256d a) {
10607   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10608     return _mm256_sinh_pd(a);
10609   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10610     return Sleef_sinhd4_u10(a);
10611   #else
10612     simde__m256d_private
10613       r_,
10614       a_ = simde__m256d_to_private(a);
10615 
10616     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
10617       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
10618         r_.m128d[i] = simde_mm_sinh_pd(a_.m128d[i]);
10619       }
10620     #else
10621       SIMDE_VECTORIZE
10622       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10623         r_.f64[i] = simde_math_sinh(a_.f64[i]);
10624       }
10625     #endif
10626 
10627     return simde__m256d_from_private(r_);
10628   #endif
10629 }
10630 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10631   #undef _mm256_sinh_pd
10632   #define _mm256_sinh_pd(a) simde_mm256_sinh_pd(a)
10633 #endif
10634 
10635 SIMDE_FUNCTION_ATTRIBUTES
10636 simde__m512
simde_mm512_sinh_ps(simde__m512 a)10637 simde_mm512_sinh_ps (simde__m512 a) {
10638   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10639     return _mm512_sinh_ps(a);
10640   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10641     return Sleef_sinhf16_u10(a);
10642   #else
10643     simde__m512_private
10644       r_,
10645       a_ = simde__m512_to_private(a);
10646 
10647     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10648       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
10649         r_.m256[i] = simde_mm256_sinh_ps(a_.m256[i]);
10650       }
10651     #else
10652       SIMDE_VECTORIZE
10653       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10654         r_.f32[i] = simde_math_sinhf(a_.f32[i]);
10655       }
10656     #endif
10657 
10658     return simde__m512_from_private(r_);
10659   #endif
10660 }
10661 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10662   #undef _mm512_sinh_ps
10663   #define _mm512_sinh_ps(a) simde_mm512_sinh_ps(a)
10664 #endif
10665 
10666 SIMDE_FUNCTION_ATTRIBUTES
10667 simde__m512d
simde_mm512_sinh_pd(simde__m512d a)10668 simde_mm512_sinh_pd (simde__m512d a) {
10669   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10670     return _mm512_sinh_pd(a);
10671   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10672     return Sleef_sinhd8_u10(a);
10673   #else
10674     simde__m512d_private
10675       r_,
10676       a_ = simde__m512d_to_private(a);
10677 
10678     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10679       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
10680         r_.m256d[i] = simde_mm256_sinh_pd(a_.m256d[i]);
10681       }
10682     #else
10683       SIMDE_VECTORIZE
10684       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10685         r_.f64[i] = simde_math_sinh(a_.f64[i]);
10686       }
10687     #endif
10688 
10689     return simde__m512d_from_private(r_);
10690   #endif
10691 }
10692 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10693   #undef _mm512_sinh_pd
10694   #define _mm512_sinh_pd(a) simde_mm512_sinh_pd(a)
10695 #endif
10696 
10697 SIMDE_FUNCTION_ATTRIBUTES
10698 simde__m512
simde_mm512_mask_sinh_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)10699 simde_mm512_mask_sinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
10700   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10701     return _mm512_mask_sinh_ps(src, k, a);
10702   #else
10703     return simde_mm512_mask_mov_ps(src, k, simde_mm512_sinh_ps(a));
10704   #endif
10705 }
10706 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10707   #undef _mm512_mask_sinh_ps
10708   #define _mm512_mask_sinh_ps(src, k, a) simde_mm512_mask_sinh_ps(src, k, a)
10709 #endif
10710 
10711 SIMDE_FUNCTION_ATTRIBUTES
10712 simde__m512d
simde_mm512_mask_sinh_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)10713 simde_mm512_mask_sinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
10714   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10715     return _mm512_mask_sinh_pd(src, k, a);
10716   #else
10717     return simde_mm512_mask_mov_pd(src, k, simde_mm512_sinh_pd(a));
10718   #endif
10719 }
10720 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10721   #undef _mm512_mask_sinh_pd
10722   #define _mm512_mask_sinh_pd(src, k, a) simde_mm512_mask_sinh_pd(src, k, a)
10723 #endif
10724 
10725 SIMDE_FUNCTION_ATTRIBUTES
10726 simde__m128
simde_mm_svml_ceil_ps(simde__m128 a)10727 simde_mm_svml_ceil_ps (simde__m128 a) {
10728   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10729     return _mm_svml_ceil_ps(a);
10730   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10731     return Sleef_ceilf4(a);
10732   #else
10733     return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF);
10734   #endif
10735 }
10736 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10737   #undef _mm_svml_ceil_ps
10738   #define _mm_svml_ceil_ps(a) simde_mm_svml_ceil_ps(a)
10739 #endif
10740 
10741 SIMDE_FUNCTION_ATTRIBUTES
10742 simde__m128d
simde_mm_svml_ceil_pd(simde__m128d a)10743 simde_mm_svml_ceil_pd (simde__m128d a) {
10744   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10745     return _mm_svml_ceil_pd(a);
10746   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10747     return Sleef_ceild2(a);
10748   #else
10749     return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF);
10750   #endif
10751 }
10752 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10753   #undef _mm_svml_ceil_pd
10754   #define _mm_svml_ceil_pd(a) simde_mm_svml_ceil_pd(a)
10755 #endif
10756 
10757 SIMDE_FUNCTION_ATTRIBUTES
10758 simde__m256
simde_mm256_svml_ceil_ps(simde__m256 a)10759 simde_mm256_svml_ceil_ps (simde__m256 a) {
10760   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10761     return _mm256_svml_ceil_ps(a);
10762   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10763     return Sleef_ceilf8(a);
10764   #else
10765     return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF);
10766   #endif
10767 }
10768 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10769   #undef _mm256_svml_ceil_ps
10770   #define _mm256_svml_ceil_ps(a) simde_mm256_svml_ceil_ps(a)
10771 #endif
10772 
10773 SIMDE_FUNCTION_ATTRIBUTES
10774 simde__m256d
simde_mm256_svml_ceil_pd(simde__m256d a)10775 simde_mm256_svml_ceil_pd (simde__m256d a) {
10776   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10777     return _mm256_svml_ceil_pd(a);
10778   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10779     return Sleef_ceild4(a);
10780   #else
10781     return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF);
10782   #endif
10783 }
10784 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10785   #undef _mm256_svml_ceil_pd
10786   #define _mm256_svml_ceil_pd(a) simde_mm256_svml_ceil_pd(a)
10787 #endif
10788 
10789 SIMDE_FUNCTION_ATTRIBUTES
10790 simde__m512
simde_mm512_ceil_ps(simde__m512 a)10791 simde_mm512_ceil_ps (simde__m512 a) {
10792   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10793     return _mm512_ceil_ps(a);
10794   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10795     return Sleef_ceilf16(a);
10796   #else
10797     simde__m512_private
10798       r_,
10799       a_ = simde__m512_to_private(a);
10800 
10801     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10802       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
10803         r_.m256[i] = simde_mm256_ceil_ps(a_.m256[i]);
10804       }
10805     #else
10806       SIMDE_VECTORIZE
10807       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10808         r_.f32[i] = simde_math_ceilf(a_.f32[i]);
10809       }
10810     #endif
10811 
10812     return simde__m512_from_private(r_);
10813   #endif
10814 }
10815 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10816   #undef _mm512_ceil_ps
10817   #define _mm512_ceil_ps(a) simde_mm512_ceil_ps(a)
10818 #endif
10819 
10820 SIMDE_FUNCTION_ATTRIBUTES
10821 simde__m512d
simde_mm512_ceil_pd(simde__m512d a)10822 simde_mm512_ceil_pd (simde__m512d a) {
10823   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10824     return _mm512_ceil_pd(a);
10825   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10826     return Sleef_ceild8(a);
10827   #else
10828     simde__m512d_private
10829       r_,
10830       a_ = simde__m512d_to_private(a);
10831 
10832     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10833       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
10834         r_.m256d[i] = simde_mm256_ceil_pd(a_.m256d[i]);
10835       }
10836     #else
10837       SIMDE_VECTORIZE
10838       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10839         r_.f64[i] = simde_math_ceil(a_.f64[i]);
10840       }
10841     #endif
10842 
10843     return simde__m512d_from_private(r_);
10844   #endif
10845 }
10846 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10847   #undef _mm512_ceil_pd
10848   #define _mm512_ceil_pd(a) simde_mm512_ceil_pd(a)
10849 #endif
10850 
10851 SIMDE_FUNCTION_ATTRIBUTES
10852 simde__m512
simde_mm512_mask_ceil_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)10853 simde_mm512_mask_ceil_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
10854   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10855     return _mm512_mask_ceil_ps(src, k, a);
10856   #else
10857     return simde_mm512_mask_mov_ps(src, k, simde_mm512_ceil_ps(a));
10858   #endif
10859 }
10860 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10861   #undef _mm512_mask_ceil_ps
10862   #define _mm512_mask_ceil_ps(src, k, a) simde_mm512_mask_ceil_ps(src, k, a)
10863 #endif
10864 
10865 SIMDE_FUNCTION_ATTRIBUTES
10866 simde__m512d
simde_mm512_mask_ceil_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)10867 simde_mm512_mask_ceil_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
10868   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10869     return _mm512_mask_ceil_pd(src, k, a);
10870   #else
10871     return simde_mm512_mask_mov_pd(src, k, simde_mm512_ceil_pd(a));
10872   #endif
10873 }
10874 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10875   #undef _mm512_mask_ceil_pd
10876   #define _mm512_mask_ceil_pd(src, k, a) simde_mm512_mask_ceil_pd(src, k, a)
10877 #endif
10878 
10879 SIMDE_FUNCTION_ATTRIBUTES
10880 simde__m128
simde_mm_svml_floor_ps(simde__m128 a)10881 simde_mm_svml_floor_ps (simde__m128 a) {
10882   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10883     return _mm_svml_floor_ps(a);
10884   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10885     return Sleef_floorf4(a);
10886   #else
10887     return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF);
10888   #endif
10889 }
10890 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10891   #undef _mm_svml_floor_ps
10892   #define _mm_svml_floor_ps(a) simde_mm_svml_floor_ps(a)
10893 #endif
10894 
10895 SIMDE_FUNCTION_ATTRIBUTES
10896 simde__m128d
simde_mm_svml_floor_pd(simde__m128d a)10897 simde_mm_svml_floor_pd (simde__m128d a) {
10898   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
10899     return _mm_svml_floor_pd(a);
10900   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
10901     return Sleef_floord2(a);
10902   #else
10903     return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF);
10904   #endif
10905 }
10906 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10907   #undef _mm_svml_floor_pd
10908   #define _mm_svml_floor_pd(a) simde_mm_svml_floor_pd(a)
10909 #endif
10910 
10911 SIMDE_FUNCTION_ATTRIBUTES
10912 simde__m256
simde_mm256_svml_floor_ps(simde__m256 a)10913 simde_mm256_svml_floor_ps (simde__m256 a) {
10914   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10915     return _mm256_svml_floor_ps(a);
10916   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10917     return Sleef_floorf8(a);
10918   #else
10919     return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF);
10920   #endif
10921 }
10922 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10923   #undef _mm256_svml_floor_ps
10924   #define _mm256_svml_floor_ps(a) simde_mm256_svml_floor_ps(a)
10925 #endif
10926 
10927 SIMDE_FUNCTION_ATTRIBUTES
10928 simde__m256d
simde_mm256_svml_floor_pd(simde__m256d a)10929 simde_mm256_svml_floor_pd (simde__m256d a) {
10930   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
10931     return _mm256_svml_floor_pd(a);
10932   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
10933     return Sleef_floord4(a);
10934   #else
10935     return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF);
10936   #endif
10937 }
10938 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10939   #undef _mm256_svml_floor_pd
10940   #define _mm256_svml_floor_pd(a) simde_mm256_svml_floor_pd(a)
10941 #endif
10942 
10943 SIMDE_FUNCTION_ATTRIBUTES
10944 simde__m512
simde_mm512_floor_ps(simde__m512 a)10945 simde_mm512_floor_ps (simde__m512 a) {
10946   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10947     return _mm512_floor_ps(a);
10948   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10949     return Sleef_floorf16(a);
10950   #else
10951     simde__m512_private
10952       r_,
10953       a_ = simde__m512_to_private(a);
10954 
10955     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10956       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
10957         r_.m256[i] = simde_mm256_floor_ps(a_.m256[i]);
10958       }
10959     #else
10960       SIMDE_VECTORIZE
10961       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
10962         r_.f32[i] = simde_math_floorf(a_.f32[i]);
10963       }
10964     #endif
10965 
10966     return simde__m512_from_private(r_);
10967   #endif
10968 }
10969 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
10970   #undef _mm512_floor_ps
10971   #define _mm512_floor_ps(a) simde_mm512_floor_ps(a)
10972 #endif
10973 
10974 SIMDE_FUNCTION_ATTRIBUTES
10975 simde__m512d
simde_mm512_floor_pd(simde__m512d a)10976 simde_mm512_floor_pd (simde__m512d a) {
10977   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
10978     return _mm512_floor_pd(a);
10979   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
10980     return Sleef_floord8(a);
10981   #else
10982     simde__m512d_private
10983       r_,
10984       a_ = simde__m512d_to_private(a);
10985 
10986     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
10987       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
10988         r_.m256d[i] = simde_mm256_floor_pd(a_.m256d[i]);
10989       }
10990     #else
10991       SIMDE_VECTORIZE
10992       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
10993         r_.f64[i] = simde_math_floor(a_.f64[i]);
10994       }
10995     #endif
10996 
10997     return simde__m512d_from_private(r_);
10998   #endif
10999 }
11000 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11001   #undef _mm512_floor_pd
11002   #define _mm512_floor_pd(a) simde_mm512_floor_pd(a)
11003 #endif
11004 
11005 SIMDE_FUNCTION_ATTRIBUTES
11006 simde__m512
simde_mm512_mask_floor_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)11007 simde_mm512_mask_floor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
11008   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11009     return _mm512_mask_floor_ps(src, k, a);
11010   #else
11011     return simde_mm512_mask_mov_ps(src, k, simde_mm512_floor_ps(a));
11012   #endif
11013 }
11014 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11015   #undef _mm512_mask_floor_ps
11016   #define _mm512_mask_floor_ps(src, k, a) simde_mm512_mask_floor_ps(src, k, a)
11017 #endif
11018 
11019 SIMDE_FUNCTION_ATTRIBUTES
11020 simde__m512d
simde_mm512_mask_floor_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)11021 simde_mm512_mask_floor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
11022   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11023     return _mm512_mask_floor_pd(src, k, a);
11024   #else
11025     return simde_mm512_mask_mov_pd(src, k, simde_mm512_floor_pd(a));
11026   #endif
11027 }
11028 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11029   #undef _mm512_mask_floor_pd
11030   #define _mm512_mask_floor_pd(src, k, a) simde_mm512_mask_floor_pd(src, k, a)
11031 #endif
11032 
11033 SIMDE_FUNCTION_ATTRIBUTES
11034 simde__m128
simde_mm_svml_round_ps(simde__m128 a)11035 simde_mm_svml_round_ps (simde__m128 a) {
11036   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11037     return _mm_svml_round_ps(a);
11038   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11039     return Sleef_roundf4(a);
11040   #else
11041     simde__m128_private
11042       r_,
11043       a_ = simde__m128_to_private(a);
11044 
11045     SIMDE_VECTORIZE
11046     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11047       r_.f32[i] = simde_math_roundf(a_.f32[i]);
11048     }
11049 
11050     return simde__m128_from_private(r_);
11051   #endif
11052 }
11053 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11054   #undef _mm_svml_round_ps
11055   #define _mm_svml_round_ps(a) simde_mm_svml_round_ps(a)
11056 #endif
11057 
11058 SIMDE_FUNCTION_ATTRIBUTES
11059 simde__m128d
simde_mm_svml_round_pd(simde__m128d a)11060 simde_mm_svml_round_pd (simde__m128d a) {
11061   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11062     return _mm_svml_round_pd(a);
11063   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11064     return Sleef_roundd2(a);
11065   #else
11066     simde__m128d_private
11067       r_,
11068       a_ = simde__m128d_to_private(a);
11069 
11070     SIMDE_VECTORIZE
11071     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11072       r_.f64[i] = simde_math_round(a_.f64[i]);
11073     }
11074 
11075     return simde__m128d_from_private(r_);
11076   #endif
11077 }
11078 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11079   #undef _mm_svml_round_pd
11080   #define _mm_svml_round_pd(a) simde_mm_svml_round_pd(a)
11081 #endif
11082 
11083 SIMDE_FUNCTION_ATTRIBUTES
11084 simde__m256
simde_mm256_svml_round_ps(simde__m256 a)11085 simde_mm256_svml_round_ps (simde__m256 a) {
11086   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11087     return _mm256_svml_round_ps(a);
11088   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11089     return Sleef_roundf8(a);
11090   #else
11091     simde__m256_private
11092       r_,
11093       a_ = simde__m256_to_private(a);
11094 
11095     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
11096       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
11097         r_.m128[i] = simde_mm_svml_round_ps(a_.m128[i]);
11098       }
11099     #else
11100       SIMDE_VECTORIZE
11101       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11102         r_.f32[i] = simde_math_roundf(a_.f32[i]);
11103       }
11104     #endif
11105 
11106     return simde__m256_from_private(r_);
11107   #endif
11108 }
11109 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11110   #undef _mm256_svml_round_ps
11111   #define _mm256_svml_round_ps(a) simde_mm256_svml_round_ps(a)
11112 #endif
11113 
11114 
11115 SIMDE_FUNCTION_ATTRIBUTES
11116 simde__m256d
simde_mm256_svml_round_pd(simde__m256d a)11117 simde_mm256_svml_round_pd (simde__m256d a) {
11118   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11119     return _mm256_svml_round_pd(a);
11120   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11121     return Sleef_roundd4(a);
11122   #else
11123     simde__m256d_private
11124       r_,
11125       a_ = simde__m256d_to_private(a);
11126 
11127     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11128       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
11129         r_.m128d[i] = simde_mm_svml_round_pd(a_.m128d[i]);
11130       }
11131     #else
11132       SIMDE_VECTORIZE
11133       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11134         r_.f64[i] = simde_math_round(a_.f64[i]);
11135       }
11136     #endif
11137 
11138     return simde__m256d_from_private(r_);
11139   #endif
11140 }
11141 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11142   #undef _mm256_svml_round_pd
11143   #define _mm256_svml_round_pd(a) simde_mm256_svml_round_pd(a)
11144 #endif
11145 
11146 SIMDE_FUNCTION_ATTRIBUTES
11147 simde__m512d
simde_mm512_svml_round_pd(simde__m512d a)11148 simde_mm512_svml_round_pd (simde__m512d a) {
11149   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11150     return _mm512_svml_round_pd(a);
11151   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11152     return Sleef_roundd8(a);
11153   #else
11154     simde__m512d_private
11155       r_,
11156       a_ = simde__m512d_to_private(a);
11157 
11158     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11159       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
11160         r_.m256d[i] = simde_mm256_svml_round_pd(a_.m256d[i]);
11161       }
11162     #else
11163       SIMDE_VECTORIZE
11164       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11165         r_.f64[i] = simde_math_round(a_.f64[i]);
11166       }
11167     #endif
11168 
11169     return simde__m512d_from_private(r_);
11170   #endif
11171 }
11172 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11173   #undef _mm512_svml_round_pd
11174   #define _mm512_svml_round_pd(a) simde_mm512_svml_round_pd(a)
11175 #endif
11176 
11177 SIMDE_FUNCTION_ATTRIBUTES
11178 simde__m512d
simde_mm512_mask_svml_round_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)11179 simde_mm512_mask_svml_round_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
11180   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11181     return _mm512_mask_svml_round_pd(src, k, a);
11182   #else
11183     return simde_mm512_mask_mov_pd(src, k, simde_mm512_svml_round_pd(a));
11184   #endif
11185 }
11186 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11187   #undef _mm512_mask_svml_round_pd
11188   #define _mm512_mask_svml_round_pd(src, k, a) simde_mm512_mask_svml_round_pd(src, k, a)
11189 #endif
11190 
11191 SIMDE_FUNCTION_ATTRIBUTES
11192 simde__m128
simde_mm_svml_sqrt_ps(simde__m128 a)11193 simde_mm_svml_sqrt_ps (simde__m128 a) {
11194   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11195     return _mm_svml_sqrt_ps(a);
11196   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11197     return Sleef_sqrtf4(a);
11198   #else
11199     return simde_mm_sqrt_ps(a);
11200   #endif
11201 }
11202 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11203   #undef _mm_svml_sqrt_ps
11204   #define _mm_svml_sqrt_ps(a) simde_mm_svml_sqrt_ps(a)
11205 #endif
11206 
11207 SIMDE_FUNCTION_ATTRIBUTES
11208 simde__m128d
simde_mm_svml_sqrt_pd(simde__m128d a)11209 simde_mm_svml_sqrt_pd (simde__m128d a) {
11210   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11211     return _mm_svml_sqrt_pd(a);
11212   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11213     return Sleef_sqrtd2(a);
11214   #else
11215     return simde_mm_sqrt_pd(a);
11216   #endif
11217 }
11218 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11219   #undef _mm_svml_sqrt_pd
11220   #define _mm_svml_sqrt_pd(a) simde_mm_svml_sqrt_pd(a)
11221 #endif
11222 
11223 SIMDE_FUNCTION_ATTRIBUTES
11224 simde__m256
simde_mm256_svml_sqrt_ps(simde__m256 a)11225 simde_mm256_svml_sqrt_ps (simde__m256 a) {
11226   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11227     return _mm256_svml_sqrt_ps(a);
11228   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11229     return Sleef_sqrtf8(a);
11230   #else
11231     return simde_mm256_sqrt_ps(a);
11232   #endif
11233 }
11234 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11235   #undef _mm256_svml_sqrt_ps
11236   #define _mm256_svml_sqrt_ps(a) simde_mm256_svml_sqrt_ps(a)
11237 #endif
11238 
11239 SIMDE_FUNCTION_ATTRIBUTES
11240 simde__m256d
simde_mm256_svml_sqrt_pd(simde__m256d a)11241 simde_mm256_svml_sqrt_pd (simde__m256d a) {
11242   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11243     return _mm256_svml_sqrt_pd(a);
11244   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11245     return Sleef_sqrtd4(a);
11246   #else
11247     return simde_mm256_sqrt_pd(a);
11248   #endif
11249 }
11250 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11251   #undef _mm256_svml_sqrt_pd
11252   #define _mm256_svml_sqrt_pd(a) simde_mm256_svml_sqrt_pd(a)
11253 #endif
11254 
11255 SIMDE_FUNCTION_ATTRIBUTES
11256 simde__m512
simde_mm512_svml_sqrt_ps(simde__m512 a)11257 simde_mm512_svml_sqrt_ps (simde__m512 a) {
11258   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11259     return _mm512_svml_sqrt_ps(a);
11260   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11261     return Sleef_sqrtf16(a);
11262   #else
11263     return simde_mm512_sqrt_ps(a);
11264   #endif
11265 }
11266 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11267   #undef _mm512_svml_sqrt_ps
11268   #define _mm512_svml_sqrt_ps(a) simde_mm512_svml_sqrt_ps(a)
11269 #endif
11270 
11271 SIMDE_FUNCTION_ATTRIBUTES
11272 simde__m512d
simde_mm512_svml_sqrt_pd(simde__m512d a)11273 simde_mm512_svml_sqrt_pd (simde__m512d a) {
11274   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11275     return _mm512_svml_sqrt_pd(a);
11276   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11277     return Sleef_sqrtd8(a);
11278   #else
11279     return simde_mm512_sqrt_pd(a);
11280   #endif
11281 }
11282 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11283   #undef _mm512_svml_sqrt_pd
11284   #define _mm512_svml_sqrt_pd(a) simde_mm512_svml_sqrt_pd(a)
11285 #endif
11286 
11287 SIMDE_FUNCTION_ATTRIBUTES
11288 simde__m128
simde_mm_tan_ps(simde__m128 a)11289 simde_mm_tan_ps (simde__m128 a) {
11290   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11291     return _mm_tan_ps(a);
11292   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11293     #if SIMDE_ACCURACY_PREFERENCE > 1
11294       return Sleef_tanf4_u10(a);
11295     #else
11296       return Sleef_tanf4_u35(a);
11297     #endif
11298   #else
11299     simde__m128_private
11300       r_,
11301       a_ = simde__m128_to_private(a);
11302 
11303     SIMDE_VECTORIZE
11304     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11305       r_.f32[i] = simde_math_tanf(a_.f32[i]);
11306     }
11307 
11308     return simde__m128_from_private(r_);
11309   #endif
11310 }
11311 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11312   #undef _mm_tan_ps
11313   #define _mm_tan_ps(a) simde_mm_tan_ps(a)
11314 #endif
11315 
11316 SIMDE_FUNCTION_ATTRIBUTES
11317 simde__m128d
simde_mm_tan_pd(simde__m128d a)11318 simde_mm_tan_pd (simde__m128d a) {
11319   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11320     return _mm_tan_pd(a);
11321   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11322     #if SIMDE_ACCURACY_PREFERENCE > 1
11323       return Sleef_tand2_u10(a);
11324     #else
11325       return Sleef_tand2_u35(a);
11326     #endif
11327   #else
11328     simde__m128d_private
11329       r_,
11330       a_ = simde__m128d_to_private(a);
11331 
11332     SIMDE_VECTORIZE
11333     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11334       r_.f64[i] = simde_math_tan(a_.f64[i]);
11335     }
11336 
11337     return simde__m128d_from_private(r_);
11338   #endif
11339 }
11340 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11341   #undef _mm_tan_pd
11342   #define _mm_tan_pd(a) simde_mm_tan_pd(a)
11343 #endif
11344 
11345 SIMDE_FUNCTION_ATTRIBUTES
11346 simde__m256
simde_mm256_tan_ps(simde__m256 a)11347 simde_mm256_tan_ps (simde__m256 a) {
11348   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11349     return _mm256_tan_ps(a);
11350   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11351     #if SIMDE_ACCURACY_PREFERENCE > 1
11352       return Sleef_tanf8_u10(a);
11353     #else
11354       return Sleef_tanf8_u35(a);
11355     #endif
11356   #else
11357     simde__m256_private
11358       r_,
11359       a_ = simde__m256_to_private(a);
11360 
11361     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
11362       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
11363         r_.m128[i] = simde_mm_tan_ps(a_.m128[i]);
11364       }
11365     #else
11366       SIMDE_VECTORIZE
11367       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11368         r_.f32[i] = simde_math_tanf(a_.f32[i]);
11369       }
11370     #endif
11371 
11372     return simde__m256_from_private(r_);
11373   #endif
11374 }
11375 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11376   #undef _mm256_tan_ps
11377   #define _mm256_tan_ps(a) simde_mm256_tan_ps(a)
11378 #endif
11379 
11380 
11381 SIMDE_FUNCTION_ATTRIBUTES
11382 simde__m256d
simde_mm256_tan_pd(simde__m256d a)11383 simde_mm256_tan_pd (simde__m256d a) {
11384   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11385     return _mm256_tan_pd(a);
11386   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11387     #if SIMDE_ACCURACY_PREFERENCE > 1
11388       return Sleef_tand4_u10(a);
11389     #else
11390       return Sleef_tand4_u35(a);
11391     #endif
11392   #else
11393     simde__m256d_private
11394       r_,
11395       a_ = simde__m256d_to_private(a);
11396 
11397     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11398       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
11399         r_.m128d[i] = simde_mm_tan_pd(a_.m128d[i]);
11400       }
11401     #else
11402       SIMDE_VECTORIZE
11403       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11404         r_.f64[i] = simde_math_tan(a_.f64[i]);
11405       }
11406     #endif
11407 
11408     return simde__m256d_from_private(r_);
11409   #endif
11410 }
11411 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11412   #undef _mm256_tan_pd
11413   #define _mm256_tan_pd(a) simde_mm256_tan_pd(a)
11414 #endif
11415 
11416 SIMDE_FUNCTION_ATTRIBUTES
11417 simde__m512
simde_mm512_tan_ps(simde__m512 a)11418 simde_mm512_tan_ps (simde__m512 a) {
11419   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11420     return _mm512_tan_ps(a);
11421   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11422     #if SIMDE_ACCURACY_PREFERENCE > 1
11423       return Sleef_tanf16_u10(a);
11424     #else
11425       return Sleef_tanf16_u35(a);
11426     #endif
11427   #else
11428     simde__m512_private
11429       r_,
11430       a_ = simde__m512_to_private(a);
11431 
11432     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11433       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
11434         r_.m256[i] = simde_mm256_tan_ps(a_.m256[i]);
11435       }
11436     #else
11437       SIMDE_VECTORIZE
11438       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11439         r_.f32[i] = simde_math_tanf(a_.f32[i]);
11440       }
11441     #endif
11442 
11443     return simde__m512_from_private(r_);
11444   #endif
11445 }
11446 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11447   #undef _mm512_tan_ps
11448   #define _mm512_tan_ps(a) simde_mm512_tan_ps(a)
11449 #endif
11450 
11451 SIMDE_FUNCTION_ATTRIBUTES
11452 simde__m512d
simde_mm512_tan_pd(simde__m512d a)11453 simde_mm512_tan_pd (simde__m512d a) {
11454   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11455     return _mm512_tan_pd(a);
11456   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11457     #if SIMDE_ACCURACY_PREFERENCE > 1
11458       return Sleef_tand8_u10(a);
11459     #else
11460       return Sleef_tand8_u35(a);
11461     #endif
11462   #else
11463     simde__m512d_private
11464       r_,
11465       a_ = simde__m512d_to_private(a);
11466 
11467     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11468       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
11469         r_.m256d[i] = simde_mm256_tan_pd(a_.m256d[i]);
11470       }
11471     #else
11472       SIMDE_VECTORIZE
11473       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11474         r_.f64[i] = simde_math_tan(a_.f64[i]);
11475       }
11476     #endif
11477 
11478     return simde__m512d_from_private(r_);
11479   #endif
11480 }
11481 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11482   #undef _mm512_tan_pd
11483   #define _mm512_tan_pd(a) simde_mm512_tan_pd(a)
11484 #endif
11485 
11486 SIMDE_FUNCTION_ATTRIBUTES
11487 simde__m512
simde_mm512_mask_tan_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)11488 simde_mm512_mask_tan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
11489   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11490     return _mm512_mask_tan_ps(src, k, a);
11491   #else
11492     return simde_mm512_mask_mov_ps(src, k, simde_mm512_tan_ps(a));
11493   #endif
11494 }
11495 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11496   #undef _mm512_mask_tan_ps
11497   #define _mm512_mask_tan_ps(src, k, a) simde_mm512_mask_tan_ps(src, k, a)
11498 #endif
11499 
11500 SIMDE_FUNCTION_ATTRIBUTES
11501 simde__m512d
simde_mm512_mask_tan_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)11502 simde_mm512_mask_tan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
11503   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11504     return _mm512_mask_tan_pd(src, k, a);
11505   #else
11506     return simde_mm512_mask_mov_pd(src, k, simde_mm512_tan_pd(a));
11507   #endif
11508 }
11509 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11510   #undef _mm512_mask_tan_pd
11511   #define _mm512_mask_tan_pd(src, k, a) simde_mm512_mask_tan_pd(src, k, a)
11512 #endif
11513 
11514 SIMDE_FUNCTION_ATTRIBUTES
11515 simde__m128
simde_mm_tand_ps(simde__m128 a)11516 simde_mm_tand_ps (simde__m128 a) {
11517   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11518     return _mm_tand_ps(a);
11519   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11520     #if SIMDE_ACCURACY_PREFERENCE > 1
11521       return Sleef_tanf4_u10(simde_x_mm_deg2rad_ps(a));
11522     #else
11523       return Sleef_tanf4_u35(simde_x_mm_deg2rad_ps(a));
11524     #endif
11525   #else
11526     simde__m128_private
11527       r_,
11528       a_ = simde__m128_to_private(a);
11529 
11530     SIMDE_VECTORIZE
11531     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11532       r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i]));
11533     }
11534 
11535     return simde__m128_from_private(r_);
11536   #endif
11537 }
11538 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11539   #undef _mm_tand_ps
11540   #define _mm_tand_ps(a) simde_mm_tand_ps(a)
11541 #endif
11542 
11543 SIMDE_FUNCTION_ATTRIBUTES
11544 simde__m128d
simde_mm_tand_pd(simde__m128d a)11545 simde_mm_tand_pd (simde__m128d a) {
11546   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11547     return _mm_tand_pd(a);
11548   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11549     #if SIMDE_ACCURACY_PREFERENCE > 1
11550       return Sleef_tand2_u10(simde_x_mm_deg2rad_pd(a));
11551     #else
11552       return Sleef_tand2_u35(simde_x_mm_deg2rad_pd(a));
11553     #endif
11554   #else
11555     simde__m128d_private
11556       r_,
11557       a_ = simde__m128d_to_private(a);
11558 
11559     SIMDE_VECTORIZE
11560     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11561       r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i]));
11562     }
11563 
11564     return simde__m128d_from_private(r_);
11565   #endif
11566 }
11567 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11568   #undef _mm_tand_pd
11569   #define _mm_tand_pd(a) simde_mm_tand_pd(a)
11570 #endif
11571 
11572 SIMDE_FUNCTION_ATTRIBUTES
11573 simde__m256
simde_mm256_tand_ps(simde__m256 a)11574 simde_mm256_tand_ps (simde__m256 a) {
11575   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11576     return _mm256_tand_ps(a);
11577   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11578     #if SIMDE_ACCURACY_PREFERENCE > 1
11579       return Sleef_tanf8_u10(simde_x_mm256_deg2rad_ps(a));
11580     #else
11581       return Sleef_tanf8_u35(simde_x_mm256_deg2rad_ps(a));
11582     #endif
11583   #else
11584     simde__m256_private
11585       r_,
11586       a_ = simde__m256_to_private(a);
11587 
11588     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
11589       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
11590         r_.m128[i] = simde_mm_tand_ps(a_.m128[i]);
11591       }
11592     #else
11593       SIMDE_VECTORIZE
11594       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11595         r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i]));
11596       }
11597     #endif
11598 
11599     return simde__m256_from_private(r_);
11600   #endif
11601 }
11602 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11603   #undef _mm256_tand_ps
11604   #define _mm256_tand_ps(a) simde_mm256_tand_ps(a)
11605 #endif
11606 
11607 SIMDE_FUNCTION_ATTRIBUTES
11608 simde__m256d
simde_mm256_tand_pd(simde__m256d a)11609 simde_mm256_tand_pd (simde__m256d a) {
11610   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11611     return _mm256_tand_pd(a);
11612   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11613     #if SIMDE_ACCURACY_PREFERENCE > 1
11614       return Sleef_tand4_u10(simde_x_mm256_deg2rad_pd(a));
11615     #else
11616       return Sleef_tand4_u35(simde_x_mm256_deg2rad_pd(a));
11617     #endif
11618   #else
11619     simde__m256d_private
11620       r_,
11621       a_ = simde__m256d_to_private(a);
11622 
11623     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
11624       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
11625         r_.m128d[i] = simde_mm_tand_pd(a_.m128d[i]);
11626       }
11627     #else
11628       SIMDE_VECTORIZE
11629       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11630         r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i]));
11631       }
11632     #endif
11633 
11634     return simde__m256d_from_private(r_);
11635   #endif
11636 }
11637 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11638   #undef _mm256_tand_pd
11639   #define _mm256_tand_pd(a) simde_mm256_tand_pd(a)
11640 #endif
11641 
11642 SIMDE_FUNCTION_ATTRIBUTES
11643 simde__m512
simde_mm512_tand_ps(simde__m512 a)11644 simde_mm512_tand_ps (simde__m512 a) {
11645   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11646     return _mm512_tand_ps(a);
11647   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11648     #if SIMDE_ACCURACY_PREFERENCE > 1
11649       return Sleef_tanf16_u10(simde_x_mm512_deg2rad_ps(a));
11650     #else
11651       return Sleef_tanf16_u35(simde_x_mm512_deg2rad_ps(a));
11652     #endif
11653   #else
11654     simde__m512_private
11655       r_,
11656       a_ = simde__m512_to_private(a);
11657 
11658     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11659       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
11660         r_.m256[i] = simde_mm256_tand_ps(a_.m256[i]);
11661       }
11662     #else
11663       SIMDE_VECTORIZE
11664       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11665         r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i]));
11666       }
11667     #endif
11668 
11669     return simde__m512_from_private(r_);
11670   #endif
11671 }
11672 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11673   #undef _mm512_tand_ps
11674   #define _mm512_tand_ps(a) simde_mm512_tand_ps(a)
11675 #endif
11676 
11677 SIMDE_FUNCTION_ATTRIBUTES
11678 simde__m512d
simde_mm512_tand_pd(simde__m512d a)11679 simde_mm512_tand_pd (simde__m512d a) {
11680   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11681     return _mm512_tand_pd(a);
11682   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11683     #if SIMDE_ACCURACY_PREFERENCE > 1
11684       return Sleef_tand8_u10(simde_x_mm512_deg2rad_pd(a));
11685     #else
11686       return Sleef_tand8_u35(simde_x_mm512_deg2rad_pd(a));
11687     #endif
11688   #else
11689     simde__m512d_private
11690       r_,
11691       a_ = simde__m512d_to_private(a);
11692 
11693   #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11694       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
11695         r_.m256d[i] = simde_mm256_tand_pd(a_.m256d[i]);
11696       }
11697     #else
11698       SIMDE_VECTORIZE
11699       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11700         r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i]));
11701       }
11702     #endif
11703 
11704     return simde__m512d_from_private(r_);
11705   #endif
11706 }
11707 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11708   #undef _mm512_tand_pd
11709   #define _mm512_tand_pd(a) simde_mm512_tand_pd(a)
11710 #endif
11711 
11712 SIMDE_FUNCTION_ATTRIBUTES
11713 simde__m512
simde_mm512_mask_tand_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)11714 simde_mm512_mask_tand_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
11715   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11716     return _mm512_mask_tand_ps(src, k, a);
11717   #else
11718     return simde_mm512_mask_mov_ps(src, k, simde_mm512_tand_ps(a));
11719   #endif
11720 }
11721 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11722   #undef _mm512_mask_tand_ps
11723   #define _mm512_mask_tand_ps(src, k, a) simde_mm512_mask_tand_ps(src, k, a)
11724 #endif
11725 
11726 SIMDE_FUNCTION_ATTRIBUTES
11727 simde__m512d
simde_mm512_mask_tand_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)11728 simde_mm512_mask_tand_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
11729   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11730     return _mm512_mask_tand_pd(src, k, a);
11731   #else
11732     return simde_mm512_mask_mov_pd(src, k, simde_mm512_tand_pd(a));
11733   #endif
11734 }
11735 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11736   #undef _mm512_mask_tand_pd
11737   #define _mm512_mask_tand_pd(src, k, a) simde_mm512_mask_tand_pd(src, k, a)
11738 #endif
11739 
11740 SIMDE_FUNCTION_ATTRIBUTES
11741 simde__m128
simde_mm_tanh_ps(simde__m128 a)11742 simde_mm_tanh_ps (simde__m128 a) {
11743   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11744     return _mm_tanh_ps(a);
11745   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11746     return Sleef_tanhf4_u10(a);
11747   #else
11748     simde__m128_private
11749       r_,
11750       a_ = simde__m128_to_private(a);
11751 
11752     SIMDE_VECTORIZE
11753     for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11754       r_.f32[i] = simde_math_tanhf(a_.f32[i]);
11755     }
11756 
11757     return simde__m128_from_private(r_);
11758   #endif
11759 }
11760 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11761   #undef _mm_tanh_ps
11762   #define _mm_tanh_ps(a) simde_mm_tanh_ps(a)
11763 #endif
11764 
11765 SIMDE_FUNCTION_ATTRIBUTES
11766 simde__m128d
simde_mm_tanh_pd(simde__m128d a)11767 simde_mm_tanh_pd (simde__m128d a) {
11768   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11769     return _mm_tanh_pd(a);
11770   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11771     return Sleef_tanhd2_u10(a);
11772   #else
11773     simde__m128d_private
11774       r_,
11775       a_ = simde__m128d_to_private(a);
11776 
11777     SIMDE_VECTORIZE
11778     for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11779       r_.f64[i] = simde_math_tanh(a_.f64[i]);
11780     }
11781 
11782     return simde__m128d_from_private(r_);
11783   #endif
11784 }
11785 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11786   #undef _mm_tanh_pd
11787   #define _mm_tanh_pd(a) simde_mm_tanh_pd(a)
11788 #endif
11789 
11790 SIMDE_FUNCTION_ATTRIBUTES
11791 simde__m256
simde_mm256_tanh_ps(simde__m256 a)11792 simde_mm256_tanh_ps (simde__m256 a) {
11793   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11794     return _mm256_tanh_ps(a);
11795   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11796     return Sleef_tanhf8_u10(a);
11797   #else
11798     simde__m256_private
11799       r_,
11800       a_ = simde__m256_to_private(a);
11801 
11802     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
11803       for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
11804         r_.m128[i] = simde_mm_tanh_ps(a_.m128[i]);
11805       }
11806     #else
11807       SIMDE_VECTORIZE
11808       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11809         r_.f32[i] = simde_math_tanhf(a_.f32[i]);
11810       }
11811     #endif
11812 
11813     return simde__m256_from_private(r_);
11814   #endif
11815 }
11816 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11817   #undef _mm256_tanh_ps
11818   #define _mm256_tanh_ps(a) simde_mm256_tanh_ps(a)
11819 #endif
11820 
11821 
11822 SIMDE_FUNCTION_ATTRIBUTES
11823 simde__m256d
simde_mm256_tanh_pd(simde__m256d a)11824 simde_mm256_tanh_pd (simde__m256d a) {
11825   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11826     return _mm256_tanh_pd(a);
11827   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11828     return Sleef_tanhd4_u10(a);
11829   #else
11830     simde__m256d_private
11831       r_,
11832       a_ = simde__m256d_to_private(a);
11833 
11834     #if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
11835       for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
11836         r_.m128d[i] = simde_mm_tanh_pd(a_.m128d[i]);
11837       }
11838     #else
11839       SIMDE_VECTORIZE
11840       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11841         r_.f64[i] = simde_math_tanh(a_.f64[i]);
11842       }
11843     #endif
11844 
11845     return simde__m256d_from_private(r_);
11846   #endif
11847 }
11848 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11849   #undef _mm256_tanh_pd
11850   #define _mm256_tanh_pd(a) simde_mm256_tanh_pd(a)
11851 #endif
11852 
11853 SIMDE_FUNCTION_ATTRIBUTES
11854 simde__m512
simde_mm512_tanh_ps(simde__m512 a)11855 simde_mm512_tanh_ps (simde__m512 a) {
11856   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11857     return _mm512_tanh_ps(a);
11858   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11859     return Sleef_tanhf16_u10(a);
11860   #else
11861     simde__m512_private
11862       r_,
11863       a_ = simde__m512_to_private(a);
11864 
11865     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11866       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
11867         r_.m256[i] = simde_mm256_tanh_ps(a_.m256[i]);
11868       }
11869     #else
11870       SIMDE_VECTORIZE
11871       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
11872         r_.f32[i] = simde_math_tanhf(a_.f32[i]);
11873       }
11874     #endif
11875 
11876     return simde__m512_from_private(r_);
11877   #endif
11878 }
11879 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11880   #undef _mm512_tanh_ps
11881   #define _mm512_tanh_ps(a) simde_mm512_tanh_ps(a)
11882 #endif
11883 
11884 SIMDE_FUNCTION_ATTRIBUTES
11885 simde__m512d
simde_mm512_tanh_pd(simde__m512d a)11886 simde_mm512_tanh_pd (simde__m512d a) {
11887   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11888     return _mm512_tanh_pd(a);
11889   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
11890     return Sleef_tanhd8_u10(a);
11891   #else
11892     simde__m512d_private
11893       r_,
11894       a_ = simde__m512d_to_private(a);
11895 
11896     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
11897       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
11898         r_.m256d[i] = simde_mm256_tanh_pd(a_.m256d[i]);
11899       }
11900     #else
11901       SIMDE_VECTORIZE
11902       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
11903         r_.f64[i] = simde_math_tanh(a_.f64[i]);
11904       }
11905     #endif
11906 
11907     return simde__m512d_from_private(r_);
11908   #endif
11909 }
11910 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11911   #undef _mm512_tanh_pd
11912   #define _mm512_tanh_pd(a) simde_mm512_tanh_pd(a)
11913 #endif
11914 
11915 SIMDE_FUNCTION_ATTRIBUTES
11916 simde__m512
simde_mm512_mask_tanh_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)11917 simde_mm512_mask_tanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
11918   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11919     return _mm512_mask_tanh_ps(src, k, a);
11920   #else
11921     return simde_mm512_mask_mov_ps(src, k, simde_mm512_tanh_ps(a));
11922   #endif
11923 }
11924 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11925   #undef _mm512_mask_tanh_ps
11926   #define _mm512_mask_tanh_ps(src, k, a) simde_mm512_mask_tanh_ps(src, k, a)
11927 #endif
11928 
11929 SIMDE_FUNCTION_ATTRIBUTES
11930 simde__m512d
simde_mm512_mask_tanh_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)11931 simde_mm512_mask_tanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
11932   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
11933     return _mm512_mask_tanh_pd(src, k, a);
11934   #else
11935     return simde_mm512_mask_mov_pd(src, k, simde_mm512_tanh_pd(a));
11936   #endif
11937 }
11938 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11939   #undef _mm512_mask_tanh_pd
11940   #define _mm512_mask_tanh_pd(src, k, a) simde_mm512_mask_tanh_pd(src, k, a)
11941 #endif
11942 
11943 SIMDE_FUNCTION_ATTRIBUTES
11944 simde__m128
simde_mm_trunc_ps(simde__m128 a)11945 simde_mm_trunc_ps (simde__m128 a) {
11946   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11947     return _mm_trunc_ps(a);
11948   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11949     return Sleef_truncf4(a);
11950   #else
11951     return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_ZERO);
11952   #endif
11953 }
11954 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11955   #undef _mm_trunc_ps
11956   #define _mm_trunc_ps(a) simde_mm_trunc_ps(a)
11957 #endif
11958 
11959 SIMDE_FUNCTION_ATTRIBUTES
11960 simde__m128d
simde_mm_trunc_pd(simde__m128d a)11961 simde_mm_trunc_pd (simde__m128d a) {
11962   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE)
11963     return _mm_trunc_pd(a);
11964   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE)
11965     return Sleef_truncd2(a);
11966   #else
11967     return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_ZERO);
11968   #endif
11969 }
11970 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11971   #undef _mm_trunc_pd
11972   #define _mm_trunc_pd(a) simde_mm_trunc_pd(a)
11973 #endif
11974 
11975 SIMDE_FUNCTION_ATTRIBUTES
11976 simde__m256
simde_mm256_trunc_ps(simde__m256 a)11977 simde_mm256_trunc_ps (simde__m256 a) {
11978   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11979     return _mm256_trunc_ps(a);
11980   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11981     return Sleef_truncf8(a);
11982   #else
11983     return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_ZERO);
11984   #endif
11985 }
11986 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
11987   #undef _mm256_trunc_ps
11988   #define _mm256_trunc_ps(a) simde_mm256_trunc_ps(a)
11989 #endif
11990 
11991 SIMDE_FUNCTION_ATTRIBUTES
11992 simde__m256d
simde_mm256_trunc_pd(simde__m256d a)11993 simde_mm256_trunc_pd (simde__m256d a) {
11994   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
11995     return _mm256_trunc_pd(a);
11996   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE)
11997     return Sleef_truncd4(a);
11998   #else
11999     return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_ZERO);
12000   #endif
12001 }
12002 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12003   #undef _mm256_trunc_pd
12004   #define _mm256_trunc_pd(a) simde_mm256_trunc_pd(a)
12005 #endif
12006 
12007 SIMDE_FUNCTION_ATTRIBUTES
12008 simde__m512
simde_mm512_trunc_ps(simde__m512 a)12009 simde_mm512_trunc_ps (simde__m512 a) {
12010   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
12011     return _mm512_trunc_ps(a);
12012   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
12013     return Sleef_truncf16(a);
12014   #else
12015     simde__m512_private
12016       r_,
12017       a_ = simde__m512_to_private(a);
12018 
12019     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
12020       for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) {
12021         r_.m256[i] = simde_mm256_trunc_ps(a_.m256[i]);
12022       }
12023     #else
12024       SIMDE_VECTORIZE
12025       for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
12026         r_.f32[i] = simde_math_truncf(a_.f32[i]);
12027       }
12028     #endif
12029 
12030     return simde__m512_from_private(r_);
12031   #endif
12032 }
12033 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12034   #undef _mm512_trunc_ps
12035   #define _mm512_trunc_ps(a) simde_mm512_trunc_ps(a)
12036 #endif
12037 
12038 SIMDE_FUNCTION_ATTRIBUTES
12039 simde__m512d
simde_mm512_trunc_pd(simde__m512d a)12040 simde_mm512_trunc_pd (simde__m512d a) {
12041   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
12042     return _mm512_trunc_pd(a);
12043   #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE)
12044     return Sleef_truncd8(a);
12045   #else
12046     simde__m512d_private
12047       r_,
12048       a_ = simde__m512d_to_private(a);
12049 
12050     #if SIMDE_NATURAL_VECTOR_SIZE_LE(256)
12051       for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) {
12052         r_.m256d[i] = simde_mm256_trunc_pd(a_.m256d[i]);
12053       }
12054     #else
12055       SIMDE_VECTORIZE
12056       for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
12057         r_.f64[i] = simde_math_trunc(a_.f64[i]);
12058       }
12059     #endif
12060 
12061     return simde__m512d_from_private(r_);
12062   #endif
12063 }
12064 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12065   #undef _mm512_trunc_pd
12066   #define _mm512_trunc_pd(a) simde_mm512_trunc_pd(a)
12067 #endif
12068 
12069 SIMDE_FUNCTION_ATTRIBUTES
12070 simde__m512
simde_mm512_mask_trunc_ps(simde__m512 src,simde__mmask16 k,simde__m512 a)12071 simde_mm512_mask_trunc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) {
12072   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
12073     return _mm512_mask_trunc_ps(src, k, a);
12074   #else
12075     return simde_mm512_mask_mov_ps(src, k, simde_mm512_trunc_ps(a));
12076   #endif
12077 }
12078 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12079   #undef _mm512_mask_trunc_ps
12080   #define _mm512_mask_trunc_ps(src, k, a) simde_mm512_mask_trunc_ps(src, k, a)
12081 #endif
12082 
12083 SIMDE_FUNCTION_ATTRIBUTES
12084 simde__m512d
simde_mm512_mask_trunc_pd(simde__m512d src,simde__mmask8 k,simde__m512d a)12085 simde_mm512_mask_trunc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) {
12086   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE)
12087     return _mm512_mask_trunc_pd(src, k, a);
12088   #else
12089     return simde_mm512_mask_mov_pd(src, k, simde_mm512_trunc_pd(a));
12090   #endif
12091 }
12092 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12093   #undef _mm512_mask_trunc_pd
12094   #define _mm512_mask_trunc_pd(src, k, a) simde_mm512_mask_trunc_pd(src, k, a)
12095 #endif
12096 
12097 SIMDE_FUNCTION_ATTRIBUTES
12098 simde__m128i
simde_mm_udivrem_epi32(simde__m128i * mem_addr,simde__m128i a,simde__m128i b)12099 simde_mm_udivrem_epi32 (simde__m128i * mem_addr, simde__m128i a, simde__m128i b) {
12100   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE)
12101     return _mm_udivrem_epi32(mem_addr, a, b);
12102   #else
12103     simde__m128i r;
12104 
12105     r = simde_mm_div_epu32(a, b);
12106     *mem_addr = simde_x_mm_sub_epu32(a, simde_x_mm_mullo_epu32(r, b));
12107 
12108     return r;
12109   #endif
12110 }
12111 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12112   #undef _mm_udivrem_epi32
12113   #define _mm_udivrem_epi32(mem_addr, a, b) simde_mm_udivrem_epi32((mem_addr),(a), (b))
12114 #endif
12115 
12116 SIMDE_FUNCTION_ATTRIBUTES
12117 simde__m256i
simde_mm256_udivrem_epi32(simde__m256i * mem_addr,simde__m256i a,simde__m256i b)12118 simde_mm256_udivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) {
12119   #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE)
12120     return _mm256_udivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b);
12121   #else
12122     simde__m256i r;
12123 
12124     r = simde_mm256_div_epu32(a, b);
12125     *mem_addr = simde_x_mm256_sub_epu32(a, simde_x_mm256_mullo_epu32(r, b));
12126 
12127     return r;
12128   #endif
12129 }
12130 #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES)
12131   #undef _mm256_udivrem_epi32
12132   #define _mm256_udivrem_epi32(mem_addr, a, b) simde_mm256_udivrem_epi32((mem_addr),(a), (b))
12133 #endif
12134 
12135 SIMDE_END_DECLS_
12136 
12137 HEDLEY_DIAGNOSTIC_POP
12138 
12139 #endif /* !defined(SIMDE_X86_SVML_H) */
12140