1 /* SPDX-License-Identifier: MIT
2 *
3 * Permission is hereby granted, free of charge, to any person
4 * obtaining a copy of this software and associated documentation
5 * files (the "Software"), to deal in the Software without
6 * restriction, including without limitation the rights to use, copy,
7 * modify, merge, publish, distribute, sublicense, and/or sell copies
8 * of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Copyright:
24 * 2020 Evan Nemerson <evan@nemerson.com>
25 * 2020 Himanshi Mathur <himanshi18037@iiitd.ac.in>
26 */
27
28 #if !defined(SIMDE_X86_AVX512_CMP_H)
29 #define SIMDE_X86_AVX512_CMP_H
30
31 #include "types.h"
32 #include "../avx2.h"
33 #include "mov.h"
34 #include "mov_mask.h"
35 #include "setzero.h"
36 #include "setone.h"
37
38 HEDLEY_DIAGNOSTIC_PUSH
39 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
40 SIMDE_BEGIN_DECLS_
41
42 SIMDE_FUNCTION_ATTRIBUTES
43 simde__mmask16
simde_mm512_cmp_ps_mask(simde__m512 a,simde__m512 b,const int imm8)44 simde_mm512_cmp_ps_mask (simde__m512 a, simde__m512 b, const int imm8)
45 SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) {
46 #if defined(SIMDE_X86_AVX512F_NATIVE)
47 simde__mmask16 r;
48 SIMDE_CONSTIFY_32_(_mm512_cmp_ps_mask, r, (HEDLEY_UNREACHABLE(), 0), imm8, a, b);
49 return r;
50 #else
51 simde__m512_private
52 r_,
53 a_ = simde__m512_to_private(a),
54 b_ = simde__m512_to_private(b);
55
56 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
57 switch (imm8) {
58 case SIMDE_CMP_EQ_OQ:
59 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32));
60 break;
61 case SIMDE_CMP_LT_OS:
62 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));
63 break;
64 case SIMDE_CMP_LE_OS:
65 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));
66 break;
67 case SIMDE_CMP_UNORD_Q:
68 #if defined(simde_math_isnanf)
69 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
70 r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
71 }
72 #else
73 HEDLEY_UNREACHABLE();
74 #endif
75 break;
76 case SIMDE_CMP_NEQ_UQ:
77 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));
78 break;
79 case SIMDE_CMP_NLT_US:
80 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));
81 break;
82 case SIMDE_CMP_NLE_US:
83 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));
84 break;
85 case SIMDE_CMP_ORD_Q:
86 #if defined(simde_math_isnanf)
87 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
88 r_.u32[i] = (!simde_math_isnanf(a_.f32[i]) && !simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
89 }
90 #else
91 HEDLEY_UNREACHABLE();
92 #endif
93 break;
94 case SIMDE_CMP_EQ_UQ:
95 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32));
96 break;
97 case SIMDE_CMP_NGE_US:
98 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));
99 break;
100 case SIMDE_CMP_NGT_US:
101 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));
102 break;
103 case SIMDE_CMP_FALSE_OQ:
104 r_ = simde__m512_to_private(simde_mm512_setzero_ps());
105 break;
106 case SIMDE_CMP_NEQ_OQ:
107 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));
108 break;
109 case SIMDE_CMP_GE_OS:
110 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));
111 break;
112 case SIMDE_CMP_GT_OS:
113 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));
114 break;
115 case SIMDE_CMP_TRUE_UQ:
116 r_ = simde__m512_to_private(simde_x_mm512_setone_ps());
117 break;
118 case SIMDE_CMP_EQ_OS:
119 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32));
120 break;
121 case SIMDE_CMP_LT_OQ:
122 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));
123 break;
124 case SIMDE_CMP_LE_OQ:
125 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));
126 break;
127 case SIMDE_CMP_UNORD_S:
128 #if defined(simde_math_isnanf)
129 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
130 r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
131 }
132 #else
133 HEDLEY_UNREACHABLE();
134 #endif
135 break;
136 case SIMDE_CMP_NEQ_US:
137 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));
138 break;
139 case SIMDE_CMP_NLT_UQ:
140 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));
141 break;
142 case SIMDE_CMP_NLE_UQ:
143 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));
144 break;
145 case SIMDE_CMP_ORD_S:
146 #if defined(simde_math_isnanf)
147 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
148 r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0);
149 }
150 #else
151 HEDLEY_UNREACHABLE();
152 #endif
153 break;
154 case SIMDE_CMP_EQ_US:
155 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32));
156 break;
157 case SIMDE_CMP_NGE_UQ:
158 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));
159 break;
160 case SIMDE_CMP_NGT_UQ:
161 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));
162 break;
163 case SIMDE_CMP_FALSE_OS:
164 r_ = simde__m512_to_private(simde_mm512_setzero_ps());
165 break;
166 case SIMDE_CMP_NEQ_OS:
167 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));
168 break;
169 case SIMDE_CMP_GE_OQ:
170 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));
171 break;
172 case SIMDE_CMP_GT_OQ:
173 r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));
174 break;
175 case SIMDE_CMP_TRUE_US:
176 r_ = simde__m512_to_private(simde_x_mm512_setone_ps());
177 break;
178 default:
179 HEDLEY_UNREACHABLE();
180 break;
181 }
182 #else /* defined(SIMDE_VECTOR_SUBSCRIPT_OPS) */
183 SIMDE_VECTORIZE
184 for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
185 switch (imm8) {
186 case SIMDE_CMP_EQ_OQ:
187 r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
188 break;
189 case SIMDE_CMP_LT_OS:
190 r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
191 break;
192 case SIMDE_CMP_LE_OS:
193 r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
194 break;
195 case SIMDE_CMP_UNORD_Q:
196 #if defined(simde_math_isnanf)
197 r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
198 #else
199 HEDLEY_UNREACHABLE();
200 #endif
201 break;
202 case SIMDE_CMP_NEQ_UQ:
203 r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
204 break;
205 case SIMDE_CMP_NLT_US:
206 r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
207 break;
208 case SIMDE_CMP_NLE_US:
209 r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
210 break;
211 case SIMDE_CMP_ORD_Q:
212 #if defined(simde_math_isnanf)
213 r_.u32[i] = (!simde_math_isnanf(a_.f32[i]) && !simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
214 #else
215 HEDLEY_UNREACHABLE();
216 #endif
217 break;
218 case SIMDE_CMP_EQ_UQ:
219 r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
220 break;
221 case SIMDE_CMP_NGE_US:
222 r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
223 break;
224 case SIMDE_CMP_NGT_US:
225 r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
226 break;
227 case SIMDE_CMP_FALSE_OQ:
228 r_.u32[i] = UINT32_C(0);
229 break;
230 case SIMDE_CMP_NEQ_OQ:
231 r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
232 break;
233 case SIMDE_CMP_GE_OS:
234 r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
235 break;
236 case SIMDE_CMP_GT_OS:
237 r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
238 break;
239 case SIMDE_CMP_TRUE_UQ:
240 r_.u32[i] = ~UINT32_C(0);
241 break;
242 case SIMDE_CMP_EQ_OS:
243 r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
244 break;
245 case SIMDE_CMP_LT_OQ:
246 r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
247 break;
248 case SIMDE_CMP_LE_OQ:
249 r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
250 break;
251 case SIMDE_CMP_UNORD_S:
252 #if defined(simde_math_isnanf)
253 r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
254 #else
255 HEDLEY_UNREACHABLE();
256 #endif
257 break;
258 case SIMDE_CMP_NEQ_US:
259 r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
260 break;
261 case SIMDE_CMP_NLT_UQ:
262 r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
263 break;
264 case SIMDE_CMP_NLE_UQ:
265 r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
266 break;
267 case SIMDE_CMP_ORD_S:
268 #if defined(simde_math_isnanf)
269 r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0);
270 #else
271 HEDLEY_UNREACHABLE();
272 #endif
273 break;
274 case SIMDE_CMP_EQ_US:
275 r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
276 break;
277 case SIMDE_CMP_NGE_UQ:
278 r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
279 break;
280 case SIMDE_CMP_NGT_UQ:
281 r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
282 break;
283 case SIMDE_CMP_FALSE_OS:
284 r_.u32[i] = UINT32_C(0);
285 break;
286 case SIMDE_CMP_NEQ_OS:
287 r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
288 break;
289 case SIMDE_CMP_GE_OQ:
290 r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
291 break;
292 case SIMDE_CMP_GT_OQ:
293 r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
294 break;
295 case SIMDE_CMP_TRUE_US:
296 r_.u32[i] = ~UINT32_C(0);
297 break;
298 default:
299 HEDLEY_UNREACHABLE();
300 break;
301 }
302 }
303 #endif
304
305 return simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(r_)));
306 #endif
307 }
308 #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
309 #undef _mm512_cmp_ps_mask
310 #define _mm512_cmp_ps_mask(a, b, imm8) simde_mm512_cmp_ps_mask((a), (b), (imm8))
311 #endif
312
313 SIMDE_FUNCTION_ATTRIBUTES
314 simde__mmask8
simde_mm512_cmp_pd_mask(simde__m512d a,simde__m512d b,const int imm8)315 simde_mm512_cmp_pd_mask (simde__m512d a, simde__m512d b, const int imm8)
316 SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) {
317 #if defined(SIMDE_X86_AVX512F_NATIVE)
318 simde__mmask8 r;
319 SIMDE_CONSTIFY_32_(_mm512_cmp_pd_mask, r, (HEDLEY_UNREACHABLE(), 0), imm8, a, b);
320 return r;
321 #else
322 simde__m512d_private
323 r_,
324 a_ = simde__m512d_to_private(a),
325 b_ = simde__m512d_to_private(b);
326
327 #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
328 switch (imm8) {
329 case SIMDE_CMP_EQ_OQ:
330 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64));
331 break;
332 case SIMDE_CMP_LT_OS:
333 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64));
334 break;
335 case SIMDE_CMP_LE_OS:
336 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64));
337 break;
338 case SIMDE_CMP_UNORD_Q:
339 #if defined(simde_math_isnanf)
340 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
341 r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0);
342 }
343 #else
344 HEDLEY_UNREACHABLE();
345 #endif
346 break;
347 case SIMDE_CMP_NEQ_UQ:
348 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64));
349 break;
350 case SIMDE_CMP_NLT_US:
351 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64));
352 break;
353 case SIMDE_CMP_NLE_US:
354 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64));
355 break;
356 case SIMDE_CMP_ORD_Q:
357 #if defined(simde_math_isnanf)
358 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
359 r_.u64[i] = (!simde_math_isnanf(a_.f64[i]) && !simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0);
360 }
361 #else
362 HEDLEY_UNREACHABLE();
363 #endif
364 break;
365 case SIMDE_CMP_EQ_UQ:
366 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64));
367 break;
368 case SIMDE_CMP_NGE_US:
369 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64));
370 break;
371 case SIMDE_CMP_NGT_US:
372 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64));
373 break;
374 case SIMDE_CMP_FALSE_OQ:
375 r_ = simde__m512d_to_private(simde_mm512_setzero_pd());
376 break;
377 case SIMDE_CMP_NEQ_OQ:
378 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64));
379 break;
380 case SIMDE_CMP_GE_OS:
381 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64));
382 break;
383 case SIMDE_CMP_GT_OS:
384 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64));
385 break;
386 case SIMDE_CMP_TRUE_UQ:
387 r_ = simde__m512d_to_private(simde_x_mm512_setone_pd());
388 break;
389 case SIMDE_CMP_EQ_OS:
390 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64));
391 break;
392 case SIMDE_CMP_LT_OQ:
393 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64));
394 break;
395 case SIMDE_CMP_LE_OQ:
396 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64));
397 break;
398 case SIMDE_CMP_UNORD_S:
399 #if defined(simde_math_isnanf)
400 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
401 r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0);
402 }
403 #else
404 HEDLEY_UNREACHABLE();
405 #endif
406 break;
407 case SIMDE_CMP_NEQ_US:
408 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64));
409 break;
410 case SIMDE_CMP_NLT_UQ:
411 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64));
412 break;
413 case SIMDE_CMP_NLE_UQ:
414 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64));
415 break;
416 case SIMDE_CMP_ORD_S:
417 #if defined(simde_math_isnanf)
418 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
419 r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? UINT64_C(0) : ~UINT64_C(0);
420 }
421 #else
422 HEDLEY_UNREACHABLE();
423 #endif
424 break;
425 case SIMDE_CMP_EQ_US:
426 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64));
427 break;
428 case SIMDE_CMP_NGE_UQ:
429 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64));
430 break;
431 case SIMDE_CMP_NGT_UQ:
432 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64));
433 break;
434 case SIMDE_CMP_FALSE_OS:
435 r_ = simde__m512d_to_private(simde_mm512_setzero_pd());
436 break;
437 case SIMDE_CMP_NEQ_OS:
438 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64));
439 break;
440 case SIMDE_CMP_GE_OQ:
441 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64));
442 break;
443 case SIMDE_CMP_GT_OQ:
444 r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64));
445 break;
446 case SIMDE_CMP_TRUE_US:
447 r_ = simde__m512d_to_private(simde_x_mm512_setone_pd());
448 break;
449 default:
450 HEDLEY_UNREACHABLE();
451 break;
452 }
453 #else /* defined(SIMDE_VECTOR_SUBSCRIPT_OPS) */
454 SIMDE_VECTORIZE
455 for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
456 switch (imm8) {
457 case SIMDE_CMP_EQ_OQ:
458 r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
459 break;
460 case SIMDE_CMP_LT_OS:
461 r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
462 break;
463 case SIMDE_CMP_LE_OS:
464 r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
465 break;
466 case SIMDE_CMP_UNORD_Q:
467 #if defined(simde_math_isnanf)
468 r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0);
469 #else
470 HEDLEY_UNREACHABLE();
471 #endif
472 break;
473 case SIMDE_CMP_NEQ_UQ:
474 r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
475 break;
476 case SIMDE_CMP_NLT_US:
477 r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
478 break;
479 case SIMDE_CMP_NLE_US:
480 r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
481 break;
482 case SIMDE_CMP_ORD_Q:
483 #if defined(simde_math_isnanf)
484 r_.u64[i] = (!simde_math_isnanf(a_.f64[i]) && !simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0);
485 #else
486 HEDLEY_UNREACHABLE();
487 #endif
488 break;
489 case SIMDE_CMP_EQ_UQ:
490 r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
491 break;
492 case SIMDE_CMP_NGE_US:
493 r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
494 break;
495 case SIMDE_CMP_NGT_US:
496 r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
497 break;
498 case SIMDE_CMP_FALSE_OQ:
499 r_.u64[i] = UINT64_C(0);
500 break;
501 case SIMDE_CMP_NEQ_OQ:
502 r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
503 break;
504 case SIMDE_CMP_GE_OS:
505 r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
506 break;
507 case SIMDE_CMP_GT_OS:
508 r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
509 break;
510 case SIMDE_CMP_TRUE_UQ:
511 r_.u64[i] = ~UINT64_C(0);
512 break;
513 case SIMDE_CMP_EQ_OS:
514 r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
515 break;
516 case SIMDE_CMP_LT_OQ:
517 r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
518 break;
519 case SIMDE_CMP_LE_OQ:
520 r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
521 break;
522 case SIMDE_CMP_UNORD_S:
523 #if defined(simde_math_isnanf)
524 r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0);
525 #else
526 HEDLEY_UNREACHABLE();
527 #endif
528 break;
529 case SIMDE_CMP_NEQ_US:
530 r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
531 break;
532 case SIMDE_CMP_NLT_UQ:
533 r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
534 break;
535 case SIMDE_CMP_NLE_UQ:
536 r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
537 break;
538 case SIMDE_CMP_ORD_S:
539 #if defined(simde_math_isnanf)
540 r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? UINT64_C(0) : ~UINT64_C(0);
541 #else
542 HEDLEY_UNREACHABLE();
543 #endif
544 break;
545 case SIMDE_CMP_EQ_US:
546 r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
547 break;
548 case SIMDE_CMP_NGE_UQ:
549 r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
550 break;
551 case SIMDE_CMP_NGT_UQ:
552 r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
553 break;
554 case SIMDE_CMP_FALSE_OS:
555 r_.u64[i] = UINT64_C(0);
556 break;
557 case SIMDE_CMP_NEQ_OS:
558 r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
559 break;
560 case SIMDE_CMP_GE_OQ:
561 r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
562 break;
563 case SIMDE_CMP_GT_OQ:
564 r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0);
565 break;
566 case SIMDE_CMP_TRUE_US:
567 r_.u64[i] = ~UINT64_C(0);
568 break;
569 default:
570 HEDLEY_UNREACHABLE();
571 break;
572 }
573 }
574 #endif
575
576 return simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(r_)));
577 #endif
578 }
579 #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
580 #undef _mm512_cmp_pd_mask
581 #define _mm512_cmp_pd_mask(a, b, imm8) simde_mm512_cmp_pd_mask((a), (b), (imm8))
582 #endif
583
584 SIMDE_END_DECLS_
585 HEDLEY_DIAGNOSTIC_POP
586
587 #endif /* !defined(SIMDE_X86_AVX512_CMP_H) */
588