1 /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_GE_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_GE_H
10
11 #ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13 #endif
14
15 #include <simdpp/types.h>
16 #include <simdpp/core/make_shuffle_bytes_mask.h>
17 #include <simdpp/detail/null/compare.h>
18 #include <simdpp/detail/insn/bit_not.h>
19 #include <simdpp/detail/insn/cmp_lt.h>
20 #include <simdpp/detail/vector_array_macros.h>
21
22 namespace simdpp {
23 namespace SIMDPP_ARCH_NAMESPACE {
24 namespace detail {
25 namespace insn {
26
i_cmp_ge(const int8<16> & a,const int8<16> & b)27 SIMDPP_INL mask_int8<16> i_cmp_ge(const int8<16>& a, const int8<16>& b)
28 {
29 #if SIMDPP_USE_NULL
30 return detail::null::cmp_ge(a, b);
31 #elif SIMDPP_USE_AVX512VL
32 return _mm_cmpge_epi8_mask(a.native(), b.native());
33 #elif SIMDPP_USE_NEON
34 return vcgeq_s8(a.native(), b.native());
35 #elif SIMDPP_USE_MSA
36 return (v16u8) __msa_cle_s_b(b.native(), a.native());
37 #else
38 return i_bit_not(i_cmp_lt(a, b));
39 #endif
40 }
41
42 #if SIMDPP_USE_AVX2
i_cmp_ge(const int8<32> & a,const int8<32> & b)43 SIMDPP_INL mask_int8<32> i_cmp_ge(const int8<32>& a, const int8<32>& b)
44 {
45 #if SIMDPP_USE_AVX512VL
46 return _mm256_cmpge_epi8_mask(a.native(), b.native());
47 #else
48 return i_bit_not(i_cmp_lt(a, b));
49 #endif
50 }
51 #endif
52
53 #if SIMDPP_USE_AVX512BW
i_cmp_ge(const int8<64> & a,const int8<64> & b)54 SIMDPP_INL mask_int8<64> i_cmp_ge(const int8<64>& a, const int8<64>& b)
55 {
56 return _mm512_cmpge_epi8_mask(a.native(), b.native());
57 }
58 #endif
59
60 // -----------------------------------------------------------------------------
61
i_cmp_ge(const uint8<16> & ca,const uint8<16> & cb)62 SIMDPP_INL mask_int8<16> i_cmp_ge(const uint8<16>& ca, const uint8<16>& cb)
63 {
64 uint8<16> a = ca, b = cb;
65 #if SIMDPP_USE_NULL
66 return detail::null::cmp_ge(a, b);
67 #elif SIMDPP_USE_AVX512VL
68 return _mm_cmpge_epu8_mask(a.native(), b.native());
69 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
70 return _mm_comge_epu8(a.native(), b.native());
71 #elif SIMDPP_USE_NEON
72 return vcgeq_u8(a.native(), b.native());
73 #elif SIMDPP_USE_MSA
74 return (v16u8) __msa_cle_u_b(b.native(), a.native());
75 #else
76 return i_bit_not(i_cmp_lt(a, b));
77 #endif
78 }
79
80 #if SIMDPP_USE_AVX2
i_cmp_ge(const uint8<32> & a,const uint8<32> & b)81 SIMDPP_INL mask_int8<32> i_cmp_ge(const uint8<32>& a, const uint8<32>& b)
82 {
83 #if SIMDPP_USE_AVX512VL
84 return _mm256_cmpge_epu8_mask(a.native(), b.native());
85 #else
86 return i_bit_not(i_cmp_lt(a, b));
87 #endif
88 }
89 #endif
90
91 #if SIMDPP_USE_AVX512BW
i_cmp_ge(const uint8<64> & a,const uint8<64> & b)92 SIMDPP_INL mask_int8<64> i_cmp_ge(const uint8<64>& a, const uint8<64>& b)
93 {
94 return _mm512_cmpge_epu8_mask(a.native(), b.native());
95 }
96 #endif
97
98 // -----------------------------------------------------------------------------
99
i_cmp_ge(const int16<8> & a,const int16<8> & b)100 SIMDPP_INL mask_int16<8> i_cmp_ge(const int16<8>& a, const int16<8>& b)
101 {
102 #if SIMDPP_USE_NULL
103 return detail::null::cmp_ge(a, b);
104 #elif SIMDPP_USE_AVX512VL
105 return _mm_cmpge_epi16_mask(a.native(), b.native());
106 #elif SIMDPP_USE_NEON
107 return vcgeq_s16(a.native(), b.native());
108 #elif SIMDPP_USE_MSA
109 return (v8u16) __msa_cle_s_h(b.native(), a.native());
110 #else
111 return i_bit_not(i_cmp_lt(a, b));
112 #endif
113 }
114
115 #if SIMDPP_USE_AVX2
i_cmp_ge(const int16<16> & a,const int16<16> & b)116 SIMDPP_INL mask_int16<16> i_cmp_ge(const int16<16>& a, const int16<16>& b)
117 {
118 #if SIMDPP_USE_AVX512VL
119 return _mm256_cmpge_epi16_mask(a.native(), b.native());
120 #else
121 return i_bit_not(i_cmp_lt(a, b));
122 #endif
123 }
124 #endif
125
126 #if SIMDPP_USE_AVX512BW
i_cmp_ge(const int16<32> & a,const int16<32> & b)127 SIMDPP_INL mask_int16<32> i_cmp_ge(const int16<32>& a, const int16<32>& b)
128 {
129 return _mm512_cmpge_epi16_mask(a.native(), b.native());
130 }
131 #endif
132
133 // -----------------------------------------------------------------------------
134
i_cmp_ge(const uint16<8> & ca,const uint16<8> & cb)135 SIMDPP_INL mask_int16<8> i_cmp_ge(const uint16<8>& ca, const uint16<8>& cb)
136 {
137 uint16<8> a = ca, b = cb;
138 #if SIMDPP_USE_NULL
139 return detail::null::cmp_ge(a, b);
140 #elif SIMDPP_USE_AVX512VL
141 return _mm_cmpge_epu16_mask(a.native(), b.native());
142 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
143 return _mm_comge_epu16(a.native(), b.native());
144 #elif SIMDPP_USE_NEON
145 return vcgeq_u16(a.native(), b.native());
146 #elif SIMDPP_USE_MSA
147 return (v8u16) __msa_cle_u_h(b.native(), a.native());
148 #else
149 return i_bit_not(i_cmp_lt(a, b));
150 #endif
151 }
152
153 #if SIMDPP_USE_AVX2
i_cmp_ge(const uint16<16> & a,const uint16<16> & b)154 SIMDPP_INL mask_int16<16> i_cmp_ge(const uint16<16>& a, const uint16<16>& b)
155 {
156 #if SIMDPP_USE_AVX512VL
157 return _mm256_cmpge_epu16_mask(a.native(), b.native());
158 #else
159 return i_bit_not(i_cmp_lt(a, b));
160 #endif
161 }
162 #endif
163
164 #if SIMDPP_USE_AVX512BW
i_cmp_ge(const uint16<32> & a,const uint16<32> & b)165 SIMDPP_INL mask_int16<32> i_cmp_ge(const uint16<32>& a, const uint16<32>& b)
166 {
167 return _mm512_cmpge_epu16_mask(a.native(), b.native());
168 }
169 #endif
170
171 // -----------------------------------------------------------------------------
172
i_cmp_ge(const int32<4> & a,const int32<4> & b)173 SIMDPP_INL mask_int32<4> i_cmp_ge(const int32<4>& a, const int32<4>& b)
174 {
175 #if SIMDPP_USE_NULL
176 return detail::null::cmp_ge(a, b);
177 #elif SIMDPP_USE_AVX512VL
178 return _mm_cmpge_epi32_mask(a.native(), b.native());
179 #elif SIMDPP_USE_NEON
180 return vcgeq_s32(a.native(), b.native());
181 #elif SIMDPP_USE_MSA
182 return (v4u32) __msa_cle_s_w(b.native(), a.native());
183 #else
184 return i_bit_not(i_cmp_lt(a, b));
185 #endif
186 }
187
188 #if SIMDPP_USE_AVX2
i_cmp_ge(const int32<8> & a,const int32<8> & b)189 SIMDPP_INL mask_int32<8> i_cmp_ge(const int32<8>& a, const int32<8>& b)
190 {
191 #if SIMDPP_USE_AVX512VL
192 return _mm256_cmpge_epi32_mask(a.native(), b.native());
193 #else
194 return i_bit_not(i_cmp_lt(a, b));
195 #endif
196 }
197 #endif
198
199 #if SIMDPP_USE_AVX512F
i_cmp_ge(const int32<16> & a,const int32<16> & b)200 SIMDPP_INL mask_int32<16> i_cmp_ge(const int32<16>& a, const int32<16>& b)
201 {
202 return _mm512_cmpge_epi32_mask(a.native(), b.native());
203 }
204 #endif
205
206 // -----------------------------------------------------------------------------
207
i_cmp_ge(const uint32<4> & ca,const uint32<4> & cb)208 SIMDPP_INL mask_int32<4> i_cmp_ge(const uint32<4>& ca, const uint32<4>& cb)
209 {
210 uint32<4> a = ca, b = cb;
211 #if SIMDPP_USE_NULL
212 return detail::null::cmp_ge(a, b);
213 #elif SIMDPP_USE_AVX512VL
214 return _mm_cmpge_epu32_mask(a.native(), b.native());
215 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
216 return _mm_comge_epu32(a.native(), b.native());
217 #elif SIMDPP_USE_NEON
218 return vcgeq_u32(a.native(), b.native());
219 #elif SIMDPP_USE_MSA
220 return (v4u32) __msa_cle_u_w(b.native(), a.native());
221 #else
222 return i_bit_not(i_cmp_lt(a, b));
223 #endif
224 }
225
226 #if SIMDPP_USE_AVX2
i_cmp_ge(const uint32<8> & a,const uint32<8> & b)227 SIMDPP_INL mask_int32<8> i_cmp_ge(const uint32<8>& a, const uint32<8>& b)
228 {
229 #if SIMDPP_USE_AVX512VL
230 return _mm256_cmpge_epu32_mask(a.native(), b.native());
231 #else
232 return i_bit_not(i_cmp_lt(a, b));
233 #endif
234 }
235 #endif
236
237 #if SIMDPP_USE_AVX512F
i_cmp_ge(const uint32<16> & a,const uint32<16> & b)238 SIMDPP_INL mask_int32<16> i_cmp_ge(const uint32<16>& a, const uint32<16>& b)
239 {
240 // FIXME: BUG: GCC does not have _mm512_cmpge_epu32_mask
241 return _mm512_cmp_epu32_mask(a.native(), b.native(), _MM_CMPINT_NLT);
242 }
243 #endif
244
245 // -----------------------------------------------------------------------------
246
i_cmp_ge(const int64<2> & a,const int64<2> & b)247 SIMDPP_INL mask_int64<2> i_cmp_ge(const int64<2>& a, const int64<2>& b)
248 {
249 #if SIMDPP_USE_AVX512VL
250 return _mm_cmpge_epi64_mask(a.native(), b.native());
251 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
252 return _mm_comge_epi64(a.native(), b.native());
253 #elif SIMDPP_USE_NEON64
254 return vcgeq_s64(a.native(), b.native());
255 #elif SIMDPP_USE_MSA
256 return (v2u64) __msa_cle_s_d(b.native(), a.native());
257 #elif SIMDPP_USE_NULL
258 return detail::null::cmp_ge(a, b);
259 #else
260 return i_bit_not(i_cmp_lt(a, b));
261 #endif
262 }
263
264 #if SIMDPP_USE_AVX2
i_cmp_ge(const int64<4> & a,const int64<4> & b)265 SIMDPP_INL mask_int64<4> i_cmp_ge(const int64<4>& a, const int64<4>& b)
266 {
267 #if SIMDPP_USE_AVX512VL
268 return _mm256_cmpge_epi64_mask(a.native(), b.native());
269 #else
270 return i_bit_not(i_cmp_lt(a, b));
271 #endif
272 }
273 #endif
274
275 #if SIMDPP_USE_AVX512F
i_cmp_ge(const int64<8> & a,const int64<8> & b)276 SIMDPP_INL mask_int64<8> i_cmp_ge(const int64<8>& a, const int64<8>& b)
277 {
278 // GCC does not have _mm512_cmpge_epi64_mask
279 return _mm512_cmp_epi64_mask(a.native(), b.native(), _MM_CMPINT_NLT);
280 }
281 #endif
282
283 // -----------------------------------------------------------------------------
284
i_cmp_ge(const uint64<2> & a,const uint64<2> & b)285 SIMDPP_INL mask_int64<2> i_cmp_ge(const uint64<2>& a, const uint64<2>& b)
286 {
287 #if SIMDPP_USE_AVX512VL
288 return _mm_cmpge_epu64_mask(a.native(), b.native());
289 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
290 return _mm_comge_epu64(a.native(), b.native());
291 #elif SIMDPP_USE_NEON64
292 return vcgeq_u64(a.native(), b.native());
293 #elif SIMDPP_USE_MSA
294 return (v2u64) __msa_cle_u_d(b.native(), a.native());
295 #elif SIMDPP_USE_NULL
296 return detail::null::cmp_ge(a, b);
297 #else
298 return i_bit_not(i_cmp_lt(a, b));
299 #endif
300 }
301
302 #if SIMDPP_USE_AVX2
i_cmp_ge(const uint64<4> & a,const uint64<4> & b)303 SIMDPP_INL mask_int64<4> i_cmp_ge(const uint64<4>& a, const uint64<4>& b)
304 {
305 #if SIMDPP_USE_AVX512VL
306 return _mm256_cmpge_epu64_mask(a.native(), b.native());
307 #else
308 return i_bit_not(i_cmp_lt(a, b));
309 #endif
310 }
311 #endif
312
313 #if SIMDPP_USE_AVX512F
i_cmp_ge(const uint64<8> & a,const uint64<8> & b)314 SIMDPP_INL mask_int64<8> i_cmp_ge(const uint64<8>& a, const uint64<8>& b)
315 {
316 return _mm512_cmp_epu64_mask(a.native(), b.native(), _MM_CMPINT_NLT);
317 }
318 #endif
319
320 // -----------------------------------------------------------------------------
321
322 static SIMDPP_INL
i_cmp_ge(const float32<4> & a,const float32<4> & b)323 mask_float32<4> i_cmp_ge(const float32<4>& a, const float32<4>& b)
324 {
325 #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
326 return detail::null::cmp_ge(a, b);
327 #elif SIMDPP_USE_AVX512VL
328 return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ);
329 #elif SIMDPP_USE_AVX
330 return _mm_cmp_ps(a.native(), b.native(), _CMP_GE_OQ);
331 #elif SIMDPP_USE_SSE2
332 return _mm_cmpge_ps(a.native(), b.native());
333 #elif SIMDPP_USE_NEON
334 return vreinterpretq_f32_u32(vcgeq_f32(a.native(), b.native()));
335 #elif SIMDPP_USE_ALTIVEC
336 return vec_cmpge(a.native(), b.native());
337 #elif SIMDPP_USE_MSA
338 return (v4f32) __msa_fcle_w(b.native(), a.native());
339 #endif
340 }
341
342 #if SIMDPP_USE_AVX
343 static SIMDPP_INL
i_cmp_ge(const float32<8> & a,const float32<8> & b)344 mask_float32<8> i_cmp_ge(const float32<8>& a, const float32<8>& b)
345 {
346 #if SIMDPP_USE_AVX512VL
347 return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ);
348 #else
349 return _mm256_cmp_ps(a.native(), b.native(), _CMP_GE_OQ);
350 #endif
351 }
352 #endif
353
354 #if SIMDPP_USE_AVX512F
355 static SIMDPP_INL
i_cmp_ge(const float32<16> & a,const float32<16> & b)356 mask_float32<16> i_cmp_ge(const float32<16>& a, const float32<16>& b)
357 {
358 return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ);
359 }
360 #endif
361
362 // -----------------------------------------------------------------------------
363
364 static SIMDPP_INL
i_cmp_ge(const float64<2> & a,const float64<2> & b)365 mask_float64<2> i_cmp_ge(const float64<2>& a, const float64<2>& b)
366 {
367 #if SIMDPP_USE_AVX512VL
368 return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ);
369 #elif SIMDPP_USE_AVX
370 return _mm_cmp_pd(a.native(), b.native(), _CMP_GE_OQ);
371 #elif SIMDPP_USE_SSE2
372 return _mm_cmpge_pd(a.native(), b.native());
373 #elif SIMDPP_USE_NEON64
374 return vreinterpretq_f64_u64(vcgeq_f64(a.native(), b.native()));
375 #elif SIMDPP_USE_VSX_206
376 return (__vector double) vec_cmpge(a.native(), b.native());
377 #elif SIMDPP_USE_MSA
378 return (v2f64) __msa_fcle_d(b.native(), a.native());
379 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
380 return detail::null::cmp_ge(a, b);
381 #endif
382 }
383
384 #if SIMDPP_USE_AVX
385 static SIMDPP_INL
i_cmp_ge(const float64<4> & a,const float64<4> & b)386 mask_float64<4> i_cmp_ge(const float64<4>& a, const float64<4>& b)
387 {
388 #if SIMDPP_USE_AVX512VL
389 return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ);
390 #else
391 return _mm256_cmp_pd(a.native(), b.native(), _CMP_GE_OQ);
392 #endif
393 }
394 #endif
395
396 #if SIMDPP_USE_AVX512F
397 static SIMDPP_INL
i_cmp_ge(const float64<8> & a,const float64<8> & b)398 mask_float64<8> i_cmp_ge(const float64<8>& a, const float64<8>& b)
399 {
400 return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ);
401 }
402 #endif
403
404 // -----------------------------------------------------------------------------
405
406 template<class V> SIMDPP_INL
i_cmp_ge(const V & a,const V & b)407 typename V::mask_vector_type i_cmp_ge(const V& a, const V& b)
408 {
409 SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_ge, a, b);
410 }
411
412 } // namespace insn
413 } // namespace detail
414 } // namespace SIMDPP_ARCH_NAMESPACE
415 } // namespace simdpp
416
417 #endif
418
419