1 /*  Copyright (C) 2011-2014  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_GE_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_GE_H
10 
11 #ifndef LIBSIMDPP_SIMD_H
12     #error "This file must be included through simd.h"
13 #endif
14 
15 #include <simdpp/types.h>
16 #include <simdpp/core/make_shuffle_bytes_mask.h>
17 #include <simdpp/detail/null/compare.h>
18 #include <simdpp/detail/insn/bit_not.h>
19 #include <simdpp/detail/insn/cmp_lt.h>
20 #include <simdpp/detail/vector_array_macros.h>
21 
22 namespace simdpp {
23 namespace SIMDPP_ARCH_NAMESPACE {
24 namespace detail {
25 namespace insn {
26 
i_cmp_ge(const int8<16> & a,const int8<16> & b)27 SIMDPP_INL mask_int8<16> i_cmp_ge(const int8<16>& a, const int8<16>& b)
28 {
29 #if SIMDPP_USE_NULL
30     return detail::null::cmp_ge(a, b);
31 #elif SIMDPP_USE_AVX512VL
32     return _mm_cmpge_epi8_mask(a.native(), b.native());
33 #elif SIMDPP_USE_NEON
34     return vcgeq_s8(a.native(), b.native());
35 #elif SIMDPP_USE_MSA
36     return (v16u8) __msa_cle_s_b(b.native(), a.native());
37 #else
38     return i_bit_not(i_cmp_lt(a, b));
39 #endif
40 }
41 
42 #if SIMDPP_USE_AVX2
i_cmp_ge(const int8<32> & a,const int8<32> & b)43 SIMDPP_INL mask_int8<32> i_cmp_ge(const int8<32>& a, const int8<32>& b)
44 {
45 #if SIMDPP_USE_AVX512VL
46     return _mm256_cmpge_epi8_mask(a.native(), b.native());
47 #else
48     return i_bit_not(i_cmp_lt(a, b));
49 #endif
50 }
51 #endif
52 
53 #if SIMDPP_USE_AVX512BW
i_cmp_ge(const int8<64> & a,const int8<64> & b)54 SIMDPP_INL mask_int8<64> i_cmp_ge(const int8<64>& a, const int8<64>& b)
55 {
56     return _mm512_cmpge_epi8_mask(a.native(), b.native());
57 }
58 #endif
59 
60 // -----------------------------------------------------------------------------
61 
i_cmp_ge(const uint8<16> & ca,const uint8<16> & cb)62 SIMDPP_INL mask_int8<16> i_cmp_ge(const uint8<16>& ca, const uint8<16>& cb)
63 {
64     uint8<16> a = ca, b = cb;
65 #if SIMDPP_USE_NULL
66     return detail::null::cmp_ge(a, b);
67 #elif SIMDPP_USE_AVX512VL
68     return _mm_cmpge_epu8_mask(a.native(), b.native());
69 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
70     return _mm_comge_epu8(a.native(), b.native());
71 #elif SIMDPP_USE_NEON
72     return vcgeq_u8(a.native(), b.native());
73 #elif SIMDPP_USE_MSA
74     return (v16u8) __msa_cle_u_b(b.native(), a.native());
75 #else
76     return i_bit_not(i_cmp_lt(a, b));
77 #endif
78 }
79 
80 #if SIMDPP_USE_AVX2
i_cmp_ge(const uint8<32> & a,const uint8<32> & b)81 SIMDPP_INL mask_int8<32> i_cmp_ge(const uint8<32>& a, const uint8<32>& b)
82 {
83 #if SIMDPP_USE_AVX512VL
84     return _mm256_cmpge_epu8_mask(a.native(), b.native());
85 #else
86     return i_bit_not(i_cmp_lt(a, b));
87 #endif
88 }
89 #endif
90 
91 #if SIMDPP_USE_AVX512BW
i_cmp_ge(const uint8<64> & a,const uint8<64> & b)92 SIMDPP_INL mask_int8<64> i_cmp_ge(const uint8<64>& a, const uint8<64>& b)
93 {
94     return _mm512_cmpge_epu8_mask(a.native(), b.native());
95 }
96 #endif
97 
98 // -----------------------------------------------------------------------------
99 
i_cmp_ge(const int16<8> & a,const int16<8> & b)100 SIMDPP_INL mask_int16<8> i_cmp_ge(const int16<8>& a, const int16<8>& b)
101 {
102 #if SIMDPP_USE_NULL
103     return detail::null::cmp_ge(a, b);
104 #elif SIMDPP_USE_AVX512VL
105     return _mm_cmpge_epi16_mask(a.native(), b.native());
106 #elif SIMDPP_USE_NEON
107     return vcgeq_s16(a.native(), b.native());
108 #elif SIMDPP_USE_MSA
109     return (v8u16) __msa_cle_s_h(b.native(), a.native());
110 #else
111     return i_bit_not(i_cmp_lt(a, b));
112 #endif
113 }
114 
115 #if SIMDPP_USE_AVX2
i_cmp_ge(const int16<16> & a,const int16<16> & b)116 SIMDPP_INL mask_int16<16> i_cmp_ge(const int16<16>& a, const int16<16>& b)
117 {
118 #if SIMDPP_USE_AVX512VL
119     return _mm256_cmpge_epi16_mask(a.native(), b.native());
120 #else
121     return i_bit_not(i_cmp_lt(a, b));
122 #endif
123 }
124 #endif
125 
126 #if SIMDPP_USE_AVX512BW
i_cmp_ge(const int16<32> & a,const int16<32> & b)127 SIMDPP_INL mask_int16<32> i_cmp_ge(const int16<32>& a, const int16<32>& b)
128 {
129     return _mm512_cmpge_epi16_mask(a.native(), b.native());
130 }
131 #endif
132 
133 // -----------------------------------------------------------------------------
134 
i_cmp_ge(const uint16<8> & ca,const uint16<8> & cb)135 SIMDPP_INL mask_int16<8> i_cmp_ge(const uint16<8>& ca, const uint16<8>& cb)
136 {
137     uint16<8> a = ca, b = cb;
138 #if SIMDPP_USE_NULL
139     return detail::null::cmp_ge(a, b);
140 #elif SIMDPP_USE_AVX512VL
141     return _mm_cmpge_epu16_mask(a.native(), b.native());
142 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
143     return _mm_comge_epu16(a.native(), b.native());
144 #elif SIMDPP_USE_NEON
145     return vcgeq_u16(a.native(), b.native());
146 #elif SIMDPP_USE_MSA
147     return (v8u16) __msa_cle_u_h(b.native(), a.native());
148 #else
149     return i_bit_not(i_cmp_lt(a, b));
150 #endif
151 }
152 
153 #if SIMDPP_USE_AVX2
i_cmp_ge(const uint16<16> & a,const uint16<16> & b)154 SIMDPP_INL mask_int16<16> i_cmp_ge(const uint16<16>& a, const uint16<16>& b)
155 {
156 #if SIMDPP_USE_AVX512VL
157     return _mm256_cmpge_epu16_mask(a.native(), b.native());
158 #else
159     return i_bit_not(i_cmp_lt(a, b));
160 #endif
161 }
162 #endif
163 
164 #if SIMDPP_USE_AVX512BW
i_cmp_ge(const uint16<32> & a,const uint16<32> & b)165 SIMDPP_INL mask_int16<32> i_cmp_ge(const uint16<32>& a, const uint16<32>& b)
166 {
167     return _mm512_cmpge_epu16_mask(a.native(), b.native());
168 }
169 #endif
170 
171 // -----------------------------------------------------------------------------
172 
i_cmp_ge(const int32<4> & a,const int32<4> & b)173 SIMDPP_INL mask_int32<4> i_cmp_ge(const int32<4>& a, const int32<4>& b)
174 {
175 #if SIMDPP_USE_NULL
176     return detail::null::cmp_ge(a, b);
177 #elif SIMDPP_USE_AVX512VL
178     return _mm_cmpge_epi32_mask(a.native(), b.native());
179 #elif SIMDPP_USE_NEON
180     return vcgeq_s32(a.native(), b.native());
181 #elif SIMDPP_USE_MSA
182     return (v4u32) __msa_cle_s_w(b.native(), a.native());
183 #else
184     return i_bit_not(i_cmp_lt(a, b));
185 #endif
186 }
187 
188 #if SIMDPP_USE_AVX2
i_cmp_ge(const int32<8> & a,const int32<8> & b)189 SIMDPP_INL mask_int32<8> i_cmp_ge(const int32<8>& a, const int32<8>& b)
190 {
191 #if SIMDPP_USE_AVX512VL
192     return _mm256_cmpge_epi32_mask(a.native(), b.native());
193 #else
194     return i_bit_not(i_cmp_lt(a, b));
195 #endif
196 }
197 #endif
198 
199 #if SIMDPP_USE_AVX512F
i_cmp_ge(const int32<16> & a,const int32<16> & b)200 SIMDPP_INL mask_int32<16> i_cmp_ge(const int32<16>& a, const int32<16>& b)
201 {
202     return _mm512_cmpge_epi32_mask(a.native(), b.native());
203 }
204 #endif
205 
206 // -----------------------------------------------------------------------------
207 
i_cmp_ge(const uint32<4> & ca,const uint32<4> & cb)208 SIMDPP_INL mask_int32<4> i_cmp_ge(const uint32<4>& ca, const uint32<4>& cb)
209 {
210     uint32<4> a = ca, b = cb;
211 #if SIMDPP_USE_NULL
212     return detail::null::cmp_ge(a, b);
213 #elif SIMDPP_USE_AVX512VL
214     return _mm_cmpge_epu32_mask(a.native(), b.native());
215 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
216     return _mm_comge_epu32(a.native(), b.native());
217 #elif SIMDPP_USE_NEON
218     return vcgeq_u32(a.native(), b.native());
219 #elif SIMDPP_USE_MSA
220     return (v4u32) __msa_cle_u_w(b.native(), a.native());
221 #else
222     return i_bit_not(i_cmp_lt(a, b));
223 #endif
224 }
225 
226 #if SIMDPP_USE_AVX2
i_cmp_ge(const uint32<8> & a,const uint32<8> & b)227 SIMDPP_INL mask_int32<8> i_cmp_ge(const uint32<8>& a, const uint32<8>& b)
228 {
229 #if SIMDPP_USE_AVX512VL
230     return _mm256_cmpge_epu32_mask(a.native(), b.native());
231 #else
232     return i_bit_not(i_cmp_lt(a, b));
233 #endif
234 }
235 #endif
236 
237 #if SIMDPP_USE_AVX512F
i_cmp_ge(const uint32<16> & a,const uint32<16> & b)238 SIMDPP_INL mask_int32<16> i_cmp_ge(const uint32<16>& a, const uint32<16>& b)
239 {
240     // FIXME: BUG: GCC does not have _mm512_cmpge_epu32_mask
241     return _mm512_cmp_epu32_mask(a.native(), b.native(), _MM_CMPINT_NLT);
242 }
243 #endif
244 
245 // -----------------------------------------------------------------------------
246 
i_cmp_ge(const int64<2> & a,const int64<2> & b)247 SIMDPP_INL mask_int64<2> i_cmp_ge(const int64<2>& a, const int64<2>& b)
248 {
249 #if SIMDPP_USE_AVX512VL
250     return _mm_cmpge_epi64_mask(a.native(), b.native());
251 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
252     return _mm_comge_epi64(a.native(), b.native());
253 #elif SIMDPP_USE_NEON64
254     return vcgeq_s64(a.native(), b.native());
255 #elif SIMDPP_USE_MSA
256     return (v2u64) __msa_cle_s_d(b.native(), a.native());
257 #elif SIMDPP_USE_NULL
258     return detail::null::cmp_ge(a, b);
259 #else
260     return i_bit_not(i_cmp_lt(a, b));
261 #endif
262 }
263 
264 #if SIMDPP_USE_AVX2
i_cmp_ge(const int64<4> & a,const int64<4> & b)265 SIMDPP_INL mask_int64<4> i_cmp_ge(const int64<4>& a, const int64<4>& b)
266 {
267 #if SIMDPP_USE_AVX512VL
268     return _mm256_cmpge_epi64_mask(a.native(), b.native());
269 #else
270     return i_bit_not(i_cmp_lt(a, b));
271 #endif
272 }
273 #endif
274 
275 #if SIMDPP_USE_AVX512F
i_cmp_ge(const int64<8> & a,const int64<8> & b)276 SIMDPP_INL mask_int64<8> i_cmp_ge(const int64<8>& a, const int64<8>& b)
277 {
278     // GCC does not have _mm512_cmpge_epi64_mask
279     return _mm512_cmp_epi64_mask(a.native(), b.native(), _MM_CMPINT_NLT);
280 }
281 #endif
282 
283 // -----------------------------------------------------------------------------
284 
i_cmp_ge(const uint64<2> & a,const uint64<2> & b)285 SIMDPP_INL mask_int64<2> i_cmp_ge(const uint64<2>& a, const uint64<2>& b)
286 {
287 #if SIMDPP_USE_AVX512VL
288     return _mm_cmpge_epu64_mask(a.native(), b.native());
289 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
290     return _mm_comge_epu64(a.native(), b.native());
291 #elif SIMDPP_USE_NEON64
292     return vcgeq_u64(a.native(), b.native());
293 #elif SIMDPP_USE_MSA
294     return (v2u64) __msa_cle_u_d(b.native(), a.native());
295 #elif SIMDPP_USE_NULL
296     return detail::null::cmp_ge(a, b);
297 #else
298     return i_bit_not(i_cmp_lt(a, b));
299 #endif
300 }
301 
302 #if SIMDPP_USE_AVX2
i_cmp_ge(const uint64<4> & a,const uint64<4> & b)303 SIMDPP_INL mask_int64<4> i_cmp_ge(const uint64<4>& a, const uint64<4>& b)
304 {
305 #if SIMDPP_USE_AVX512VL
306     return _mm256_cmpge_epu64_mask(a.native(), b.native());
307 #else
308     return i_bit_not(i_cmp_lt(a, b));
309 #endif
310 }
311 #endif
312 
313 #if SIMDPP_USE_AVX512F
i_cmp_ge(const uint64<8> & a,const uint64<8> & b)314 SIMDPP_INL mask_int64<8> i_cmp_ge(const uint64<8>& a, const uint64<8>& b)
315 {
316     return _mm512_cmp_epu64_mask(a.native(), b.native(), _MM_CMPINT_NLT);
317 }
318 #endif
319 
320 // -----------------------------------------------------------------------------
321 
322 static SIMDPP_INL
i_cmp_ge(const float32<4> & a,const float32<4> & b)323 mask_float32<4> i_cmp_ge(const float32<4>& a, const float32<4>& b)
324 {
325 #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
326     return detail::null::cmp_ge(a, b);
327 #elif SIMDPP_USE_AVX512VL
328     return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ);
329 #elif SIMDPP_USE_AVX
330     return _mm_cmp_ps(a.native(), b.native(), _CMP_GE_OQ);
331 #elif SIMDPP_USE_SSE2
332     return _mm_cmpge_ps(a.native(), b.native());
333 #elif SIMDPP_USE_NEON
334     return vreinterpretq_f32_u32(vcgeq_f32(a.native(), b.native()));
335 #elif SIMDPP_USE_ALTIVEC
336     return vec_cmpge(a.native(), b.native());
337 #elif SIMDPP_USE_MSA
338     return (v4f32) __msa_fcle_w(b.native(), a.native());
339 #endif
340 }
341 
342 #if SIMDPP_USE_AVX
343 static SIMDPP_INL
i_cmp_ge(const float32<8> & a,const float32<8> & b)344 mask_float32<8> i_cmp_ge(const float32<8>& a, const float32<8>& b)
345 {
346 #if SIMDPP_USE_AVX512VL
347     return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ);
348 #else
349     return _mm256_cmp_ps(a.native(), b.native(), _CMP_GE_OQ);
350 #endif
351 }
352 #endif
353 
354 #if SIMDPP_USE_AVX512F
355 static SIMDPP_INL
i_cmp_ge(const float32<16> & a,const float32<16> & b)356 mask_float32<16> i_cmp_ge(const float32<16>& a, const float32<16>& b)
357 {
358     return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_GE_OQ);
359 }
360 #endif
361 
362 // -----------------------------------------------------------------------------
363 
364 static SIMDPP_INL
i_cmp_ge(const float64<2> & a,const float64<2> & b)365 mask_float64<2> i_cmp_ge(const float64<2>& a, const float64<2>& b)
366 {
367 #if SIMDPP_USE_AVX512VL
368     return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ);
369 #elif SIMDPP_USE_AVX
370     return _mm_cmp_pd(a.native(), b.native(), _CMP_GE_OQ);
371 #elif SIMDPP_USE_SSE2
372     return _mm_cmpge_pd(a.native(), b.native());
373 #elif SIMDPP_USE_NEON64
374     return vreinterpretq_f64_u64(vcgeq_f64(a.native(), b.native()));
375 #elif SIMDPP_USE_VSX_206
376     return (__vector double) vec_cmpge(a.native(), b.native());
377 #elif SIMDPP_USE_MSA
378     return (v2f64) __msa_fcle_d(b.native(), a.native());
379 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
380     return detail::null::cmp_ge(a, b);
381 #endif
382 }
383 
384 #if SIMDPP_USE_AVX
385 static SIMDPP_INL
i_cmp_ge(const float64<4> & a,const float64<4> & b)386 mask_float64<4> i_cmp_ge(const float64<4>& a, const float64<4>& b)
387 {
388 #if SIMDPP_USE_AVX512VL
389     return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ);
390 #else
391     return _mm256_cmp_pd(a.native(), b.native(), _CMP_GE_OQ);
392 #endif
393 }
394 #endif
395 
396 #if SIMDPP_USE_AVX512F
397 static SIMDPP_INL
i_cmp_ge(const float64<8> & a,const float64<8> & b)398 mask_float64<8> i_cmp_ge(const float64<8>& a, const float64<8>& b)
399 {
400     return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_GE_OQ);
401 }
402 #endif
403 
404 // -----------------------------------------------------------------------------
405 
406 template<class V> SIMDPP_INL
i_cmp_ge(const V & a,const V & b)407 typename V::mask_vector_type i_cmp_ge(const V& a, const V& b)
408 {
409     SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_ge, a, b);
410 }
411 
412 } // namespace insn
413 } // namespace detail
414 } // namespace SIMDPP_ARCH_NAMESPACE
415 } // namespace simdpp
416 
417 #endif
418 
419