1 /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_NEQ_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_CMP_NEQ_H
10
11 #ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13 #endif
14
15 #include <simdpp/types.h>
16 #include <simdpp/core/bit_not.h>
17 #include <simdpp/core/cmp_eq.h>
18 #include <simdpp/detail/not_implemented.h>
19 #include <simdpp/detail/null/compare.h>
20 #include <simdpp/detail/vector_array_macros.h>
21
22 namespace simdpp {
23 namespace SIMDPP_ARCH_NAMESPACE {
24 namespace detail {
25 namespace insn {
26
27 static SIMDPP_INL
i_cmp_neq(const uint8x16 & a,const uint8x16 & b)28 mask_int8x16 i_cmp_neq(const uint8x16& a, const uint8x16& b)
29 {
30 #if SIMDPP_USE_NULL
31 return detail::null::cmp_neq(a, b);
32 #elif SIMDPP_USE_AVX512VL
33 return _mm_cmpneq_epi8_mask(a.native(), b.native());
34 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
35 return _mm_comneq_epi8(a.native(), b.native());
36 #else
37 return bit_not(cmp_eq(a, b));
38 #endif
39 }
40
41 #if SIMDPP_USE_AVX512VL
42 static SIMDPP_INL
i_cmp_neq(const mask_int8<16> & a,const mask_int8<16> & b)43 mask_int8<16> i_cmp_neq(const mask_int8<16>& a, const mask_int8<16>& b)
44 {
45 return _mm512_kxor(a.native(), b.native());
46 }
47 #endif
48
49 #if SIMDPP_USE_AVX2
50 static SIMDPP_INL
i_cmp_neq(const uint8x32 & a,const uint8x32 & b)51 mask_int8x32 i_cmp_neq(const uint8x32& a, const uint8x32& b)
52 {
53 #if SIMDPP_USE_AVX512VL
54 return _mm256_cmpneq_epi8_mask(a.native(), b.native());
55 #else
56 return bit_not(cmp_eq(a, b));
57 #endif
58 }
59 #endif
60
61 #if SIMDPP_USE_AVX512VL
62 static SIMDPP_INL
i_cmp_neq(const mask_int8<32> & a,const mask_int8<32> & b)63 mask_int8<32> i_cmp_neq(const mask_int8<32>& a, const mask_int8<32>& b)
64 {
65 return _mm512_kxor(a.native(), b.native());
66 }
67 #endif
68
69 #if SIMDPP_USE_AVX512BW
i_cmp_neq(const uint8<64> & a,const uint8<64> & b)70 SIMDPP_INL mask_int8<64> i_cmp_neq(const uint8<64>& a, const uint8<64>& b)
71 {
72 return _mm512_cmpneq_epi8_mask(a.native(), b.native());
73 }
74
i_cmp_neq(const mask_int8<64> & a,const mask_int8<64> & b)75 SIMDPP_INL mask_int8<64> i_cmp_neq(const mask_int8<64>& a, const mask_int8<64>& b)
76 {
77 return _mm512_kxor(a.native(), b.native());
78 }
79 #endif
80
81 // -----------------------------------------------------------------------------
82
83 static SIMDPP_INL
i_cmp_neq(const uint16x8 & a,const uint16x8 & b)84 mask_int16x8 i_cmp_neq(const uint16x8& a, const uint16x8& b)
85 {
86 #if SIMDPP_USE_NULL
87 return detail::null::cmp_neq(a, b);
88 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
89 return _mm_comneq_epi16(a.native(), b.native());
90 #else
91 return bit_not(cmp_eq(a, b));
92 #endif
93 }
94
95 #if SIMDPP_USE_AVX512VL
96 static SIMDPP_INL
i_cmp_neq(const mask_int16<8> & a,const mask_int16<8> & b)97 mask_int16<8> i_cmp_neq(const mask_int16<8>& a, const mask_int16<8>& b)
98 {
99 return _mm512_kxor(a.native(), b.native());
100 }
101 #endif
102
103 #if SIMDPP_USE_AVX2
104 static SIMDPP_INL
i_cmp_neq(const uint16x16 & a,const uint16x16 & b)105 mask_int16x16 i_cmp_neq(const uint16x16& a, const uint16x16& b)
106 {
107 return bit_not(cmp_eq(a, b));
108 }
109 #endif
110
111 #if SIMDPP_USE_AVX512VL
112 static SIMDPP_INL
i_cmp_neq(const mask_int16<16> & a,const mask_int16<16> & b)113 mask_int16<16> i_cmp_neq(const mask_int16<16>& a, const mask_int16<16>& b)
114 {
115 return _mm512_kxor(a.native(), b.native());
116 }
117 #endif
118
119
120 #if SIMDPP_USE_AVX512BW
i_cmp_neq(const uint16<32> & a,const uint16<32> & b)121 SIMDPP_INL mask_int16<32> i_cmp_neq(const uint16<32>& a, const uint16<32>& b)
122 {
123 return _mm512_cmpneq_epi16_mask(a.native(), b.native());
124 }
125
i_cmp_neq(const mask_int16<32> & a,const mask_int16<32> & b)126 SIMDPP_INL mask_int16<32> i_cmp_neq(const mask_int16<32>& a, const mask_int16<32>& b)
127 {
128 return _mm512_kxor(a.native(), b.native());
129 }
130 #endif
131
132 // -----------------------------------------------------------------------------
133
134 static SIMDPP_INL
i_cmp_neq(const uint32x4 & a,const uint32x4 & b)135 mask_int32x4 i_cmp_neq(const uint32x4& a, const uint32x4& b)
136 {
137 #if SIMDPP_USE_NULL
138 return detail::null::cmp_neq(a, b);
139 #elif SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
140 return _mm_comneq_epi32(a.native(), b.native());
141 #else
142 return bit_not(cmp_eq(a, b));
143 #endif
144 }
145
146 #if SIMDPP_USE_AVX512VL
147 static SIMDPP_INL
i_cmp_neq(const mask_int32<4> & a,const mask_int32<4> & b)148 mask_int32<4> i_cmp_neq(const mask_int32<4>& a, const mask_int32<4>& b)
149 {
150 return _mm512_kxor(a.native(), b.native());
151 }
152 #endif
153
154 #if SIMDPP_USE_AVX2
155 static SIMDPP_INL
i_cmp_neq(const uint32x8 & a,const uint32x8 & b)156 mask_int32x8 i_cmp_neq(const uint32x8& a, const uint32x8& b)
157 {
158 return bit_not(cmp_eq(a, b));
159 }
160 #endif
161
162 #if SIMDPP_USE_AVX512VL
163 static SIMDPP_INL
i_cmp_neq(const mask_int32<8> & a,const mask_int32<8> & b)164 mask_int32<8> i_cmp_neq(const mask_int32<8>& a, const mask_int32<8>& b)
165 {
166 return _mm512_kxor(a.native(), b.native());
167 }
168 #endif
169
170 #if SIMDPP_USE_AVX512F
171 static SIMDPP_INL
i_cmp_neq(const uint32<16> & a,const uint32<16> & b)172 mask_int32<16> i_cmp_neq(const uint32<16>& a, const uint32<16>& b)
173 {
174 return _mm512_cmpneq_epu32_mask(a.native(), b.native());
175 }
176
177 static SIMDPP_INL
i_cmp_neq(const mask_int32<16> & a,const mask_int32<16> & b)178 mask_int32<16> i_cmp_neq(const mask_int32<16>& a, const mask_int32<16>& b)
179 {
180 return _mm512_kxor(a.native(), b.native());
181 }
182 #endif
183
184 // -----------------------------------------------------------------------------
185
186 static SIMDPP_INL
i_cmp_neq(const uint64x2 & a,const uint64x2 & b)187 mask_int64x2 i_cmp_neq(const uint64x2& a, const uint64x2& b)
188 {
189 #if SIMDPP_USE_XOP && !SIMDPP_WORKAROUND_XOP_COM
190 return _mm_comneq_epi64(a.native(), b.native());
191 #elif SIMDPP_USE_SSE4_1 || SIMDPP_USE_NEON || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
192 return bit_not(cmp_eq(a, b));
193 #elif SIMDPP_USE_SSE2
194 uint64x2 r32, r32s;
195 r32 = (uint32x4)cmp_eq(uint32x4(a), uint32x4(b));
196 // swap the 32-bit halves
197 r32s = bit_or(shift_l<32>(r32), shift_r<32>(r32));
198 // combine the results. Each 32-bit half is ORed with the neighbouring pair
199 r32 = bit_or(r32, r32s);
200 return r32;
201 #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
202 return detail::null::cmp_neq(a, b);
203 #endif
204 }
205
206 #if SIMDPP_USE_AVX512VL
207 static SIMDPP_INL
i_cmp_neq(const mask_int64<2> & a,const mask_int64<2> & b)208 mask_int64<2> i_cmp_neq(const mask_int64<2>& a, const mask_int64<2>& b)
209 {
210 return _mm512_kxor(a.native(), b.native());
211 }
212 #endif
213
214 #if SIMDPP_USE_AVX2
215 static SIMDPP_INL
i_cmp_neq(const uint64x4 & a,const uint64x4 & b)216 mask_int64x4 i_cmp_neq(const uint64x4& a, const uint64x4& b)
217 {
218 return bit_not(cmp_eq(a, b));
219 }
220 #endif
221
222 #if SIMDPP_USE_AVX512VL
223 static SIMDPP_INL
i_cmp_neq(const mask_int64<4> & a,const mask_int64<4> & b)224 mask_int64<4> i_cmp_neq(const mask_int64<4>& a, const mask_int64<4>& b)
225 {
226 return _mm512_kxor(a.native(), b.native());
227 }
228 #endif
229
230 #if SIMDPP_USE_AVX512F
231 static SIMDPP_INL
i_cmp_neq(const uint64<8> & a,const uint64<8> & b)232 mask_int64<8> i_cmp_neq(const uint64<8>& a, const uint64<8>& b)
233 {
234 return _mm512_cmpneq_epi64_mask(a.native(), b.native());
235 }
236
237 static SIMDPP_INL
i_cmp_neq(const mask_int64<8> & a,const mask_int64<8> & b)238 mask_int64<8> i_cmp_neq(const mask_int64<8>& a, const mask_int64<8>& b)
239 {
240 return _mm512_kxor(a.native(), b.native());
241 }
242 #endif
243
244 // -----------------------------------------------------------------------------
245
246 static SIMDPP_INL
i_cmp_neq(const float32x4 & a,const float32x4 & b)247 mask_float32x4 i_cmp_neq(const float32x4& a, const float32x4& b)
248 {
249 #if SIMDPP_USE_NULL
250 return detail::null::cmp_neq(a, b);
251 #elif SIMDPP_USE_AVX512VL
252 return _mm_cmp_ps_mask(a.native(), b.native(), _CMP_NEQ_UQ);
253 #elif SIMDPP_USE_AVX
254 return _mm_cmp_ps(a.native(), b.native(), _CMP_NEQ_UQ);
255 #elif SIMDPP_USE_SSE2
256 return _mm_cmpneq_ps(a.native(), b.native());
257 #elif SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
258 return bit_not(cmp_eq(a, b));
259 #elif SIMDPP_USE_MSA
260 return (v4f32) __msa_fcune_w(a.native(), b.native());
261 #endif
262 }
263
264 #if SIMDPP_USE_AVX512VL
265 static SIMDPP_INL
i_cmp_neq(const mask_float32<4> & a,const mask_float32<4> & b)266 mask_float32<4> i_cmp_neq(const mask_float32<4>& a, const mask_float32<4>& b)
267 {
268 return _mm512_kxor(a.native(), b.native());
269 }
270 #endif
271
272 #if SIMDPP_USE_AVX
273 static SIMDPP_INL
i_cmp_neq(const float32x8 & a,const float32x8 & b)274 mask_float32x8 i_cmp_neq(const float32x8& a, const float32x8& b)
275 {
276 #if SIMDPP_USE_AVX512VL
277 return _mm256_cmp_ps_mask(a.native(), b.native(), _CMP_NEQ_UQ);
278 #else
279 return _mm256_cmp_ps(a.native(), b.native(), _CMP_NEQ_UQ);
280 #endif
281 }
282 #endif
283
284 #if SIMDPP_USE_AVX512VL
285 static SIMDPP_INL
i_cmp_neq(const mask_float32<8> & a,const mask_float32<8> & b)286 mask_float32<8> i_cmp_neq(const mask_float32<8>& a, const mask_float32<8>& b)
287 {
288 return _mm512_kxor(a.native(), b.native());
289 }
290 #endif
291
292 #if SIMDPP_USE_AVX512F
293 static SIMDPP_INL
i_cmp_neq(const float32<16> & a,const float32<16> & b)294 mask_float32<16> i_cmp_neq(const float32<16>& a, const float32<16>& b)
295 {
296 return _mm512_cmp_ps_mask(a.native(), b.native(), _CMP_NEQ_UQ);
297 }
298
299 static SIMDPP_INL
i_cmp_neq(const mask_float32<16> & a,const mask_float32<16> & b)300 mask_float32<16> i_cmp_neq(const mask_float32<16>& a, const mask_float32<16>& b)
301 {
302 return _mm512_kxor(a.native(), b.native());
303 }
304 #endif
305
306 // -----------------------------------------------------------------------------
307
308 static SIMDPP_INL
i_cmp_neq(const float64x2 & a,const float64x2 & b)309 mask_float64x2 i_cmp_neq(const float64x2& a, const float64x2& b)
310 {
311 #if SIMDPP_USE_AVX512VL
312 return _mm_cmp_pd_mask(a.native(), b.native(), _CMP_NEQ_UQ);
313 #elif SIMDPP_USE_AVX
314 return _mm_cmp_pd(a.native(), b.native(), _CMP_NEQ_UQ);
315 #elif SIMDPP_USE_SSE2
316 return _mm_cmpneq_pd(a.native(), b.native());
317 #elif SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206
318 return bit_not(cmp_eq(a, b));
319 #elif SIMDPP_USE_MSA
320 return (v2f64) __msa_fcune_d(a.native(), b.native());
321 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
322 return detail::null::cmp_neq(a, b);
323 #else
324 return SIMDPP_NOT_IMPLEMENTED2(a, b);
325 #endif
326 }
327
328 #if SIMDPP_USE_AVX512VL
329 static SIMDPP_INL
i_cmp_neq(const mask_float64<2> & a,const mask_float64<2> & b)330 mask_float64<2> i_cmp_neq(const mask_float64<2>& a, const mask_float64<2>& b)
331 {
332 return _mm512_kxor(a.native(), b.native());
333 }
334 #endif
335
336 #if SIMDPP_USE_AVX
337 static SIMDPP_INL
i_cmp_neq(const float64x4 & a,const float64x4 & b)338 mask_float64x4 i_cmp_neq(const float64x4& a, const float64x4& b)
339 {
340 #if SIMDPP_USE_AVX512VL
341 return _mm256_cmp_pd_mask(a.native(), b.native(), _CMP_NEQ_UQ);
342 #else
343 return _mm256_cmp_pd(a.native(), b.native(), _CMP_NEQ_UQ);
344 #endif
345 }
346 #endif
347
348 #if SIMDPP_USE_AVX512VL
349 static SIMDPP_INL
i_cmp_neq(const mask_float64<4> & a,const mask_float64<4> & b)350 mask_float64<4> i_cmp_neq(const mask_float64<4>& a, const mask_float64<4>& b)
351 {
352 return _mm512_kxor(a.native(), b.native());
353 }
354 #endif
355
356 #if SIMDPP_USE_AVX512F
357 static SIMDPP_INL
i_cmp_neq(const float64<8> & a,const float64<8> & b)358 mask_float64<8> i_cmp_neq(const float64<8>& a, const float64<8>& b)
359 {
360 return _mm512_cmp_pd_mask(a.native(), b.native(), _CMP_NEQ_UQ);
361 }
362
363 static SIMDPP_INL
i_cmp_neq(const mask_float64<8> & a,const mask_float64<8> & b)364 mask_float64<8> i_cmp_neq(const mask_float64<8>& a, const mask_float64<8>& b)
365 {
366 return _mm512_kxor(a.native(), b.native());
367 }
368 #endif
369
370 // -----------------------------------------------------------------------------
371
372 template<class V> SIMDPP_INL
i_cmp_neq(const V & a,const V & b)373 typename V::mask_vector_type i_cmp_neq(const V& a, const V& b)
374 {
375 SIMDPP_VEC_ARRAY_IMPL2(typename V::mask_vector_type, i_cmp_neq, a, b);
376 }
377
378 } // namespace insn
379 } // namespace detail
380 } // namespace SIMDPP_ARCH_NAMESPACE
381 } // namespace simdpp
382
383 #endif
384
385