1 /*  Copyright (C) 2011-2014  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_NOT_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_NOT_H
10 
11 #ifndef LIBSIMDPP_SIMD_H
12     #error "This file must be included through simd.h"
13 #endif
14 
15 #include <simdpp/types.h>
16 #include <simdpp/core/bit_xor.h>
17 #include <simdpp/core/to_mask.h>
18 #include <simdpp/detail/null/bitwise.h>
19 #include <simdpp/detail/vector_array_macros.h>
20 
21 namespace simdpp {
22 namespace SIMDPP_ARCH_NAMESPACE {
23 namespace detail {
24 namespace insn {
25 
26 
27 static SIMDPP_INL
i_bit_not(const uint8x16 & a)28 uint8x16 i_bit_not(const uint8x16& a)
29 {
30 #if SIMDPP_USE_NULL
31     uint8x16 r;
32     for (unsigned i = 0; i < a.length; i++) {
33         r.el(i) = ~a.el(i);
34     }
35     return r;
36 #elif SIMDPP_USE_SSE2
37     return bit_xor(a, 0xff);
38 #elif SIMDPP_USE_NEON
39     return vmvnq_u8(a.native());
40 #elif SIMDPP_USE_ALTIVEC
41     return vec_nor(a.native(), a.native());
42 #elif SIMDPP_USE_MSA
43     return __msa_nor_v(a.native(), a.native());
44 #endif
45 }
46 
47 #if SIMDPP_USE_AVX2
48 static SIMDPP_INL
i_bit_not(const uint8x32 & a)49 uint8x32 i_bit_not(const uint8x32& a)
50 {
51     return bit_xor(a, 0xff);
52 }
53 #endif
54 
55 #if SIMDPP_USE_AVX512BW
i_bit_not(const uint8<64> & a)56 SIMDPP_INL uint8<64> i_bit_not(const uint8<64>& a)
57 {
58     __m512i n = a.native();
59     return _mm512_ternarylogic_epi32(n, n, n, 0x1);
60 }
61 #endif
62 
63 // -----------------------------------------------------------------------------
64 
i_bit_not(const uint16<8> & a)65 SIMDPP_INL uint16<8> i_bit_not(const uint16<8>& a)
66 {
67     return uint16<8>(i_bit_not(uint8<16>(a)));
68 }
69 
70 #if SIMDPP_USE_AVX2
i_bit_not(const uint16<16> & a)71 SIMDPP_INL uint16<16> i_bit_not(const uint16<16>& a)
72 {
73     return uint16<16>(i_bit_not(uint8<32>(a)));
74 }
75 #endif
76 
77 #if SIMDPP_USE_AVX512BW
i_bit_not(const uint16<32> & a)78 SIMDPP_INL uint16<32> i_bit_not(const uint16<32>& a)
79 {
80     __m512i n = a.native();
81     return _mm512_ternarylogic_epi32(n, n, n, 0x1);
82 }
83 #endif
84 
85 // -----------------------------------------------------------------------------
86 
87 static SIMDPP_INL
i_bit_not(const uint32<4> & a)88 uint32<4> i_bit_not(const uint32<4>& a)
89 {
90     return uint32<4>(i_bit_not(uint8<16>(a)));
91 }
92 
93 #if SIMDPP_USE_AVX2
94 static SIMDPP_INL
i_bit_not(const uint32<8> & a)95 uint32<8> i_bit_not(const uint32<8>& a)
96 {
97     return uint32<8>(i_bit_not(uint8<32>(a)));
98 }
99 #endif
100 
101 #if SIMDPP_USE_AVX512F
102 static SIMDPP_INL
i_bit_not(const uint32<16> & a)103 uint32<16> i_bit_not(const uint32<16>& a)
104 {
105     __m512i n = a.native();
106     return _mm512_ternarylogic_epi32(n, n, n, 0x1);
107 }
108 #endif
109 
110 // -----------------------------------------------------------------------------
111 
112 static SIMDPP_INL
i_bit_not(const uint64<2> & a)113 uint64<2> i_bit_not(const uint64<2>& a)
114 {
115 #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
116     uint64x2 r;
117     for (unsigned i = 0; i < a.length; i++) {
118         r.el(i) = ~a.el(i);
119     }
120     return r;
121 #else
122     return uint64<2>(i_bit_not(uint8<16>(a)));
123 #endif
124 }
125 
126 #if SIMDPP_USE_AVX2
127 static SIMDPP_INL
i_bit_not(const uint64<4> & a)128 uint64<4> i_bit_not(const uint64<4>& a)
129 {
130     return uint64<4>(i_bit_not(uint8<32>(a)));
131 }
132 #endif
133 
134 #if SIMDPP_USE_AVX512F
135 static SIMDPP_INL
i_bit_not(const uint64<8> & a)136 uint64<8> i_bit_not(const uint64<8>& a)
137 {
138     __m512i n = a.native();
139     return _mm512_ternarylogic_epi64(n, n, n, 0x1);
140 }
141 #endif
142 
143 // -----------------------------------------------------------------------------
144 
145 static SIMDPP_INL
i_bit_not(const mask_int8x16 & a)146 mask_int8x16 i_bit_not(const mask_int8x16& a)
147 {
148 #if SIMDPP_USE_NULL
149     return detail::null::bit_not_mm(a);
150 #else
151     return to_mask(i_bit_not(uint8x16(a)));
152 #endif
153 }
154 
155 static SIMDPP_INL
i_bit_not(const mask_int16x8 & a)156 mask_int16x8 i_bit_not(const mask_int16x8& a)
157 {
158 #if SIMDPP_USE_NULL
159     return detail::null::bit_not_mm(a);
160 #else
161     return to_mask(i_bit_not(uint16x8(a)));
162 #endif
163 }
164 
165 static SIMDPP_INL
i_bit_not(const mask_int32x4 & a)166 mask_int32x4 i_bit_not(const mask_int32x4& a)
167 {
168 #if SIMDPP_USE_NULL
169     return detail::null::bit_not_mm(a);
170 #else
171     return to_mask(i_bit_not(uint32x4(a)));
172 #endif
173 }
174 
175 static SIMDPP_INL
i_bit_not(const mask_int64x2 & a)176 mask_int64x2 i_bit_not(const mask_int64x2& a)
177 {
178 #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
179     return detail::null::bit_not_mm(a);
180 #else
181     return to_mask(i_bit_not(uint64x2(a)));
182 #endif
183 }
184 
185 #if SIMDPP_USE_AVX2 && !SIMDPP_USE_AVX512VL
i_bit_not(const mask_int8x32 & a)186 static SIMDPP_INL mask_int8x32  i_bit_not(const mask_int8x32& a)  { return i_bit_not(uint8x32(a)); }
i_bit_not(const mask_int16x16 & a)187 static SIMDPP_INL mask_int16x16 i_bit_not(const mask_int16x16& a) { return i_bit_not(uint16x16(a)); }
i_bit_not(const mask_int32x8 & a)188 static SIMDPP_INL mask_int32x8  i_bit_not(const mask_int32x8& a)  { return i_bit_not(uint32x8(a)); }
i_bit_not(const mask_int64x4 & a)189 static SIMDPP_INL mask_int64x4  i_bit_not(const mask_int64x4& a)  { return i_bit_not(uint64x4(a)); }
190 #endif
191 
192 #if SIMDPP_USE_AVX512VL
i_bit_not(const mask_int8x32 & a)193 static SIMDPP_INL mask_int8x32  i_bit_not(const mask_int8x32& a)  { return ~a.native(); }
i_bit_not(const mask_int16x16 & a)194 static SIMDPP_INL mask_int16x16 i_bit_not(const mask_int16x16& a) { return ~a.native(); }
i_bit_not(const mask_int32x8 & a)195 static SIMDPP_INL mask_int32x8  i_bit_not(const mask_int32x8& a)  { return ~a.native(); }
i_bit_not(const mask_int64x4 & a)196 static SIMDPP_INL mask_int64x4  i_bit_not(const mask_int64x4& a)  { return ~a.native(); }
197 #endif
198 
199 #if SIMDPP_USE_AVX512F
200 static SIMDPP_INL
i_bit_not(const mask_int32<16> & a)201 mask_int32<16> i_bit_not(const mask_int32<16>& a)
202 {
203     return _mm512_knot(a.native());
204 }
205 
206 static SIMDPP_INL
i_bit_not(const mask_int64<8> & a)207 mask_int64<8> i_bit_not(const mask_int64<8>& a)
208 {
209     return _mm512_knot(a.native());
210 }
211 #endif
212 
213 #if SIMDPP_USE_AVX512BW
i_bit_not(const mask_int8<64> & a)214 SIMDPP_INL mask_int8<64> i_bit_not(const mask_int8<64>& a)
215 {
216     return ~a.native();
217 }
218 
i_bit_not(const mask_int16<32> & a)219 SIMDPP_INL mask_int16<32> i_bit_not(const mask_int16<32>& a)
220 {
221     return ~a.native();
222 }
223 #endif
224 
225 // -----------------------------------------------------------------------------
226 
227 static SIMDPP_INL
i_bit_not(const float32x4 & a)228 float32x4 i_bit_not(const float32x4& a)
229 {
230 #if SIMDPP_USE_SSE2
231     return bit_xor(a, 0xffffffff);
232 #elif SIMDPP_USE_NEON_FLT_SP
233     return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(a.native())));
234 #elif SIMDPP_USE_ALTIVEC
235     return vec_nor(a.native(), a.native());
236 #elif SIMDPP_USE_NULL || SIMDPP_USE_MSA || SIMDPP_USE_NEON_NO_FLT_SP
237     return float32x4(i_bit_not(uint32x4(a)));
238 #endif
239 }
240 
241 #if SIMDPP_USE_AVX
242 static SIMDPP_INL
i_bit_not(const float32x8 & a)243 float32x8 i_bit_not(const float32x8& a)
244 {
245     return bit_xor(a, 0xffffffff);
246 }
247 #endif
248 
249 #if SIMDPP_USE_AVX512F
250 static SIMDPP_INL
i_bit_not(const float32<16> & a)251 float32<16> i_bit_not(const float32<16>& a)
252 {
253     __m512i n = _mm512_castps_si512(a.native());
254     n = _mm512_ternarylogic_epi32(n, n, n, 0x1);
255     return _mm512_castsi512_ps(n);
256 }
257 #endif
258 
259 // -----------------------------------------------------------------------------
260 
261 static SIMDPP_INL
i_bit_not(const float64x2 & a)262 float64x2 i_bit_not(const float64x2& a)
263 {
264 #if SIMDPP_USE_SSE2
265     return bit_xor(a, 0xffffffffffffffff);
266 #elif SIMDPP_USE_NEON64
267     return vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(a.native())));
268 #elif SIMDPP_USE_VSX_206
269     return vec_nor(a.native(), a.native());
270 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
271     return float64x2(i_bit_not(uint64x2(a)));
272 #endif
273 }
274 
275 #if SIMDPP_USE_AVX
276 static SIMDPP_INL
i_bit_not(const float64x4 & a)277 float64x4 i_bit_not(const float64x4& a)
278 {
279     return bit_xor(a, 0xffffffffffffffff);
280 }
281 #endif
282 
283 #if SIMDPP_USE_AVX512F
284 static SIMDPP_INL
i_bit_not(const float64<8> & a)285 float64<8> i_bit_not(const float64<8>& a)
286 {
287     __m512i n = _mm512_castpd_si512(a.native());
288     n = _mm512_ternarylogic_epi64(n, n, n, 0x1);
289     return _mm512_castsi512_pd(n);
290 }
291 #endif
292 
293 // -----------------------------------------------------------------------------
294 
295 static SIMDPP_INL
i_bit_not(const mask_float32x4 & a)296 mask_float32x4 i_bit_not(const mask_float32x4& a)
297 {
298 #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
299     return detail::null::bit_not_mm(a);
300 #else
301     return to_mask(i_bit_not(float32<4>(a)));
302 #endif
303 }
304 
305 #if SIMDPP_USE_AVX
306 static SIMDPP_INL
i_bit_not(const mask_float32x8 & a)307 mask_float32x8 i_bit_not(const mask_float32x8& a)
308 {
309     return to_mask(i_bit_not(float32x8(a)));
310 }
311 #endif
312 
313 #if SIMDPP_USE_AVX512F
314 static SIMDPP_INL
i_bit_not(const mask_float32<16> & a)315 mask_float32<16> i_bit_not(const mask_float32<16>& a)
316 {
317     return _mm512_knot(a.native());
318 }
319 #endif
320 
321 // -----------------------------------------------------------------------------
322 
323 static SIMDPP_INL
i_bit_not(const mask_float64x2 & a)324 mask_float64x2 i_bit_not(const mask_float64x2& a)
325 {
326 #if SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206)
327     return detail::null::bit_not_mm(a);
328 #else
329     return to_mask(i_bit_not(float64x2(a)));
330 #endif
331 }
332 
333 #if SIMDPP_USE_AVX
334 static SIMDPP_INL
i_bit_not(const mask_float64x4 & a)335 mask_float64x4 i_bit_not(const mask_float64x4& a)
336 {
337     return to_mask(i_bit_not(float64x4(a)));
338 }
339 #endif
340 
341 #if SIMDPP_USE_AVX512F
342 static SIMDPP_INL
i_bit_not(const mask_float64<8> & a)343 mask_float64<8> i_bit_not(const mask_float64<8>& a)
344 {
345     return _mm512_knot(a.native());
346 }
347 #endif
348 
349 // -----------------------------------------------------------------------------
350 
351 template<class V> SIMDPP_INL
i_bit_not(const V & a)352 V i_bit_not(const V& a)
353 {
354     SIMDPP_VEC_ARRAY_IMPL1(V, i_bit_not, a)
355 }
356 
357 } // namespace insn
358 } // namespace detail
359 } // namespace SIMDPP_ARCH_NAMESPACE
360 } // namespace simdpp
361 
362 #endif
363 
364