1 /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_NOT_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_NOT_H
10
11 #ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13 #endif
14
15 #include <simdpp/types.h>
16 #include <simdpp/core/bit_xor.h>
17 #include <simdpp/core/to_mask.h>
18 #include <simdpp/detail/null/bitwise.h>
19 #include <simdpp/detail/vector_array_macros.h>
20
21 namespace simdpp {
22 namespace SIMDPP_ARCH_NAMESPACE {
23 namespace detail {
24 namespace insn {
25
26
27 static SIMDPP_INL
i_bit_not(const uint8x16 & a)28 uint8x16 i_bit_not(const uint8x16& a)
29 {
30 #if SIMDPP_USE_NULL
31 uint8x16 r;
32 for (unsigned i = 0; i < a.length; i++) {
33 r.el(i) = ~a.el(i);
34 }
35 return r;
36 #elif SIMDPP_USE_SSE2
37 return bit_xor(a, 0xff);
38 #elif SIMDPP_USE_NEON
39 return vmvnq_u8(a.native());
40 #elif SIMDPP_USE_ALTIVEC
41 return vec_nor(a.native(), a.native());
42 #elif SIMDPP_USE_MSA
43 return __msa_nor_v(a.native(), a.native());
44 #endif
45 }
46
47 #if SIMDPP_USE_AVX2
48 static SIMDPP_INL
i_bit_not(const uint8x32 & a)49 uint8x32 i_bit_not(const uint8x32& a)
50 {
51 return bit_xor(a, 0xff);
52 }
53 #endif
54
55 #if SIMDPP_USE_AVX512BW
i_bit_not(const uint8<64> & a)56 SIMDPP_INL uint8<64> i_bit_not(const uint8<64>& a)
57 {
58 __m512i n = a.native();
59 return _mm512_ternarylogic_epi32(n, n, n, 0x1);
60 }
61 #endif
62
63 // -----------------------------------------------------------------------------
64
i_bit_not(const uint16<8> & a)65 SIMDPP_INL uint16<8> i_bit_not(const uint16<8>& a)
66 {
67 return uint16<8>(i_bit_not(uint8<16>(a)));
68 }
69
70 #if SIMDPP_USE_AVX2
i_bit_not(const uint16<16> & a)71 SIMDPP_INL uint16<16> i_bit_not(const uint16<16>& a)
72 {
73 return uint16<16>(i_bit_not(uint8<32>(a)));
74 }
75 #endif
76
77 #if SIMDPP_USE_AVX512BW
i_bit_not(const uint16<32> & a)78 SIMDPP_INL uint16<32> i_bit_not(const uint16<32>& a)
79 {
80 __m512i n = a.native();
81 return _mm512_ternarylogic_epi32(n, n, n, 0x1);
82 }
83 #endif
84
85 // -----------------------------------------------------------------------------
86
87 static SIMDPP_INL
i_bit_not(const uint32<4> & a)88 uint32<4> i_bit_not(const uint32<4>& a)
89 {
90 return uint32<4>(i_bit_not(uint8<16>(a)));
91 }
92
93 #if SIMDPP_USE_AVX2
94 static SIMDPP_INL
i_bit_not(const uint32<8> & a)95 uint32<8> i_bit_not(const uint32<8>& a)
96 {
97 return uint32<8>(i_bit_not(uint8<32>(a)));
98 }
99 #endif
100
101 #if SIMDPP_USE_AVX512F
102 static SIMDPP_INL
i_bit_not(const uint32<16> & a)103 uint32<16> i_bit_not(const uint32<16>& a)
104 {
105 __m512i n = a.native();
106 return _mm512_ternarylogic_epi32(n, n, n, 0x1);
107 }
108 #endif
109
110 // -----------------------------------------------------------------------------
111
112 static SIMDPP_INL
i_bit_not(const uint64<2> & a)113 uint64<2> i_bit_not(const uint64<2>& a)
114 {
115 #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
116 uint64x2 r;
117 for (unsigned i = 0; i < a.length; i++) {
118 r.el(i) = ~a.el(i);
119 }
120 return r;
121 #else
122 return uint64<2>(i_bit_not(uint8<16>(a)));
123 #endif
124 }
125
126 #if SIMDPP_USE_AVX2
127 static SIMDPP_INL
i_bit_not(const uint64<4> & a)128 uint64<4> i_bit_not(const uint64<4>& a)
129 {
130 return uint64<4>(i_bit_not(uint8<32>(a)));
131 }
132 #endif
133
134 #if SIMDPP_USE_AVX512F
135 static SIMDPP_INL
i_bit_not(const uint64<8> & a)136 uint64<8> i_bit_not(const uint64<8>& a)
137 {
138 __m512i n = a.native();
139 return _mm512_ternarylogic_epi64(n, n, n, 0x1);
140 }
141 #endif
142
143 // -----------------------------------------------------------------------------
144
145 static SIMDPP_INL
i_bit_not(const mask_int8x16 & a)146 mask_int8x16 i_bit_not(const mask_int8x16& a)
147 {
148 #if SIMDPP_USE_NULL
149 return detail::null::bit_not_mm(a);
150 #else
151 return to_mask(i_bit_not(uint8x16(a)));
152 #endif
153 }
154
155 static SIMDPP_INL
i_bit_not(const mask_int16x8 & a)156 mask_int16x8 i_bit_not(const mask_int16x8& a)
157 {
158 #if SIMDPP_USE_NULL
159 return detail::null::bit_not_mm(a);
160 #else
161 return to_mask(i_bit_not(uint16x8(a)));
162 #endif
163 }
164
165 static SIMDPP_INL
i_bit_not(const mask_int32x4 & a)166 mask_int32x4 i_bit_not(const mask_int32x4& a)
167 {
168 #if SIMDPP_USE_NULL
169 return detail::null::bit_not_mm(a);
170 #else
171 return to_mask(i_bit_not(uint32x4(a)));
172 #endif
173 }
174
175 static SIMDPP_INL
i_bit_not(const mask_int64x2 & a)176 mask_int64x2 i_bit_not(const mask_int64x2& a)
177 {
178 #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
179 return detail::null::bit_not_mm(a);
180 #else
181 return to_mask(i_bit_not(uint64x2(a)));
182 #endif
183 }
184
185 #if SIMDPP_USE_AVX2 && !SIMDPP_USE_AVX512VL
i_bit_not(const mask_int8x32 & a)186 static SIMDPP_INL mask_int8x32 i_bit_not(const mask_int8x32& a) { return i_bit_not(uint8x32(a)); }
i_bit_not(const mask_int16x16 & a)187 static SIMDPP_INL mask_int16x16 i_bit_not(const mask_int16x16& a) { return i_bit_not(uint16x16(a)); }
i_bit_not(const mask_int32x8 & a)188 static SIMDPP_INL mask_int32x8 i_bit_not(const mask_int32x8& a) { return i_bit_not(uint32x8(a)); }
i_bit_not(const mask_int64x4 & a)189 static SIMDPP_INL mask_int64x4 i_bit_not(const mask_int64x4& a) { return i_bit_not(uint64x4(a)); }
190 #endif
191
192 #if SIMDPP_USE_AVX512VL
i_bit_not(const mask_int8x32 & a)193 static SIMDPP_INL mask_int8x32 i_bit_not(const mask_int8x32& a) { return ~a.native(); }
i_bit_not(const mask_int16x16 & a)194 static SIMDPP_INL mask_int16x16 i_bit_not(const mask_int16x16& a) { return ~a.native(); }
i_bit_not(const mask_int32x8 & a)195 static SIMDPP_INL mask_int32x8 i_bit_not(const mask_int32x8& a) { return ~a.native(); }
i_bit_not(const mask_int64x4 & a)196 static SIMDPP_INL mask_int64x4 i_bit_not(const mask_int64x4& a) { return ~a.native(); }
197 #endif
198
199 #if SIMDPP_USE_AVX512F
200 static SIMDPP_INL
i_bit_not(const mask_int32<16> & a)201 mask_int32<16> i_bit_not(const mask_int32<16>& a)
202 {
203 return _mm512_knot(a.native());
204 }
205
206 static SIMDPP_INL
i_bit_not(const mask_int64<8> & a)207 mask_int64<8> i_bit_not(const mask_int64<8>& a)
208 {
209 return _mm512_knot(a.native());
210 }
211 #endif
212
213 #if SIMDPP_USE_AVX512BW
i_bit_not(const mask_int8<64> & a)214 SIMDPP_INL mask_int8<64> i_bit_not(const mask_int8<64>& a)
215 {
216 return ~a.native();
217 }
218
i_bit_not(const mask_int16<32> & a)219 SIMDPP_INL mask_int16<32> i_bit_not(const mask_int16<32>& a)
220 {
221 return ~a.native();
222 }
223 #endif
224
225 // -----------------------------------------------------------------------------
226
227 static SIMDPP_INL
i_bit_not(const float32x4 & a)228 float32x4 i_bit_not(const float32x4& a)
229 {
230 #if SIMDPP_USE_SSE2
231 return bit_xor(a, 0xffffffff);
232 #elif SIMDPP_USE_NEON_FLT_SP
233 return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(a.native())));
234 #elif SIMDPP_USE_ALTIVEC
235 return vec_nor(a.native(), a.native());
236 #elif SIMDPP_USE_NULL || SIMDPP_USE_MSA || SIMDPP_USE_NEON_NO_FLT_SP
237 return float32x4(i_bit_not(uint32x4(a)));
238 #endif
239 }
240
241 #if SIMDPP_USE_AVX
242 static SIMDPP_INL
i_bit_not(const float32x8 & a)243 float32x8 i_bit_not(const float32x8& a)
244 {
245 return bit_xor(a, 0xffffffff);
246 }
247 #endif
248
249 #if SIMDPP_USE_AVX512F
250 static SIMDPP_INL
i_bit_not(const float32<16> & a)251 float32<16> i_bit_not(const float32<16>& a)
252 {
253 __m512i n = _mm512_castps_si512(a.native());
254 n = _mm512_ternarylogic_epi32(n, n, n, 0x1);
255 return _mm512_castsi512_ps(n);
256 }
257 #endif
258
259 // -----------------------------------------------------------------------------
260
261 static SIMDPP_INL
i_bit_not(const float64x2 & a)262 float64x2 i_bit_not(const float64x2& a)
263 {
264 #if SIMDPP_USE_SSE2
265 return bit_xor(a, 0xffffffffffffffff);
266 #elif SIMDPP_USE_NEON64
267 return vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(a.native())));
268 #elif SIMDPP_USE_VSX_206
269 return vec_nor(a.native(), a.native());
270 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
271 return float64x2(i_bit_not(uint64x2(a)));
272 #endif
273 }
274
275 #if SIMDPP_USE_AVX
276 static SIMDPP_INL
i_bit_not(const float64x4 & a)277 float64x4 i_bit_not(const float64x4& a)
278 {
279 return bit_xor(a, 0xffffffffffffffff);
280 }
281 #endif
282
283 #if SIMDPP_USE_AVX512F
284 static SIMDPP_INL
i_bit_not(const float64<8> & a)285 float64<8> i_bit_not(const float64<8>& a)
286 {
287 __m512i n = _mm512_castpd_si512(a.native());
288 n = _mm512_ternarylogic_epi64(n, n, n, 0x1);
289 return _mm512_castsi512_pd(n);
290 }
291 #endif
292
293 // -----------------------------------------------------------------------------
294
295 static SIMDPP_INL
i_bit_not(const mask_float32x4 & a)296 mask_float32x4 i_bit_not(const mask_float32x4& a)
297 {
298 #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
299 return detail::null::bit_not_mm(a);
300 #else
301 return to_mask(i_bit_not(float32<4>(a)));
302 #endif
303 }
304
305 #if SIMDPP_USE_AVX
306 static SIMDPP_INL
i_bit_not(const mask_float32x8 & a)307 mask_float32x8 i_bit_not(const mask_float32x8& a)
308 {
309 return to_mask(i_bit_not(float32x8(a)));
310 }
311 #endif
312
313 #if SIMDPP_USE_AVX512F
314 static SIMDPP_INL
i_bit_not(const mask_float32<16> & a)315 mask_float32<16> i_bit_not(const mask_float32<16>& a)
316 {
317 return _mm512_knot(a.native());
318 }
319 #endif
320
321 // -----------------------------------------------------------------------------
322
323 static SIMDPP_INL
i_bit_not(const mask_float64x2 & a)324 mask_float64x2 i_bit_not(const mask_float64x2& a)
325 {
326 #if SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206)
327 return detail::null::bit_not_mm(a);
328 #else
329 return to_mask(i_bit_not(float64x2(a)));
330 #endif
331 }
332
333 #if SIMDPP_USE_AVX
334 static SIMDPP_INL
i_bit_not(const mask_float64x4 & a)335 mask_float64x4 i_bit_not(const mask_float64x4& a)
336 {
337 return to_mask(i_bit_not(float64x4(a)));
338 }
339 #endif
340
341 #if SIMDPP_USE_AVX512F
342 static SIMDPP_INL
i_bit_not(const mask_float64<8> & a)343 mask_float64<8> i_bit_not(const mask_float64<8>& a)
344 {
345 return _mm512_knot(a.native());
346 }
347 #endif
348
349 // -----------------------------------------------------------------------------
350
351 template<class V> SIMDPP_INL
i_bit_not(const V & a)352 V i_bit_not(const V& a)
353 {
354 SIMDPP_VEC_ARRAY_IMPL1(V, i_bit_not, a)
355 }
356
357 } // namespace insn
358 } // namespace detail
359 } // namespace SIMDPP_ARCH_NAMESPACE
360 } // namespace simdpp
361
362 #endif
363
364