1 /* Copyright (C) 2011-2012 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef LIBSIMDPP_DETAIL_NULL_BITWISE_H
9 #define LIBSIMDPP_DETAIL_NULL_BITWISE_H
10 #if SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
11
12 #ifndef LIBSIMDPP_SIMD_H
13 #error "This file must be included through simd.h"
14 #endif
15
16 #include <simdpp/types.h>
17 #include <simdpp/core/cast.h>
18 #include <simdpp/detail/null/mask.h>
19
20 namespace simdpp {
21 namespace SIMDPP_ARCH_NAMESPACE {
22 namespace detail {
23 namespace null {
24
25 /* *_vm variants accept a vector as the first argument and a mask as the second
26 *_mm variants accept masks as both arguments
27 */
28
29 template<class V> SIMDPP_INL
bit_and(const V & a,const V & b)30 V bit_and(const V& a, const V& b)
31 {
32 V r;
33 using E = typename V::element_type;
34 using U = typename V::uint_element_type;
35 for (unsigned i = 0; i < V::length; i++) {
36 U a1 = bit_cast<U, E>(a.el(i));
37 U b1 = bit_cast<U, E>(b.el(i));
38 r.el(i) = bit_cast<E, U>(a1 & b1);
39 }
40 return r;
41 }
42
43 template<class V, class M> SIMDPP_INL
bit_and_vm(const V & a,const M & m)44 V bit_and_vm(const V& a, const M& m)
45 {
46 V r;
47 for (unsigned i = 0; i < V::length; i++) {
48 r.el(i) = m.el(i) ? a.el(i) : 0;
49 }
50 return r;
51 }
52
53 template<class M> SIMDPP_INL
bit_and_mm(const M & a,const M & b)54 M bit_and_mm(const M& a, const M& b)
55 {
56 M r;
57 for (unsigned i = 0; i < M::length; i++) {
58 r.el(i) = a.el(i) & b.el(i);
59 }
60 return r;
61 }
62
63
64 template<class V> SIMDPP_INL
bit_andnot(const V & a,const V & b)65 V bit_andnot(const V& a, const V& b)
66 {
67 V r;
68 using E = typename V::element_type;
69 using U = typename V::uint_element_type;
70 for (unsigned i = 0; i < V::length; i++) {
71 U a1 = bit_cast<U, E>(a.el(i));
72 U b1 = bit_cast<U, E>(b.el(i));
73 r.el(i) = bit_cast<E, U>(a1 & ~b1);
74 }
75 return r;
76 }
77
78 template<class V, class M> SIMDPP_INL
bit_andnot_vm(const V & a,const M & m)79 V bit_andnot_vm(const V& a, const M& m)
80 {
81 V r;
82 for (unsigned i = 0; i < V::length; i++) {
83 r.el(i) = m.el(i) ? 0 : a.el(i);
84 }
85 return r;
86 }
87
88 template<class M> SIMDPP_INL
bit_andnot_mm(const M & a,const M & b)89 M bit_andnot_mm(const M& a, const M& b)
90 {
91 M r;
92 for (unsigned i = 0; i < M::length; i++) {
93 r.el(i) = a.el(i) & (b.el(i) ^ 1);
94 }
95 return r;
96 }
97
98
99 template<class V> SIMDPP_INL
bit_or(const V & a,const V & b)100 V bit_or(const V& a, const V& b)
101 {
102 V r;
103 using E = typename V::element_type;
104 using U = typename V::uint_element_type;
105 for (unsigned i = 0; i < V::length; i++) {
106 U a1 = bit_cast<U, E>(a.el(i));
107 U b1 = bit_cast<U, E>(b.el(i));
108 r.el(i) = bit_cast<E, U>(a1 | b1);
109 }
110 return r;
111 }
112
113 template<class M> SIMDPP_INL
bit_or_mm(const M & a,const M & b)114 M bit_or_mm(const M& a, const M& b)
115 {
116 M r;
117 for (unsigned i = 0; i < M::length; i++) {
118 r.el(i) = a.el(i) | b.el(i);
119 }
120 return r;
121 }
122
123 template<class V> SIMDPP_INL
bit_xor(const V & a,const V & b)124 V bit_xor(const V& a, const V& b)
125 {
126 V r;
127 using E = typename V::element_type;
128 using U = typename V::uint_element_type;
129 for (unsigned i = 0; i < V::length; i++) {
130 U a1 = bit_cast<U, E>(a.el(i));
131 U b1 = bit_cast<U, E>(b.el(i));
132 r.el(i) = bit_cast<E, U>(a1 ^ b1);
133 }
134 return r;
135 }
136
137 template<class M> SIMDPP_INL
bit_xor_mm(const M & a,const M & b)138 M bit_xor_mm(const M& a, const M& b)
139 {
140 M r;
141 for (unsigned i = 0; i < M::length; i++) {
142 r.el(i) = a.el(i) ^ b.el(i);
143 }
144 return r;
145 }
146
147 template<class M> SIMDPP_INL
bit_not_mm(const M & a)148 M bit_not_mm(const M& a)
149 {
150 M r;
151 for (unsigned i = 0; i < M::length; i++) {
152 r.el(i) = a.el(i) ^ 1;
153 }
154 return r;
155 }
156
157 template<class V> SIMDPP_INL
test_bits_any(const V & a)158 bool test_bits_any(const V& a)
159 {
160 using U = typename V::uint_element_type;
161 U r = 0;
162 for (unsigned i = 0; i < a.length; ++i) {
163 r |= bit_cast<U>(a.el(i));
164 }
165 return r != 0;
166 }
167
168 static SIMDPP_INL
el_popcnt8(uint8_t v)169 uint8_t el_popcnt8(uint8_t v)
170 {
171 uint8_t m55 = 0x55;
172 uint8_t m33 = 0x33;
173 uint8_t m0f = 0x0f;
174 v = v - ((v >> 1) & m55);
175 v = (v & m33) + ((v >> 2) & m33);
176 v = (v + (v >> 4)) & m0f;
177 return v;
178 }
179
180 static SIMDPP_INL
el_popcnt16(uint16_t v)181 uint16_t el_popcnt16(uint16_t v)
182 {
183 uint16_t m55 = 0x5555;
184 uint16_t m33 = 0x3333;
185 uint16_t m0f = 0x0f0f;
186 v = v - ((v >> 1) & m55);
187 v = (v & m33) + ((v >> 2) & m33);
188 v = (v + (v >> 4)) & m0f;
189 v = (v + (v >> 8));
190 v = v & 0x00ff;
191 return v;
192 }
193
194 static SIMDPP_INL
el_popcnt32(uint32_t v)195 uint32_t el_popcnt32(uint32_t v)
196 {
197 uint32_t m55 = 0x55555555;
198 uint32_t m33 = 0x33333333;
199 uint32_t m0f = 0x0f0f0f0f;
200 v = v - ((v >> 1) & m55);
201 v = (v & m33) + ((v >> 2) & m33);
202 v = (v + (v >> 4)) & m0f;
203 // rather than doing 2 adds + 2 shifts we can do 1 mul + 1 shift
204 v = (v * 0x01010101) >> 24;
205 return v;
206 }
207
208 static SIMDPP_INL
el_popcnt64(uint64_t v)209 uint64_t el_popcnt64(uint64_t v)
210 {
211 uint64_t m55 = 0x5555555555555555;
212 uint64_t m33 = 0x3333333333333333;
213 uint64_t m0f = 0x0f0f0f0f0f0f0f0f;
214 v = v - ((v >> 1) & m55);
215 v = (v & m33) + ((v >> 2) & m33);
216 v = (v + (v >> 4)) & m0f;
217 // rather than doing 3 adds + 3 shifts we can do 1 mul + 1 shift
218 v = (v * 0x0101010101010101) >> 56;
219 return v;
220 }
221
222 } // namespace null
223 } // namespace detail
224 } // namespace SIMDPP_ARCH_NAMESPACE
225 } // namespace simdpp
226
227 #endif
228 #endif
229