1 /*  Copyright (C) 2011-2012  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_DETAIL_NULL_BITWISE_H
9 #define LIBSIMDPP_DETAIL_NULL_BITWISE_H
10 #if SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
11 
12 #ifndef LIBSIMDPP_SIMD_H
13     #error "This file must be included through simd.h"
14 #endif
15 
16 #include <simdpp/types.h>
17 #include <simdpp/core/cast.h>
18 #include <simdpp/detail/null/mask.h>
19 
20 namespace simdpp {
21 namespace SIMDPP_ARCH_NAMESPACE {
22 namespace detail {
23 namespace null {
24 
25 /* *_vm variants accept a vector as the first argument and a mask as the second
26    *_mm variants accept masks as both arguments
27 */
28 
29 template<class V> SIMDPP_INL
bit_and(const V & a,const V & b)30 V bit_and(const V& a, const V& b)
31 {
32     V r;
33     using E = typename V::element_type;
34     using U = typename V::uint_element_type;
35     for (unsigned i = 0; i < V::length; i++) {
36         U a1 = bit_cast<U, E>(a.el(i));
37         U b1 = bit_cast<U, E>(b.el(i));
38         r.el(i) = bit_cast<E, U>(a1 & b1);
39     }
40     return r;
41 }
42 
43 template<class V, class M> SIMDPP_INL
bit_and_vm(const V & a,const M & m)44 V bit_and_vm(const V& a, const M& m)
45 {
46     V r;
47     for (unsigned i = 0; i < V::length; i++) {
48         r.el(i) = m.el(i) ? a.el(i) : 0;
49     }
50     return r;
51 }
52 
53 template<class M> SIMDPP_INL
bit_and_mm(const M & a,const M & b)54 M bit_and_mm(const M& a, const M& b)
55 {
56     M r;
57     for (unsigned i = 0; i < M::length; i++) {
58         r.el(i) = a.el(i) & b.el(i);
59     }
60     return r;
61 }
62 
63 
64 template<class V> SIMDPP_INL
bit_andnot(const V & a,const V & b)65 V bit_andnot(const V& a, const V& b)
66 {
67     V r;
68     using E = typename V::element_type;
69     using U = typename V::uint_element_type;
70     for (unsigned i = 0; i < V::length; i++) {
71         U a1 = bit_cast<U, E>(a.el(i));
72         U b1 = bit_cast<U, E>(b.el(i));
73         r.el(i) = bit_cast<E, U>(a1 & ~b1);
74     }
75     return r;
76 }
77 
78 template<class V, class M> SIMDPP_INL
bit_andnot_vm(const V & a,const M & m)79 V bit_andnot_vm(const V& a, const M& m)
80 {
81     V r;
82     for (unsigned i = 0; i < V::length; i++) {
83         r.el(i) = m.el(i) ? 0 : a.el(i);
84     }
85     return r;
86 }
87 
88 template<class M> SIMDPP_INL
bit_andnot_mm(const M & a,const M & b)89 M bit_andnot_mm(const M& a, const M& b)
90 {
91     M r;
92     for (unsigned i = 0; i < M::length; i++) {
93         r.el(i) = a.el(i) & (b.el(i) ^ 1);
94     }
95     return r;
96 }
97 
98 
99 template<class V> SIMDPP_INL
bit_or(const V & a,const V & b)100 V bit_or(const V& a, const V& b)
101 {
102     V r;
103     using E = typename V::element_type;
104     using U = typename V::uint_element_type;
105     for (unsigned i = 0; i < V::length; i++) {
106         U a1 = bit_cast<U, E>(a.el(i));
107         U b1 = bit_cast<U, E>(b.el(i));
108         r.el(i) = bit_cast<E, U>(a1 | b1);
109     }
110     return r;
111 }
112 
113 template<class M> SIMDPP_INL
bit_or_mm(const M & a,const M & b)114 M bit_or_mm(const M& a, const M& b)
115 {
116     M r;
117     for (unsigned i = 0; i < M::length; i++) {
118         r.el(i) = a.el(i) | b.el(i);
119     }
120     return r;
121 }
122 
123 template<class V> SIMDPP_INL
bit_xor(const V & a,const V & b)124 V bit_xor(const V& a, const V& b)
125 {
126     V r;
127     using E = typename V::element_type;
128     using U = typename V::uint_element_type;
129     for (unsigned i = 0; i < V::length; i++) {
130         U a1 = bit_cast<U, E>(a.el(i));
131         U b1 = bit_cast<U, E>(b.el(i));
132         r.el(i) = bit_cast<E, U>(a1 ^ b1);
133     }
134     return r;
135 }
136 
137 template<class M> SIMDPP_INL
bit_xor_mm(const M & a,const M & b)138 M bit_xor_mm(const M& a, const M& b)
139 {
140     M r;
141     for (unsigned i = 0; i < M::length; i++) {
142         r.el(i) = a.el(i) ^ b.el(i);
143     }
144     return r;
145 }
146 
147 template<class M> SIMDPP_INL
bit_not_mm(const M & a)148 M bit_not_mm(const M& a)
149 {
150     M r;
151     for (unsigned i = 0; i < M::length; i++) {
152         r.el(i) = a.el(i) ^ 1;
153     }
154     return r;
155 }
156 
157 template<class V> SIMDPP_INL
test_bits_any(const V & a)158 bool test_bits_any(const V& a)
159 {
160     using U = typename V::uint_element_type;
161     U r = 0;
162     for (unsigned i = 0; i < a.length; ++i) {
163         r |= bit_cast<U>(a.el(i));
164     }
165     return r != 0;
166 }
167 
168 static SIMDPP_INL
el_popcnt8(uint8_t v)169 uint8_t el_popcnt8(uint8_t v)
170 {
171     uint8_t m55 = 0x55;
172     uint8_t m33 = 0x33;
173     uint8_t m0f = 0x0f;
174     v = v - ((v >> 1) & m55);
175     v = (v & m33) + ((v >> 2) & m33);
176     v = (v + (v >> 4)) & m0f;
177     return v;
178 }
179 
180 static SIMDPP_INL
el_popcnt16(uint16_t v)181 uint16_t el_popcnt16(uint16_t v)
182 {
183     uint16_t m55 = 0x5555;
184     uint16_t m33 = 0x3333;
185     uint16_t m0f = 0x0f0f;
186     v = v - ((v >> 1) & m55);
187     v = (v & m33) + ((v >> 2) & m33);
188     v = (v + (v >> 4)) & m0f;
189     v = (v + (v >> 8));
190     v = v & 0x00ff;
191     return v;
192 }
193 
194 static SIMDPP_INL
el_popcnt32(uint32_t v)195 uint32_t el_popcnt32(uint32_t v)
196 {
197     uint32_t m55 = 0x55555555;
198     uint32_t m33 = 0x33333333;
199     uint32_t m0f = 0x0f0f0f0f;
200     v = v - ((v >> 1) & m55);
201     v = (v & m33) + ((v >> 2) & m33);
202     v = (v + (v >> 4)) & m0f;
203     // rather than doing 2 adds + 2 shifts we can do 1 mul + 1 shift
204     v = (v * 0x01010101) >> 24;
205     return v;
206 }
207 
208 static SIMDPP_INL
el_popcnt64(uint64_t v)209 uint64_t el_popcnt64(uint64_t v)
210 {
211     uint64_t m55 = 0x5555555555555555;
212     uint64_t m33 = 0x3333333333333333;
213     uint64_t m0f = 0x0f0f0f0f0f0f0f0f;
214     v = v - ((v >> 1) & m55);
215     v = (v & m33) + ((v >> 2) & m33);
216     v = (v + (v >> 4)) & m0f;
217     // rather than doing 3 adds + 3 shifts we can do 1 mul + 1 shift
218     v = (v * 0x0101010101010101) >> 56;
219     return v;
220 }
221 
222 } // namespace null
223 } // namespace detail
224 } // namespace SIMDPP_ARCH_NAMESPACE
225 } // namespace simdpp
226 
227 #endif
228 #endif
229