1 /* Copyright (C) 2016 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_AND_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_AND_H
10
11 #ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13 #endif
14
15 #include <simdpp/types.h>
16 #include <simdpp/core/bit_and.h>
17 #include <simdpp/core/extract.h>
18 #include <simdpp/core/move_l.h>
19 #include <simdpp/core/make_uint.h>
20
21 namespace simdpp {
22 namespace SIMDPP_ARCH_NAMESPACE {
23 namespace detail {
24 namespace insn {
25
26 static SIMDPP_INL
i_reduce_and(const uint8x16 & a)27 uint8_t i_reduce_and(const uint8x16& a)
28 {
29 #if SIMDPP_USE_NULL
30 uint8_t r = a.el(0);
31 for (unsigned i = 0; i < a.length; i++) {
32 r &= a.el(i);
33 }
34 return r;
35 #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
36 uint8x16 r = bit_and(a, move16_l<8>(a));
37 r = bit_and(r, move16_l<4>(r));
38 r = bit_and(r, move16_l<2>(r));
39 r = bit_and(r, move16_l<1>(r));
40 return extract<0>(r);
41 #endif
42 }
43
44 #if SIMDPP_USE_AVX2
45 static SIMDPP_INL
i_reduce_and(const uint8x32 & a)46 uint8_t i_reduce_and(const uint8x32& a)
47 {
48 uint8x16 r = detail::extract128<0>(a);
49 r = bit_and(r, detail::extract128<1>(a));
50 return i_reduce_and(r);
51 }
52 #endif
53
54 #if SIMDPP_USE_AVX512BW
i_reduce_and(const uint8<64> & a)55 SIMDPP_INL uint8_t i_reduce_and(const uint8<64>& a)
56 {
57 uint8<32> r = detail::extract256<0>(a);
58 r = bit_and(r, detail::extract256<1>(a));
59 return i_reduce_and(r);
60 }
61 #endif
62
63 template<unsigned N>
i_reduce_and(const uint8<N> & a)64 SIMDPP_INL uint8_t i_reduce_and(const uint8<N>& a)
65 {
66 #if SIMDPP_USE_NULL
67 uint8_t r = 0xff;
68 for (unsigned j = 0; j < a.vec_length; ++j) {
69 for (unsigned i = 0; i < a.base_length; i++) {
70 r &= a.vec(j).el(i);
71 }
72 }
73 return r;
74 #else
75 uint8v r = a.vec(0);
76 for (unsigned j = 1; j < a.vec_length; ++j) {
77 r = bit_and(r, a.vec(j));
78 }
79 return i_reduce_and(r);
80 #endif
81 }
82
83 // -----------------------------------------------------------------------------
84
85 static SIMDPP_INL
i_reduce_and(const uint16x8 & a)86 uint16_t i_reduce_and(const uint16x8& a)
87 {
88 #if SIMDPP_USE_NULL
89 uint16_t r = a.el(0);
90 for (unsigned i = 0; i < a.length; i++) {
91 r &= a.el(i);
92 }
93 return r;
94 #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
95 uint16x8 r = bit_and(a, move8_l<4>(a));
96 r = bit_and(r, move8_l<2>(r));
97 r = bit_and(r, move8_l<1>(r));
98 return extract<0>(r);
99 #endif
100 }
101
102 #if SIMDPP_USE_AVX2
103 static SIMDPP_INL
i_reduce_and(const uint16x16 & a)104 uint16_t i_reduce_and(const uint16x16& a)
105 {
106 uint16x8 r = detail::extract128<0>(a);
107 r = bit_and(r, detail::extract128<1>(a));
108 return i_reduce_and(r);
109 }
110 #endif
111
112 #if SIMDPP_USE_AVX512BW
i_reduce_and(const uint16<32> & a)113 SIMDPP_INL uint16_t i_reduce_and(const uint16<32>& a)
114 {
115 uint16<16> r = detail::extract256<0>(a);
116 r = bit_and(r, detail::extract256<1>(a));
117 return i_reduce_and(r);
118 }
119 #endif
120
121 template<unsigned N>
i_reduce_and(const uint16<N> & a)122 SIMDPP_INL uint16_t i_reduce_and(const uint16<N>& a)
123 {
124 #if SIMDPP_USE_NULL
125 uint16_t r = 0xffff;
126 for (unsigned j = 0; j < a.vec_length; ++j) {
127 for (unsigned i = 0; i < a.base_length; i++) {
128 r &= a.vec(j).el(i);
129 }
130 }
131 return r;
132 #else
133 uint16v r = a.vec(0);
134 for (unsigned j = 1; j < a.vec_length; ++j) {
135 r = bit_and(r, a.vec(j));
136 }
137 return i_reduce_and(r);
138 #endif
139 }
140
141 // -----------------------------------------------------------------------------
142
143 static SIMDPP_INL
i_reduce_and(const uint32x4 & a)144 uint32_t i_reduce_and(const uint32x4& a)
145 {
146 #if SIMDPP_USE_NULL
147 uint32_t r = a.el(0);
148 for (unsigned i = 0; i < a.length; i++) {
149 r &= a.el(i);
150 }
151 return r;
152 #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
153 uint32x4 r = bit_and(a, move4_l<2>(a));
154 r = bit_and(r, move4_l<1>(r));
155 return extract<0>(r);
156 #endif
157 }
158
159 #if SIMDPP_USE_AVX2
160 static SIMDPP_INL
i_reduce_and(const uint32x8 & a)161 uint32_t i_reduce_and(const uint32x8& a)
162 {
163 uint32x4 r = detail::extract128<0>(a);
164 r = bit_and(r, detail::extract128<1>(a));
165 r = bit_and(r, move4_l<2>(r));
166 r = bit_and(r, move4_l<1>(r));
167 return extract<0>(r);
168 }
169 #endif
170
171 #if SIMDPP_USE_AVX512F
172 static SIMDPP_INL
i_reduce_and(const uint32<16> & a)173 uint32_t i_reduce_and(const uint32<16>& a)
174 {
175 return i_reduce_and(bit_and(extract256<0>(a), extract256<1>(a)));
176 }
177 #endif
178
179 template<unsigned N>
i_reduce_and(const uint32<N> & a)180 SIMDPP_INL uint32_t i_reduce_and(const uint32<N>& a)
181 {
182 #if SIMDPP_USE_NULL
183 uint32_t r = 0xffffffff;
184 for (unsigned j = 0; j < a.vec_length; ++j) {
185 for (unsigned i = 0; i < a.base_length; i++) {
186 r &= a.vec(j).el(i);
187 }
188 }
189 return r;
190 #else
191 uint32v r = a.vec(0);
192 for (unsigned j = 1; j < a.vec_length; ++j) {
193 r = bit_and(r, a.vec(j));
194 }
195 return i_reduce_and(r);
196 #endif
197 }
198
199 // -----------------------------------------------------------------------------
200
201 static SIMDPP_INL
i_reduce_and(const uint64x2 & a)202 uint64_t i_reduce_and(const uint64x2& a)
203 {
204 #if SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
205 uint64x2 r = bit_and(a, move2_l<1>(a));
206 return extract<0>(r);
207 #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
208 uint64_t r = a.el(0);
209 for (unsigned i = 0; i < a.length; i++) {
210 r &= a.el(i);
211 }
212 return r;
213 #endif
214 }
215
216 #if SIMDPP_USE_AVX2
217 static SIMDPP_INL
i_reduce_and(const uint64x4 & a)218 uint64_t i_reduce_and(const uint64x4& a)
219 {
220 uint64x2 r = detail::extract128<0>(a);
221 r = bit_and(r, detail::extract128<1>(a));
222 r = bit_and(r, move2_l<1>(r));
223 return extract<0>(r);
224 }
225 #endif
226
227 #if SIMDPP_USE_AVX512F
228 static SIMDPP_INL
i_reduce_and(const uint64<8> & a)229 uint64_t i_reduce_and(const uint64<8>& a)
230 {
231 return i_reduce_and(bit_and(extract256<0>(a), extract256<1>(a)));
232 }
233 #endif
234
235 template<unsigned N>
i_reduce_and(const uint64<N> & a)236 SIMDPP_INL uint64_t i_reduce_and(const uint64<N>& a)
237 {
238 #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
239 uint64_t r = 0xffffffffffffffff;
240 for (unsigned j = 0; j < a.vec_length; ++j) {
241 for (unsigned i = 0; i < a.base_length; i++) {
242 r &= a.vec(j).el(i);
243 }
244 }
245 return r;
246 #else
247 uint64v r = a.vec(0);
248 for (unsigned j = 1; j < a.vec_length; ++j) {
249 r = bit_and(r, a.vec(j));
250 }
251 return i_reduce_and(r);
252 #endif
253 }
254
255 // -----------------------------------------------------------------------------
256
257 } // namespace insn
258 } // namespace detail
259 } // namespace SIMDPP_ARCH_NAMESPACE
260 } // namespace simdpp
261
262 #endif
263
264