1 /*  Copyright (C) 2016  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_AND_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_AND_H
10 
11 #ifndef LIBSIMDPP_SIMD_H
12     #error "This file must be included through simd.h"
13 #endif
14 
15 #include <simdpp/types.h>
16 #include <simdpp/core/bit_and.h>
17 #include <simdpp/core/extract.h>
18 #include <simdpp/core/move_l.h>
19 #include <simdpp/core/make_uint.h>
20 
21 namespace simdpp {
22 namespace SIMDPP_ARCH_NAMESPACE {
23 namespace detail {
24 namespace insn {
25 
26 static SIMDPP_INL
i_reduce_and(const uint8x16 & a)27 uint8_t i_reduce_and(const uint8x16& a)
28 {
29 #if SIMDPP_USE_NULL
30     uint8_t r = a.el(0);
31     for (unsigned i = 0; i < a.length; i++) {
32         r &= a.el(i);
33     }
34     return r;
35 #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
36     uint8x16 r = bit_and(a, move16_l<8>(a));
37     r = bit_and(r, move16_l<4>(r));
38     r = bit_and(r, move16_l<2>(r));
39     r = bit_and(r, move16_l<1>(r));
40     return extract<0>(r);
41 #endif
42 }
43 
44 #if SIMDPP_USE_AVX2
45 static SIMDPP_INL
i_reduce_and(const uint8x32 & a)46 uint8_t i_reduce_and(const uint8x32& a)
47 {
48     uint8x16 r = detail::extract128<0>(a);
49     r = bit_and(r, detail::extract128<1>(a));
50     return i_reduce_and(r);
51 }
52 #endif
53 
54 #if SIMDPP_USE_AVX512BW
i_reduce_and(const uint8<64> & a)55 SIMDPP_INL uint8_t i_reduce_and(const uint8<64>& a)
56 {
57     uint8<32> r = detail::extract256<0>(a);
58     r = bit_and(r, detail::extract256<1>(a));
59     return i_reduce_and(r);
60 }
61 #endif
62 
63 template<unsigned N>
i_reduce_and(const uint8<N> & a)64 SIMDPP_INL uint8_t i_reduce_and(const uint8<N>& a)
65 {
66 #if SIMDPP_USE_NULL
67     uint8_t r = 0xff;
68     for (unsigned j = 0; j < a.vec_length; ++j) {
69         for (unsigned i = 0; i < a.base_length; i++) {
70             r &= a.vec(j).el(i);
71         }
72     }
73     return r;
74 #else
75     uint8v r = a.vec(0);
76     for (unsigned j = 1; j < a.vec_length; ++j) {
77         r = bit_and(r, a.vec(j));
78     }
79     return i_reduce_and(r);
80 #endif
81 }
82 
83 // -----------------------------------------------------------------------------
84 
85 static SIMDPP_INL
i_reduce_and(const uint16x8 & a)86 uint16_t i_reduce_and(const uint16x8& a)
87 {
88 #if SIMDPP_USE_NULL
89     uint16_t r = a.el(0);
90     for (unsigned i = 0; i < a.length; i++) {
91         r &= a.el(i);
92     }
93     return r;
94 #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
95     uint16x8 r = bit_and(a, move8_l<4>(a));
96     r = bit_and(r, move8_l<2>(r));
97     r = bit_and(r, move8_l<1>(r));
98     return extract<0>(r);
99 #endif
100 }
101 
102 #if SIMDPP_USE_AVX2
103 static SIMDPP_INL
i_reduce_and(const uint16x16 & a)104 uint16_t i_reduce_and(const uint16x16& a)
105 {
106     uint16x8 r = detail::extract128<0>(a);
107     r = bit_and(r, detail::extract128<1>(a));
108     return i_reduce_and(r);
109 }
110 #endif
111 
112 #if SIMDPP_USE_AVX512BW
i_reduce_and(const uint16<32> & a)113 SIMDPP_INL uint16_t i_reduce_and(const uint16<32>& a)
114 {
115     uint16<16> r = detail::extract256<0>(a);
116     r = bit_and(r, detail::extract256<1>(a));
117     return i_reduce_and(r);
118 }
119 #endif
120 
121 template<unsigned N>
i_reduce_and(const uint16<N> & a)122 SIMDPP_INL uint16_t i_reduce_and(const uint16<N>& a)
123 {
124 #if SIMDPP_USE_NULL
125     uint16_t r = 0xffff;
126     for (unsigned j = 0; j < a.vec_length; ++j) {
127         for (unsigned i = 0; i < a.base_length; i++) {
128             r &= a.vec(j).el(i);
129         }
130     }
131     return r;
132 #else
133     uint16v r = a.vec(0);
134     for (unsigned j = 1; j < a.vec_length; ++j) {
135         r = bit_and(r, a.vec(j));
136     }
137     return i_reduce_and(r);
138 #endif
139 }
140 
141 // -----------------------------------------------------------------------------
142 
143 static SIMDPP_INL
i_reduce_and(const uint32x4 & a)144 uint32_t i_reduce_and(const uint32x4& a)
145 {
146 #if SIMDPP_USE_NULL
147     uint32_t r = a.el(0);
148     for (unsigned i = 0; i < a.length; i++) {
149         r &= a.el(i);
150     }
151     return r;
152 #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
153     uint32x4 r = bit_and(a, move4_l<2>(a));
154     r = bit_and(r, move4_l<1>(r));
155     return extract<0>(r);
156 #endif
157 }
158 
159 #if SIMDPP_USE_AVX2
160 static SIMDPP_INL
i_reduce_and(const uint32x8 & a)161 uint32_t i_reduce_and(const uint32x8& a)
162 {
163     uint32x4 r = detail::extract128<0>(a);
164     r = bit_and(r, detail::extract128<1>(a));
165     r = bit_and(r, move4_l<2>(r));
166     r = bit_and(r, move4_l<1>(r));
167     return extract<0>(r);
168 }
169 #endif
170 
171 #if SIMDPP_USE_AVX512F
172 static SIMDPP_INL
i_reduce_and(const uint32<16> & a)173 uint32_t i_reduce_and(const uint32<16>& a)
174 {
175     return i_reduce_and(bit_and(extract256<0>(a), extract256<1>(a)));
176 }
177 #endif
178 
179 template<unsigned N>
i_reduce_and(const uint32<N> & a)180 SIMDPP_INL uint32_t i_reduce_and(const uint32<N>& a)
181 {
182 #if SIMDPP_USE_NULL
183     uint32_t r = 0xffffffff;
184     for (unsigned j = 0; j < a.vec_length; ++j) {
185         for (unsigned i = 0; i < a.base_length; i++) {
186             r &= a.vec(j).el(i);
187         }
188     }
189     return r;
190 #else
191     uint32v r = a.vec(0);
192     for (unsigned j = 1; j < a.vec_length; ++j) {
193         r = bit_and(r, a.vec(j));
194     }
195     return i_reduce_and(r);
196 #endif
197 }
198 
199 // -----------------------------------------------------------------------------
200 
201 static SIMDPP_INL
i_reduce_and(const uint64x2 & a)202 uint64_t i_reduce_and(const uint64x2& a)
203 {
204 #if SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
205     uint64x2 r = bit_and(a, move2_l<1>(a));
206     return extract<0>(r);
207 #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
208     uint64_t r = a.el(0);
209     for (unsigned i = 0; i < a.length; i++) {
210         r &= a.el(i);
211     }
212     return r;
213 #endif
214 }
215 
216 #if SIMDPP_USE_AVX2
217 static SIMDPP_INL
i_reduce_and(const uint64x4 & a)218 uint64_t i_reduce_and(const uint64x4& a)
219 {
220     uint64x2 r = detail::extract128<0>(a);
221     r = bit_and(r, detail::extract128<1>(a));
222     r = bit_and(r, move2_l<1>(r));
223     return extract<0>(r);
224 }
225 #endif
226 
227 #if SIMDPP_USE_AVX512F
228 static SIMDPP_INL
i_reduce_and(const uint64<8> & a)229 uint64_t i_reduce_and(const uint64<8>& a)
230 {
231     return i_reduce_and(bit_and(extract256<0>(a), extract256<1>(a)));
232 }
233 #endif
234 
235 template<unsigned N>
i_reduce_and(const uint64<N> & a)236 SIMDPP_INL uint64_t i_reduce_and(const uint64<N>& a)
237 {
238 #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
239     uint64_t r = 0xffffffffffffffff;
240     for (unsigned j = 0; j < a.vec_length; ++j) {
241         for (unsigned i = 0; i < a.base_length; i++) {
242             r &= a.vec(j).el(i);
243         }
244     }
245     return r;
246 #else
247     uint64v r = a.vec(0);
248     for (unsigned j = 1; j < a.vec_length; ++j) {
249         r = bit_and(r, a.vec(j));
250     }
251     return i_reduce_and(r);
252 #endif
253 }
254 
255 // -----------------------------------------------------------------------------
256 
257 } // namespace insn
258 } // namespace detail
259 } // namespace SIMDPP_ARCH_NAMESPACE
260 } // namespace simdpp
261 
262 #endif
263 
264