1 /* Copyright (C) 2017 Povilas Kanapickas <povilas@radix.lt> 2 3 Distributed under the Boost Software License, Version 1.0. 4 (See accompanying file LICENSE_1_0.txt or copy at 5 http://www.boost.org/LICENSE_1_0.txt) 6 */ 7 8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_POPCNT_H 9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_POPCNT_H 10 11 #ifndef LIBSIMDPP_SIMD_H 12 #error "This file must be included through simd.h" 13 #endif 14 15 #include <simdpp/types.h> 16 #include <simdpp/detail/null/bitwise.h> 17 #include <simdpp/core/i_popcnt.h> 18 #include <simdpp/core/i_reduce_add.h> 19 20 namespace simdpp { 21 namespace SIMDPP_ARCH_NAMESPACE { 22 namespace detail { 23 namespace insn { 24 25 static SIMDPP_INL i_reduce_popcnt(const uint32<4> & a)26uint32_t i_reduce_popcnt(const uint32<4>& a) 27 { 28 #if SIMDPP_USE_NULL 29 uint32_t r = 0; 30 for (unsigned i = 0; i < a.length; i++) { 31 r += detail::null::el_popcnt32(a.el(i)); 32 } 33 return r; 34 #elif SIMDPP_USE_X86_POPCNT_INSN 35 uint32_t r = 0; 36 #if SIMDPP_64_BITS 37 uint64<2> a64; a64 = a; 38 r += _mm_popcnt_u64(extract<0>(a64)); 39 r += _mm_popcnt_u64(extract<1>(a64)); 40 #else 41 r += _mm_popcnt_u32(extract<0>(a)); 42 r += _mm_popcnt_u32(extract<1>(a)); 43 r += _mm_popcnt_u32(extract<2>(a)); 44 r += _mm_popcnt_u32(extract<3>(a)); 45 #endif 46 return r; 47 #elif SIMDPP_USE_NEON 48 uint8<16> r = vcntq_u8(vreinterpretq_u8_u32(a.native())); 49 return reduce_add(r); 50 #elif SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA 51 uint64<2> a64; a64 = a; 52 a64 = popcnt(a64); 53 return reduce_add(a64); 54 #elif SIMDPP_USE_SSE2 55 uint64<2> r = popcnt((uint64<2>)a); 56 return (uint32_t) reduce_add(r); 57 #else 58 uint32<4> r = popcnt(a); 59 return reduce_add(r); 60 #endif 61 } 62 63 #if SIMDPP_USE_AVX2 64 static SIMDPP_INL i_reduce_popcnt(const uint32<8> & a)65uint32_t i_reduce_popcnt(const uint32<8>& a) 66 { 67 #if SIMDPP_USE_X86_POPCNT_INSN && SIMDPP_64_BITS 68 uint32<4> a0, a1; 69 split(a, a0, a1); 70 return i_reduce_popcnt(a0) + i_reduce_popcnt(a1); 71 #else 72 uint64<4> r = popcnt((uint64<4>)a); 73 return (uint32_t) reduce_add(r); 74 #endif 75 } 76 #endif 77 78 #if SIMDPP_USE_AVX512F 79 static SIMDPP_INL i_reduce_popcnt(const uint32<16> & a)80uint32_t i_reduce_popcnt(const uint32<16>& a) 81 { 82 #if SIMDPP_USE_X86_POPCNT_INSN && SIMDPP_64_BITS 83 uint32<8> a0, a1; 84 split(a, a0, a1); 85 return i_reduce_popcnt(a0) + i_reduce_popcnt(a1); 86 #else 87 // TODO: support AVX512VPOPCNTDQ 88 uint64<8> r = popcnt((uint64<8>)a); 89 return reduce_add(r); 90 #endif 91 } 92 #endif 93 94 template<unsigned N> SIMDPP_INL i_reduce_popcnt(const uint32<N> & a)95uint32_t i_reduce_popcnt(const uint32<N>& a) 96 { 97 uint32_t r = 0; 98 for (unsigned j = 0; j < a.vec_length; ++j) { 99 r += i_reduce_popcnt(a.vec(j)); 100 } 101 return r; 102 } 103 104 // ----------------------------------------------------------------------------- 105 106 } // namespace insn 107 } // namespace detail 108 } // namespace SIMDPP_ARCH_NAMESPACE 109 } // namespace simdpp 110 111 #endif 112 113