1 /* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_SHRINK_TO_INT32_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_SHRINK_TO_INT32_H
10
11 #ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13 #endif
14
15 #include <simdpp/types.h>
16 #include <simdpp/core/permute4.h>
17 #include <simdpp/core/unzip_lo.h>
18 #include <simdpp/core/zip_lo.h>
19
20 namespace simdpp {
21 namespace SIMDPP_ARCH_NAMESPACE {
22 namespace detail {
23 namespace insn {
24
25
i_to_uint32(const uint64<4> & a)26 SIMDPP_INL uint32<4> i_to_uint32(const uint64<4>& a)
27 {
28 #if SIMDPP_USE_NULL
29 uint32<4> r;
30 for (unsigned i = 0; i < 4; i++) {
31 r.el(i) = uint32_t(a.vec(i/2).el(i%2));
32 }
33 return r;
34 #elif SIMDPP_USE_AVX512VL
35 return _mm256_cvtepi64_epi32(a.native());
36 #elif SIMDPP_USE_AVX2
37 uint64<4> a64;
38 a64 = permute4<0,2,0,2>((uint32<8>) a);
39 a64 = permute4<0,2,0,2>(a64);
40 return _mm256_castsi256_si128(a64.native());
41 #elif SIMDPP_USE_NEON64
42 uint32x2_t low = vmovn_u64(a.vec(0).native());
43 return vmovn_high_u64(low, a.vec(1).native());
44 #elif SIMDPP_USE_NEON
45 uint32x2_t low = vmovn_u64(a.vec(0).native());
46 uint32x2_t high = vmovn_u64(a.vec(1).native());
47 return vcombine_u32(low, high);
48 #elif SIMDPP_USE_VSX_207
49 return vec_pack(a.vec(0).native(), a.vec(1).native());
50 #elif SIMDPP_USE_ALTIVEC && SIMDPP_BIG_ENDIAN
51 uint32<4> r1, r2;
52 r1 = a.vec(0);
53 r2 = a.vec(1);
54 return unzip4_hi(r1, r2);
55 #elif SIMDPP_USE_SSE2 || SIMDPP_USE_MSA || (SIMDPP_USE_ALTIVEC && SIMDPP_LITTLE_ENDIAN)
56 uint32<4> r1, r2;
57 r1 = a.vec(0);
58 r2 = a.vec(1);
59 return unzip4_lo(r1, r2);
60 #endif
61 }
62
63 #if SIMDPP_USE_AVX2
i_to_uint32(const uint64<8> & a)64 SIMDPP_INL uint32<8> i_to_uint32(const uint64<8>& a)
65 {
66 #if SIMDPP_USE_AVX512F
67 return _mm512_cvtepi64_epi32(a.native());
68 #else
69 uint64<4> b0, b1;
70 b0 = permute4<0,2,0,2>((uint32<8>) a.vec(0));
71 b1 = permute4<0,2,0,2>((uint32<8>) a.vec(1));
72 b0 = zip2_lo(b0, b1);
73 b0 = permute4<0,2,1,3>(b0);
74 return (uint32<8>) b0;
75 #endif
76 }
77 #endif
78
79 #if SIMDPP_USE_AVX512F
i_to_uint32(const uint64<16> & a)80 SIMDPP_INL uint32<16> i_to_uint32(const uint64<16>& a)
81 {
82 uint32<8> r1 = _mm512_cvtepi64_epi32(a.vec(0).native());
83 uint32<8> r2 = _mm512_cvtepi64_epi32(a.vec(1).native());
84 return combine(r1, r2);
85 }
86 #endif
87
88 template<unsigned N> SIMDPP_INL
i_to_uint32(const uint64<N> & a)89 uint32<N> i_to_uint32(const uint64<N>& a)
90 {
91 SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(uint32<N>, i_to_uint32, a)
92 }
93
94
95 } // namespace insn
96 } // namespace detail
97 } // namespace SIMDPP_ARCH_NAMESPACE
98 } // namespace simdpp
99
100 #endif
101
102
103