1 /*  Copyright (C) 2013-2017  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_SHRINK_TO_INT32_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_SHRINK_TO_INT32_H
10 
11 #ifndef LIBSIMDPP_SIMD_H
12     #error "This file must be included through simd.h"
13 #endif
14 
15 #include <simdpp/types.h>
16 #include <simdpp/core/permute4.h>
17 #include <simdpp/core/unzip_lo.h>
18 #include <simdpp/core/zip_lo.h>
19 
20 namespace simdpp {
21 namespace SIMDPP_ARCH_NAMESPACE {
22 namespace detail {
23 namespace insn {
24 
25 
i_to_uint32(const uint64<4> & a)26 SIMDPP_INL uint32<4> i_to_uint32(const uint64<4>& a)
27 {
28 #if SIMDPP_USE_NULL
29     uint32<4> r;
30     for (unsigned i = 0; i < 4; i++) {
31         r.el(i) = uint32_t(a.vec(i/2).el(i%2));
32     }
33     return r;
34 #elif SIMDPP_USE_AVX512VL
35     return _mm256_cvtepi64_epi32(a.native());
36 #elif SIMDPP_USE_AVX2
37     uint64<4> a64;
38     a64 = permute4<0,2,0,2>((uint32<8>) a);
39     a64 = permute4<0,2,0,2>(a64);
40     return _mm256_castsi256_si128(a64.native());
41 #elif SIMDPP_USE_NEON64
42     uint32x2_t low = vmovn_u64(a.vec(0).native());
43     return vmovn_high_u64(low, a.vec(1).native());
44 #elif SIMDPP_USE_NEON
45     uint32x2_t low = vmovn_u64(a.vec(0).native());
46     uint32x2_t high = vmovn_u64(a.vec(1).native());
47     return vcombine_u32(low, high);
48 #elif SIMDPP_USE_VSX_207
49     return vec_pack(a.vec(0).native(), a.vec(1).native());
50 #elif SIMDPP_USE_ALTIVEC && SIMDPP_BIG_ENDIAN
51     uint32<4> r1, r2;
52     r1 = a.vec(0);
53     r2 = a.vec(1);
54     return unzip4_hi(r1, r2);
55 #elif SIMDPP_USE_SSE2 || SIMDPP_USE_MSA || (SIMDPP_USE_ALTIVEC && SIMDPP_LITTLE_ENDIAN)
56     uint32<4> r1, r2;
57     r1 = a.vec(0);
58     r2 = a.vec(1);
59     return unzip4_lo(r1, r2);
60 #endif
61 }
62 
63 #if SIMDPP_USE_AVX2
i_to_uint32(const uint64<8> & a)64 SIMDPP_INL uint32<8> i_to_uint32(const uint64<8>& a)
65 {
66 #if SIMDPP_USE_AVX512F
67     return _mm512_cvtepi64_epi32(a.native());
68 #else
69     uint64<4> b0, b1;
70     b0 = permute4<0,2,0,2>((uint32<8>) a.vec(0));
71     b1 = permute4<0,2,0,2>((uint32<8>) a.vec(1));
72     b0 = zip2_lo(b0, b1);
73     b0 = permute4<0,2,1,3>(b0);
74     return (uint32<8>) b0;
75 #endif
76 }
77 #endif
78 
79 #if SIMDPP_USE_AVX512F
i_to_uint32(const uint64<16> & a)80 SIMDPP_INL uint32<16> i_to_uint32(const uint64<16>& a)
81 {
82     uint32<8> r1 = _mm512_cvtepi64_epi32(a.vec(0).native());
83     uint32<8> r2 = _mm512_cvtepi64_epi32(a.vec(1).native());
84     return combine(r1, r2);
85 }
86 #endif
87 
88 template<unsigned N> SIMDPP_INL
i_to_uint32(const uint64<N> & a)89 uint32<N> i_to_uint32(const uint64<N>& a)
90 {
91     SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(uint32<N>, i_to_uint32, a)
92 }
93 
94 
95 } // namespace insn
96 } // namespace detail
97 } // namespace SIMDPP_ARCH_NAMESPACE
98 } // namespace simdpp
99 
100 #endif
101 
102 
103