1 /*  Copyright (C) 2013-2014  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_PERMUTE2_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_PERMUTE2_H
10 
11 #ifndef LIBSIMDPP_SIMD_H
12     #error "This file must be included through simd.h"
13 #endif
14 
15 #include <simdpp/types.h>
16 #include <simdpp/core/make_shuffle_bytes_mask.h>
17 #include <simdpp/core/permute4.h>
18 #include <simdpp/detail/null/shuffle.h>
19 #include <simdpp/detail/shuffle/neon_int16x8.h>
20 #include <simdpp/detail/shuffle/neon_int32x4.h>
21 #include <simdpp/detail/shuffle/neon_int64x2.h>
22 #include <simdpp/detail/shuffle/shuffle_mask.h>
23 #include <simdpp/detail/vector_array_macros.h>
24 
25 namespace simdpp {
26 namespace SIMDPP_ARCH_NAMESPACE {
27 namespace detail {
28 namespace insn {
29 
30 
31 template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL
i_permute2(const uint16<N> & a)32 uint16<N> i_permute2(const uint16<N>& a)
33 {
34     static_assert(s0 < 2 && s1 < 2, "Selector out of range");
35     return i_permute4<s0,s1,s0+2,s1+2>(a);
36 }
37 
38 
39 template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL
i_permute2(const uint32<N> & a)40 uint32<N> i_permute2(const uint32<N>& a)
41 {
42     static_assert(s0 < 2 && s1 < 2, "Selector out of range");
43     return i_permute4<s0,s1,s0+2,s1+2>(a);
44 }
45 
46 
47 template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL
i_permute2(const float32<N> & a)48 float32<N> i_permute2(const float32<N>& a)
49 {
50     static_assert(s0 < 2 && s1 < 2, "Selector out of range");
51     return i_permute4<s0,s1,s0+2,s1+2>(a);
52 }
53 
54 
55 template<unsigned s0, unsigned s1> SIMDPP_INL
i_permute2(const uint64x2 & a)56 uint64x2 i_permute2(const uint64x2& a)
57 {
58     static_assert(s0 < 2 && s1 < 2, "Selector out of range");
59 #if SIMDPP_USE_SSE2 || SIMDPP_USE_MSA
60     return (uint64x2) i_permute4<s0*2, s0*2+1, s1*2, s1*2+1>(int32x4(a));
61 #elif SIMDPP_USE_NEON
62     return detail::neon_shuffle_int64x2::permute2<s0,s1>(a);
63 #elif SIMDPP_USE_VSX_207
64     return vec_xxpermdi(a.native(), a.native(),
65                         SIMDPP_VSX_SHUFFLE_MASK_2x2(s0, s1));
66 #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
67     return detail::null::permute<s0,s1>(a);
68 #endif
69 }
70 
71 #if SIMDPP_USE_AVX2
72 template<unsigned s0, unsigned s1> SIMDPP_INL
i_permute2(const uint64x4 & a)73 uint64x4 i_permute2(const uint64x4& a)
74 {
75     static_assert(s0 < 2 && s1 < 2, "Selector out of range");
76     return i_permute4<s0,s1,s0+2,s1+2>(a);
77 }
78 #endif
79 
80 #if SIMDPP_USE_AVX512F
81 template<unsigned s0, unsigned s1> SIMDPP_INL
i_permute2(const uint64<8> & a)82 uint64<8> i_permute2(const uint64<8>& a)
83 {
84     static_assert(s0 < 2 && s1 < 2, "Selector out of range");
85     return i_permute4<s0,s1,s0+2,s1+2>(a);
86 }
87 #endif
88 
89 template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL
i_permute2(const uint64<N> & a)90 uint64<N> i_permute2(const uint64<N>& a)
91 {
92     static_assert(s0 < 2 && s1 < 2, "Selector out of range");
93     SIMDPP_VEC_ARRAY_IMPL1(uint64<N>, (i_permute2<s0,s1>), a);
94 }
95 
96 // -----------------------------------------------------------------------------
97 
98 template<unsigned s0, unsigned s1> SIMDPP_INL
i_permute2(const float64x2 & a)99 float64x2 i_permute2(const float64x2& a)
100 {
101     static_assert(s0 < 2 && s1 < 2, "Selector out of range");
102 #if SIMDPP_USE_SSE2
103     return _mm_shuffle_pd(a.native(), a.native(), SIMDPP_SHUFFLE_MASK_2x2(s0, s1));
104 #elif SIMDPP_USE_VSX_206
105     return vec_xxpermdi(a.native(), a.native(),
106                         SIMDPP_VSX_SHUFFLE_MASK_2x2(s0, s1));
107 #elif SIMDPP_USE_NEON64 || SIMDPP_USE_MSA
108     return float64x2(i_permute2<s0,s1>(int64x2(a)));
109 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
110     return detail::null::permute<s0,s1>(a);
111 #endif
112 }
113 
114 #if SIMDPP_USE_AVX
115 template<unsigned s0, unsigned s1> SIMDPP_INL
i_permute2(const float64x4 & a)116 float64x4 i_permute2(const float64x4& a)
117 {
118     static_assert(s0 < 2 && s1 < 2, "Selector out of range");
119 #if SIMDPP_USE_AVX2
120     return _mm256_permute4x64_pd(a.native(), s0 | s1<<2 | (s0+2)<<4 | (s1+2)<<6);
121 #else // SIMDPP_USE_AVX
122     return _mm256_permute_pd(a.native(), s0 | s1<<1 | s0<<2 | s1<<3);
123 #endif
124 }
125 #endif
126 
127 #if SIMDPP_USE_AVX512F
128 template<unsigned s0, unsigned s1> SIMDPP_INL
i_permute2(const float64<8> & a)129 float64<8> i_permute2(const float64<8>& a)
130 {
131     static_assert(s0 < 2 && s1 < 2, "Selector out of range");
132     return _mm512_permute_pd(a.native(), s0 | s1<<1 | s0<<2 | s1<<3 | s0<<4 | s1<<5 | s0<<6 | s1<<7);
133 }
134 #endif
135 
136 template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL
i_permute2(const float64<N> & a)137 float64<N> i_permute2(const float64<N>& a)
138 {
139     static_assert(s0 < 2 && s1 < 2, "Selector out of range");
140     SIMDPP_VEC_ARRAY_IMPL1(float64<N>, (i_permute2<s0,s1>), a);
141 }
142 
143 } // namespace insn
144 } // namespace detail
145 } // namespace SIMDPP_ARCH_NAMESPACE
146 } // namespace simdpp
147 
148 #endif
149 
150