1 /*  Copyright (C) 2011-2014  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BROADCAST_W_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_BROADCAST_W_H
10 
11 #ifndef LIBSIMDPP_SIMD_H
12     #error "This file must be included through simd.h"
13 #endif
14 
15 #include <simdpp/types.h>
16 #include <simdpp/core/splat_n.h>
17 #include <simdpp/detail/extract128.h>
18 #include <simdpp/detail/insn/shuffle128.h>
19 #include <simdpp/detail/shuffle/shuffle_mask.h>
20 
21 namespace simdpp {
22 namespace SIMDPP_ARCH_NAMESPACE {
23 namespace detail {
24 namespace insn {
25 
26 // -----------------------------------------------------------------------------
27 
28 template<unsigned s> SIMDPP_INL
i_splat(const uint8x16 & a)29 uint8x16 i_splat(const uint8x16& a)
30 {
31     return i_splat16<s>(a);
32 }
33 
34 #if SIMDPP_USE_AVX2
35 template<unsigned s> SIMDPP_INL
i_splat(const uint8x32 & a)36 uint8x32 i_splat(const uint8x32& a)
37 {
38     static_assert(s < 32, "Access out of bounds");
39     uint8x16 lo;
40     lo = s < 16 ? detail::extract128<0>(a) : detail::extract128<1>(a);
41     lo = move16_l<s % 16>(lo);
42     return _mm256_broadcastb_epi8(lo.native());
43 }
44 #endif
45 
46 #if SIMDPP_USE_AVX512BW
47 template<unsigned s> SIMDPP_INL
i_splat(const uint8<64> & a)48 uint8<64> i_splat(const uint8<64>& a)
49 {
50     static_assert(s < 64, "Access out of bounds");
51     uint8<16> lo;
52     lo = detail::extract128<s / 16>(a);
53     lo = move16_l<s % 16>(lo);
54     return _mm512_broadcastb_epi8(lo.native());
55 }
56 #endif
57 
58 // -----------------------------------------------------------------------------
59 
60 template<unsigned s> SIMDPP_INL
i_splat(const uint16x8 & a)61 uint16x8 i_splat(const uint16x8& a)
62 {
63     return i_splat8<s>(a);
64 }
65 
66 #if SIMDPP_USE_AVX2
67 template<unsigned s> SIMDPP_INL
i_splat(const uint16x16 & a)68 uint16x16 i_splat(const uint16x16& a)
69 {
70     static_assert(s < 16, "Access out of bounds");
71     uint16x8 lo;
72     lo = s < 8 ? detail::extract128<0>(a) : detail::extract128<1>(a);
73     lo = move8_l<s % 8>(lo);
74     return _mm256_broadcastw_epi16(lo.native());
75 }
76 #endif
77 
78 #if SIMDPP_USE_AVX512BW
79 template<unsigned s> SIMDPP_INL
i_splat(const uint16<32> & a)80 uint16<32> i_splat(const uint16<32>& a)
81 {
82     static_assert(s < 32, "Access out of bounds");
83     uint16<8> lo;
84     lo = detail::extract128<s / 8>(a);
85     lo = move8_l<s % 8>(lo);
86     return _mm512_broadcastw_epi16(lo.native());
87 }
88 #endif
89 
90 // -----------------------------------------------------------------------------
91 
92 template<unsigned s> SIMDPP_INL
i_splat(const uint32x4 & a)93 uint32x4 i_splat(const uint32x4& a)
94 {
95     return i_splat4<s>(a);
96 }
97 
98 #if SIMDPP_USE_AVX2
99 template<unsigned s> SIMDPP_INL
i_splat(const uint32x8 & ca)100 uint32x8 i_splat(const uint32x8& ca)
101 {
102     static_assert(s < 8, "Access out of bounds");
103     uint32<8> a = ca;
104     a = permute4<s%4,s%4,s%4,s%4>(a);
105     a = detail::shuffle1_128<s/4, s/4>(a, a);
106     return a;
107 }
108 #endif
109 
110 #if SIMDPP_USE_AVX512F
111 template<unsigned s> SIMDPP_INL
i_splat(const uint32<16> & ca)112 uint32<16> i_splat(const uint32<16>& ca)
113 {
114     static_assert(s < 16, "Access out of bounds");
115     uint32<16> a = ca;
116     a = permute4<s%4,s%4,s%4,s%4>(a);
117     a = detail::shuffle2_128<s/4, s/4, s/4, s/4>(a, a);
118     return a;
119 }
120 #endif
121 
122 // -----------------------------------------------------------------------------
123 
124 template<unsigned s> SIMDPP_INL
i_splat(const uint64x2 & a)125 uint64x2 i_splat(const uint64x2& a)
126 {
127     return i_splat2<s>(a);
128 }
129 
130 #if SIMDPP_USE_AVX2
131 template<unsigned s> SIMDPP_INL
i_splat(const uint64x4 & a)132 uint64x4 i_splat(const uint64x4& a)
133 {
134     static_assert(s < 4, "Access out of bounds");
135     return permute4<s,s,s,s>(a);
136 }
137 #endif
138 
139 #if SIMDPP_USE_AVX512F
140 template<unsigned s> SIMDPP_INL
i_splat(const uint64<8> & ca)141 uint64<8> i_splat(const uint64<8>& ca)
142 {
143     static_assert(s < 8, "Access out of bounds");
144     uint64<8> a = ca;
145     a = permute2<s%2,s%2>(a);
146     a = _mm512_shuffle_i64x2(a.native(), a.native(),
147                              SIMDPP_SHUFFLE_MASK_4x4(s/2, s/2, s/2, s/2)); // TODO extract
148     return a;
149 }
150 #endif
151 
152 // -----------------------------------------------------------------------------
153 
154 template<unsigned s> SIMDPP_INL
i_splat(const float32x4 & a)155 float32x4 i_splat(const float32x4& a)
156 {
157     return i_splat4<s>(a);
158 }
159 
160 #if SIMDPP_USE_AVX
161 template<unsigned s> SIMDPP_INL
i_splat(const float32x8 & ca)162 float32x8 i_splat(const float32x8& ca)
163 {
164     static_assert(s < 8, "Access out of bounds");
165     float32<8> a = ca;
166     a = shuffle1_128<s/4,s/4>(a, a);
167     return permute4<s%4,s%4,s%4,s%4>(a);
168 }
169 #endif
170 
171 #if SIMDPP_USE_AVX512F
172 template<unsigned s> SIMDPP_INL
i_splat(const float32<16> & ca)173 float32<16> i_splat(const float32<16>& ca)
174 {
175     static_assert(s < 16, "Access out of bounds");
176     float32<16> a = ca;
177     a = permute4<s%4,s%4,s%4,s%4>(a);
178     a = _mm512_shuffle_f32x4(a.native(), a.native(),
179                              SIMDPP_SHUFFLE_MASK_4x4(s/4, s/4, s/4, s/4));
180     return a;
181 }
182 #endif
183 
184 // -----------------------------------------------------------------------------
185 
186 template<unsigned s> SIMDPP_INL
i_splat(const float64x2 & a)187 float64x2 i_splat(const float64x2& a)
188 {
189     return i_splat2<s>(a);
190 }
191 
192 #if SIMDPP_USE_AVX
193 template<unsigned s> SIMDPP_INL
i_splat(const float64x4 & a)194 float64x4 i_splat(const float64x4& a)
195 {
196     static_assert(s < 4, "Access out of bounds");
197 #if SIMDPP_USE_AVX2
198     return permute4<s,s,s,s>(a);
199 #else // SIMDPP_USE_AVX
200     float64<4> b;
201     b = detail::shuffle1_128<s/2,s/2>(a, a);
202     b = permute2<s%2,s%2>(b);
203     return b;
204 #endif
205 }
206 #endif
207 
208 #if SIMDPP_USE_AVX512F
209 template<unsigned s> SIMDPP_INL
i_splat(const float64<8> & ca)210 float64<8> i_splat(const float64<8>& ca)
211 {
212     static_assert(s < 8, "Access out of bounds");
213     float64<8> a = ca;
214     a = permute2<s%2,s%2>(a);
215     a = _mm512_shuffle_f64x2(a.native(), a.native(),
216                              SIMDPP_SHUFFLE_MASK_4x4(s/2, s/2, s/2, s/2)); // TODO extract
217     return a;
218 }
219 #endif
220 
221 // -----------------------------------------------------------------------------
222 
223 template<unsigned s, class V> SIMDPP_INL
i_splat(const V & a)224 V i_splat(const V& a)
225 {
226     static_assert(s < V::length, "Access out of bounds");
227 
228     using U = typename V::base_vector_type;
229     U one = a.vec(s / U::length);
230 
231     one = i_splat<s % U::length>(one);
232 
233     V r;
234     for (unsigned i = 0; i < V::vec_length; ++i) {
235         r.vec(i) = one;
236     }
237     return r;
238 }
239 
240 } // namespace insn
241 } // namespace detail
242 } // namespace SIMDPP_ARCH_NAMESPACE
243 } // namespace simdpp
244 
245 #endif
246 
247