1 /*  Copyright (C) 2017  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_FLOAT_TO_INT64_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_FLOAT_TO_INT64_H
10 
11 #ifndef LIBSIMDPP_SIMD_H
12     #error "This file must be included through simd.h"
13 #endif
14 
15 #include <simdpp/types.h>
16 #include <simdpp/detail/insn/conv_any_to_float64.h>
17 #include <simdpp/detail/vector_array_conv_macros.h>
18 
19 namespace simdpp {
20 namespace SIMDPP_ARCH_NAMESPACE {
21 namespace detail {
22 namespace insn {
23 
24 
25 static SIMDPP_INL
i_to_uint64(const float64<2> & a)26 uint64<2> i_to_uint64(const float64<2>& a)
27 {
28 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
29     return _mm_cvttpd_epu64(a.native());
30 #elif SIMDPP_USE_AVX512DQ
31     __m512d a512 = _mm512_castpd128_pd512(a.native());
32     return _mm512_castsi512_si128(_mm512_cvttpd_epu64(a512));
33 #elif SIMDPP_USE_NEON64
34     return vcvtq_u64_f64(a.native());
35 #elif SIMDPP_USE_VSX_206
36     uint32<4> r;
37     r = (__vector uint32_t) vec_ctu(a.native(), 0);
38     return (uint64<2>) r;
39 #elif SIMDPP_USE_VSX_207
40     return vec_ctu(a.native(), 0);
41 #elif SIMDPP_USE_MSA
42     return __msa_ftrunc_u_d(a.native());
43 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
44     detail::mem_block<uint64<2>> r;
45     r[0] = uint64_t(a.el(0));
46     r[1] = uint64_t(a.el(1));
47     return r;
48 #else
49     return SIMDPP_NOT_IMPLEMENTED1(a);
50 #endif
51 }
52 
53 #if SIMDPP_USE_AVX
54 static SIMDPP_INL
i_to_uint64(const float64<4> & a)55 uint64<4> i_to_uint64(const float64<4>& a)
56 {
57 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
58     return _mm256_cvttpd_epu64(a.native());
59 #elif SIMDPP_USE_AVX512DQ
60     __m512d a512 = _mm512_castpd256_pd512(a.native());
61     return _mm512_castsi512_si256(_mm512_cvttpd_epu64(a512));
62 #else
63     return SIMDPP_NOT_IMPLEMENTED1(a);
64 #endif
65 }
66 #endif
67 
68 #if SIMDPP_USE_AVX512F
69 static SIMDPP_INL
i_to_uint64(const float64<8> & a)70 uint64<8> i_to_uint64(const float64<8>& a)
71 {
72 #if SIMDPP_USE_AVX512DQ
73     return _mm512_cvttpd_epu64(a.native());
74 #else
75     return SIMDPP_NOT_IMPLEMENTED1(a);
76 #endif
77 }
78 #endif
79 
80 template<unsigned N> SIMDPP_INL
i_to_uint64(const float64<N> & a)81 uint64<N> i_to_uint64(const float64<N>& a)
82 {
83     SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(uint64<N>, i_to_uint64, a)
84 }
85 
86 // -----------------------------------------------------------------------------
87 
88 static SIMDPP_INL
i_to_int64(const float64<2> & a)89 int64<2> i_to_int64(const float64<2>& a)
90 {
91 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
92     return _mm_cvttpd_epi64(a.native());
93 #elif SIMDPP_USE_AVX512DQ
94     __m512d a512 = _mm512_castpd128_pd512(a.native());
95     return _mm512_castsi512_si128(_mm512_cvttpd_epi64(a512));
96 #elif SIMDPP_USE_NEON64
97     return vcvtq_s64_f64(a.native());
98 #elif SIMDPP_USE_VSX_207
99     return vec_cts(a.native(), 0);
100 #elif SIMDPP_USE_VSX_206
101     int32<4> r;
102     r = (__vector int32_t) vec_cts(a.native(), 0);
103     return (int64<2>) r;
104 #elif SIMDPP_USE_MSA
105     return __msa_ftrunc_s_d(a.native());
106 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
107     detail::mem_block<int64<2>> r;
108     r[0] = int64_t(a.el(0));
109     r[1] = int64_t(a.el(1));
110     return r;
111 #else
112     return SIMDPP_NOT_IMPLEMENTED1(a);
113 #endif
114 }
115 
116 #if SIMDPP_USE_AVX
117 static SIMDPP_INL
i_to_int64(const float64<4> & a)118 int64<4> i_to_int64(const float64<4>& a)
119 {
120 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
121     return _mm256_cvttpd_epi64(a.native());
122 #elif SIMDPP_USE_AVX512DQ
123     __m512d a512 = _mm512_castpd256_pd512(a.native());
124     return _mm512_castsi512_si256(_mm512_cvttpd_epi64(a512));
125 #else
126     return SIMDPP_NOT_IMPLEMENTED1(a);
127 #endif
128 }
129 #endif
130 
131 #if SIMDPP_USE_AVX512F
132 static SIMDPP_INL
i_to_int64(const float64<8> & a)133 int64<8> i_to_int64(const float64<8>& a)
134 {
135 #if SIMDPP_USE_AVX512DQ
136     return _mm512_cvttpd_epi64(a.native());
137 #else
138     return SIMDPP_NOT_IMPLEMENTED1(a);
139 #endif
140 }
141 #endif
142 
143 template<unsigned N> SIMDPP_INL
i_to_int64(const float64<N> & a)144 int64<N> i_to_int64(const float64<N>& a)
145 {
146     SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(int64<N>, i_to_int64, a)
147 }
148 
149 // ----------------------------------------------------------------------------
150 
151 static SIMDPP_INL
i_to_uint64(const float32<4> & a)152 uint64<4> i_to_uint64(const float32<4>& a)
153 {
154 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
155     return _mm256_cvttps_epu64(a.native());
156 #elif SIMDPP_USE_AVX512DQ
157     __m256 a256 = _mm256_castps128_ps256(a.native());
158     return _mm512_castsi512_si256(_mm512_cvttps_epu64(a256));
159 #elif SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
160     return i_to_uint64(i_to_float64(a));
161 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
162     detail::mem_block<uint64<4>> r;
163     detail::mem_block<float32<4>> mi(a);
164     r[0] = int64_t(mi[0]);
165     r[1] = int64_t(mi[1]);
166     r[2] = int64_t(mi[2]);
167     r[3] = int64_t(mi[3]);
168     return r;
169 #else
170     return SIMDPP_NOT_IMPLEMENTED1(a);
171 #endif
172 }
173 
174 #if SIMDPP_USE_AVX
175 static SIMDPP_INL
i_to_uint64(const float32<8> & a)176 uint64<8> i_to_uint64(const float32<8>& a)
177 {
178 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
179     return _mm512_cvttps_epu64(a.native());
180 #else
181     return SIMDPP_NOT_IMPLEMENTED1(a);
182 #endif
183 }
184 #endif
185 
186 #if SIMDPP_USE_AVX512F
187 static SIMDPP_INL
i_to_uint64(const float32<16> & a)188 uint64<16> i_to_uint64(const float32<16>& a)
189 {
190 #if SIMDPP_USE_AVX512DQ
191     float32<8> a0, a1;
192     uint64<16> r;
193     split(a, a0, a1);
194 
195     r.vec(0) = _mm512_cvttps_epu64(a0.native());
196     r.vec(1) = _mm512_cvttps_epu64(a1.native());
197 
198     return r;
199 #else
200     return SIMDPP_NOT_IMPLEMENTED1(a);
201 #endif
202 }
203 #endif
204 
205 template<unsigned N> SIMDPP_INL
i_to_uint64(const float32<N> & a)206 uint64<N> i_to_uint64(const float32<N>& a)
207 {
208     SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint64<N>, i_to_uint64, a)
209 }
210 
211 // -----------------------------------------------------------------------------
212 
213 static SIMDPP_INL
i_to_int64(const float32<4> & a)214 int64<4> i_to_int64(const float32<4>& a)
215 {
216 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
217     return _mm256_cvttps_epi64(a.native());
218 #elif SIMDPP_USE_AVX512DQ
219     __m256 a256 = _mm256_castps128_ps256(a.native());
220     return _mm512_castsi512_si256(_mm512_cvttps_epi64(a256));
221 #elif SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
222     return i_to_int64(i_to_float64(a));
223 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
224     detail::mem_block<int64<4>> r;
225     detail::mem_block<float32<4>> mi(a);
226     r[0] = int64_t(mi[0]);
227     r[1] = int64_t(mi[1]);
228     r[2] = int64_t(mi[2]);
229     r[3] = int64_t(mi[3]);
230     return r;
231 #else
232     return SIMDPP_NOT_IMPLEMENTED1(a);
233 #endif
234 }
235 
236 #if SIMDPP_USE_AVX
237 static SIMDPP_INL
i_to_int64(const float32<8> & a)238 int64<8> i_to_int64(const float32<8>& a)
239 {
240 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
241     return _mm512_cvttps_epi64(a.native());
242 #else
243     return SIMDPP_NOT_IMPLEMENTED1(a);
244 #endif
245 }
246 #endif
247 
248 #if SIMDPP_USE_AVX512F
249 static SIMDPP_INL
i_to_int64(const float32<16> & a)250 int64<16> i_to_int64(const float32<16>& a)
251 {
252 #if SIMDPP_USE_AVX512DQ
253     float32<8> a0, a1;
254     int64<16> r;
255     split(a, a0, a1);
256 
257     r.vec(0) = _mm512_cvttps_epi64(a0.native());
258     r.vec(1) = _mm512_cvttps_epi64(a1.native());
259 
260     return r;
261 #else
262     return SIMDPP_NOT_IMPLEMENTED1(a);
263 #endif
264 }
265 #endif
266 
267 template<unsigned N> SIMDPP_INL
i_to_int64(const float32<N> & a)268 int64<N> i_to_int64(const float32<N>& a)
269 {
270     SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int64<N>, i_to_int64, a)
271 }
272 
273 } // namespace insn
274 } // namespace detail
275 } // namespace SIMDPP_ARCH_NAMESPACE
276 } // namespace simdpp
277 
278 #endif
279 
280 
281