1 /* Copyright (C) 2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_FLOAT_TO_INT64_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_FLOAT_TO_INT64_H
10
11 #ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13 #endif
14
15 #include <simdpp/types.h>
16 #include <simdpp/detail/insn/conv_any_to_float64.h>
17 #include <simdpp/detail/vector_array_conv_macros.h>
18
19 namespace simdpp {
20 namespace SIMDPP_ARCH_NAMESPACE {
21 namespace detail {
22 namespace insn {
23
24
25 static SIMDPP_INL
i_to_uint64(const float64<2> & a)26 uint64<2> i_to_uint64(const float64<2>& a)
27 {
28 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
29 return _mm_cvttpd_epu64(a.native());
30 #elif SIMDPP_USE_AVX512DQ
31 __m512d a512 = _mm512_castpd128_pd512(a.native());
32 return _mm512_castsi512_si128(_mm512_cvttpd_epu64(a512));
33 #elif SIMDPP_USE_NEON64
34 return vcvtq_u64_f64(a.native());
35 #elif SIMDPP_USE_VSX_206
36 uint32<4> r;
37 r = (__vector uint32_t) vec_ctu(a.native(), 0);
38 return (uint64<2>) r;
39 #elif SIMDPP_USE_VSX_207
40 return vec_ctu(a.native(), 0);
41 #elif SIMDPP_USE_MSA
42 return __msa_ftrunc_u_d(a.native());
43 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
44 detail::mem_block<uint64<2>> r;
45 r[0] = uint64_t(a.el(0));
46 r[1] = uint64_t(a.el(1));
47 return r;
48 #else
49 return SIMDPP_NOT_IMPLEMENTED1(a);
50 #endif
51 }
52
53 #if SIMDPP_USE_AVX
54 static SIMDPP_INL
i_to_uint64(const float64<4> & a)55 uint64<4> i_to_uint64(const float64<4>& a)
56 {
57 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
58 return _mm256_cvttpd_epu64(a.native());
59 #elif SIMDPP_USE_AVX512DQ
60 __m512d a512 = _mm512_castpd256_pd512(a.native());
61 return _mm512_castsi512_si256(_mm512_cvttpd_epu64(a512));
62 #else
63 return SIMDPP_NOT_IMPLEMENTED1(a);
64 #endif
65 }
66 #endif
67
68 #if SIMDPP_USE_AVX512F
69 static SIMDPP_INL
i_to_uint64(const float64<8> & a)70 uint64<8> i_to_uint64(const float64<8>& a)
71 {
72 #if SIMDPP_USE_AVX512DQ
73 return _mm512_cvttpd_epu64(a.native());
74 #else
75 return SIMDPP_NOT_IMPLEMENTED1(a);
76 #endif
77 }
78 #endif
79
80 template<unsigned N> SIMDPP_INL
i_to_uint64(const float64<N> & a)81 uint64<N> i_to_uint64(const float64<N>& a)
82 {
83 SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(uint64<N>, i_to_uint64, a)
84 }
85
86 // -----------------------------------------------------------------------------
87
88 static SIMDPP_INL
i_to_int64(const float64<2> & a)89 int64<2> i_to_int64(const float64<2>& a)
90 {
91 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
92 return _mm_cvttpd_epi64(a.native());
93 #elif SIMDPP_USE_AVX512DQ
94 __m512d a512 = _mm512_castpd128_pd512(a.native());
95 return _mm512_castsi512_si128(_mm512_cvttpd_epi64(a512));
96 #elif SIMDPP_USE_NEON64
97 return vcvtq_s64_f64(a.native());
98 #elif SIMDPP_USE_VSX_207
99 return vec_cts(a.native(), 0);
100 #elif SIMDPP_USE_VSX_206
101 int32<4> r;
102 r = (__vector int32_t) vec_cts(a.native(), 0);
103 return (int64<2>) r;
104 #elif SIMDPP_USE_MSA
105 return __msa_ftrunc_s_d(a.native());
106 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
107 detail::mem_block<int64<2>> r;
108 r[0] = int64_t(a.el(0));
109 r[1] = int64_t(a.el(1));
110 return r;
111 #else
112 return SIMDPP_NOT_IMPLEMENTED1(a);
113 #endif
114 }
115
116 #if SIMDPP_USE_AVX
117 static SIMDPP_INL
i_to_int64(const float64<4> & a)118 int64<4> i_to_int64(const float64<4>& a)
119 {
120 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
121 return _mm256_cvttpd_epi64(a.native());
122 #elif SIMDPP_USE_AVX512DQ
123 __m512d a512 = _mm512_castpd256_pd512(a.native());
124 return _mm512_castsi512_si256(_mm512_cvttpd_epi64(a512));
125 #else
126 return SIMDPP_NOT_IMPLEMENTED1(a);
127 #endif
128 }
129 #endif
130
131 #if SIMDPP_USE_AVX512F
132 static SIMDPP_INL
i_to_int64(const float64<8> & a)133 int64<8> i_to_int64(const float64<8>& a)
134 {
135 #if SIMDPP_USE_AVX512DQ
136 return _mm512_cvttpd_epi64(a.native());
137 #else
138 return SIMDPP_NOT_IMPLEMENTED1(a);
139 #endif
140 }
141 #endif
142
143 template<unsigned N> SIMDPP_INL
i_to_int64(const float64<N> & a)144 int64<N> i_to_int64(const float64<N>& a)
145 {
146 SIMDPP_VEC_ARRAY_IMPL_CONV_EXTRACT(int64<N>, i_to_int64, a)
147 }
148
149 // ----------------------------------------------------------------------------
150
151 static SIMDPP_INL
i_to_uint64(const float32<4> & a)152 uint64<4> i_to_uint64(const float32<4>& a)
153 {
154 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
155 return _mm256_cvttps_epu64(a.native());
156 #elif SIMDPP_USE_AVX512DQ
157 __m256 a256 = _mm256_castps128_ps256(a.native());
158 return _mm512_castsi512_si256(_mm512_cvttps_epu64(a256));
159 #elif SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
160 return i_to_uint64(i_to_float64(a));
161 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
162 detail::mem_block<uint64<4>> r;
163 detail::mem_block<float32<4>> mi(a);
164 r[0] = int64_t(mi[0]);
165 r[1] = int64_t(mi[1]);
166 r[2] = int64_t(mi[2]);
167 r[3] = int64_t(mi[3]);
168 return r;
169 #else
170 return SIMDPP_NOT_IMPLEMENTED1(a);
171 #endif
172 }
173
174 #if SIMDPP_USE_AVX
175 static SIMDPP_INL
i_to_uint64(const float32<8> & a)176 uint64<8> i_to_uint64(const float32<8>& a)
177 {
178 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
179 return _mm512_cvttps_epu64(a.native());
180 #else
181 return SIMDPP_NOT_IMPLEMENTED1(a);
182 #endif
183 }
184 #endif
185
186 #if SIMDPP_USE_AVX512F
187 static SIMDPP_INL
i_to_uint64(const float32<16> & a)188 uint64<16> i_to_uint64(const float32<16>& a)
189 {
190 #if SIMDPP_USE_AVX512DQ
191 float32<8> a0, a1;
192 uint64<16> r;
193 split(a, a0, a1);
194
195 r.vec(0) = _mm512_cvttps_epu64(a0.native());
196 r.vec(1) = _mm512_cvttps_epu64(a1.native());
197
198 return r;
199 #else
200 return SIMDPP_NOT_IMPLEMENTED1(a);
201 #endif
202 }
203 #endif
204
205 template<unsigned N> SIMDPP_INL
i_to_uint64(const float32<N> & a)206 uint64<N> i_to_uint64(const float32<N>& a)
207 {
208 SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint64<N>, i_to_uint64, a)
209 }
210
211 // -----------------------------------------------------------------------------
212
213 static SIMDPP_INL
i_to_int64(const float32<4> & a)214 int64<4> i_to_int64(const float32<4>& a)
215 {
216 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
217 return _mm256_cvttps_epi64(a.native());
218 #elif SIMDPP_USE_AVX512DQ
219 __m256 a256 = _mm256_castps128_ps256(a.native());
220 return _mm512_castsi512_si256(_mm512_cvttps_epi64(a256));
221 #elif SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
222 return i_to_int64(i_to_float64(a));
223 #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
224 detail::mem_block<int64<4>> r;
225 detail::mem_block<float32<4>> mi(a);
226 r[0] = int64_t(mi[0]);
227 r[1] = int64_t(mi[1]);
228 r[2] = int64_t(mi[2]);
229 r[3] = int64_t(mi[3]);
230 return r;
231 #else
232 return SIMDPP_NOT_IMPLEMENTED1(a);
233 #endif
234 }
235
236 #if SIMDPP_USE_AVX
237 static SIMDPP_INL
i_to_int64(const float32<8> & a)238 int64<8> i_to_int64(const float32<8>& a)
239 {
240 #if SIMDPP_USE_AVX512DQ && SIMDPP_USE_AVX512VL
241 return _mm512_cvttps_epi64(a.native());
242 #else
243 return SIMDPP_NOT_IMPLEMENTED1(a);
244 #endif
245 }
246 #endif
247
248 #if SIMDPP_USE_AVX512F
249 static SIMDPP_INL
i_to_int64(const float32<16> & a)250 int64<16> i_to_int64(const float32<16>& a)
251 {
252 #if SIMDPP_USE_AVX512DQ
253 float32<8> a0, a1;
254 int64<16> r;
255 split(a, a0, a1);
256
257 r.vec(0) = _mm512_cvttps_epi64(a0.native());
258 r.vec(1) = _mm512_cvttps_epi64(a1.native());
259
260 return r;
261 #else
262 return SIMDPP_NOT_IMPLEMENTED1(a);
263 #endif
264 }
265 #endif
266
267 template<unsigned N> SIMDPP_INL
i_to_int64(const float32<N> & a)268 int64<N> i_to_int64(const float32<N>& a)
269 {
270 SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int64<N>, i_to_int64, a)
271 }
272
273 } // namespace insn
274 } // namespace detail
275 } // namespace SIMDPP_ARCH_NAMESPACE
276 } // namespace simdpp
277
278 #endif
279
280
281