1 /** @addtogroup types
2 * @{
3 */
4 /*
5 Copyright (C) 2016 D Levin (https://www.kfrlib.com)
6 This file is part of KFR
7
8 KFR is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 (at your option) any later version.
12
13 KFR is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with KFR.
20
21 If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
22 Buying a commercial license is mandatory as soon as you develop commercial activities without
23 disclosing the source code of your own applications.
24 See https://www.kfrlib.com for details.
25 */
26 #pragma once
27
28 #include "../version.hpp"
29 #include "constants.hpp"
30 #include "impl/backend.hpp"
31
32 /**
33 * @brief Internal macro for functions
34 */
35 #define KFR_FN(FN) \
36 namespace fn \
37 { \
38 struct FN \
39 { \
40 template <typename... Args> \
41 CMT_INLINE_MEMBER decltype(::kfr::FN(std::declval<Args>()...)) operator()(Args&&... args) const \
42 { \
43 return ::kfr::FN(std::forward<Args>(args)...); \
44 } \
45 }; \
46 }
47
48 /**
49 * @brief Internal macro for functions
50 */
51 #define KFR_I_FN(FN) \
52 namespace fn \
53 { \
54 struct FN \
55 { \
56 template <typename... Args> \
57 CMT_INLINE_MEMBER decltype(::kfr::intrinsics::FN(std::declval<Args>()...)) operator()( \
58 Args&&... args) const \
59 { \
60 return ::kfr::intrinsics::FN(std::forward<Args>(args)...); \
61 } \
62 }; \
63 }
64
65 #define KFR_I_FN_FULL(FN, FULLFN) \
66 namespace fn \
67 { \
68 struct FN \
69 { \
70 template <typename... Args> \
71 CMT_INLINE_MEMBER decltype(FULLFN(std::declval<Args>()...)) operator()(Args&&... args) const \
72 { \
73 return FULLFN(std::forward<Args>(args)...); \
74 } \
75 }; \
76 }
77
78 CMT_PRAGMA_GNU(GCC diagnostic push)
79 CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wpragmas")
80 CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wfloat-equal")
81 CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wc++98-compat-local-type-template-args")
82 CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshadow")
83 CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wpacked")
84
85 CMT_PRAGMA_MSVC(warning(push))
86 CMT_PRAGMA_MSVC(warning(disable : 4814))
87
88 namespace kfr
89 {
90
91 inline namespace CMT_ARCH_NAME
92 {
93
94 template <typename T, size_t N>
95 struct alignas(next_poweroftwo(sizeof(T)) * next_poweroftwo(N)) portable_vec
96 {
shapekfr::CMT_ARCH_NAME::portable_vec97 static constexpr vec_shape<T, N> shape() CMT_NOEXCEPT { return {}; }
98
99 static_assert(N > 0 && N <= 1024, "Invalid vector size");
100
101 static_assert(is_simd_type<T> || !compound_type_traits<T>::is_scalar, "Invalid vector type");
102
103 // type and size
104 using value_type = T;
105
sizekfr::CMT_ARCH_NAME::portable_vec106 constexpr static size_t size() CMT_NOEXCEPT { return N; }
107
108 T elem[N];
109 };
110
111 template <typename T, size_t N>
112 struct vec;
113
114 template <typename T, size_t N>
115 struct vec_halves
116 {
117 vec<T, prev_poweroftwo(N - 1)> low;
118 vec<T, N - prev_poweroftwo(N - 1)> high;
119 };
120
121 template <typename T>
122 struct vec_halves<T, 1>
123 {
124 T val;
125 };
126
127 namespace internal
128 {
129
130 // scalar to scalar
131 template <typename To, typename From>
132 struct conversion
133 {
134 static_assert(is_convertible<From, To>, "");
135
castkfr::CMT_ARCH_NAME::internal::conversion136 static To cast(const From& value) { return value; }
137 };
138
139 template <typename T>
140 struct compoundcast
141 {
to_flatkfr::CMT_ARCH_NAME::internal::compoundcast142 static vec<T, 1> to_flat(const T& x) { return vec<T, 1>(x); }
143
from_flatkfr::CMT_ARCH_NAME::internal::compoundcast144 static T from_flat(const vec<T, 1>& x) { return x.front(); }
145 };
146
147 template <typename T, size_t N>
148 struct compoundcast<vec<T, N>>
149 {
to_flatkfr::CMT_ARCH_NAME::internal::compoundcast150 static const vec<T, N>& to_flat(const vec<T, N>& x) { return x; }
151
from_flatkfr::CMT_ARCH_NAME::internal::compoundcast152 static const vec<T, N>& from_flat(const vec<T, N>& x) { return x; }
153 };
154
155 template <typename T, size_t N1, size_t N2>
156 struct compoundcast<vec<vec<T, N1>, N2>>
157 {
to_flatkfr::CMT_ARCH_NAME::internal::compoundcast158 static vec<T, N1 * N2> to_flat(const vec<vec<T, N1>, N2>& x) { return x.v; }
159
from_flatkfr::CMT_ARCH_NAME::internal::compoundcast160 static vec<vec<T, N1>, N2> from_flat(const vec<T, N1 * N2>& x) { return x.v; }
161 };
162
163 template <typename T, size_t N1, size_t N2, size_t N3>
164 struct compoundcast<vec<vec<vec<T, N1>, N2>, N3>>
165 {
to_flatkfr::CMT_ARCH_NAME::internal::compoundcast166 static vec<T, N1 * N2 * N3> to_flat(const vec<vec<vec<T, N1>, N2>, N3>& x) { return x.v; }
167
from_flatkfr::CMT_ARCH_NAME::internal::compoundcast168 static vec<vec<vec<T, N1>, N2>, N3> from_flat(const vec<T, N1 * N2 * N3>& x) { return x.v; }
169 };
170
171 template <typename T, size_t N_>
172 inline constexpr size_t vec_alignment =
173 const_max(alignof(intrinsics::simd<typename compound_type_traits<T>::deep_subtype,
174 const_max(size_t(1), N_) * compound_type_traits<T>::deep_width>),
175 const_min(size_t(platform<>::native_vector_alignment),
176 next_poweroftwo(sizeof(typename compound_type_traits<T>::deep_subtype) *
177 const_max(size_t(1), N_) * compound_type_traits<T>::deep_width)));
178
179 } // namespace internal
180
181 template <typename T, size_t N_>
182 struct alignas(internal::vec_alignment<T, N_>) vec
183 {
184 static_assert(N_ > 0, "vec<T, N>: vector width cannot be zero");
185
186 constexpr static inline size_t N = const_max(size_t(1), N_);
shapekfr::CMT_ARCH_NAME::vec187 static constexpr vec_shape<T, N> shape() CMT_NOEXCEPT { return {}; }
188
189 // type and size
190 using value_type = T;
191
sizekfr::CMT_ARCH_NAME::vec192 constexpr static size_t size() CMT_NOEXCEPT { return N; }
193
194 using ST = typename compound_type_traits<T>::deep_subtype;
195 using scalar_type = ST;
196
197 constexpr static inline size_t SW = compound_type_traits<T>::deep_width;
198 constexpr static inline size_t SN = N * SW;
199
scalar_sizekfr::CMT_ARCH_NAME::vec200 constexpr static size_t scalar_size() CMT_NOEXCEPT { return SN; }
201
202 static_assert(is_simd_type<scalar_type>, "Invalid vector type");
203
204 static_assert(scalar_size() > 0 && scalar_size() <= 1024, "Invalid vector size");
205
206 using mask_t = mask<T, N>;
207
208 using simd_type = intrinsics::simd<ST, SN>;
209 using uvalue_type = utype<T>;
210 using iuvalue_type = conditional<is_i_class<T>, T, uvalue_type>;
211
212 using uscalar_type = utype<ST>;
213 using iuscalar_type = conditional<is_i_class<ST>, ST, uscalar_type>;
214
215 using usimd_type = intrinsics::simd<uscalar_type, SN>;
216 using iusimd_type = intrinsics::simd<iuscalar_type, SN>;
217
218 // constructors and assignment
219 // from SIMD
veckfr::CMT_ARCH_NAME::vec220 KFR_MEM_INTRINSIC vec(const simd_type& simd) CMT_NOEXCEPT : v(simd) {}
221 // default
veckfr::CMT_ARCH_NAME::vec222 KFR_MEM_INTRINSIC constexpr vec() CMT_NOEXCEPT {}
223 // copy
224 KFR_MEM_INTRINSIC constexpr vec(const vec& value) CMT_NOEXCEPT = default;
225 // move
226 KFR_MEM_INTRINSIC constexpr vec(vec&&) CMT_NOEXCEPT = default;
227 // assignment
228 KFR_MEM_INTRINSIC constexpr vec& operator=(const vec&) CMT_NOEXCEPT = default;
229
230 // from scalar
231 template <typename U, KFR_ENABLE_IF(is_convertible<U, value_type>&& compound_type_traits<T>::is_scalar)>
veckfr::CMT_ARCH_NAME::vec232 KFR_MEM_INTRINSIC vec(const U& s) CMT_NOEXCEPT
233 : v(intrinsics::simd_broadcast(intrinsics::simd_t<unwrap_bit<ST>, SN>{},
234 static_cast<unwrap_bit<ST>>(static_cast<ST>(s))))
235 {
236 }
237
238 template <typename U, KFR_ENABLE_IF(is_convertible<U, value_type> && !compound_type_traits<T>::is_scalar)>
veckfr::CMT_ARCH_NAME::vec239 KFR_MEM_INTRINSIC vec(const U& s) CMT_NOEXCEPT
240 : v(intrinsics::simd_shuffle(intrinsics::simd_t<unwrap_bit<ST>, SW>{},
241 internal::compoundcast<T>::to_flat(static_cast<T>(s)).v,
242 csizeseq<SN> % csize<SW>, overload_auto))
243 {
244 }
245
246 // from list
247 template <typename... Us, KFR_ENABLE_IF(sizeof...(Us) <= 1022 && compound_type_traits<T>::is_scalar)>
veckfr::CMT_ARCH_NAME::vec248 KFR_MEM_INTRINSIC vec(const value_type& s0, const value_type& s1, const Us&... rest) CMT_NOEXCEPT
249 : v(intrinsics::simd_make(cometa::ctype<T>, s0, s1, static_cast<value_type>(rest)...))
250 {
251 }
252
253 template <typename... Us, KFR_ENABLE_IF(sizeof...(Us) <= 1022 && !compound_type_traits<T>::is_scalar)>
veckfr::CMT_ARCH_NAME::vec254 KFR_MEM_INTRINSIC vec(const value_type& s0, const value_type& s1, const Us&... rest) CMT_NOEXCEPT
255 : v(intrinsics::simd_concat<ST, size_t(SW), size_t(SW), just_value<Us, size_t>(SW)...>(
256 internal::compoundcast<T>::to_flat(s0).v, internal::compoundcast<T>::to_flat(s1).v,
257 internal::compoundcast<T>::to_flat(static_cast<T>(rest)).v...))
258 {
259 }
260
261 // from vector of another type
262 template <typename U, KFR_ENABLE_IF(is_convertible<U, value_type> &&
263 (compound_type_traits<T>::is_scalar && !is_bit<U>))>
veckfr::CMT_ARCH_NAME::vec264 KFR_MEM_INTRINSIC vec(const vec<U, N>& x) CMT_NOEXCEPT
265 : v(intrinsics::simd_convert(
266 intrinsics::simd_cvt_t<unwrap_bit<ST>, unwrap_bit<deep_subtype<U>>, SN>{}, x.v))
267 {
268 }
269
270 template <typename U, KFR_ENABLE_IF(is_convertible<U, value_type> &&
271 !(compound_type_traits<T>::is_scalar && !is_bit<U>))>
veckfr::CMT_ARCH_NAME::vec272 KFR_MEM_INTRINSIC vec(const vec<U, N>& x) CMT_NOEXCEPT
273 : v(internal::conversion<vec<T, N>, vec<U, N>>::cast(x).v)
274 {
275 }
276
277 // from list of vectors
278 template <size_t... Ns, typename = enable_if<csum<size_t, Ns...>() == N>>
279 KFR_MEM_INTRINSIC vec(const vec<T, Ns>&... vs) CMT_NOEXCEPT
280 : v(intrinsics::simd_concat<ST, (SW * Ns)...>(vs.v...))
281 {
282 }
283
veckfr::CMT_ARCH_NAME::vec284 KFR_MEM_INTRINSIC vec(const portable_vec<T, N>& p) CMT_NOEXCEPT : vec(bitcast_anything<vec>(p)) {}
285
operator portable_vec<T,N>kfr::CMT_ARCH_NAME::vec286 KFR_MEM_INTRINSIC operator portable_vec<T, N>() const CMT_NOEXCEPT
287 {
288 return bitcast_anything<portable_vec<T, N>>(*this);
289 }
290
veckfr::CMT_ARCH_NAME::vec291 KFR_MEM_INTRINSIC vec(czeros_t) CMT_NOEXCEPT : v(intrinsics::simd_zeros<ST, SN>()) {}
292
veckfr::CMT_ARCH_NAME::vec293 KFR_MEM_INTRINSIC vec(cones_t) CMT_NOEXCEPT : v(intrinsics::simd_allones<ST, SN>()) {}
294
295 template <typename U, size_t M, KFR_ENABLE_IF(sizeof(U) * M == sizeof(T) * N)>
frombitskfr::CMT_ARCH_NAME::vec296 KFR_MEM_INTRINSIC static vec frombits(const vec<U, M>& v) CMT_NOEXCEPT
297 {
298 return intrinsics::simd_bitcast(
299 intrinsics::simd_cvt_t<ST, typename vec<U, M>::scalar_type, vec<U, M>::scalar_size()>{}, v.v);
300 }
301
302 // shuffle
303 template <size_t... indices>
shufflekfr::CMT_ARCH_NAME::vec304 KFR_MEM_INTRINSIC vec<value_type, sizeof...(indices)> shuffle(csizes_t<indices...> i) const CMT_NOEXCEPT
305 {
306 return vec<value_type, sizeof...(indices)>(intrinsics::simd_shuffle(
307 intrinsics::simd_t<unwrap_bit<ST>, SN>{}, v, scale<SW>(i), overload_auto));
308 }
309
310 template <size_t... indices>
shufflekfr::CMT_ARCH_NAME::vec311 KFR_MEM_INTRINSIC vec<value_type, sizeof...(indices)> shuffle(const vec& y,
312 csizes_t<indices...> i) const CMT_NOEXCEPT
313 {
314 return vec<value_type, sizeof...(indices)>(
315 intrinsics::simd_shuffle(intrinsics::simd2_t<ST, SN, SN>{}, v, y.v, scale<SW>(i), overload_auto));
316 }
317
318 // element access
319 struct element;
320
operator []kfr::CMT_ARCH_NAME::vec321 KFR_MEM_INTRINSIC constexpr value_type operator[](size_t index) const& CMT_NOEXCEPT { return get(index); }
322
operator []kfr::CMT_ARCH_NAME::vec323 KFR_MEM_INTRINSIC constexpr value_type operator[](size_t index) && CMT_NOEXCEPT { return get(index); }
324
operator []kfr::CMT_ARCH_NAME::vec325 KFR_MEM_INTRINSIC constexpr element operator[](size_t index) & CMT_NOEXCEPT { return { *this, index }; }
326
frontkfr::CMT_ARCH_NAME::vec327 KFR_MEM_INTRINSIC value_type front() const CMT_NOEXCEPT { return get(csize<0>); }
328
backkfr::CMT_ARCH_NAME::vec329 KFR_MEM_INTRINSIC value_type back() const CMT_NOEXCEPT { return get(csize<N - 1>); }
330
331 template <int dummy = 0, KFR_ENABLE_IF(dummy == 0 && compound_type_traits<T>::is_scalar)>
getkfr::CMT_ARCH_NAME::vec332 KFR_MEM_INTRINSIC constexpr value_type get(size_t index) const CMT_NOEXCEPT
333 {
334 return intrinsics::simd_get_element<T, N>(v, index);
335 }
336
337 template <int dummy = 0, typename = void,
338 KFR_ENABLE_IF(dummy == 0 && !compound_type_traits<T>::is_scalar)>
getkfr::CMT_ARCH_NAME::vec339 KFR_MEM_INTRINSIC constexpr value_type get(size_t index) const CMT_NOEXCEPT
340 {
341 union {
342 simd_type v;
343 T s[N];
344 } u{ this->v };
345 return u.s[index];
346 }
347
348 template <size_t index, KFR_ENABLE_IF(index < 1024 && compound_type_traits<T>::is_scalar)>
getkfr::CMT_ARCH_NAME::vec349 KFR_MEM_INTRINSIC constexpr value_type get(csize_t<index>) const CMT_NOEXCEPT
350 {
351 return intrinsics::simd_get_element<T, N>(v, csize<index>);
352 }
353
354 template <size_t index, typename = void,
355 KFR_ENABLE_IF(index < 1024 && !compound_type_traits<T>::is_scalar)>
getkfr::CMT_ARCH_NAME::vec356 KFR_MEM_INTRINSIC constexpr value_type get(csize_t<index>) const CMT_NOEXCEPT
357 {
358 return internal::compoundcast<T>::from_flat(intrinsics::simd_shuffle(
359 intrinsics::simd_t<unwrap_bit<ST>, SN>{}, v, csizeseq<SW, SW * index>, overload_auto));
360 }
361
362 template <size_t index>
getkfr::CMT_ARCH_NAME::vec363 KFR_MEM_INTRINSIC constexpr value_type get() const CMT_NOEXCEPT
364 {
365 return this->get(csize_t<index>{});
366 }
367
368 template <int dummy = 0, KFR_ENABLE_IF(dummy == 0 && compound_type_traits<T>::is_scalar)>
setkfr::CMT_ARCH_NAME::vec369 KFR_MEM_INTRINSIC constexpr void set(size_t index, const value_type& s) CMT_NOEXCEPT
370 {
371 v = intrinsics::simd_set_element<T, N>(v, index, s);
372 }
373
374 template <int dummy = 0, KFR_ENABLE_IF(dummy == 0 && !compound_type_traits<T>::is_scalar)>
setkfr::CMT_ARCH_NAME::vec375 KFR_MEM_INTRINSIC constexpr void set(size_t index, const value_type& s) CMT_NOEXCEPT
376 {
377 union {
378 simd_type v;
379 T s[N];
380 } u{ this->v };
381 u.s[index] = s;
382 this->v = u.v;
383 }
384
385 template <size_t index, KFR_ENABLE_IF(index < 1024 && compound_type_traits<T>::is_scalar)>
setkfr::CMT_ARCH_NAME::vec386 KFR_MEM_INTRINSIC constexpr void set(csize_t<index>, const value_type& s) CMT_NOEXCEPT
387 {
388 v = intrinsics::simd_set_element<T, N>(v, csize<index>, s);
389 }
390
391 template <size_t index, typename = void,
392 KFR_ENABLE_IF(index < 1024 && !compound_type_traits<T>::is_scalar)>
setkfr::CMT_ARCH_NAME::vec393 KFR_MEM_INTRINSIC constexpr void set(csize_t<index>, const value_type& s) CMT_NOEXCEPT
394 {
395 this->s[index] = s;
396 }
397
398 struct element
399 {
operator value_typekfr::CMT_ARCH_NAME::vec::element400 constexpr operator value_type() const CMT_NOEXCEPT { return v.get(index); }
401
operator =kfr::CMT_ARCH_NAME::vec::element402 KFR_MEM_INTRINSIC element& operator=(const value_type& s) CMT_NOEXCEPT
403 {
404 v.set(index, s);
405 return *this;
406 }
407
operator =kfr::CMT_ARCH_NAME::vec::element408 KFR_MEM_INTRINSIC element& operator=(const element& s) CMT_NOEXCEPT
409 {
410 v.set(index, static_cast<value_type>(s));
411 return *this;
412 }
413
414 template <typename U, size_t M>
operator =kfr::CMT_ARCH_NAME::vec::element415 KFR_MEM_INTRINSIC element& operator=(const typename vec<U, M>::element& s) CMT_NOEXCEPT
416 {
417 v.set(index, static_cast<value_type>(static_cast<U>(s)));
418 return *this;
419 }
420
421 vec& v;
422 size_t index;
423 };
424
425 // read/write
426 template <bool aligned = false>
427 KFR_MEM_INTRINSIC explicit constexpr vec(const value_type* src,
428 cbool_t<aligned> = cbool_t<aligned>()) CMT_NOEXCEPT;
429
430 template <bool aligned = false>
431 KFR_MEM_INTRINSIC const vec& write(value_type* dest,
432 cbool_t<aligned> = cbool_t<aligned>()) const CMT_NOEXCEPT;
433
flattenkfr::CMT_ARCH_NAME::vec434 KFR_MEM_INTRINSIC vec<ST, SN> flatten() const CMT_NOEXCEPT { return v; }
435
from_flattenkfr::CMT_ARCH_NAME::vec436 KFR_MEM_INTRINSIC static vec from_flatten(const vec<ST, SN>& x) { return vec(x.v); }
437
asmaskkfr::CMT_ARCH_NAME::vec438 KFR_MEM_INTRINSIC constexpr mask_t asmask() const CMT_NOEXCEPT { return mask_t(v); }
439
asveckfr::CMT_ARCH_NAME::vec440 KFR_MEM_INTRINSIC constexpr vec<unwrap_bit<T>, N> asvec() const CMT_NOEXCEPT
441 {
442 return vec<unwrap_bit<T>, N>(v);
443 }
444
445 constexpr static size_t simd_element_size = const_min(vector_width<T>, N);
446 constexpr static size_t simd_element_count = N / simd_element_size;
447 using simd_element_type = simd<ST, simd_element_size>;
448
449 public:
450 union {
451 simd_type v;
452 vec_halves<T, N> h;
453 // simd_element_type w[simd_element_count];
454 // T s[N];
455 };
456 };
457
458 template <typename T>
459 constexpr inline bool is_vec_element = is_simd_type<deep_subtype<remove_const<T>>>;
460
461 template <typename T, size_t N, size_t... indices>
shufflevector(const vec<T,N> & x,csizes_t<indices...> i)462 KFR_INTRINSIC vec<T, sizeof...(indices)> shufflevector(const vec<T, N>& x,
463 csizes_t<indices...> i) CMT_NOEXCEPT
464 {
465 return intrinsics::simd_shuffle(intrinsics::simd_t<unwrap_bit<T>, N>{}, x.v, i, overload_auto);
466 }
467
468 template <typename T, size_t N, size_t... indices>
shufflevectors(const vec<T,N> & x,const vec<T,N> & y,csizes_t<indices...> i)469 KFR_INTRINSIC vec<T, sizeof...(indices)> shufflevectors(const vec<T, N>& x, const vec<T, N>& y,
470 csizes_t<indices...> i) CMT_NOEXCEPT
471 {
472 return intrinsics::simd_shuffle(intrinsics::simd2_t<T, N, N>{}, x.v, y.v, i, overload_auto);
473 }
474
475 namespace internal
476 {
477 template <typename T>
478 struct is_vec_impl : std::false_type
479 {
480 };
481
482 template <typename T, size_t N>
483 struct is_vec_impl<vec<T, N>> : std::true_type
484 {
485 };
486 } // namespace internal
487
488 template <typename T>
489 constexpr inline bool is_vec = internal::is_vec_impl<T>::value;
490
491 CMT_PRAGMA_GNU(GCC diagnostic push)
492 CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wold-style-cast")
493
494 template <size_t N, typename T>
broadcast(T x)495 constexpr KFR_INTRINSIC vec<T, N> broadcast(T x)
496 {
497 return x;
498 }
499
500 CMT_PRAGMA_GNU(GCC diagnostic pop)
501
502 namespace internal
503 {
504
505 template <typename To, typename From, size_t N, typename Tsub = deep_subtype<To>,
506 size_t Nout = (N * compound_type_traits<To>::deep_width)>
builtin_convertvector(const vec<From,N> & value)507 constexpr KFR_INTRINSIC vec<To, N> builtin_convertvector(const vec<From, N>& value) CMT_NOEXCEPT
508 {
509 return vec<To, N>(value);
510 }
511
512 // vector to vector
513 template <typename To, typename From, size_t N, size_t N2>
514 struct conversion<vec<To, N>, vec<From, N2>>
515 {
516 static_assert(N == N2, "");
517 static_assert(!is_compound<To>, "");
518 static_assert(!is_compound<From>, "");
519
castkfr::CMT_ARCH_NAME::internal::conversion520 static vec<To, N> cast(const vec<From, N>& value) { return vec<To, N>(value); }
521 };
522
523 // scalar to vector
524 template <typename To, typename From, size_t N>
525 struct conversion<vec<To, N>, From>
526 {
527 static_assert(is_convertible<From, To>, "");
528
castkfr::CMT_ARCH_NAME::internal::conversion529 static vec<To, N> cast(const From& value) { return broadcast<N>(static_cast<To>(value)); }
530 };
531 } // namespace internal
532
533 template <typename T>
size_of()534 constexpr size_t size_of() CMT_NOEXCEPT
535 {
536 return sizeof(deep_subtype<T>) * compound_type_traits<T>::deep_width;
537 }
538
539 template <typename From, size_t N, typename Tsub = deep_subtype<From>,
540 size_t Nout = N* size_of<From>() / size_of<Tsub>()>
flatten(const vec<From,N> & x)541 constexpr KFR_INTRINSIC vec<Tsub, Nout> flatten(const vec<From, N>& x) CMT_NOEXCEPT
542 {
543 return x.flatten();
544 }
545
546 template <typename To, typename From,
547 typename Tout = typename compound_type_traits<From>::template deep_rebind<To>>
cast(const From & value)548 constexpr KFR_INTRINSIC Tout cast(const From& value) CMT_NOEXCEPT
549 {
550 return static_cast<Tout>(value);
551 }
552
553 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
cast(const vec<Tin,N> & value)554 constexpr KFR_INTRINSIC vec<Tout, N> cast(const vec<Tin, N>& value) CMT_NOEXCEPT
555 {
556 return vec<Tout, N>(value);
557 }
558
559 template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
cast(const vec<vec<Tin,N1>,N2> & value)560 constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> cast(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT
561 {
562 return vec<vec<Tout, N1>, N2>(value);
563 }
564
565 template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
cast(const vec<vec<vec<Tin,N1>,N2>,N3> & value)566 constexpr KFR_INTRINSIC vec<vec<vec<Tout, N1>, N2>, N3> cast(const vec<vec<vec<Tin, N1>, N2>, N3>& value)
567 CMT_NOEXCEPT
568 {
569 return vec<vec<vec<Tout, N1>, N2>, N3>(value);
570 }
571
572 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>)>
cast(const vec<Tin,N> & value)573 constexpr KFR_INTRINSIC const vec<Tin, N>& cast(const vec<Tin, N>& value) CMT_NOEXCEPT
574 {
575 return value;
576 }
577
578 template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(is_same<Tin, Tout>)>
cast(const vec<vec<Tin,N1>,N2> & value)579 constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& cast(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT
580 {
581 return value;
582 }
583
584 template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(is_same<Tin, Tout>)>
cast(const vec<vec<vec<Tin,N1>,N2>,N3> & value)585 constexpr KFR_INTRINSIC const vec<vec<vec<Tin, N1>, N2>, N3>& cast(
586 const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT
587 {
588 return value;
589 }
590
591 //
592
593 template <typename To, typename From,
594 typename Tout = typename compound_type_traits<From>::template deep_rebind<To>>
innercast(const From & value)595 constexpr KFR_INTRINSIC Tout innercast(const From& value) CMT_NOEXCEPT
596 {
597 return static_cast<Tout>(value);
598 }
599
600 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
innercast(const vec<Tin,N> & value)601 constexpr KFR_INTRINSIC vec<Tout, N> innercast(const vec<Tin, N>& value) CMT_NOEXCEPT
602 {
603 return vec<Tout, N>(value);
604 }
605
606 template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
innercast(const vec<vec<Tin,N1>,N2> & value)607 constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> innercast(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT
608 {
609 return vec<vec<Tout, N1>, N2>(value);
610 }
611
612 template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
innercast(const vec<vec<vec<Tin,N1>,N2>,N3> & value)613 constexpr KFR_INTRINSIC vec<vec<vec<Tout, N1>, N2>, N3> innercast(const vec<vec<vec<Tin, N1>, N2>, N3>& value)
614 CMT_NOEXCEPT
615 {
616 return vec<vec<vec<Tout, N1>, N2>, N3>(value);
617 }
618
619 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>)>
innercast(const vec<Tin,N> & value)620 constexpr KFR_INTRINSIC const vec<Tin, N>& innercast(const vec<Tin, N>& value) CMT_NOEXCEPT
621 {
622 return value;
623 }
624
625 template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(is_same<Tin, Tout>)>
innercast(const vec<vec<Tin,N1>,N2> & value)626 constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& innercast(const vec<vec<Tin, N1>, N2>& value)
627 CMT_NOEXCEPT
628 {
629 return value;
630 }
631
632 template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(is_same<Tin, Tout>)>
innercast(const vec<vec<vec<Tin,N1>,N2>,N3> & value)633 constexpr KFR_INTRINSIC const vec<vec<vec<Tin, N1>, N2>, N3>& innercast(
634 const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT
635 {
636 return value;
637 }
638
639 //
640
641 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
elemcast(const vec<Tin,N> & value)642 constexpr KFR_INTRINSIC vec<Tout, N> elemcast(const vec<Tin, N>& value) CMT_NOEXCEPT
643 {
644 return vec<Tout, N>(value);
645 }
646
647 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>)>
elemcast(const vec<Tin,N> & value)648 constexpr KFR_INTRINSIC const vec<Tin, N>& elemcast(const vec<Tin, N>& value) CMT_NOEXCEPT
649 {
650 return value;
651 }
652
653 template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
elemcast(const vec<vec<Tin,N1>,N2> & value)654 constexpr KFR_INTRINSIC vec<Tout, N2> elemcast(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT
655 {
656 return vec<Tout, N2>(value);
657 }
658
659 template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
elemcast(const vec<vec<vec<Tin,N1>,N2>,N3> & value)660 constexpr KFR_INTRINSIC vec<Tout, N3> elemcast(const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT
661 {
662 return vec<Tout, N3>(value);
663 }
664
665 template <typename To, typename From>
bitcast(const From & value)666 CMT_GNU_CONSTEXPR KFR_INTRINSIC To bitcast(const From& value) CMT_NOEXCEPT
667 {
668 static_assert(sizeof(From) == sizeof(To), "bitcast: Incompatible types");
669 union {
670 From from;
671 To to;
672 } u{ value };
673 return u.to;
674 }
675
676 template <typename To, typename From, size_t N, size_t Nout = (N * size_of<From>() / size_of<To>())>
bitcast(const vec<From,N> & value)677 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<To, Nout> bitcast(const vec<From, N>& value) CMT_NOEXCEPT
678 {
679 return vec<To, Nout>::frombits(value);
680 }
681
682 template <typename From, typename To = utype<From>, KFR_ENABLE_IF(!is_compound<From>)>
ubitcast(const From & value)683 constexpr KFR_INTRINSIC To ubitcast(const From& value) CMT_NOEXCEPT
684 {
685 return bitcast<To>(value);
686 }
687
688 template <typename From, typename To = itype<From>, KFR_ENABLE_IF(!is_compound<From>)>
ibitcast(const From & value)689 constexpr KFR_INTRINSIC To ibitcast(const From& value) CMT_NOEXCEPT
690 {
691 return bitcast<To>(value);
692 }
693
694 template <typename From, typename To = ftype<From>, KFR_ENABLE_IF(!is_compound<From>)>
fbitcast(const From & value)695 constexpr KFR_INTRINSIC To fbitcast(const From& value) CMT_NOEXCEPT
696 {
697 return bitcast<To>(value);
698 }
699
700 template <typename From, typename To = uitype<From>, KFR_ENABLE_IF(!is_compound<From>)>
uibitcast(const From & value)701 constexpr KFR_INTRINSIC To uibitcast(const From& value) CMT_NOEXCEPT
702 {
703 return bitcast<To>(value);
704 }
705
706 template <typename From, size_t N, typename To = utype<From>,
707 size_t Nout = size_of<From>() * N / size_of<To>()>
ubitcast(const vec<From,N> & value)708 constexpr KFR_INTRINSIC vec<To, Nout> ubitcast(const vec<From, N>& value) CMT_NOEXCEPT
709 {
710 return vec<To, Nout>::frombits(value);
711 }
712
713 template <typename From, size_t N, typename To = itype<From>,
714 size_t Nout = size_of<From>() * N / size_of<To>()>
ibitcast(const vec<From,N> & value)715 constexpr KFR_INTRINSIC vec<To, Nout> ibitcast(const vec<From, N>& value) CMT_NOEXCEPT
716 {
717 return vec<To, Nout>::frombits(value);
718 }
719
720 template <typename From, size_t N, typename To = ftype<From>,
721 size_t Nout = size_of<From>() * N / size_of<To>()>
fbitcast(const vec<From,N> & value)722 constexpr KFR_INTRINSIC vec<To, Nout> fbitcast(const vec<From, N>& value) CMT_NOEXCEPT
723 {
724 return vec<To, Nout>::frombits(value);
725 }
726
727 template <typename From, size_t N, typename To = uitype<From>,
728 size_t Nout = size_of<From>() * N / size_of<To>()>
uibitcast(const vec<From,N> & value)729 constexpr KFR_INTRINSIC vec<To, Nout> uibitcast(const vec<From, N>& value) CMT_NOEXCEPT
730 {
731 return vec<To, Nout>::frombits(value);
732 }
733
vector_alignment(size_t size)734 constexpr KFR_INTRINSIC size_t vector_alignment(size_t size) { return next_poweroftwo(size); }
735
736 template <typename T, size_t N>
737 struct pkd_vec
738 {
pkd_veckfr::CMT_ARCH_NAME::pkd_vec739 constexpr pkd_vec() CMT_NOEXCEPT {}
740
pkd_veckfr::CMT_ARCH_NAME::pkd_vec741 pkd_vec(const vec<T, N>& value) CMT_NOEXCEPT { value.write(v); }
742
743 template <typename... Ts>
pkd_veckfr::CMT_ARCH_NAME::pkd_vec744 constexpr pkd_vec(Ts... init) CMT_NOEXCEPT : v{ static_cast<T>(init)... }
745 {
746 static_assert(N <= sizeof...(Ts), "Too few initializers for pkd_vec");
747 }
748
749 private:
750 T v[N];
751 friend struct vec<T, N>;
752 }
753 #ifdef CMT_GNU_ATTRIBUTES
754 __attribute__((packed))
755 #endif
756 ;
757
758 namespace internal
759 {
760
761 template <size_t, typename T>
make_vector_get_n()762 constexpr KFR_INTRINSIC T make_vector_get_n()
763 {
764 return T();
765 }
766
767 template <size_t index, typename T, typename... Args>
make_vector_get_n(const T & arg,const Args &...args)768 constexpr KFR_INTRINSIC T make_vector_get_n(const T& arg, const Args&... args)
769 {
770 return index == 0 ? arg : make_vector_get_n<index - 1, T>(args...);
771 }
772
773 template <typename T, typename... Args, size_t... indices, size_t N = sizeof...(Args)>
make_vector_impl(csizes_t<indices...>,const Args &...args)774 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> make_vector_impl(csizes_t<indices...>, const Args&... args)
775 {
776 static_assert(sizeof...(indices) == sizeof...(Args), "");
777 const T list[] = { static_cast<T>(args)... };
778 return vec<T, N>(list[indices]...);
779 }
780
781 template <bool, typename Tfallback, typename... Args>
782 struct conditional_common;
783
784 template <typename Tfallback, typename... Args>
785 struct conditional_common<true, Tfallback, Args...>
786 {
787 using type = common_type<Args...>;
788 };
789
790 template <typename Tfallback, typename... Args>
791 struct conditional_common<false, Tfallback, Args...>
792 {
793 using type = Tfallback;
794 };
795
796 } // namespace internal
797
798 /// Create vector from scalar values
799 /// @code
800 /// CHECK( make_vector( 1, 2, 3, 4 ) == i32x4{1, 2, 3, 4} );
801 /// @endcode
802 template <typename Type = void, typename Arg, typename... Args, size_t N = (sizeof...(Args) + 1),
803 typename SubType =
804 fix_type<typename internal::conditional_common<is_void<Type>, Type, Arg, Args...>::type>>
make_vector(const Arg & x,const Args &...rest)805 constexpr KFR_INTRINSIC vec<SubType, N> make_vector(const Arg& x, const Args&... rest)
806 {
807 return internal::make_vector_impl<SubType>(cvalseq_t<size_t, N>(), static_cast<SubType>(x),
808 static_cast<SubType>(rest)...);
809 }
810
811 template <typename T, size_t N>
make_vector(const vec<T,N> & x)812 constexpr KFR_INTRINSIC vec<T, N> make_vector(const vec<T, N>& x)
813 {
814 return x;
815 }
816
817 template <typename T, T... Values, size_t N = sizeof...(Values)>
make_vector(cvals_t<T,Values...>)818 constexpr KFR_INTRINSIC vec<T, N> make_vector(cvals_t<T, Values...>)
819 {
820 return make_vector<T>(Values...);
821 }
822
823 template <typename Type = void, typename Arg, typename... Args, size_t N = (sizeof...(Args) + 1),
824 typename SubType = fix_type<conditional<is_void<Type>, common_type<Arg, Args...>, Type>>,
825 KFR_ENABLE_IF(is_number<subtype<SubType>>)>
pack(const Arg & x,const Args &...rest)826 constexpr KFR_INTRINSIC vec<SubType, N> pack(const Arg& x, const Args&... rest)
827 {
828 return internal::make_vector_impl<SubType>(csizeseq<N>, static_cast<SubType>(x),
829 static_cast<SubType>(rest)...);
830 }
831
832 using f32x1 = vec<f32, 1>;
833 using f32x2 = vec<f32, 2>;
834 using f32x3 = vec<f32, 3>;
835 using f32x4 = vec<f32, 4>;
836 using f32x8 = vec<f32, 8>;
837 using f32x16 = vec<f32, 16>;
838 using f32x32 = vec<f32, 32>;
839 using f32x64 = vec<f32, 64>;
840 using f64x1 = vec<f64, 1>;
841 using f64x2 = vec<f64, 2>;
842 using f64x3 = vec<f64, 3>;
843 using f64x4 = vec<f64, 4>;
844 using f64x8 = vec<f64, 8>;
845 using f64x16 = vec<f64, 16>;
846 using f64x32 = vec<f64, 32>;
847 using f64x64 = vec<f64, 64>;
848 using i8x1 = vec<i8, 1>;
849 using i8x2 = vec<i8, 2>;
850 using i8x3 = vec<i8, 3>;
851 using i8x4 = vec<i8, 4>;
852 using i8x8 = vec<i8, 8>;
853 using i8x16 = vec<i8, 16>;
854 using i8x32 = vec<i8, 32>;
855 using i8x64 = vec<i8, 64>;
856 using i16x1 = vec<i16, 1>;
857 using i16x2 = vec<i16, 2>;
858 using i16x3 = vec<i16, 3>;
859 using i16x4 = vec<i16, 4>;
860 using i16x8 = vec<i16, 8>;
861 using i16x16 = vec<i16, 16>;
862 using i16x32 = vec<i16, 32>;
863 using i16x64 = vec<i16, 64>;
864 using i32x1 = vec<i32, 1>;
865 using i32x2 = vec<i32, 2>;
866 using i32x3 = vec<i32, 3>;
867 using i32x4 = vec<i32, 4>;
868 using i32x8 = vec<i32, 8>;
869 using i32x16 = vec<i32, 16>;
870 using i32x32 = vec<i32, 32>;
871 using i32x64 = vec<i32, 64>;
872 using i64x1 = vec<i64, 1>;
873 using i64x2 = vec<i64, 2>;
874 using i64x3 = vec<i64, 3>;
875 using i64x4 = vec<i64, 4>;
876 using i64x8 = vec<i64, 8>;
877 using i64x16 = vec<i64, 16>;
878 using i64x32 = vec<i64, 32>;
879 using i64x64 = vec<i64, 64>;
880 using u8x1 = vec<u8, 1>;
881 using u8x2 = vec<u8, 2>;
882 using u8x3 = vec<u8, 3>;
883 using u8x4 = vec<u8, 4>;
884 using u8x8 = vec<u8, 8>;
885 using u8x16 = vec<u8, 16>;
886 using u8x32 = vec<u8, 32>;
887 using u8x64 = vec<u8, 64>;
888 using u16x1 = vec<u16, 1>;
889 using u16x2 = vec<u16, 2>;
890 using u16x3 = vec<u16, 3>;
891 using u16x4 = vec<u16, 4>;
892 using u16x8 = vec<u16, 8>;
893 using u16x16 = vec<u16, 16>;
894 using u16x32 = vec<u16, 32>;
895 using u16x64 = vec<u16, 64>;
896 using u32x1 = vec<u32, 1>;
897 using u32x2 = vec<u32, 2>;
898 using u32x3 = vec<u32, 3>;
899 using u32x4 = vec<u32, 4>;
900 using u32x8 = vec<u32, 8>;
901 using u32x16 = vec<u32, 16>;
902 using u32x32 = vec<u32, 32>;
903 using u32x64 = vec<u32, 64>;
904 using u64x1 = vec<u64, 1>;
905 using u64x2 = vec<u64, 2>;
906 using u64x3 = vec<u64, 3>;
907 using u64x4 = vec<u64, 4>;
908 using u64x8 = vec<u64, 8>;
909 using u64x16 = vec<u64, 16>;
910 using u64x32 = vec<u64, 32>;
911 using u64x64 = vec<u64, 64>;
912
913 namespace glsl_names
914 {
915 using vec2 = f32x2;
916 using vec3 = f32x3;
917 using vec4 = f32x4;
918 using dvec2 = f64x2;
919 using dvec3 = f64x3;
920 using dvec4 = f64x4;
921 using ivec2 = i32x2;
922 using ivec3 = i32x3;
923 using ivec4 = i32x4;
924 using uvec2 = u32x2;
925 using uvec3 = u32x3;
926 using uvec4 = u32x4;
927 } // namespace glsl_names
928 namespace opencl_names
929 {
930 using char2 = i8x2;
931 using char3 = i8x3;
932 using char4 = i8x4;
933 using char8 = i8x8;
934 using char16 = i8x16;
935 using uchar2 = u8x2;
936 using uchar3 = u8x3;
937 using uchar4 = u8x4;
938 using uchar8 = u8x8;
939 using uchar16 = u8x16;
940
941 using short2 = i16x2;
942 using short3 = i16x3;
943 using short4 = i16x4;
944 using short8 = i16x8;
945 using short16 = i16x16;
946 using ushort2 = u16x2;
947 using ushort3 = u16x3;
948 using ushort4 = u16x4;
949 using ushort8 = u16x8;
950 using ushort16 = u16x16;
951
952 using int2 = i32x2;
953 using int3 = i32x3;
954 using int4 = i32x4;
955 using int8 = i32x8;
956 using int16 = i32x16;
957 using uint2 = u32x2;
958 using uint3 = u32x3;
959 using uint4 = u32x4;
960 using uint8 = u32x8;
961 using uint16 = u32x16;
962
963 using long2 = i64x2;
964 using long3 = i64x3;
965 using long4 = i64x4;
966 using long8 = i64x8;
967 using long16 = i64x16;
968 using ulong2 = u64x2;
969 using ulong3 = u64x3;
970 using ulong4 = u64x4;
971 using ulong8 = u64x8;
972 using ulong16 = u64x16;
973
974 using float2 = f32x2;
975 using float3 = f32x3;
976 using float4 = f32x4;
977 using float8 = f32x8;
978 using float16 = f32x16;
979
980 using double2 = f64x2;
981 using double3 = f64x3;
982 using double4 = f64x4;
983 using double8 = f64x8;
984 using double16 = f64x16;
985 } // namespace opencl_names
986
987 namespace internal
988 {
989
990 template <size_t Index, typename T, size_t N, typename Fn, typename... Args,
991 typename Tout = invoke_result<Fn, subtype<decay<Args>>...>>
applyfn_helper(Fn && fn,Args &&...args)992 constexpr KFR_INTRINSIC Tout applyfn_helper(Fn&& fn, Args&&... args)
993 {
994 return fn(args[Index]...);
995 }
996
997 template <typename T, size_t N, typename Fn, typename... Args,
998 typename Tout = invoke_result<Fn, subtype<decay<Args>>...>, size_t... Indices>
apply_helper(Fn && fn,csizes_t<Indices...>,Args &&...args)999 constexpr KFR_INTRINSIC vec<Tout, N> apply_helper(Fn&& fn, csizes_t<Indices...>, Args&&... args)
1000 {
1001 return make_vector(applyfn_helper<Indices, T, N>(std::forward<Fn>(fn), std::forward<Args>(args)...)...);
1002 }
1003
1004 template <typename T, size_t N, typename Fn, size_t... Indices>
apply0_helper(Fn && fn,csizes_t<Indices...>)1005 constexpr KFR_INTRINSIC vec<T, N> apply0_helper(Fn&& fn, csizes_t<Indices...>)
1006 {
1007 return make_vector(((void)Indices, void(), fn())...);
1008 }
1009 } // namespace internal
1010
1011 template <typename T, size_t N, typename Fn, typename... Args,
1012 typename Tout = invoke_result<Fn, T, subtype<decay<Args>>...>>
apply(Fn && fn,const vec<T,N> & arg,Args &&...args)1013 constexpr KFR_INTRINSIC vec<Tout, N> apply(Fn&& fn, const vec<T, N>& arg, Args&&... args)
1014 {
1015 return internal::apply_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>, arg, std::forward<Args>(args)...);
1016 }
1017
1018 template <typename T, typename Fn, typename... Args, typename Tout = invoke_result<Fn, T, decay<Args>...>,
1019 KFR_ENABLE_IF(is_same<T, subtype<T>>)>
apply(Fn && fn,const T & arg,Args &&...args)1020 constexpr KFR_INTRINSIC Tout apply(Fn&& fn, const T& arg, Args&&... args)
1021 {
1022 return fn(arg, args...);
1023 }
1024
1025 template <size_t N, typename Fn, typename T = invoke_result<Fn>>
apply(Fn && fn)1026 constexpr KFR_INTRINSIC vec<T, N> apply(Fn&& fn)
1027 {
1028 return internal::apply0_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>);
1029 }
1030
1031 template <typename T, size_t N>
zerovector()1032 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> zerovector()
1033 {
1034 return vec<T, N>(czeros);
1035 }
1036
1037 template <typename T, size_t N>
zerovector(vec_shape<T,N>)1038 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> zerovector(vec_shape<T, N>)
1039 {
1040 return vec<T, N>(czeros);
1041 }
1042
1043 template <typename T, size_t N>
zerovector(vec<T,N>)1044 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> zerovector(vec<T, N>)
1045 {
1046 return vec<T, N>(czeros);
1047 }
1048
1049 template <typename T, size_t N>
allonesvector()1050 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> allonesvector()
1051 {
1052 return vec<T, N>(cones);
1053 }
1054
1055 template <typename T, size_t N>
allonesvector(vec_shape<T,N>)1056 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> allonesvector(vec_shape<T, N>)
1057 {
1058 return vec<T, N>(cones);
1059 }
1060
1061 template <typename T, size_t N>
allonesvector(vec<T,N>)1062 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> allonesvector(vec<T, N>)
1063 {
1064 return vec<T, N>(cones);
1065 }
1066
1067 template <typename T, size_t N>
undefinedvector()1068 constexpr KFR_INTRINSIC vec<T, N> undefinedvector()
1069 {
1070 return vec<T, N>{};
1071 }
1072
1073 template <typename T, size_t N>
undefinedvector(vec_shape<T,N>)1074 constexpr KFR_INTRINSIC vec<T, N> undefinedvector(vec_shape<T, N>)
1075 {
1076 return undefinedvector<T, N>();
1077 }
1078
1079 template <size_t N>
1080 struct vec_template
1081 {
1082 template <typename T>
1083 using type = vec<T, N>;
1084 };
1085
1086 #ifdef KFR_TESTING
1087
special_values()1088 inline const std::vector<special_value>& special_values()
1089 {
1090 static const std::vector<special_value> values{ special_constant::infinity,
1091 special_constant::neg_infinity,
1092 special_constant::min,
1093 special_constant::lowest,
1094 special_constant::max,
1095 3.1415926535897932384626433832795,
1096 4.499999,
1097 4.500001,
1098 -4.499999,
1099 -4.500001,
1100 0.1111111111111111111111111111111,
1101 -0.4444444444444444444444444444444,
1102 -1,
1103 0,
1104 +1 };
1105 return values;
1106 }
1107
1108 namespace test_catogories
1109 {
1110 constexpr cint_t<1> scalars{};
1111 constexpr cint_t<2> vectors{};
1112 constexpr cint_t<3> all{};
1113
types(cint_t<0>)1114 constexpr inline auto types(cint_t<0>) { return ctypes_t<>{}; }
1115
types(cint_t<1>)1116 constexpr inline auto types(cint_t<1>) { return cconcat(numeric_types); }
1117
types(cint_t<2>)1118 constexpr inline auto types(cint_t<2>) { return cconcat(numeric_vector_types<vec>); }
1119
types(cint_t<3>)1120 constexpr inline auto types(cint_t<3>) { return cconcat(numeric_types, numeric_vector_types<vec>); }
1121
1122 } // namespace test_catogories
1123
1124 template <typename T, size_t N, size_t... indices>
test_enumerate(vec_shape<T,N>,csizes_t<indices...>,double start=0,double step=1)1125 vec<T, N> test_enumerate(vec_shape<T, N>, csizes_t<indices...>, double start = 0, double step = 1)
1126 {
1127 return make_vector<T>(static_cast<T>(start + step * indices)...);
1128 }
1129
1130 template <int Cat, typename Fn, typename RefFn, typename IsApplicable = fn_return_constant<bool, true>>
test_function1(cint_t<Cat> cat,Fn && fn,RefFn && reffn,IsApplicable && isapplicable=IsApplicable{})1131 void test_function1(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isapplicable = IsApplicable{})
1132 {
1133 testo::matrix(
1134 named("value") = special_values(), named("type") = test_catogories::types(cat),
__anon91f2555c0502(special_value value, auto type) 1135 [&](special_value value, auto type) {
1136 using T = typename decltype(type)::type;
1137 if (isapplicable(ctype<T>, value))
1138 {
1139 const T x(value);
1140 CHECK(is_same<decltype(fn(x)), typename compound_type_traits<T>::template rebind<decltype(
1141 reffn(std::declval<subtype<T>>()))>>);
1142 const auto fn_x = fn(x);
1143 const auto ref_x = apply(reffn, x);
1144 ::testo::active_test()->check(testo::deep_is_equal(ref_x, fn_x),
1145 as_string(fn_x, " == ", ref_x), "fn(x) == apply(reffn, x)");
1146 // CHECK(fn(x) == apply(reffn, x));
1147 }
1148 });
1149
__anon91f2555c0602(auto type) 1150 testo::matrix(named("type") = test_catogories::types(cint<Cat & ~1>), [&](auto type) {
1151 using T = typename decltype(type)::type;
1152 const T x = test_enumerate(T::shape(), csizeseq<T::size()>, 0);
1153 CHECK(fn(x) == apply(reffn, x));
1154 });
1155 }
1156
1157 template <int Cat, typename Fn, typename RefFn, typename IsApplicable = fn_return_constant<bool, true>>
test_function2(cint_t<Cat> cat,Fn && fn,RefFn && reffn,IsApplicable && isapplicable=IsApplicable{})1158 void test_function2(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isapplicable = IsApplicable{})
1159 {
1160 testo::matrix(named("value1") = special_values(), //
1161 named("value2") = special_values(), named("type") = test_catogories::types(cat),
__anon91f2555c0702(special_value value1, special_value value2, auto type) 1162 [&](special_value value1, special_value value2, auto type) {
1163 using T = typename decltype(type)::type;
1164 const T x1(value1);
1165 const T x2(value2);
1166 if (isapplicable(ctype<T>, value1, value2))
1167 {
1168 CHECK(is_same<decltype(fn(x1, x2)),
1169 typename compound_type_traits<T>::template rebind<decltype(
1170 reffn(std::declval<subtype<T>>(), std::declval<subtype<T>>()))>>);
1171 CHECK(fn(x1, x2) == apply(reffn, x1, x2));
1172 }
1173 });
1174
__anon91f2555c0802(auto type) 1175 testo::matrix(named("type") = test_catogories::types(cint<Cat & ~1>), [&](auto type) {
1176 using T = typename decltype(type)::type;
1177 const T x1 = test_enumerate(T::shape(), csizeseq<T::size()>, 0, 1);
1178 const T x2 = test_enumerate(T::shape(), csizeseq<T::size()>, 100, -1);
1179 CHECK(fn(x1, x2) == apply(reffn, x1, x2));
1180 });
1181 }
1182
1183 #endif
1184
1185 namespace internal
1186 {
1187 // vector to vector<vector>
1188 template <typename To, typename From, size_t N>
1189 struct conversion<vec<bit<To>, N>, vec<bit<From>, N>>
1190 {
castkfr::CMT_ARCH_NAME::internal::conversion1191 static vec<bit<To>, N> cast(const vec<bit<From>, N>& value)
1192 {
1193 return vec<To, N>::frombits(innercast<itype<To>>(vec<itype<From>, N>::frombits(value.asvec())))
1194 .asmask();
1195 }
1196 };
1197
1198 // vector to vector<vector>
1199 template <typename To, typename From, size_t N1, size_t N2, size_t Ns1>
1200 struct conversion<vec<vec<To, N1>, N2>, vec<From, Ns1>>
1201 {
1202 static_assert(N1 == Ns1, "");
1203 static_assert(!is_compound<To>, "");
1204 static_assert(!is_compound<From>, "");
1205
castkfr::CMT_ARCH_NAME::internal::conversion1206 static vec<vec<To, N1>, N2> cast(const vec<From, N1>& value)
1207 {
1208 return vec<vec<To, N1>, N2>::from_flatten(
1209 kfr::innercast<To>(value.flatten())
1210 .shuffle(csizeseq<N2 * vec<From, N1>::scalar_size()> % csize<N2>));
1211 }
1212 };
1213
1214 // vector to vector<vector<vector>>
1215 template <typename To, typename From, size_t N1, size_t N2, size_t N3, size_t Ns1>
1216 struct conversion<vec<vec<vec<To, N1>, N2>, N3>, vec<From, Ns1>>
1217 {
1218 static_assert(N1 == Ns1, "");
1219 static_assert(!is_compound<To>, "");
1220 static_assert(!is_compound<From>, "");
1221
castkfr::CMT_ARCH_NAME::internal::conversion1222 static vec<vec<vec<To, N1>, N2>, N3> cast(const vec<From, N1>& value)
1223 {
1224 return vec<vec<vec<To, N1>, N2>, N3>::from_flatten(
1225 kfr::innercast<To>(value.flatten())
1226 .shuffle(csizeseq<N2 * vec<From, N1>::scalar_size()> % csize<N2>));
1227 }
1228 };
1229
1230 // vector<vector> to vector<vector>
1231 template <typename To, typename From, size_t N1, size_t N2, size_t NN1, size_t NN2>
1232 struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, NN1>, NN2>>
1233 {
1234 static_assert(N1 == NN1, "");
1235 static_assert(N2 == NN2, "");
1236 static_assert(!is_compound<To>, "");
1237 static_assert(!is_compound<From>, "");
1238
castkfr::CMT_ARCH_NAME::internal::conversion1239 static vec<vec<To, N1>, N2> cast(const vec<vec<From, N1>, N2>& value)
1240 {
1241 return vec<vec<To, N1>, N2>::from_flatten(kfr::innercast<To>(value.flatten()));
1242 }
1243 };
1244
1245 // vector<vector<vector>> to vector<vector<vector>>
1246 template <typename To, typename From, size_t N1, size_t N2, size_t N3, size_t NN1, size_t NN2, size_t NN3>
1247 struct conversion<vec<vec<vec<To, N1>, N2>, N3>, vec<vec<vec<From, NN1>, NN2>, NN3>>
1248 {
1249 static_assert(N1 == NN1, "");
1250 static_assert(N2 == NN2, "");
1251 static_assert(N3 == NN3, "");
1252 static_assert(!is_compound<To>, "");
1253 static_assert(!is_compound<From>, "");
1254
castkfr::CMT_ARCH_NAME::internal::conversion1255 static vec<vec<vec<To, N1>, N2>, N3> cast(const vec<vec<vec<From, N1>, N2>, N3>& value)
1256 {
1257 return vec<vec<vec<To, N1>, N2>, N3>::from_flatten(kfr::innercast<To>(value.flatten()));
1258 }
1259 };
1260 } // namespace internal
1261
1262 template <typename T, size_t N1, size_t N2 = N1>
1263 using mat = vec<vec<T, N1>, N2>;
1264
1265 using u8x2x2 = vec<vec<u8, 2>, 2>;
1266 using i8x2x2 = vec<vec<i8, 2>, 2>;
1267 using u16x2x2 = vec<vec<u16, 2>, 2>;
1268 using i16x2x2 = vec<vec<i16, 2>, 2>;
1269 using u32x2x2 = vec<vec<u32, 2>, 2>;
1270 using i32x2x2 = vec<vec<i32, 2>, 2>;
1271 using u64x2x2 = vec<vec<u64, 2>, 2>;
1272 using i64x2x2 = vec<vec<i64, 2>, 2>;
1273 using f32x2x2 = vec<vec<f32, 2>, 2>;
1274 using f64x2x2 = vec<vec<f64, 2>, 2>;
1275
1276 using u8x4x4 = vec<vec<u8, 4>, 4>;
1277 using i8x4x4 = vec<vec<i8, 4>, 4>;
1278 using u16x4x4 = vec<vec<u16, 4>, 4>;
1279 using i16x4x4 = vec<vec<i16, 4>, 4>;
1280 using u32x4x4 = vec<vec<u32, 4>, 4>;
1281 using i32x4x4 = vec<vec<i32, 4>, 4>;
1282 using u64x4x4 = vec<vec<u64, 4>, 4>;
1283 using i64x4x4 = vec<vec<i64, 4>, 4>;
1284 using f32x4x4 = vec<vec<f32, 4>, 4>;
1285 using f64x4x4 = vec<vec<f64, 4>, 4>;
1286
1287 template <size_t N1, size_t N2>
1288 struct vec_vec_template
1289 {
1290 template <typename T>
1291 using type = vec<vec<T, N1>, N2>;
1292 };
1293
1294 namespace internal
1295 {
1296
1297 template <typename T, size_t... Ns>
1298 struct vecx_t;
1299
1300 template <typename T>
1301 struct vecx_t<T>
1302 {
1303 using type = T;
1304 };
1305
1306 template <typename T, size_t N1>
1307 struct vecx_t<T, N1>
1308 {
1309 using type = vec<T, N1>;
1310 };
1311
1312 template <typename T, size_t N1, size_t N2>
1313 struct vecx_t<T, N1, N2>
1314 {
1315 using type = vec<vec<T, N1>, N2>;
1316 };
1317
1318 template <typename T, size_t N1, size_t N2, size_t N3>
1319 struct vecx_t<T, N1, N2, N3>
1320 {
1321 using type = vec<vec<vec<T, N1>, N2>, N3>;
1322 };
1323 } // namespace internal
1324
1325 template <typename T, size_t... Ns>
1326 using vecx = typename internal::vecx_t<T, Ns...>::type;
1327
1328 } // namespace CMT_ARCH_NAME
1329 template <typename T1, typename T2, size_t N>
1330 struct common_type_impl<kfr::vec<T1, N>, kfr::vec<T2, N>>
1331 : common_type_from_subtypes<T1, T2, kfr::vec_template<N>::template type>
1332 {
1333 };
1334 template <typename T1, typename T2, size_t N>
1335 struct common_type_impl<kfr::vec<T1, N>, T2>
1336 : common_type_from_subtypes<T1, T2, kfr::vec_template<N>::template type>
1337 {
1338 };
1339 template <typename T1, typename T2, size_t N>
1340 struct common_type_impl<T1, kfr::vec<T2, N>>
1341 : common_type_from_subtypes<T1, T2, kfr::vec_template<N>::template type>
1342 {
1343 };
1344
1345 template <typename T1, typename T2, size_t N1, size_t N2>
1346 struct common_type_impl<kfr::vec<T1, N1>, kfr::vec<kfr::vec<T2, N1>, N2>>
1347 : common_type_from_subtypes<T1, T2, kfr::vec_vec_template<N1, N2>::template type>
1348 {
1349 using type = kfr::vec<kfr::vec<typename common_type_impl<T1, T2>::type, N1>, N2>;
1350 };
1351 template <typename T1, typename T2, size_t N1, size_t N2>
1352 struct common_type_impl<kfr::vec<kfr::vec<T1, N1>, N2>, kfr::vec<T2, N1>>
1353 : common_type_from_subtypes<T1, T2, kfr::vec_vec_template<N1, N2>::template type>
1354 {
1355 };
1356
1357 } // namespace kfr
1358
1359 namespace cometa
1360 {
1361
1362 template <typename T, size_t N>
1363 struct compound_type_traits<kfr::vec_shape<T, N>>
1364 {
1365 constexpr static size_t width = N;
1366 constexpr static size_t deep_width = width * compound_type_traits<T>::width;
1367 using subtype = T;
1368 using deep_subtype = cometa::deep_subtype<T>;
1369 constexpr static bool is_scalar = false;
1370 constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1;
1371
1372 template <typename U>
1373 using rebind = kfr::vec_shape<U, N>;
1374 template <typename U>
1375 using deep_rebind = kfr::vec_shape<typename compound_type_traits<subtype>::template deep_rebind<U>, N>;
1376 };
1377
1378 template <typename T, size_t N>
1379 struct compound_type_traits<kfr::vec<T, N>>
1380 {
1381 using subtype = T;
1382 using deep_subtype = cometa::deep_subtype<T>;
1383 constexpr static size_t width = N;
1384 constexpr static size_t deep_width = width * compound_type_traits<T>::width;
1385 constexpr static bool is_scalar = false;
1386 constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1;
1387 template <typename U>
1388 using rebind = kfr::vec<U, N>;
1389 template <typename U>
1390 using deep_rebind = kfr::vec<typename compound_type_traits<subtype>::template deep_rebind<U>, N>;
1391
atcometa::compound_type_traits1392 KFR_MEM_INTRINSIC static constexpr subtype at(const kfr::vec<T, N>& value, size_t index)
1393 {
1394 return value[index];
1395 }
1396 };
1397
1398 namespace details
1399 {
1400 template <typename T, size_t N>
1401 struct flt_type_impl<kfr::vec<T, N>>
1402 {
1403 using type = kfr::vec<typename flt_type_impl<T>::type, N>;
1404 };
1405 } // namespace details
1406 } // namespace cometa
1407
1408 CMT_PRAGMA_GNU(GCC diagnostic pop)
1409 CMT_PRAGMA_MSVC(warning(pop))
1410
1411 namespace std
1412 {
1413
1414 template <typename T, size_t N>
1415 struct tuple_size<kfr::vec<T, N>> : public integral_constant<size_t, N>
1416 {
1417 };
1418
1419 template <size_t I, class T, size_t N>
1420 struct tuple_element<I, kfr::vec<T, N>>
1421 {
1422 using type = T;
1423 };
1424
1425 } // namespace std
1426