1 /** @addtogroup types
2  *  @{
3  */
4 /*
5   Copyright (C) 2016 D Levin (https://www.kfrlib.com)
6   This file is part of KFR
7 
8   KFR is free software: you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation, either version 2 of the License, or
11   (at your option) any later version.
12 
13   KFR is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17 
18   You should have received a copy of the GNU General Public License
19   along with KFR.
20 
21   If GPL is not suitable for your project, you must purchase a commercial license to use KFR.
22   Buying a commercial license is mandatory as soon as you develop commercial activities without
23   disclosing the source code of your own applications.
24   See https://www.kfrlib.com for details.
25  */
26 #pragma once
27 
28 #include "../version.hpp"
29 #include "constants.hpp"
30 #include "impl/backend.hpp"
31 
32 /**
33  *  @brief Internal macro for functions
34  */
35 #define KFR_FN(FN)                                                                                           \
36     namespace fn                                                                                             \
37     {                                                                                                        \
38     struct FN                                                                                                \
39     {                                                                                                        \
40         template <typename... Args>                                                                          \
41         CMT_INLINE_MEMBER decltype(::kfr::FN(std::declval<Args>()...)) operator()(Args&&... args) const      \
42         {                                                                                                    \
43             return ::kfr::FN(std::forward<Args>(args)...);                                                   \
44         }                                                                                                    \
45     };                                                                                                       \
46     }
47 
48 /**
49  *  @brief Internal macro for functions
50  */
51 #define KFR_I_FN(FN)                                                                                         \
52     namespace fn                                                                                             \
53     {                                                                                                        \
54     struct FN                                                                                                \
55     {                                                                                                        \
56         template <typename... Args>                                                                          \
57         CMT_INLINE_MEMBER decltype(::kfr::intrinsics::FN(std::declval<Args>()...)) operator()(               \
58             Args&&... args) const                                                                            \
59         {                                                                                                    \
60             return ::kfr::intrinsics::FN(std::forward<Args>(args)...);                                       \
61         }                                                                                                    \
62     };                                                                                                       \
63     }
64 
65 #define KFR_I_FN_FULL(FN, FULLFN)                                                                            \
66     namespace fn                                                                                             \
67     {                                                                                                        \
68     struct FN                                                                                                \
69     {                                                                                                        \
70         template <typename... Args>                                                                          \
71         CMT_INLINE_MEMBER decltype(FULLFN(std::declval<Args>()...)) operator()(Args&&... args) const         \
72         {                                                                                                    \
73             return FULLFN(std::forward<Args>(args)...);                                                      \
74         }                                                                                                    \
75     };                                                                                                       \
76     }
77 
78 CMT_PRAGMA_GNU(GCC diagnostic push)
79 CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wpragmas")
80 CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wfloat-equal")
81 CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wc++98-compat-local-type-template-args")
82 CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wshadow")
83 CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wpacked")
84 
85 CMT_PRAGMA_MSVC(warning(push))
86 CMT_PRAGMA_MSVC(warning(disable : 4814))
87 
88 namespace kfr
89 {
90 
91 inline namespace CMT_ARCH_NAME
92 {
93 
94 template <typename T, size_t N>
95 struct alignas(next_poweroftwo(sizeof(T)) * next_poweroftwo(N)) portable_vec
96 {
shapekfr::CMT_ARCH_NAME::portable_vec97     static constexpr vec_shape<T, N> shape() CMT_NOEXCEPT { return {}; }
98 
99     static_assert(N > 0 && N <= 1024, "Invalid vector size");
100 
101     static_assert(is_simd_type<T> || !compound_type_traits<T>::is_scalar, "Invalid vector type");
102 
103     // type and size
104     using value_type = T;
105 
sizekfr::CMT_ARCH_NAME::portable_vec106     constexpr static size_t size() CMT_NOEXCEPT { return N; }
107 
108     T elem[N];
109 };
110 
111 template <typename T, size_t N>
112 struct vec;
113 
114 template <typename T, size_t N>
115 struct vec_halves
116 {
117     vec<T, prev_poweroftwo(N - 1)> low;
118     vec<T, N - prev_poweroftwo(N - 1)> high;
119 };
120 
121 template <typename T>
122 struct vec_halves<T, 1>
123 {
124     T val;
125 };
126 
127 namespace internal
128 {
129 
130 // scalar to scalar
131 template <typename To, typename From>
132 struct conversion
133 {
134     static_assert(is_convertible<From, To>, "");
135 
castkfr::CMT_ARCH_NAME::internal::conversion136     static To cast(const From& value) { return value; }
137 };
138 
139 template <typename T>
140 struct compoundcast
141 {
to_flatkfr::CMT_ARCH_NAME::internal::compoundcast142     static vec<T, 1> to_flat(const T& x) { return vec<T, 1>(x); }
143 
from_flatkfr::CMT_ARCH_NAME::internal::compoundcast144     static T from_flat(const vec<T, 1>& x) { return x.front(); }
145 };
146 
147 template <typename T, size_t N>
148 struct compoundcast<vec<T, N>>
149 {
to_flatkfr::CMT_ARCH_NAME::internal::compoundcast150     static const vec<T, N>& to_flat(const vec<T, N>& x) { return x; }
151 
from_flatkfr::CMT_ARCH_NAME::internal::compoundcast152     static const vec<T, N>& from_flat(const vec<T, N>& x) { return x; }
153 };
154 
155 template <typename T, size_t N1, size_t N2>
156 struct compoundcast<vec<vec<T, N1>, N2>>
157 {
to_flatkfr::CMT_ARCH_NAME::internal::compoundcast158     static vec<T, N1 * N2> to_flat(const vec<vec<T, N1>, N2>& x) { return x.v; }
159 
from_flatkfr::CMT_ARCH_NAME::internal::compoundcast160     static vec<vec<T, N1>, N2> from_flat(const vec<T, N1 * N2>& x) { return x.v; }
161 };
162 
163 template <typename T, size_t N1, size_t N2, size_t N3>
164 struct compoundcast<vec<vec<vec<T, N1>, N2>, N3>>
165 {
to_flatkfr::CMT_ARCH_NAME::internal::compoundcast166     static vec<T, N1 * N2 * N3> to_flat(const vec<vec<vec<T, N1>, N2>, N3>& x) { return x.v; }
167 
from_flatkfr::CMT_ARCH_NAME::internal::compoundcast168     static vec<vec<vec<T, N1>, N2>, N3> from_flat(const vec<T, N1 * N2 * N3>& x) { return x.v; }
169 };
170 
171 template <typename T, size_t N_>
172 inline constexpr size_t vec_alignment =
173     const_max(alignof(intrinsics::simd<typename compound_type_traits<T>::deep_subtype,
174                                        const_max(size_t(1), N_) * compound_type_traits<T>::deep_width>),
175               const_min(size_t(platform<>::native_vector_alignment),
176                         next_poweroftwo(sizeof(typename compound_type_traits<T>::deep_subtype) *
177                                         const_max(size_t(1), N_) * compound_type_traits<T>::deep_width)));
178 
179 } // namespace internal
180 
181 template <typename T, size_t N_>
182 struct alignas(internal::vec_alignment<T, N_>) vec
183 {
184     static_assert(N_ > 0, "vec<T, N>: vector width cannot be zero");
185 
186     constexpr static inline size_t N = const_max(size_t(1), N_);
shapekfr::CMT_ARCH_NAME::vec187     static constexpr vec_shape<T, N> shape() CMT_NOEXCEPT { return {}; }
188 
189     // type and size
190     using value_type = T;
191 
sizekfr::CMT_ARCH_NAME::vec192     constexpr static size_t size() CMT_NOEXCEPT { return N; }
193 
194     using ST          = typename compound_type_traits<T>::deep_subtype;
195     using scalar_type = ST;
196 
197     constexpr static inline size_t SW = compound_type_traits<T>::deep_width;
198     constexpr static inline size_t SN = N * SW;
199 
scalar_sizekfr::CMT_ARCH_NAME::vec200     constexpr static size_t scalar_size() CMT_NOEXCEPT { return SN; }
201 
202     static_assert(is_simd_type<scalar_type>, "Invalid vector type");
203 
204     static_assert(scalar_size() > 0 && scalar_size() <= 1024, "Invalid vector size");
205 
206     using mask_t = mask<T, N>;
207 
208     using simd_type    = intrinsics::simd<ST, SN>;
209     using uvalue_type  = utype<T>;
210     using iuvalue_type = conditional<is_i_class<T>, T, uvalue_type>;
211 
212     using uscalar_type  = utype<ST>;
213     using iuscalar_type = conditional<is_i_class<ST>, ST, uscalar_type>;
214 
215     using usimd_type  = intrinsics::simd<uscalar_type, SN>;
216     using iusimd_type = intrinsics::simd<iuscalar_type, SN>;
217 
218     // constructors and assignment
219     // from SIMD
veckfr::CMT_ARCH_NAME::vec220     KFR_MEM_INTRINSIC vec(const simd_type& simd) CMT_NOEXCEPT : v(simd) {}
221     // default
veckfr::CMT_ARCH_NAME::vec222     KFR_MEM_INTRINSIC constexpr vec() CMT_NOEXCEPT {}
223     // copy
224     KFR_MEM_INTRINSIC constexpr vec(const vec& value) CMT_NOEXCEPT = default;
225     // move
226     KFR_MEM_INTRINSIC constexpr vec(vec&&) CMT_NOEXCEPT = default;
227     // assignment
228     KFR_MEM_INTRINSIC constexpr vec& operator=(const vec&) CMT_NOEXCEPT = default;
229 
230     // from scalar
231     template <typename U, KFR_ENABLE_IF(is_convertible<U, value_type>&& compound_type_traits<T>::is_scalar)>
veckfr::CMT_ARCH_NAME::vec232     KFR_MEM_INTRINSIC vec(const U& s) CMT_NOEXCEPT
233         : v(intrinsics::simd_broadcast(intrinsics::simd_t<unwrap_bit<ST>, SN>{},
234                                        static_cast<unwrap_bit<ST>>(static_cast<ST>(s))))
235     {
236     }
237 
238     template <typename U, KFR_ENABLE_IF(is_convertible<U, value_type> && !compound_type_traits<T>::is_scalar)>
veckfr::CMT_ARCH_NAME::vec239     KFR_MEM_INTRINSIC vec(const U& s) CMT_NOEXCEPT
240         : v(intrinsics::simd_shuffle(intrinsics::simd_t<unwrap_bit<ST>, SW>{},
241                                      internal::compoundcast<T>::to_flat(static_cast<T>(s)).v,
242                                      csizeseq<SN> % csize<SW>, overload_auto))
243     {
244     }
245 
246     // from list
247     template <typename... Us, KFR_ENABLE_IF(sizeof...(Us) <= 1022 && compound_type_traits<T>::is_scalar)>
veckfr::CMT_ARCH_NAME::vec248     KFR_MEM_INTRINSIC vec(const value_type& s0, const value_type& s1, const Us&... rest) CMT_NOEXCEPT
249         : v(intrinsics::simd_make(cometa::ctype<T>, s0, s1, static_cast<value_type>(rest)...))
250     {
251     }
252 
253     template <typename... Us, KFR_ENABLE_IF(sizeof...(Us) <= 1022 && !compound_type_traits<T>::is_scalar)>
veckfr::CMT_ARCH_NAME::vec254     KFR_MEM_INTRINSIC vec(const value_type& s0, const value_type& s1, const Us&... rest) CMT_NOEXCEPT
255         : v(intrinsics::simd_concat<ST, size_t(SW), size_t(SW), just_value<Us, size_t>(SW)...>(
256               internal::compoundcast<T>::to_flat(s0).v, internal::compoundcast<T>::to_flat(s1).v,
257               internal::compoundcast<T>::to_flat(static_cast<T>(rest)).v...))
258     {
259     }
260 
261     // from vector of another type
262     template <typename U, KFR_ENABLE_IF(is_convertible<U, value_type> &&
263                                         (compound_type_traits<T>::is_scalar && !is_bit<U>))>
veckfr::CMT_ARCH_NAME::vec264     KFR_MEM_INTRINSIC vec(const vec<U, N>& x) CMT_NOEXCEPT
265         : v(intrinsics::simd_convert(
266               intrinsics::simd_cvt_t<unwrap_bit<ST>, unwrap_bit<deep_subtype<U>>, SN>{}, x.v))
267     {
268     }
269 
270     template <typename U, KFR_ENABLE_IF(is_convertible<U, value_type> &&
271                                         !(compound_type_traits<T>::is_scalar && !is_bit<U>))>
veckfr::CMT_ARCH_NAME::vec272     KFR_MEM_INTRINSIC vec(const vec<U, N>& x) CMT_NOEXCEPT
273         : v(internal::conversion<vec<T, N>, vec<U, N>>::cast(x).v)
274     {
275     }
276 
277     // from list of vectors
278     template <size_t... Ns, typename = enable_if<csum<size_t, Ns...>() == N>>
279     KFR_MEM_INTRINSIC vec(const vec<T, Ns>&... vs) CMT_NOEXCEPT
280         : v(intrinsics::simd_concat<ST, (SW * Ns)...>(vs.v...))
281     {
282     }
283 
veckfr::CMT_ARCH_NAME::vec284     KFR_MEM_INTRINSIC vec(const portable_vec<T, N>& p) CMT_NOEXCEPT : vec(bitcast_anything<vec>(p)) {}
285 
operator portable_vec<T,N>kfr::CMT_ARCH_NAME::vec286     KFR_MEM_INTRINSIC operator portable_vec<T, N>() const CMT_NOEXCEPT
287     {
288         return bitcast_anything<portable_vec<T, N>>(*this);
289     }
290 
veckfr::CMT_ARCH_NAME::vec291     KFR_MEM_INTRINSIC vec(czeros_t) CMT_NOEXCEPT : v(intrinsics::simd_zeros<ST, SN>()) {}
292 
veckfr::CMT_ARCH_NAME::vec293     KFR_MEM_INTRINSIC vec(cones_t) CMT_NOEXCEPT : v(intrinsics::simd_allones<ST, SN>()) {}
294 
295     template <typename U, size_t M, KFR_ENABLE_IF(sizeof(U) * M == sizeof(T) * N)>
frombitskfr::CMT_ARCH_NAME::vec296     KFR_MEM_INTRINSIC static vec frombits(const vec<U, M>& v) CMT_NOEXCEPT
297     {
298         return intrinsics::simd_bitcast(
299             intrinsics::simd_cvt_t<ST, typename vec<U, M>::scalar_type, vec<U, M>::scalar_size()>{}, v.v);
300     }
301 
302     // shuffle
303     template <size_t... indices>
shufflekfr::CMT_ARCH_NAME::vec304     KFR_MEM_INTRINSIC vec<value_type, sizeof...(indices)> shuffle(csizes_t<indices...> i) const CMT_NOEXCEPT
305     {
306         return vec<value_type, sizeof...(indices)>(intrinsics::simd_shuffle(
307             intrinsics::simd_t<unwrap_bit<ST>, SN>{}, v, scale<SW>(i), overload_auto));
308     }
309 
310     template <size_t... indices>
shufflekfr::CMT_ARCH_NAME::vec311     KFR_MEM_INTRINSIC vec<value_type, sizeof...(indices)> shuffle(const vec& y,
312                                                                   csizes_t<indices...> i) const CMT_NOEXCEPT
313     {
314         return vec<value_type, sizeof...(indices)>(
315             intrinsics::simd_shuffle(intrinsics::simd2_t<ST, SN, SN>{}, v, y.v, scale<SW>(i), overload_auto));
316     }
317 
318     // element access
319     struct element;
320 
operator []kfr::CMT_ARCH_NAME::vec321     KFR_MEM_INTRINSIC constexpr value_type operator[](size_t index) const& CMT_NOEXCEPT { return get(index); }
322 
operator []kfr::CMT_ARCH_NAME::vec323     KFR_MEM_INTRINSIC constexpr value_type operator[](size_t index) && CMT_NOEXCEPT { return get(index); }
324 
operator []kfr::CMT_ARCH_NAME::vec325     KFR_MEM_INTRINSIC constexpr element operator[](size_t index) & CMT_NOEXCEPT { return { *this, index }; }
326 
frontkfr::CMT_ARCH_NAME::vec327     KFR_MEM_INTRINSIC value_type front() const CMT_NOEXCEPT { return get(csize<0>); }
328 
backkfr::CMT_ARCH_NAME::vec329     KFR_MEM_INTRINSIC value_type back() const CMT_NOEXCEPT { return get(csize<N - 1>); }
330 
331     template <int dummy = 0, KFR_ENABLE_IF(dummy == 0 && compound_type_traits<T>::is_scalar)>
getkfr::CMT_ARCH_NAME::vec332     KFR_MEM_INTRINSIC constexpr value_type get(size_t index) const CMT_NOEXCEPT
333     {
334         return intrinsics::simd_get_element<T, N>(v, index);
335     }
336 
337     template <int dummy = 0, typename = void,
338               KFR_ENABLE_IF(dummy == 0 && !compound_type_traits<T>::is_scalar)>
getkfr::CMT_ARCH_NAME::vec339     KFR_MEM_INTRINSIC constexpr value_type get(size_t index) const CMT_NOEXCEPT
340     {
341         union {
342             simd_type v;
343             T s[N];
344         } u{ this->v };
345         return u.s[index];
346     }
347 
348     template <size_t index, KFR_ENABLE_IF(index < 1024 && compound_type_traits<T>::is_scalar)>
getkfr::CMT_ARCH_NAME::vec349     KFR_MEM_INTRINSIC constexpr value_type get(csize_t<index>) const CMT_NOEXCEPT
350     {
351         return intrinsics::simd_get_element<T, N>(v, csize<index>);
352     }
353 
354     template <size_t index, typename = void,
355               KFR_ENABLE_IF(index < 1024 && !compound_type_traits<T>::is_scalar)>
getkfr::CMT_ARCH_NAME::vec356     KFR_MEM_INTRINSIC constexpr value_type get(csize_t<index>) const CMT_NOEXCEPT
357     {
358         return internal::compoundcast<T>::from_flat(intrinsics::simd_shuffle(
359             intrinsics::simd_t<unwrap_bit<ST>, SN>{}, v, csizeseq<SW, SW * index>, overload_auto));
360     }
361 
362     template <size_t index>
getkfr::CMT_ARCH_NAME::vec363     KFR_MEM_INTRINSIC constexpr value_type get() const CMT_NOEXCEPT
364     {
365         return this->get(csize_t<index>{});
366     }
367 
368     template <int dummy = 0, KFR_ENABLE_IF(dummy == 0 && compound_type_traits<T>::is_scalar)>
setkfr::CMT_ARCH_NAME::vec369     KFR_MEM_INTRINSIC constexpr void set(size_t index, const value_type& s) CMT_NOEXCEPT
370     {
371         v = intrinsics::simd_set_element<T, N>(v, index, s);
372     }
373 
374     template <int dummy = 0, KFR_ENABLE_IF(dummy == 0 && !compound_type_traits<T>::is_scalar)>
setkfr::CMT_ARCH_NAME::vec375     KFR_MEM_INTRINSIC constexpr void set(size_t index, const value_type& s) CMT_NOEXCEPT
376     {
377         union {
378             simd_type v;
379             T s[N];
380         } u{ this->v };
381         u.s[index] = s;
382         this->v    = u.v;
383     }
384 
385     template <size_t index, KFR_ENABLE_IF(index < 1024 && compound_type_traits<T>::is_scalar)>
setkfr::CMT_ARCH_NAME::vec386     KFR_MEM_INTRINSIC constexpr void set(csize_t<index>, const value_type& s) CMT_NOEXCEPT
387     {
388         v = intrinsics::simd_set_element<T, N>(v, csize<index>, s);
389     }
390 
391     template <size_t index, typename = void,
392               KFR_ENABLE_IF(index < 1024 && !compound_type_traits<T>::is_scalar)>
setkfr::CMT_ARCH_NAME::vec393     KFR_MEM_INTRINSIC constexpr void set(csize_t<index>, const value_type& s) CMT_NOEXCEPT
394     {
395         this->s[index] = s;
396     }
397 
398     struct element
399     {
operator value_typekfr::CMT_ARCH_NAME::vec::element400         constexpr operator value_type() const CMT_NOEXCEPT { return v.get(index); }
401 
operator =kfr::CMT_ARCH_NAME::vec::element402         KFR_MEM_INTRINSIC element& operator=(const value_type& s) CMT_NOEXCEPT
403         {
404             v.set(index, s);
405             return *this;
406         }
407 
operator =kfr::CMT_ARCH_NAME::vec::element408         KFR_MEM_INTRINSIC element& operator=(const element& s) CMT_NOEXCEPT
409         {
410             v.set(index, static_cast<value_type>(s));
411             return *this;
412         }
413 
414         template <typename U, size_t M>
operator =kfr::CMT_ARCH_NAME::vec::element415         KFR_MEM_INTRINSIC element& operator=(const typename vec<U, M>::element& s) CMT_NOEXCEPT
416         {
417             v.set(index, static_cast<value_type>(static_cast<U>(s)));
418             return *this;
419         }
420 
421         vec& v;
422         size_t index;
423     };
424 
425     // read/write
426     template <bool aligned = false>
427     KFR_MEM_INTRINSIC explicit constexpr vec(const value_type* src,
428                                              cbool_t<aligned> = cbool_t<aligned>()) CMT_NOEXCEPT;
429 
430     template <bool aligned = false>
431     KFR_MEM_INTRINSIC const vec& write(value_type* dest,
432                                        cbool_t<aligned> = cbool_t<aligned>()) const CMT_NOEXCEPT;
433 
flattenkfr::CMT_ARCH_NAME::vec434     KFR_MEM_INTRINSIC vec<ST, SN> flatten() const CMT_NOEXCEPT { return v; }
435 
from_flattenkfr::CMT_ARCH_NAME::vec436     KFR_MEM_INTRINSIC static vec from_flatten(const vec<ST, SN>& x) { return vec(x.v); }
437 
asmaskkfr::CMT_ARCH_NAME::vec438     KFR_MEM_INTRINSIC constexpr mask_t asmask() const CMT_NOEXCEPT { return mask_t(v); }
439 
asveckfr::CMT_ARCH_NAME::vec440     KFR_MEM_INTRINSIC constexpr vec<unwrap_bit<T>, N> asvec() const CMT_NOEXCEPT
441     {
442         return vec<unwrap_bit<T>, N>(v);
443     }
444 
445     constexpr static size_t simd_element_size  = const_min(vector_width<T>, N);
446     constexpr static size_t simd_element_count = N / simd_element_size;
447     using simd_element_type                    = simd<ST, simd_element_size>;
448 
449 public:
450     union {
451         simd_type v;
452         vec_halves<T, N> h;
453         // simd_element_type w[simd_element_count];
454         // T s[N];
455     };
456 };
457 
458 template <typename T>
459 constexpr inline bool is_vec_element = is_simd_type<deep_subtype<remove_const<T>>>;
460 
461 template <typename T, size_t N, size_t... indices>
shufflevector(const vec<T,N> & x,csizes_t<indices...> i)462 KFR_INTRINSIC vec<T, sizeof...(indices)> shufflevector(const vec<T, N>& x,
463                                                        csizes_t<indices...> i) CMT_NOEXCEPT
464 {
465     return intrinsics::simd_shuffle(intrinsics::simd_t<unwrap_bit<T>, N>{}, x.v, i, overload_auto);
466 }
467 
468 template <typename T, size_t N, size_t... indices>
shufflevectors(const vec<T,N> & x,const vec<T,N> & y,csizes_t<indices...> i)469 KFR_INTRINSIC vec<T, sizeof...(indices)> shufflevectors(const vec<T, N>& x, const vec<T, N>& y,
470                                                         csizes_t<indices...> i) CMT_NOEXCEPT
471 {
472     return intrinsics::simd_shuffle(intrinsics::simd2_t<T, N, N>{}, x.v, y.v, i, overload_auto);
473 }
474 
475 namespace internal
476 {
477 template <typename T>
478 struct is_vec_impl : std::false_type
479 {
480 };
481 
482 template <typename T, size_t N>
483 struct is_vec_impl<vec<T, N>> : std::true_type
484 {
485 };
486 } // namespace internal
487 
488 template <typename T>
489 constexpr inline bool is_vec = internal::is_vec_impl<T>::value;
490 
491 CMT_PRAGMA_GNU(GCC diagnostic push)
492 CMT_PRAGMA_GNU(GCC diagnostic ignored "-Wold-style-cast")
493 
494 template <size_t N, typename T>
broadcast(T x)495 constexpr KFR_INTRINSIC vec<T, N> broadcast(T x)
496 {
497     return x;
498 }
499 
500 CMT_PRAGMA_GNU(GCC diagnostic pop)
501 
502 namespace internal
503 {
504 
505 template <typename To, typename From, size_t N, typename Tsub = deep_subtype<To>,
506           size_t Nout = (N * compound_type_traits<To>::deep_width)>
builtin_convertvector(const vec<From,N> & value)507 constexpr KFR_INTRINSIC vec<To, N> builtin_convertvector(const vec<From, N>& value) CMT_NOEXCEPT
508 {
509     return vec<To, N>(value);
510 }
511 
512 // vector to vector
513 template <typename To, typename From, size_t N, size_t N2>
514 struct conversion<vec<To, N>, vec<From, N2>>
515 {
516     static_assert(N == N2, "");
517     static_assert(!is_compound<To>, "");
518     static_assert(!is_compound<From>, "");
519 
castkfr::CMT_ARCH_NAME::internal::conversion520     static vec<To, N> cast(const vec<From, N>& value) { return vec<To, N>(value); }
521 };
522 
523 // scalar to vector
524 template <typename To, typename From, size_t N>
525 struct conversion<vec<To, N>, From>
526 {
527     static_assert(is_convertible<From, To>, "");
528 
castkfr::CMT_ARCH_NAME::internal::conversion529     static vec<To, N> cast(const From& value) { return broadcast<N>(static_cast<To>(value)); }
530 };
531 } // namespace internal
532 
533 template <typename T>
size_of()534 constexpr size_t size_of() CMT_NOEXCEPT
535 {
536     return sizeof(deep_subtype<T>) * compound_type_traits<T>::deep_width;
537 }
538 
539 template <typename From, size_t N, typename Tsub = deep_subtype<From>,
540           size_t Nout = N* size_of<From>() / size_of<Tsub>()>
flatten(const vec<From,N> & x)541 constexpr KFR_INTRINSIC vec<Tsub, Nout> flatten(const vec<From, N>& x) CMT_NOEXCEPT
542 {
543     return x.flatten();
544 }
545 
546 template <typename To, typename From,
547           typename Tout = typename compound_type_traits<From>::template deep_rebind<To>>
cast(const From & value)548 constexpr KFR_INTRINSIC Tout cast(const From& value) CMT_NOEXCEPT
549 {
550     return static_cast<Tout>(value);
551 }
552 
553 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
cast(const vec<Tin,N> & value)554 constexpr KFR_INTRINSIC vec<Tout, N> cast(const vec<Tin, N>& value) CMT_NOEXCEPT
555 {
556     return vec<Tout, N>(value);
557 }
558 
559 template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
cast(const vec<vec<Tin,N1>,N2> & value)560 constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> cast(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT
561 {
562     return vec<vec<Tout, N1>, N2>(value);
563 }
564 
565 template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
cast(const vec<vec<vec<Tin,N1>,N2>,N3> & value)566 constexpr KFR_INTRINSIC vec<vec<vec<Tout, N1>, N2>, N3> cast(const vec<vec<vec<Tin, N1>, N2>, N3>& value)
567     CMT_NOEXCEPT
568 {
569     return vec<vec<vec<Tout, N1>, N2>, N3>(value);
570 }
571 
572 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>)>
cast(const vec<Tin,N> & value)573 constexpr KFR_INTRINSIC const vec<Tin, N>& cast(const vec<Tin, N>& value) CMT_NOEXCEPT
574 {
575     return value;
576 }
577 
578 template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(is_same<Tin, Tout>)>
cast(const vec<vec<Tin,N1>,N2> & value)579 constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& cast(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT
580 {
581     return value;
582 }
583 
584 template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(is_same<Tin, Tout>)>
cast(const vec<vec<vec<Tin,N1>,N2>,N3> & value)585 constexpr KFR_INTRINSIC const vec<vec<vec<Tin, N1>, N2>, N3>& cast(
586     const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT
587 {
588     return value;
589 }
590 
591 //
592 
593 template <typename To, typename From,
594           typename Tout = typename compound_type_traits<From>::template deep_rebind<To>>
innercast(const From & value)595 constexpr KFR_INTRINSIC Tout innercast(const From& value) CMT_NOEXCEPT
596 {
597     return static_cast<Tout>(value);
598 }
599 
600 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
innercast(const vec<Tin,N> & value)601 constexpr KFR_INTRINSIC vec<Tout, N> innercast(const vec<Tin, N>& value) CMT_NOEXCEPT
602 {
603     return vec<Tout, N>(value);
604 }
605 
606 template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
innercast(const vec<vec<Tin,N1>,N2> & value)607 constexpr KFR_INTRINSIC vec<vec<Tout, N1>, N2> innercast(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT
608 {
609     return vec<vec<Tout, N1>, N2>(value);
610 }
611 
612 template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
innercast(const vec<vec<vec<Tin,N1>,N2>,N3> & value)613 constexpr KFR_INTRINSIC vec<vec<vec<Tout, N1>, N2>, N3> innercast(const vec<vec<vec<Tin, N1>, N2>, N3>& value)
614     CMT_NOEXCEPT
615 {
616     return vec<vec<vec<Tout, N1>, N2>, N3>(value);
617 }
618 
619 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>)>
innercast(const vec<Tin,N> & value)620 constexpr KFR_INTRINSIC const vec<Tin, N>& innercast(const vec<Tin, N>& value) CMT_NOEXCEPT
621 {
622     return value;
623 }
624 
625 template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(is_same<Tin, Tout>)>
innercast(const vec<vec<Tin,N1>,N2> & value)626 constexpr KFR_INTRINSIC const vec<vec<Tin, N1>, N2>& innercast(const vec<vec<Tin, N1>, N2>& value)
627     CMT_NOEXCEPT
628 {
629     return value;
630 }
631 
632 template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(is_same<Tin, Tout>)>
innercast(const vec<vec<vec<Tin,N1>,N2>,N3> & value)633 constexpr KFR_INTRINSIC const vec<vec<vec<Tin, N1>, N2>, N3>& innercast(
634     const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT
635 {
636     return value;
637 }
638 
639 //
640 
641 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
elemcast(const vec<Tin,N> & value)642 constexpr KFR_INTRINSIC vec<Tout, N> elemcast(const vec<Tin, N>& value) CMT_NOEXCEPT
643 {
644     return vec<Tout, N>(value);
645 }
646 
647 template <typename Tout, typename Tin, size_t N, KFR_ENABLE_IF(is_same<Tin, Tout>)>
elemcast(const vec<Tin,N> & value)648 constexpr KFR_INTRINSIC const vec<Tin, N>& elemcast(const vec<Tin, N>& value) CMT_NOEXCEPT
649 {
650     return value;
651 }
652 
653 template <typename Tout, typename Tin, size_t N1, size_t N2, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
elemcast(const vec<vec<Tin,N1>,N2> & value)654 constexpr KFR_INTRINSIC vec<Tout, N2> elemcast(const vec<vec<Tin, N1>, N2>& value) CMT_NOEXCEPT
655 {
656     return vec<Tout, N2>(value);
657 }
658 
659 template <typename Tout, typename Tin, size_t N1, size_t N2, size_t N3, KFR_ENABLE_IF(!is_same<Tin, Tout>)>
elemcast(const vec<vec<vec<Tin,N1>,N2>,N3> & value)660 constexpr KFR_INTRINSIC vec<Tout, N3> elemcast(const vec<vec<vec<Tin, N1>, N2>, N3>& value) CMT_NOEXCEPT
661 {
662     return vec<Tout, N3>(value);
663 }
664 
665 template <typename To, typename From>
bitcast(const From & value)666 CMT_GNU_CONSTEXPR KFR_INTRINSIC To bitcast(const From& value) CMT_NOEXCEPT
667 {
668     static_assert(sizeof(From) == sizeof(To), "bitcast: Incompatible types");
669     union {
670         From from;
671         To to;
672     } u{ value };
673     return u.to;
674 }
675 
676 template <typename To, typename From, size_t N, size_t Nout = (N * size_of<From>() / size_of<To>())>
bitcast(const vec<From,N> & value)677 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<To, Nout> bitcast(const vec<From, N>& value) CMT_NOEXCEPT
678 {
679     return vec<To, Nout>::frombits(value);
680 }
681 
682 template <typename From, typename To = utype<From>, KFR_ENABLE_IF(!is_compound<From>)>
ubitcast(const From & value)683 constexpr KFR_INTRINSIC To ubitcast(const From& value) CMT_NOEXCEPT
684 {
685     return bitcast<To>(value);
686 }
687 
688 template <typename From, typename To = itype<From>, KFR_ENABLE_IF(!is_compound<From>)>
ibitcast(const From & value)689 constexpr KFR_INTRINSIC To ibitcast(const From& value) CMT_NOEXCEPT
690 {
691     return bitcast<To>(value);
692 }
693 
694 template <typename From, typename To = ftype<From>, KFR_ENABLE_IF(!is_compound<From>)>
fbitcast(const From & value)695 constexpr KFR_INTRINSIC To fbitcast(const From& value) CMT_NOEXCEPT
696 {
697     return bitcast<To>(value);
698 }
699 
700 template <typename From, typename To = uitype<From>, KFR_ENABLE_IF(!is_compound<From>)>
uibitcast(const From & value)701 constexpr KFR_INTRINSIC To uibitcast(const From& value) CMT_NOEXCEPT
702 {
703     return bitcast<To>(value);
704 }
705 
706 template <typename From, size_t N, typename To = utype<From>,
707           size_t Nout = size_of<From>() * N / size_of<To>()>
ubitcast(const vec<From,N> & value)708 constexpr KFR_INTRINSIC vec<To, Nout> ubitcast(const vec<From, N>& value) CMT_NOEXCEPT
709 {
710     return vec<To, Nout>::frombits(value);
711 }
712 
713 template <typename From, size_t N, typename To = itype<From>,
714           size_t Nout = size_of<From>() * N / size_of<To>()>
ibitcast(const vec<From,N> & value)715 constexpr KFR_INTRINSIC vec<To, Nout> ibitcast(const vec<From, N>& value) CMT_NOEXCEPT
716 {
717     return vec<To, Nout>::frombits(value);
718 }
719 
720 template <typename From, size_t N, typename To = ftype<From>,
721           size_t Nout = size_of<From>() * N / size_of<To>()>
fbitcast(const vec<From,N> & value)722 constexpr KFR_INTRINSIC vec<To, Nout> fbitcast(const vec<From, N>& value) CMT_NOEXCEPT
723 {
724     return vec<To, Nout>::frombits(value);
725 }
726 
727 template <typename From, size_t N, typename To = uitype<From>,
728           size_t Nout = size_of<From>() * N / size_of<To>()>
uibitcast(const vec<From,N> & value)729 constexpr KFR_INTRINSIC vec<To, Nout> uibitcast(const vec<From, N>& value) CMT_NOEXCEPT
730 {
731     return vec<To, Nout>::frombits(value);
732 }
733 
vector_alignment(size_t size)734 constexpr KFR_INTRINSIC size_t vector_alignment(size_t size) { return next_poweroftwo(size); }
735 
736 template <typename T, size_t N>
737 struct pkd_vec
738 {
pkd_veckfr::CMT_ARCH_NAME::pkd_vec739     constexpr pkd_vec() CMT_NOEXCEPT {}
740 
pkd_veckfr::CMT_ARCH_NAME::pkd_vec741     pkd_vec(const vec<T, N>& value) CMT_NOEXCEPT { value.write(v); }
742 
743     template <typename... Ts>
pkd_veckfr::CMT_ARCH_NAME::pkd_vec744     constexpr pkd_vec(Ts... init) CMT_NOEXCEPT : v{ static_cast<T>(init)... }
745     {
746         static_assert(N <= sizeof...(Ts), "Too few initializers for pkd_vec");
747     }
748 
749 private:
750     T v[N];
751     friend struct vec<T, N>;
752 }
753 #ifdef CMT_GNU_ATTRIBUTES
754 __attribute__((packed))
755 #endif
756 ;
757 
758 namespace internal
759 {
760 
761 template <size_t, typename T>
make_vector_get_n()762 constexpr KFR_INTRINSIC T make_vector_get_n()
763 {
764     return T();
765 }
766 
767 template <size_t index, typename T, typename... Args>
make_vector_get_n(const T & arg,const Args &...args)768 constexpr KFR_INTRINSIC T make_vector_get_n(const T& arg, const Args&... args)
769 {
770     return index == 0 ? arg : make_vector_get_n<index - 1, T>(args...);
771 }
772 
773 template <typename T, typename... Args, size_t... indices, size_t N = sizeof...(Args)>
make_vector_impl(csizes_t<indices...>,const Args &...args)774 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> make_vector_impl(csizes_t<indices...>, const Args&... args)
775 {
776     static_assert(sizeof...(indices) == sizeof...(Args), "");
777     const T list[] = { static_cast<T>(args)... };
778     return vec<T, N>(list[indices]...);
779 }
780 
781 template <bool, typename Tfallback, typename... Args>
782 struct conditional_common;
783 
784 template <typename Tfallback, typename... Args>
785 struct conditional_common<true, Tfallback, Args...>
786 {
787     using type = common_type<Args...>;
788 };
789 
790 template <typename Tfallback, typename... Args>
791 struct conditional_common<false, Tfallback, Args...>
792 {
793     using type = Tfallback;
794 };
795 
796 } // namespace internal
797 
798 /// Create vector from scalar values
799 /// @code
800 /// CHECK( make_vector( 1, 2, 3, 4 ) == i32x4{1, 2, 3, 4} );
801 /// @endcode
802 template <typename Type = void, typename Arg, typename... Args, size_t N = (sizeof...(Args) + 1),
803           typename SubType =
804               fix_type<typename internal::conditional_common<is_void<Type>, Type, Arg, Args...>::type>>
make_vector(const Arg & x,const Args &...rest)805 constexpr KFR_INTRINSIC vec<SubType, N> make_vector(const Arg& x, const Args&... rest)
806 {
807     return internal::make_vector_impl<SubType>(cvalseq_t<size_t, N>(), static_cast<SubType>(x),
808                                                static_cast<SubType>(rest)...);
809 }
810 
811 template <typename T, size_t N>
make_vector(const vec<T,N> & x)812 constexpr KFR_INTRINSIC vec<T, N> make_vector(const vec<T, N>& x)
813 {
814     return x;
815 }
816 
817 template <typename T, T... Values, size_t N = sizeof...(Values)>
make_vector(cvals_t<T,Values...>)818 constexpr KFR_INTRINSIC vec<T, N> make_vector(cvals_t<T, Values...>)
819 {
820     return make_vector<T>(Values...);
821 }
822 
823 template <typename Type = void, typename Arg, typename... Args, size_t N = (sizeof...(Args) + 1),
824           typename SubType = fix_type<conditional<is_void<Type>, common_type<Arg, Args...>, Type>>,
825           KFR_ENABLE_IF(is_number<subtype<SubType>>)>
pack(const Arg & x,const Args &...rest)826 constexpr KFR_INTRINSIC vec<SubType, N> pack(const Arg& x, const Args&... rest)
827 {
828     return internal::make_vector_impl<SubType>(csizeseq<N>, static_cast<SubType>(x),
829                                                static_cast<SubType>(rest)...);
830 }
831 
832 using f32x1  = vec<f32, 1>;
833 using f32x2  = vec<f32, 2>;
834 using f32x3  = vec<f32, 3>;
835 using f32x4  = vec<f32, 4>;
836 using f32x8  = vec<f32, 8>;
837 using f32x16 = vec<f32, 16>;
838 using f32x32 = vec<f32, 32>;
839 using f32x64 = vec<f32, 64>;
840 using f64x1  = vec<f64, 1>;
841 using f64x2  = vec<f64, 2>;
842 using f64x3  = vec<f64, 3>;
843 using f64x4  = vec<f64, 4>;
844 using f64x8  = vec<f64, 8>;
845 using f64x16 = vec<f64, 16>;
846 using f64x32 = vec<f64, 32>;
847 using f64x64 = vec<f64, 64>;
848 using i8x1   = vec<i8, 1>;
849 using i8x2   = vec<i8, 2>;
850 using i8x3   = vec<i8, 3>;
851 using i8x4   = vec<i8, 4>;
852 using i8x8   = vec<i8, 8>;
853 using i8x16  = vec<i8, 16>;
854 using i8x32  = vec<i8, 32>;
855 using i8x64  = vec<i8, 64>;
856 using i16x1  = vec<i16, 1>;
857 using i16x2  = vec<i16, 2>;
858 using i16x3  = vec<i16, 3>;
859 using i16x4  = vec<i16, 4>;
860 using i16x8  = vec<i16, 8>;
861 using i16x16 = vec<i16, 16>;
862 using i16x32 = vec<i16, 32>;
863 using i16x64 = vec<i16, 64>;
864 using i32x1  = vec<i32, 1>;
865 using i32x2  = vec<i32, 2>;
866 using i32x3  = vec<i32, 3>;
867 using i32x4  = vec<i32, 4>;
868 using i32x8  = vec<i32, 8>;
869 using i32x16 = vec<i32, 16>;
870 using i32x32 = vec<i32, 32>;
871 using i32x64 = vec<i32, 64>;
872 using i64x1  = vec<i64, 1>;
873 using i64x2  = vec<i64, 2>;
874 using i64x3  = vec<i64, 3>;
875 using i64x4  = vec<i64, 4>;
876 using i64x8  = vec<i64, 8>;
877 using i64x16 = vec<i64, 16>;
878 using i64x32 = vec<i64, 32>;
879 using i64x64 = vec<i64, 64>;
880 using u8x1   = vec<u8, 1>;
881 using u8x2   = vec<u8, 2>;
882 using u8x3   = vec<u8, 3>;
883 using u8x4   = vec<u8, 4>;
884 using u8x8   = vec<u8, 8>;
885 using u8x16  = vec<u8, 16>;
886 using u8x32  = vec<u8, 32>;
887 using u8x64  = vec<u8, 64>;
888 using u16x1  = vec<u16, 1>;
889 using u16x2  = vec<u16, 2>;
890 using u16x3  = vec<u16, 3>;
891 using u16x4  = vec<u16, 4>;
892 using u16x8  = vec<u16, 8>;
893 using u16x16 = vec<u16, 16>;
894 using u16x32 = vec<u16, 32>;
895 using u16x64 = vec<u16, 64>;
896 using u32x1  = vec<u32, 1>;
897 using u32x2  = vec<u32, 2>;
898 using u32x3  = vec<u32, 3>;
899 using u32x4  = vec<u32, 4>;
900 using u32x8  = vec<u32, 8>;
901 using u32x16 = vec<u32, 16>;
902 using u32x32 = vec<u32, 32>;
903 using u32x64 = vec<u32, 64>;
904 using u64x1  = vec<u64, 1>;
905 using u64x2  = vec<u64, 2>;
906 using u64x3  = vec<u64, 3>;
907 using u64x4  = vec<u64, 4>;
908 using u64x8  = vec<u64, 8>;
909 using u64x16 = vec<u64, 16>;
910 using u64x32 = vec<u64, 32>;
911 using u64x64 = vec<u64, 64>;
912 
913 namespace glsl_names
914 {
915 using vec2  = f32x2;
916 using vec3  = f32x3;
917 using vec4  = f32x4;
918 using dvec2 = f64x2;
919 using dvec3 = f64x3;
920 using dvec4 = f64x4;
921 using ivec2 = i32x2;
922 using ivec3 = i32x3;
923 using ivec4 = i32x4;
924 using uvec2 = u32x2;
925 using uvec3 = u32x3;
926 using uvec4 = u32x4;
927 } // namespace glsl_names
928 namespace opencl_names
929 {
930 using char2   = i8x2;
931 using char3   = i8x3;
932 using char4   = i8x4;
933 using char8   = i8x8;
934 using char16  = i8x16;
935 using uchar2  = u8x2;
936 using uchar3  = u8x3;
937 using uchar4  = u8x4;
938 using uchar8  = u8x8;
939 using uchar16 = u8x16;
940 
941 using short2   = i16x2;
942 using short3   = i16x3;
943 using short4   = i16x4;
944 using short8   = i16x8;
945 using short16  = i16x16;
946 using ushort2  = u16x2;
947 using ushort3  = u16x3;
948 using ushort4  = u16x4;
949 using ushort8  = u16x8;
950 using ushort16 = u16x16;
951 
952 using int2   = i32x2;
953 using int3   = i32x3;
954 using int4   = i32x4;
955 using int8   = i32x8;
956 using int16  = i32x16;
957 using uint2  = u32x2;
958 using uint3  = u32x3;
959 using uint4  = u32x4;
960 using uint8  = u32x8;
961 using uint16 = u32x16;
962 
963 using long2   = i64x2;
964 using long3   = i64x3;
965 using long4   = i64x4;
966 using long8   = i64x8;
967 using long16  = i64x16;
968 using ulong2  = u64x2;
969 using ulong3  = u64x3;
970 using ulong4  = u64x4;
971 using ulong8  = u64x8;
972 using ulong16 = u64x16;
973 
974 using float2  = f32x2;
975 using float3  = f32x3;
976 using float4  = f32x4;
977 using float8  = f32x8;
978 using float16 = f32x16;
979 
980 using double2  = f64x2;
981 using double3  = f64x3;
982 using double4  = f64x4;
983 using double8  = f64x8;
984 using double16 = f64x16;
985 } // namespace opencl_names
986 
987 namespace internal
988 {
989 
990 template <size_t Index, typename T, size_t N, typename Fn, typename... Args,
991           typename Tout = invoke_result<Fn, subtype<decay<Args>>...>>
applyfn_helper(Fn && fn,Args &&...args)992 constexpr KFR_INTRINSIC Tout applyfn_helper(Fn&& fn, Args&&... args)
993 {
994     return fn(args[Index]...);
995 }
996 
997 template <typename T, size_t N, typename Fn, typename... Args,
998           typename Tout = invoke_result<Fn, subtype<decay<Args>>...>, size_t... Indices>
apply_helper(Fn && fn,csizes_t<Indices...>,Args &&...args)999 constexpr KFR_INTRINSIC vec<Tout, N> apply_helper(Fn&& fn, csizes_t<Indices...>, Args&&... args)
1000 {
1001     return make_vector(applyfn_helper<Indices, T, N>(std::forward<Fn>(fn), std::forward<Args>(args)...)...);
1002 }
1003 
1004 template <typename T, size_t N, typename Fn, size_t... Indices>
apply0_helper(Fn && fn,csizes_t<Indices...>)1005 constexpr KFR_INTRINSIC vec<T, N> apply0_helper(Fn&& fn, csizes_t<Indices...>)
1006 {
1007     return make_vector(((void)Indices, void(), fn())...);
1008 }
1009 } // namespace internal
1010 
1011 template <typename T, size_t N, typename Fn, typename... Args,
1012           typename Tout = invoke_result<Fn, T, subtype<decay<Args>>...>>
apply(Fn && fn,const vec<T,N> & arg,Args &&...args)1013 constexpr KFR_INTRINSIC vec<Tout, N> apply(Fn&& fn, const vec<T, N>& arg, Args&&... args)
1014 {
1015     return internal::apply_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>, arg, std::forward<Args>(args)...);
1016 }
1017 
1018 template <typename T, typename Fn, typename... Args, typename Tout = invoke_result<Fn, T, decay<Args>...>,
1019           KFR_ENABLE_IF(is_same<T, subtype<T>>)>
apply(Fn && fn,const T & arg,Args &&...args)1020 constexpr KFR_INTRINSIC Tout apply(Fn&& fn, const T& arg, Args&&... args)
1021 {
1022     return fn(arg, args...);
1023 }
1024 
1025 template <size_t N, typename Fn, typename T = invoke_result<Fn>>
apply(Fn && fn)1026 constexpr KFR_INTRINSIC vec<T, N> apply(Fn&& fn)
1027 {
1028     return internal::apply0_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>);
1029 }
1030 
1031 template <typename T, size_t N>
zerovector()1032 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> zerovector()
1033 {
1034     return vec<T, N>(czeros);
1035 }
1036 
1037 template <typename T, size_t N>
zerovector(vec_shape<T,N>)1038 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> zerovector(vec_shape<T, N>)
1039 {
1040     return vec<T, N>(czeros);
1041 }
1042 
1043 template <typename T, size_t N>
zerovector(vec<T,N>)1044 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> zerovector(vec<T, N>)
1045 {
1046     return vec<T, N>(czeros);
1047 }
1048 
1049 template <typename T, size_t N>
allonesvector()1050 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> allonesvector()
1051 {
1052     return vec<T, N>(cones);
1053 }
1054 
1055 template <typename T, size_t N>
allonesvector(vec_shape<T,N>)1056 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> allonesvector(vec_shape<T, N>)
1057 {
1058     return vec<T, N>(cones);
1059 }
1060 
1061 template <typename T, size_t N>
allonesvector(vec<T,N>)1062 CMT_GNU_CONSTEXPR KFR_INTRINSIC vec<T, N> allonesvector(vec<T, N>)
1063 {
1064     return vec<T, N>(cones);
1065 }
1066 
1067 template <typename T, size_t N>
undefinedvector()1068 constexpr KFR_INTRINSIC vec<T, N> undefinedvector()
1069 {
1070     return vec<T, N>{};
1071 }
1072 
1073 template <typename T, size_t N>
undefinedvector(vec_shape<T,N>)1074 constexpr KFR_INTRINSIC vec<T, N> undefinedvector(vec_shape<T, N>)
1075 {
1076     return undefinedvector<T, N>();
1077 }
1078 
1079 template <size_t N>
1080 struct vec_template
1081 {
1082     template <typename T>
1083     using type = vec<T, N>;
1084 };
1085 
1086 #ifdef KFR_TESTING
1087 
special_values()1088 inline const std::vector<special_value>& special_values()
1089 {
1090     static const std::vector<special_value> values{ special_constant::infinity,
1091                                                     special_constant::neg_infinity,
1092                                                     special_constant::min,
1093                                                     special_constant::lowest,
1094                                                     special_constant::max,
1095                                                     3.1415926535897932384626433832795,
1096                                                     4.499999,
1097                                                     4.500001,
1098                                                     -4.499999,
1099                                                     -4.500001,
1100                                                     0.1111111111111111111111111111111,
1101                                                     -0.4444444444444444444444444444444,
1102                                                     -1,
1103                                                     0,
1104                                                     +1 };
1105     return values;
1106 }
1107 
1108 namespace test_catogories
1109 {
1110 constexpr cint_t<1> scalars{};
1111 constexpr cint_t<2> vectors{};
1112 constexpr cint_t<3> all{};
1113 
types(cint_t<0>)1114 constexpr inline auto types(cint_t<0>) { return ctypes_t<>{}; }
1115 
types(cint_t<1>)1116 constexpr inline auto types(cint_t<1>) { return cconcat(numeric_types); }
1117 
types(cint_t<2>)1118 constexpr inline auto types(cint_t<2>) { return cconcat(numeric_vector_types<vec>); }
1119 
types(cint_t<3>)1120 constexpr inline auto types(cint_t<3>) { return cconcat(numeric_types, numeric_vector_types<vec>); }
1121 
1122 } // namespace test_catogories
1123 
1124 template <typename T, size_t N, size_t... indices>
test_enumerate(vec_shape<T,N>,csizes_t<indices...>,double start=0,double step=1)1125 vec<T, N> test_enumerate(vec_shape<T, N>, csizes_t<indices...>, double start = 0, double step = 1)
1126 {
1127     return make_vector<T>(static_cast<T>(start + step * indices)...);
1128 }
1129 
1130 template <int Cat, typename Fn, typename RefFn, typename IsApplicable = fn_return_constant<bool, true>>
test_function1(cint_t<Cat> cat,Fn && fn,RefFn && reffn,IsApplicable && isapplicable=IsApplicable{})1131 void test_function1(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isapplicable = IsApplicable{})
1132 {
1133     testo::matrix(
1134         named("value") = special_values(), named("type") = test_catogories::types(cat),
__anon91f2555c0502(special_value value, auto type) 1135         [&](special_value value, auto type) {
1136             using T = typename decltype(type)::type;
1137             if (isapplicable(ctype<T>, value))
1138             {
1139                 const T x(value);
1140                 CHECK(is_same<decltype(fn(x)), typename compound_type_traits<T>::template rebind<decltype(
1141                                                    reffn(std::declval<subtype<T>>()))>>);
1142                 const auto fn_x  = fn(x);
1143                 const auto ref_x = apply(reffn, x);
1144                 ::testo::active_test()->check(testo::deep_is_equal(ref_x, fn_x),
1145                                               as_string(fn_x, " == ", ref_x), "fn(x) == apply(reffn, x)");
1146                 //   CHECK(fn(x) == apply(reffn, x));
1147             }
1148         });
1149 
__anon91f2555c0602(auto type) 1150     testo::matrix(named("type") = test_catogories::types(cint<Cat & ~1>), [&](auto type) {
1151         using T   = typename decltype(type)::type;
1152         const T x = test_enumerate(T::shape(), csizeseq<T::size()>, 0);
1153         CHECK(fn(x) == apply(reffn, x));
1154     });
1155 }
1156 
1157 template <int Cat, typename Fn, typename RefFn, typename IsApplicable = fn_return_constant<bool, true>>
test_function2(cint_t<Cat> cat,Fn && fn,RefFn && reffn,IsApplicable && isapplicable=IsApplicable{})1158 void test_function2(cint_t<Cat> cat, Fn&& fn, RefFn&& reffn, IsApplicable&& isapplicable = IsApplicable{})
1159 {
1160     testo::matrix(named("value1") = special_values(), //
1161                   named("value2") = special_values(), named("type") = test_catogories::types(cat),
__anon91f2555c0702(special_value value1, special_value value2, auto type) 1162                   [&](special_value value1, special_value value2, auto type) {
1163                       using T = typename decltype(type)::type;
1164                       const T x1(value1);
1165                       const T x2(value2);
1166                       if (isapplicable(ctype<T>, value1, value2))
1167                       {
1168                           CHECK(is_same<decltype(fn(x1, x2)),
1169                                         typename compound_type_traits<T>::template rebind<decltype(
1170                                             reffn(std::declval<subtype<T>>(), std::declval<subtype<T>>()))>>);
1171                           CHECK(fn(x1, x2) == apply(reffn, x1, x2));
1172                       }
1173                   });
1174 
__anon91f2555c0802(auto type) 1175     testo::matrix(named("type") = test_catogories::types(cint<Cat & ~1>), [&](auto type) {
1176         using T    = typename decltype(type)::type;
1177         const T x1 = test_enumerate(T::shape(), csizeseq<T::size()>, 0, 1);
1178         const T x2 = test_enumerate(T::shape(), csizeseq<T::size()>, 100, -1);
1179         CHECK(fn(x1, x2) == apply(reffn, x1, x2));
1180     });
1181 }
1182 
1183 #endif
1184 
1185 namespace internal
1186 {
1187 // vector to vector<vector>
1188 template <typename To, typename From, size_t N>
1189 struct conversion<vec<bit<To>, N>, vec<bit<From>, N>>
1190 {
castkfr::CMT_ARCH_NAME::internal::conversion1191     static vec<bit<To>, N> cast(const vec<bit<From>, N>& value)
1192     {
1193         return vec<To, N>::frombits(innercast<itype<To>>(vec<itype<From>, N>::frombits(value.asvec())))
1194             .asmask();
1195     }
1196 };
1197 
1198 // vector to vector<vector>
1199 template <typename To, typename From, size_t N1, size_t N2, size_t Ns1>
1200 struct conversion<vec<vec<To, N1>, N2>, vec<From, Ns1>>
1201 {
1202     static_assert(N1 == Ns1, "");
1203     static_assert(!is_compound<To>, "");
1204     static_assert(!is_compound<From>, "");
1205 
castkfr::CMT_ARCH_NAME::internal::conversion1206     static vec<vec<To, N1>, N2> cast(const vec<From, N1>& value)
1207     {
1208         return vec<vec<To, N1>, N2>::from_flatten(
1209             kfr::innercast<To>(value.flatten())
1210                 .shuffle(csizeseq<N2 * vec<From, N1>::scalar_size()> % csize<N2>));
1211     }
1212 };
1213 
1214 // vector to vector<vector<vector>>
1215 template <typename To, typename From, size_t N1, size_t N2, size_t N3, size_t Ns1>
1216 struct conversion<vec<vec<vec<To, N1>, N2>, N3>, vec<From, Ns1>>
1217 {
1218     static_assert(N1 == Ns1, "");
1219     static_assert(!is_compound<To>, "");
1220     static_assert(!is_compound<From>, "");
1221 
castkfr::CMT_ARCH_NAME::internal::conversion1222     static vec<vec<vec<To, N1>, N2>, N3> cast(const vec<From, N1>& value)
1223     {
1224         return vec<vec<vec<To, N1>, N2>, N3>::from_flatten(
1225             kfr::innercast<To>(value.flatten())
1226                 .shuffle(csizeseq<N2 * vec<From, N1>::scalar_size()> % csize<N2>));
1227     }
1228 };
1229 
1230 // vector<vector> to vector<vector>
1231 template <typename To, typename From, size_t N1, size_t N2, size_t NN1, size_t NN2>
1232 struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, NN1>, NN2>>
1233 {
1234     static_assert(N1 == NN1, "");
1235     static_assert(N2 == NN2, "");
1236     static_assert(!is_compound<To>, "");
1237     static_assert(!is_compound<From>, "");
1238 
castkfr::CMT_ARCH_NAME::internal::conversion1239     static vec<vec<To, N1>, N2> cast(const vec<vec<From, N1>, N2>& value)
1240     {
1241         return vec<vec<To, N1>, N2>::from_flatten(kfr::innercast<To>(value.flatten()));
1242     }
1243 };
1244 
1245 // vector<vector<vector>> to vector<vector<vector>>
1246 template <typename To, typename From, size_t N1, size_t N2, size_t N3, size_t NN1, size_t NN2, size_t NN3>
1247 struct conversion<vec<vec<vec<To, N1>, N2>, N3>, vec<vec<vec<From, NN1>, NN2>, NN3>>
1248 {
1249     static_assert(N1 == NN1, "");
1250     static_assert(N2 == NN2, "");
1251     static_assert(N3 == NN3, "");
1252     static_assert(!is_compound<To>, "");
1253     static_assert(!is_compound<From>, "");
1254 
castkfr::CMT_ARCH_NAME::internal::conversion1255     static vec<vec<vec<To, N1>, N2>, N3> cast(const vec<vec<vec<From, N1>, N2>, N3>& value)
1256     {
1257         return vec<vec<vec<To, N1>, N2>, N3>::from_flatten(kfr::innercast<To>(value.flatten()));
1258     }
1259 };
1260 } // namespace internal
1261 
1262 template <typename T, size_t N1, size_t N2 = N1>
1263 using mat = vec<vec<T, N1>, N2>;
1264 
1265 using u8x2x2  = vec<vec<u8, 2>, 2>;
1266 using i8x2x2  = vec<vec<i8, 2>, 2>;
1267 using u16x2x2 = vec<vec<u16, 2>, 2>;
1268 using i16x2x2 = vec<vec<i16, 2>, 2>;
1269 using u32x2x2 = vec<vec<u32, 2>, 2>;
1270 using i32x2x2 = vec<vec<i32, 2>, 2>;
1271 using u64x2x2 = vec<vec<u64, 2>, 2>;
1272 using i64x2x2 = vec<vec<i64, 2>, 2>;
1273 using f32x2x2 = vec<vec<f32, 2>, 2>;
1274 using f64x2x2 = vec<vec<f64, 2>, 2>;
1275 
1276 using u8x4x4  = vec<vec<u8, 4>, 4>;
1277 using i8x4x4  = vec<vec<i8, 4>, 4>;
1278 using u16x4x4 = vec<vec<u16, 4>, 4>;
1279 using i16x4x4 = vec<vec<i16, 4>, 4>;
1280 using u32x4x4 = vec<vec<u32, 4>, 4>;
1281 using i32x4x4 = vec<vec<i32, 4>, 4>;
1282 using u64x4x4 = vec<vec<u64, 4>, 4>;
1283 using i64x4x4 = vec<vec<i64, 4>, 4>;
1284 using f32x4x4 = vec<vec<f32, 4>, 4>;
1285 using f64x4x4 = vec<vec<f64, 4>, 4>;
1286 
1287 template <size_t N1, size_t N2>
1288 struct vec_vec_template
1289 {
1290     template <typename T>
1291     using type = vec<vec<T, N1>, N2>;
1292 };
1293 
1294 namespace internal
1295 {
1296 
1297 template <typename T, size_t... Ns>
1298 struct vecx_t;
1299 
1300 template <typename T>
1301 struct vecx_t<T>
1302 {
1303     using type = T;
1304 };
1305 
1306 template <typename T, size_t N1>
1307 struct vecx_t<T, N1>
1308 {
1309     using type = vec<T, N1>;
1310 };
1311 
1312 template <typename T, size_t N1, size_t N2>
1313 struct vecx_t<T, N1, N2>
1314 {
1315     using type = vec<vec<T, N1>, N2>;
1316 };
1317 
1318 template <typename T, size_t N1, size_t N2, size_t N3>
1319 struct vecx_t<T, N1, N2, N3>
1320 {
1321     using type = vec<vec<vec<T, N1>, N2>, N3>;
1322 };
1323 } // namespace internal
1324 
1325 template <typename T, size_t... Ns>
1326 using vecx = typename internal::vecx_t<T, Ns...>::type;
1327 
1328 } // namespace CMT_ARCH_NAME
1329 template <typename T1, typename T2, size_t N>
1330 struct common_type_impl<kfr::vec<T1, N>, kfr::vec<T2, N>>
1331     : common_type_from_subtypes<T1, T2, kfr::vec_template<N>::template type>
1332 {
1333 };
1334 template <typename T1, typename T2, size_t N>
1335 struct common_type_impl<kfr::vec<T1, N>, T2>
1336     : common_type_from_subtypes<T1, T2, kfr::vec_template<N>::template type>
1337 {
1338 };
1339 template <typename T1, typename T2, size_t N>
1340 struct common_type_impl<T1, kfr::vec<T2, N>>
1341     : common_type_from_subtypes<T1, T2, kfr::vec_template<N>::template type>
1342 {
1343 };
1344 
1345 template <typename T1, typename T2, size_t N1, size_t N2>
1346 struct common_type_impl<kfr::vec<T1, N1>, kfr::vec<kfr::vec<T2, N1>, N2>>
1347     : common_type_from_subtypes<T1, T2, kfr::vec_vec_template<N1, N2>::template type>
1348 {
1349     using type = kfr::vec<kfr::vec<typename common_type_impl<T1, T2>::type, N1>, N2>;
1350 };
1351 template <typename T1, typename T2, size_t N1, size_t N2>
1352 struct common_type_impl<kfr::vec<kfr::vec<T1, N1>, N2>, kfr::vec<T2, N1>>
1353     : common_type_from_subtypes<T1, T2, kfr::vec_vec_template<N1, N2>::template type>
1354 {
1355 };
1356 
1357 } // namespace kfr
1358 
1359 namespace cometa
1360 {
1361 
1362 template <typename T, size_t N>
1363 struct compound_type_traits<kfr::vec_shape<T, N>>
1364 {
1365     constexpr static size_t width      = N;
1366     constexpr static size_t deep_width = width * compound_type_traits<T>::width;
1367     using subtype                      = T;
1368     using deep_subtype                 = cometa::deep_subtype<T>;
1369     constexpr static bool is_scalar    = false;
1370     constexpr static size_t depth      = cometa::compound_type_traits<T>::depth + 1;
1371 
1372     template <typename U>
1373     using rebind = kfr::vec_shape<U, N>;
1374     template <typename U>
1375     using deep_rebind = kfr::vec_shape<typename compound_type_traits<subtype>::template deep_rebind<U>, N>;
1376 };
1377 
1378 template <typename T, size_t N>
1379 struct compound_type_traits<kfr::vec<T, N>>
1380 {
1381     using subtype                      = T;
1382     using deep_subtype                 = cometa::deep_subtype<T>;
1383     constexpr static size_t width      = N;
1384     constexpr static size_t deep_width = width * compound_type_traits<T>::width;
1385     constexpr static bool is_scalar    = false;
1386     constexpr static size_t depth      = cometa::compound_type_traits<T>::depth + 1;
1387     template <typename U>
1388     using rebind = kfr::vec<U, N>;
1389     template <typename U>
1390     using deep_rebind = kfr::vec<typename compound_type_traits<subtype>::template deep_rebind<U>, N>;
1391 
atcometa::compound_type_traits1392     KFR_MEM_INTRINSIC static constexpr subtype at(const kfr::vec<T, N>& value, size_t index)
1393     {
1394         return value[index];
1395     }
1396 };
1397 
1398 namespace details
1399 {
1400 template <typename T, size_t N>
1401 struct flt_type_impl<kfr::vec<T, N>>
1402 {
1403     using type = kfr::vec<typename flt_type_impl<T>::type, N>;
1404 };
1405 } // namespace details
1406 } // namespace cometa
1407 
1408 CMT_PRAGMA_GNU(GCC diagnostic pop)
1409 CMT_PRAGMA_MSVC(warning(pop))
1410 
1411 namespace std
1412 {
1413 
1414 template <typename T, size_t N>
1415 struct tuple_size<kfr::vec<T, N>> : public integral_constant<size_t, N>
1416 {
1417 };
1418 
1419 template <size_t I, class T, size_t N>
1420 struct tuple_element<I, kfr::vec<T, N>>
1421 {
1422     using type = T;
1423 };
1424 
1425 } // namespace std
1426