1 #ifndef NETGEN_CORE_SIMD_GENERIC_HPP
2 #define NETGEN_CORE_SIMD_GENERIC_HPP
3 
4 /**************************************************************************/
5 /* File:   simd_base.hpp                                                  */
6 /* Author: Joachim Schoeberl, Matthias Hochsteger                         */
7 /* Date:   25. Mar. 16                                                    */
8 /**************************************************************************/
9 
10 #include <type_traits>
11 #include <functional>
12 #include <tuple>
13 
14 #include "array.hpp"
15 
16 namespace ngcore
17 {
18 
GetDefaultSIMDSize()19   constexpr int GetDefaultSIMDSize() {
20 #if defined __AVX512F__
21     return 8;
22 #elif defined __AVX__
23     return 4;
24 #elif defined NETGEN_ARCH_AMD64
25     return 2;
26 #else
27     return 2;
28 #endif
29   }
30 
31 
32   template <typename T, int N=GetDefaultSIMDSize()> class SIMD;
33 
34   class mask64;
35 
36   ////////////////////////////////////////////////////////////////////////////
37   namespace detail {
38     template <typename T, size_t N, size_t... I>
array_range_impl(std::array<T,N> const & arr,size_t first,std::index_sequence<I...>)39     auto array_range_impl(std::array<T, N> const& arr,
40                    size_t first,
41                    std::index_sequence<I...>)
42     -> std::array<T, sizeof...(I)> {
43         return {arr[first + I]...};
44     }
45 
46     template <size_t S, typename T, size_t N>
array_range(std::array<T,N> const & arr,size_t first)47     auto array_range(std::array<T, N> const& arr, size_t first) {
48       return array_range_impl(arr, first, std::make_index_sequence<S>{});
49     }
50 
51   } // namespace detail
52 
53   ////////////////////////////////////////////////////////////////////////////
54   // mask
55 
56   template <>
57   class SIMD<mask64,1>
58   {
59     int64_t mask;
60   public:
SIMD(int64_t i)61     SIMD (int64_t i)
62       : mask(i > 0 ? -1 : 0) { ; }
Data() const63     bool Data() const { return mask; }
Size()64     static constexpr int Size() { return 1; }
operator [](int) const65     auto operator[] (int /* i */) const { return mask; }
66   };
67 
68 
69   template <int N>
70   class alignas(GetDefaultSIMDSize()*sizeof(int64_t)) SIMD<mask64,N>
71   {
72     static constexpr int N1 = std::min(GetDefaultSIMDSize(), N/2);
73     static constexpr int N2 = N-N1;
74 
75     SIMD<mask64,N1> lo;
76     SIMD<mask64,N2> hi;
77   public:
78 
SIMD(int64_t i)79     SIMD (int64_t i) : lo(i), hi(i-N1 ) { ; }
SIMD(SIMD<mask64,N1> lo_,SIMD<mask64,N2> hi_)80     SIMD (SIMD<mask64,N1> lo_, SIMD<mask64,N2> hi_) : lo(lo_), hi(hi_) { ; }
Lo() const81     SIMD<mask64,N1> Lo() const { return lo; }
Hi() const82     SIMD<mask64,N2> Hi() const { return hi; }
Size()83     static constexpr int Size() { return N; }
84   };
85 
86   template<int N>
operator &&(SIMD<mask64,N> a,SIMD<mask64,N> b)87   NETGEN_INLINE SIMD<mask64,N> operator&& (SIMD<mask64,N> a, SIMD<mask64,N> b)
88     {
89       if constexpr(N==1) return a.Data() && b.Data();
90       else               return { a.Lo() && b.Lo(), a.Hi() && b.Hi() };
91     }
92 
93 
94   ////////////////////////////////////////////////////////////////////////////
95   // int64
96 
97   template<>
98   class SIMD<int64_t,1>
99   {
100     int64_t data;
101 
102   public:
Size()103     static constexpr int Size() { return 1; }
SIMD()104     SIMD () {}
105     SIMD (const SIMD &) = default;
106     SIMD & operator= (const SIMD &) = default;
SIMD(int val)107     SIMD (int val) : data{val} {}
SIMD(int64_t val)108     SIMD (int64_t val) : data{val} {}
SIMD(size_t val)109     SIMD (size_t val) : data(val) {}
SIMD(std::array<int64_t,1> arr)110     explicit SIMD (std::array<int64_t, 1> arr)
111         : data{arr[0]}
112     {}
113 
operator [](int i) const114     int64_t operator[] (int i) const { return ((int64_t*)(&data))[i]; }
Data() const115     auto Data() const { return data; }
FirstInt(int64_t n0=0)116     static SIMD FirstInt(int64_t n0=0) { return {n0}; }
117     template <int I>
Get()118     int64_t Get()
119     {
120       static_assert(I==0);
121       return data;
122     }
123   };
124 
125   template<int N>
126   class alignas(GetDefaultSIMDSize()*sizeof(int64_t)) SIMD<int64_t,N>
127   {
128     static constexpr int N1 = std::min(GetDefaultSIMDSize(), N/2);
129     static constexpr int N2 = N-N1;
130 
131     SIMD<int64_t,N1> lo;
132     SIMD<int64_t,N2> high;
133 
134   public:
Size()135     static constexpr int Size() { return N; }
136 
SIMD()137     SIMD () {}
138     SIMD (const SIMD &) = default;
139     SIMD & operator= (const SIMD &) = default;
140 
SIMD(int val)141     SIMD (int val) : lo{val}, high{val} { ; }
SIMD(int64_t val)142     SIMD (int64_t val) : lo{val}, high{val} { ; }
SIMD(size_t val)143     SIMD (size_t val) : lo{val}, high{val} { ; }
SIMD(SIMD<int64_t,N1> lo_,SIMD<int64_t,N2> high_)144     SIMD (SIMD<int64_t,N1> lo_, SIMD<int64_t,N2> high_) : lo(lo_), high(high_) { ; }
145 
SIMD(std::array<int64_t,N> arr)146     explicit SIMD( std::array<int64_t, N> arr )
147         : lo(detail::array_range<N1>(arr, 0)),
148           high(detail::array_range<N2>(arr, N1))
149       {}
150 
151     template<typename ...T>
SIMD(const T...vals)152     explicit SIMD(const T... vals)
153     : lo(detail::array_range<N1>(std::array<int64_t, N>{vals...}, 0)),
154       high(detail::array_range<N2>(std::array<int64_t, N>{vals...}, N1))
155       {
156         static_assert(sizeof...(vals)==N, "wrong number of arguments");
157       }
158 
159 
160     template<typename T, typename std::enable_if<std::is_convertible<T, std::function<int64_t(int)>>::value, int>::type = 0>
SIMD(const T & func)161       SIMD (const T & func)
162     {
163       for(auto i : IntRange(N1))
164           lo[i] = func(i);
165       for(auto i : IntRange(N2))
166           high[i] = func(N1+i);
167     }
168 
Lo() const169     auto Lo() const { return lo; }
Hi() const170     auto Hi() const { return high; }
171 
operator [](int i) const172     int64_t operator[] (int i) const { return ((int64_t*)(&lo))[i]; }
173 
174     /*
175     operator tuple<int64_t&,int64_t&,int64_t&,int64_t&> ()
176     { return tuple<int64_t&,int64_t&,int64_t&,int64_t&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); }
177     */
178 
179     /*
180     static SIMD FirstInt() { return { 0, 1, 2, 3 }; }
181     */
FirstInt(int64_t n0=0)182     static SIMD FirstInt(int64_t n0=0) { return {SIMD<int64_t,N1>::FirstInt(n0), SIMD<int64_t,N2>::FirstInt(n0+N1)}; }
183     template <int I>
Get()184     int64_t Get()
185     {
186       static_assert(I>=0 && I<N, "Index out of range");
187       if constexpr(I<N1) return lo.template Get<I>();
188       else               return high.template Get<I-N1>();
189     }
190   };
191 
192 
193   ////////////////////////////////////////////////////////////////////////////
194   // double
195 
196   template<>
197   class SIMD<double,1>
198   {
199     double data;
200 
201   public:
Size()202     static constexpr int Size() { return 1; }
SIMD()203     SIMD () {}
204     SIMD (const SIMD &) = default;
205     SIMD & operator= (const SIMD &) = default;
SIMD(double val)206     SIMD (double val) { data = val; }
SIMD(int val)207     SIMD (int val)    { data = val; }
SIMD(size_t val)208     SIMD (size_t val) { data = val; }
SIMD(double const * p)209     SIMD (double const * p) { data = *p; }
SIMD(double const * p,SIMD<mask64,1> mask)210     SIMD (double const * p, SIMD<mask64,1> mask) { data = mask.Data() ? *p : 0.0; }
SIMD(std::array<double,1> arr)211     explicit SIMD (std::array<double, 1> arr)
212         : data{arr[0]}
213     {}
214 
215     template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
SIMD(const T & func)216     SIMD (const T & func)
217     {
218       data = func(0);
219     }
220 
221     template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
operator =(const T & func)222     SIMD & operator= (const T & func)
223     {
224       data = func(0);
225       return *this;
226     }
227 
Store(double * p)228     void Store (double * p) { *p = data; }
Store(double * p,SIMD<mask64,1> mask)229     void Store (double * p, SIMD<mask64,1> mask) { if (mask.Data()) *p = data; }
230 
operator [](int i) const231     double operator[] (int i) const { return ((double*)(&data))[i]; }
Data() const232     double Data() const { return data; }
233     template <int I>
Get()234     double Get()
235     {
236       static_assert(I==0);
237       return data;
238     }
239   };
240 
241 
242   template<int N>
243   class alignas(GetDefaultSIMDSize()*sizeof(double)) SIMD<double, N>
244   {
245     static constexpr int N1 = std::min(GetDefaultSIMDSize(), N/2);
246     static constexpr int N2 = N-N1;
247 
248     SIMD<double, N1> lo;
249     SIMD<double, N2> high;
250 
251   public:
Size()252     static constexpr int Size() { return N; }
SIMD()253     SIMD () {}
254     SIMD (const SIMD &) = default;
SIMD(SIMD<double,N1> lo_,SIMD<double,N2> hi_)255     SIMD (SIMD<double,N1> lo_, SIMD<double,N2> hi_) : lo(lo_), high(hi_) { ; }
256 
257     template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
SIMD(const T & func)258     SIMD (const T & func)
259     {
260       double  *p = (double*)this;
261       for(auto i : IntRange(N))
262           p[i] = func(i);
263     }
264 
265     template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
operator =(const T & func)266     SIMD & operator= (const T & func)
267     {
268       double  *p = (double*)this;
269       for(auto i : IntRange(N))
270           p[i] = func(i);
271       return *this;
272     }
273 
274 
275     SIMD & operator= (const SIMD &) = default;
276 
SIMD(double val)277     SIMD (double val) : lo{val}, high{val} { ; }
SIMD(int val)278     SIMD (int val)    : lo{val}, high{val} { ; }
SIMD(size_t val)279     SIMD (size_t val) : lo{val}, high{val} { ; }
280 
SIMD(double const * p)281     SIMD (double const * p) : lo{p}, high{p+N1} { ; }
SIMD(double const * p,SIMD<mask64,N> mask)282     SIMD (double const * p, SIMD<mask64,N> mask)
283         : lo{p, mask.Lo()}, high{p+N1, mask.Hi()}
284       { }
SIMD(double * p)285     SIMD (double * p) : lo{p}, high{p+N1} { ; }
SIMD(double * p,SIMD<mask64,N> mask)286     SIMD (double * p, SIMD<mask64,N> mask)
287         : lo{p, mask.Lo()}, high{p+N1, mask.Hi()}
288       { }
289 
SIMD(std::array<double,N> arr)290     explicit SIMD( std::array<double, N> arr )
291         : lo(detail::array_range<N1>(arr, 0)),
292           high(detail::array_range<N2>(arr, N1))
293       {}
294 
295     template<typename ...T>
SIMD(const T...vals)296     explicit SIMD(const T... vals)
297     : lo(detail::array_range<N1>(std::array<double, N>{vals...}, 0)),
298       high(detail::array_range<N2>(std::array<double, N>{vals...}, N1))
299       {
300         static_assert(sizeof...(vals)==N, "wrong number of arguments");
301       }
302 
Store(double * p)303     void Store (double * p) { lo.Store(p); high.Store(p+N1); }
Store(double * p,SIMD<mask64,N> mask)304     void Store (double * p, SIMD<mask64,N> mask)
305     {
306       lo.Store(p, mask.Lo());
307       high.Store(p+N1, mask.Hi());
308     }
309 
Lo() const310     auto Lo() const { return lo; }
Hi() const311     auto Hi() const { return high; }
312 
operator [](int i) const313     double operator[] (int i) const { return ((double*)(&lo))[i]; }
314 
315     template<typename=std::enable_if<N==2>>
operator std::tuple<double&,double&>()316     operator std::tuple<double&,double&> ()
317     {
318 	double *p = (double*)this;
319 	return std::tuple<double&,double&>(p[0], p[1]);
320     }
321 
322     template<typename=std::enable_if<N==4>>
operator std::tuple<double&,double&,double&,double&>()323     operator std::tuple<double&,double&,double&,double&> ()
324     { return std::tuple<double&,double&,double&,double&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); }
325 
326     template <int I>
Get()327     double Get()
328     {
329       static_assert(I>=0 && I<N, "Index out of range");
330       if constexpr(I<N1) return lo.template Get<I>();
331       else               return high.template Get<I-N1>();
332     }
Data() const333     auto Data() const { return *this; }
334   };
335 
336 
337   // Generic operators for any arithmetic type/simd width
338   template <typename T, int N>
operator +(SIMD<T,N> a,SIMD<T,N> b)339   NETGEN_INLINE SIMD<T,N> operator+ (SIMD<T,N> a, SIMD<T,N> b) {
340       if constexpr(N==1) return a.Data()+b.Data();
341       else               return { a.Lo()+b.Lo(), a.Hi()+b.Hi() };
342   }
343 
344   template <typename T, int N>
operator -(SIMD<T,N> a,SIMD<T,N> b)345   NETGEN_INLINE SIMD<T,N> operator- (SIMD<T,N> a, SIMD<T,N> b) {
346       if constexpr(N==1) return a.Data()-b.Data();
347       else               return { a.Lo()-b.Lo(), a.Hi()-b.Hi() };
348   }
349   template <typename T, int N>
operator -(SIMD<T,N> a)350   NETGEN_INLINE SIMD<T,N> operator- (SIMD<T,N> a) {
351       if constexpr(N==1) return -a.Data();
352       else               return { -a.Lo(), -a.Hi() };
353   }
354 
355   template <typename T, int N>
operator *(SIMD<T,N> a,SIMD<T,N> b)356   NETGEN_INLINE SIMD<T,N> operator* (SIMD<T,N> a, SIMD<T,N> b) {
357       if constexpr(N==1) return a.Data()*b.Data();
358       else               return { a.Lo()*b.Lo(), a.Hi()*b.Hi() };
359   }
360 
361   template <typename T, int N>
operator /(SIMD<T,N> a,SIMD<T,N> b)362   NETGEN_INLINE SIMD<T,N> operator/ (SIMD<T,N> a, SIMD<T,N> b) {
363       if constexpr(N==1) return a.Data()/b.Data();
364       else               return { a.Lo()/b.Lo(), a.Hi()/b.Hi() };
365   }
366 
367   template <typename T, int N>
operator <(SIMD<T,N> a,SIMD<T,N> b)368   NETGEN_INLINE SIMD<mask64,N> operator< (SIMD<T,N> a, SIMD<T,N> b)
369     {
370       if constexpr(N==1) return a.Data() < b.Data();
371       else               return { a.Lo()<b.Lo(), a.Hi()<b.Hi() };
372     }
373 
374   template <typename T, int N>
operator <=(SIMD<T,N> a,SIMD<T,N> b)375   NETGEN_INLINE SIMD<mask64,N> operator<= (SIMD<T,N> a, SIMD<T,N> b)
376     {
377       if constexpr(N==1) return a.Data() <= b.Data();
378       else               return { a.Lo()<=b.Lo(), a.Hi()<=b.Hi() };
379     }
380 
381   template <typename T, int N>
operator >(SIMD<T,N> a,SIMD<T,N> b)382   NETGEN_INLINE SIMD<mask64,N> operator> (SIMD<T,N> a, SIMD<T,N> b)
383     {
384       if constexpr(N==1) return a.Data() > b.Data();
385       else               return { a.Lo()>b.Lo(), a.Hi()>b.Hi() };
386     }
387 
388   template <typename T, int N>
operator >=(SIMD<T,N> a,SIMD<T,N> b)389   NETGEN_INLINE SIMD<mask64,N> operator>= (SIMD<T,N> a, SIMD<T,N> b)
390     {
391       if constexpr(N==1) return a.Data() >= b.Data();
392       else               return { a.Lo()>=b.Lo(), a.Hi()>=b.Hi() };
393     }
394 
395   template <typename T, int N>
operator ==(SIMD<T,N> a,SIMD<T,N> b)396   NETGEN_INLINE SIMD<mask64,N> operator== (SIMD<T,N> a, SIMD<T,N> b)
397     {
398       if constexpr(N==1) return a.Data() == b.Data();
399       else               return { a.Lo()==b.Lo(), a.Hi()==b.Hi() };
400     }
401 
402   template <typename T, int N>
operator !=(SIMD<T,N> a,SIMD<T,N> b)403   NETGEN_INLINE SIMD<mask64,N> operator!= (SIMD<T,N> a, SIMD<T,N> b)
404     {
405       if constexpr(N==1) return a.Data() != b.Data();
406       else               return { a.Lo()!=b.Lo(), a.Hi()!=b.Hi() };
407     }
408 
409   // int64_t operators with scalar operand (implement overloads to allow implicit casts for second operand)
410   template <int N>
operator +(SIMD<int64_t,N> a,int64_t b)411   NETGEN_INLINE SIMD<int64_t,N> operator+ (SIMD<int64_t,N> a, int64_t b) { return a+SIMD<int64_t,N>(b); }
412   template <int N>
operator +(int64_t a,SIMD<int64_t,N> b)413   NETGEN_INLINE SIMD<int64_t,N> operator+ (int64_t a, SIMD<int64_t,N> b) { return SIMD<int64_t,N>(a)+b; }
414   template <int N>
operator -(int64_t a,SIMD<int64_t,N> b)415   NETGEN_INLINE SIMD<int64_t,N> operator- (int64_t a, SIMD<int64_t,N> b) { return SIMD<int64_t,N>(a)-b; }
416   template <int N>
operator -(SIMD<int64_t,N> a,int64_t b)417   NETGEN_INLINE SIMD<int64_t,N> operator- (SIMD<int64_t,N> a, int64_t b) { return a-SIMD<int64_t,N>(b); }
418   template <int N>
operator *(int64_t a,SIMD<int64_t,N> b)419   NETGEN_INLINE SIMD<int64_t,N> operator* (int64_t a, SIMD<int64_t,N> b) { return SIMD<int64_t,N>(a)*b; }
420   template <int N>
operator *(SIMD<int64_t,N> b,int64_t a)421   NETGEN_INLINE SIMD<int64_t,N> operator* (SIMD<int64_t,N> b, int64_t a) { return SIMD<int64_t,N>(a)*b; }
422   template <int N>
operator /(SIMD<int64_t,N> a,int64_t b)423   NETGEN_INLINE SIMD<int64_t,N> operator/ (SIMD<int64_t,N> a, int64_t b) { return a/SIMD<int64_t,N>(b); }
424   template <int N>
operator /(int64_t a,SIMD<int64_t,N> b)425   NETGEN_INLINE SIMD<int64_t,N> operator/ (int64_t a, SIMD<int64_t,N> b) { return SIMD<int64_t,N>(a)/b; }
426   template <int N>
operator +=(SIMD<int64_t,N> & a,SIMD<int64_t,N> b)427   NETGEN_INLINE SIMD<int64_t,N> & operator+= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a=a+b; return a; }
428   template <int N>
operator +=(SIMD<int64_t,N> & a,int64_t b)429   NETGEN_INLINE SIMD<int64_t,N> & operator+= (SIMD<int64_t,N> & a, int64_t b) { a+=SIMD<int64_t,N>(b); return a; }
430   template <int N>
operator -=(SIMD<int64_t,N> & a,SIMD<int64_t,N> b)431   NETGEN_INLINE SIMD<int64_t,N> & operator-= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a = a-b; return a; }
432   template <int N>
operator -=(SIMD<int64_t,N> & a,int64_t b)433   NETGEN_INLINE SIMD<int64_t,N> & operator-= (SIMD<int64_t,N> & a, int64_t b) { a-=SIMD<int64_t,N>(b); return a; }
434   template <int N>
operator *=(SIMD<int64_t,N> & a,SIMD<int64_t,N> b)435   NETGEN_INLINE SIMD<int64_t,N> & operator*= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a=a*b; return a; }
436   template <int N>
operator *=(SIMD<int64_t,N> & a,int64_t b)437   NETGEN_INLINE SIMD<int64_t,N> & operator*= (SIMD<int64_t,N> & a, int64_t b) { a*=SIMD<int64_t,N>(b); return a; }
438   template <int N>
operator /=(SIMD<int64_t,N> & a,SIMD<int64_t,N> b)439   NETGEN_INLINE SIMD<int64_t,N> & operator/= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a = a/b; return a; }
440 
441   // double operators with scalar operand (implement overloads to allow implicit casts for second operand)
442   template <int N>
operator +(SIMD<double,N> a,double b)443   NETGEN_INLINE SIMD<double,N> operator+ (SIMD<double,N> a, double b) { return a+SIMD<double,N>(b); }
444   template <int N>
operator +(double a,SIMD<double,N> b)445   NETGEN_INLINE SIMD<double,N> operator+ (double a, SIMD<double,N> b) { return SIMD<double,N>(a)+b; }
446   template <int N>
operator -(double a,SIMD<double,N> b)447   NETGEN_INLINE SIMD<double,N> operator- (double a, SIMD<double,N> b) { return SIMD<double,N>(a)-b; }
448   template <int N>
operator -(SIMD<double,N> a,double b)449   NETGEN_INLINE SIMD<double,N> operator- (SIMD<double,N> a, double b) { return a-SIMD<double,N>(b); }
450   template <int N>
operator *(double a,SIMD<double,N> b)451   NETGEN_INLINE SIMD<double,N> operator* (double a, SIMD<double,N> b) { return SIMD<double,N>(a)*b; }
452   template <int N>
operator *(SIMD<double,N> b,double a)453   NETGEN_INLINE SIMD<double,N> operator* (SIMD<double,N> b, double a) { return SIMD<double,N>(a)*b; }
454   template <int N>
operator /(SIMD<double,N> a,double b)455   NETGEN_INLINE SIMD<double,N> operator/ (SIMD<double,N> a, double b) { return a/SIMD<double,N>(b); }
456   template <int N>
operator /(double a,SIMD<double,N> b)457   NETGEN_INLINE SIMD<double,N> operator/ (double a, SIMD<double,N> b) { return SIMD<double,N>(a)/b; }
458   template <int N>
operator +=(SIMD<double,N> & a,SIMD<double,N> b)459   NETGEN_INLINE SIMD<double,N> & operator+= (SIMD<double,N> & a, SIMD<double,N> b) { a=a+b; return a; }
460   template <int N>
operator +=(SIMD<double,N> & a,double b)461   NETGEN_INLINE SIMD<double,N> & operator+= (SIMD<double,N> & a, double b) { a+=SIMD<double,N>(b); return a; }
462   template <int N>
operator -=(SIMD<double,N> & a,SIMD<double,N> b)463   NETGEN_INLINE SIMD<double,N> & operator-= (SIMD<double,N> & a, SIMD<double,N> b) { a = a-b; return a; }
464   template <int N>
operator -=(SIMD<double,N> & a,double b)465   NETGEN_INLINE SIMD<double,N> & operator-= (SIMD<double,N> & a, double b) { a-=SIMD<double,N>(b); return a; }
466   template <int N>
operator *=(SIMD<double,N> & a,SIMD<double,N> b)467   NETGEN_INLINE SIMD<double,N> & operator*= (SIMD<double,N> & a, SIMD<double,N> b) { a=a*b; return a; }
468   template <int N>
operator *=(SIMD<double,N> & a,double b)469   NETGEN_INLINE SIMD<double,N> & operator*= (SIMD<double,N> & a, double b) { a*=SIMD<double,N>(b); return a; }
470   template <int N>
operator /=(SIMD<double,N> & a,SIMD<double,N> b)471   NETGEN_INLINE SIMD<double,N> & operator/= (SIMD<double,N> & a, SIMD<double,N> b) { a = a/b; return a; }
472 
473   // double functions
474 
475   template <int N>
L2Norm2(SIMD<double,N> a)476   NETGEN_INLINE SIMD<double,N> L2Norm2 (SIMD<double,N> a) { return a*a; }
477   template <int N>
Trans(SIMD<double,N> a)478   NETGEN_INLINE SIMD<double,N> Trans (SIMD<double,N> a) { return a; }
479 
480   template <int N>
HSum(SIMD<double,N> a)481   NETGEN_INLINE double HSum (SIMD<double,N> a)
482     {
483       if constexpr(N==1)
484           return a.Data();
485       else
486           return HSum(a.Lo()) + HSum(a.Hi());
487     }
488 
IfPos(double a,double b,double c)489   NETGEN_INLINE double IfPos (double a, double b, double c) { return a>0 ? b : c; }
IfZero(double a,double b,double c)490   NETGEN_INLINE double IfZero (double a, double b, double c) { return a==0. ? b : c; }
491 
492   template<typename T, int N>
IfPos(SIMD<T,N> a,SIMD<T,N> b,SIMD<T,N> c)493   NETGEN_INLINE SIMD<T,N> IfPos (SIMD<T,N> a, SIMD<T,N> b, SIMD<T,N> c)
494     {
495       if constexpr(N==1) return a.Data()>0.0 ? b : c;
496       else               return { IfPos(a.Lo(), b.Lo(), c.Lo()), IfPos(a.Hi(), b.Hi(), c.Hi())};
497 
498     }
499 
500   template<typename T, int N>
IfZero(SIMD<T,N> a,SIMD<T,N> b,SIMD<T,N> c)501   NETGEN_INLINE SIMD<T,N> IfZero (SIMD<T,N> a, SIMD<T,N> b, SIMD<T,N> c)
502     {
503       if constexpr(N==1) return a.Data()==0.0 ? b : c;
504       else               return { IfZero(a.Lo(), b.Lo(), c.Lo()), IfZero(a.Hi(), b.Hi(), c.Hi())};
505 
506     }
507 
508   template<typename T, int N>
If(SIMD<mask64,N> a,SIMD<T,N> b,SIMD<T,N> c)509   NETGEN_INLINE SIMD<T,N> If (SIMD<mask64,N> a, SIMD<T,N> b, SIMD<T,N> c)
510     {
511       if constexpr(N==1) return a.Data() ? b : c;
512       else               return { If(a.Lo(), b.Lo(), c.Lo()), If(a.Hi(), b.Hi(), c.Hi())};
513 
514     }
515 
516   // a*b+c
517   template <typename T1, typename T2, typename T3>
FMA(T1 a,T2 b,T3 c)518   NETGEN_INLINE auto FMA(T1 a, T2 b, T3 c)
519   {
520     return c+a*b;
521   }
522 
523   template <typename T1, typename T2, typename T3>
FNMA(T1 a,T2 b,T3 c)524   NETGEN_INLINE auto FNMA(T1 a, T2 b, T3 c)
525   {
526     return c-a*b;
527   }
528 
529   // update form of fma
530   template <int N>
FMAasm(SIMD<double,N> a,SIMD<double,N> b,SIMD<double,N> & sum)531   void FMAasm (SIMD<double,N> a, SIMD<double,N> b, SIMD<double,N> & sum)
532   {
533     sum = FMA(a,b,sum);
534   }
535 
536   // update form of fms
537   template <int N>
FNMAasm(SIMD<double,N> a,SIMD<double,N> b,SIMD<double,N> & sum)538   void FNMAasm (SIMD<double,N> a, SIMD<double,N> b, SIMD<double,N> & sum)
539   {
540     // sum -= a*b;
541     sum = FNMA(a,b,sum);
542   }
543 
544 
545   template <int i, typename T, int N>
546   T get(SIMD<T,N> a) { return a[i]; }
547 
548   template <int NUM, typename FUNC>
Iterate2(FUNC f)549   NETGEN_INLINE void Iterate2 (FUNC f)
550   {
551     if constexpr (NUM > 1) Iterate2<NUM-1> (f);
552     if constexpr (NUM >= 1) f(std::integral_constant<int,NUM-1>());
553   }
554 
555 
556   template <typename T, int N>
operator <<(ostream & ost,SIMD<T,N> simd)557   ostream & operator<< (ostream & ost, SIMD<T,N> simd)
558   {
559     /*
560     ost << simd[0];
561     for (int i = 1; i < simd.Size(); i++)
562       ost << " " << simd[i];
563     */
564     Iterate2<simd.Size()> ([&] (auto I) {
565         if (I.value != 0) ost << " ";
566         ost << get<I.value>(simd);
567       });
568     return ost;
569   }
570 
571   using std::sqrt;
572   template <int N>
sqrt(ngcore::SIMD<double,N> a)573   NETGEN_INLINE ngcore::SIMD<double,N> sqrt (ngcore::SIMD<double,N> a) {
574     return ngcore::SIMD<double>([a](int i)->double { return sqrt(a[i]); } );
575   }
576 
577   using std::fabs;
578   template <int N>
fabs(ngcore::SIMD<double,N> a)579   NETGEN_INLINE ngcore::SIMD<double,N> fabs (ngcore::SIMD<double,N> a) {
580     return ngcore::SIMD<double>([a](int i)->double { return fabs(a[i]); } );
581   }
582 
583   using std::floor;
584   template <int N>
floor(ngcore::SIMD<double,N> a)585   NETGEN_INLINE ngcore::SIMD<double,N> floor (ngcore::SIMD<double,N> a) {
586     return ngcore::SIMD<double>([a](int i)->double { return floor(a[i]); } );
587   }
588 
589   using std::ceil;
590   template <int N>
ceil(ngcore::SIMD<double,N> a)591   NETGEN_INLINE ngcore::SIMD<double,N> ceil (ngcore::SIMD<double,N> a) {
592     return ngcore::SIMD<double>([a](int i)->double { return ceil(a[i]); } );
593   }
594 
595   using std::exp;
596   template <int N>
exp(ngcore::SIMD<double,N> a)597   NETGEN_INLINE ngcore::SIMD<double,N> exp (ngcore::SIMD<double,N> a) {
598     return ngcore::SIMD<double>([a](int i)->double { return exp(a[i]); } );
599   }
600 
601   using std::log;
602   template <int N>
log(ngcore::SIMD<double,N> a)603   NETGEN_INLINE ngcore::SIMD<double,N> log (ngcore::SIMD<double,N> a) {
604     return ngcore::SIMD<double,N>([a](int i)->double { return log(a[i]); } );
605   }
606 
607   using std::pow;
608   template <int N>
pow(ngcore::SIMD<double,N> a,double x)609   NETGEN_INLINE ngcore::SIMD<double,N> pow (ngcore::SIMD<double,N> a, double x) {
610     return ngcore::SIMD<double,N>([a,x](int i)->double { return pow(a[i],x); } );
611   }
612 
613   template <int N>
pow(ngcore::SIMD<double,N> a,ngcore::SIMD<double,N> b)614   NETGEN_INLINE ngcore::SIMD<double,N> pow (ngcore::SIMD<double,N> a, ngcore::SIMD<double,N> b) {
615     return ngcore::SIMD<double,N>([a,b](int i)->double { return pow(a[i],b[i]); } );
616   }
617 
618   using std::sin;
619   template <int N>
sin(ngcore::SIMD<double,N> a)620   NETGEN_INLINE ngcore::SIMD<double,N> sin (ngcore::SIMD<double,N> a) {
621     return ngcore::SIMD<double,N>([a](int i)->double { return sin(a[i]); } );
622   }
623 
624   using std::cos;
625   template <int N>
cos(ngcore::SIMD<double,N> a)626   NETGEN_INLINE ngcore::SIMD<double,N> cos (ngcore::SIMD<double,N> a) {
627     return ngcore::SIMD<double,N>([a](int i)->double { return cos(a[i]); } );
628   }
629 
630   using std::tan;
631   template <int N>
tan(ngcore::SIMD<double,N> a)632   NETGEN_INLINE ngcore::SIMD<double,N> tan (ngcore::SIMD<double,N> a) {
633     return ngcore::SIMD<double,N>([a](int i)->double { return tan(a[i]); } );
634   }
635 
636   using std::atan;
637   template <int N>
atan(ngcore::SIMD<double,N> a)638   NETGEN_INLINE ngcore::SIMD<double,N> atan (ngcore::SIMD<double,N> a) {
639     return ngcore::SIMD<double,N>([a](int i)->double { return atan(a[i]); } );
640   }
641 
642   using std::atan2;
643   template <int N>
atan2(ngcore::SIMD<double,N> y,ngcore::SIMD<double,N> x)644   NETGEN_INLINE ngcore::SIMD<double,N> atan2 (ngcore::SIMD<double,N> y, ngcore::SIMD<double,N> x) {
645     return ngcore::SIMD<double,N>([y,x](int i)->double { return atan2(y[i], x[i]); } );
646   }
647 
648   using std::acos;
649   template <int N>
acos(ngcore::SIMD<double,N> a)650   NETGEN_INLINE ngcore::SIMD<double,N> acos (ngcore::SIMD<double,N> a) {
651     return ngcore::SIMD<double,N>([a](int i)->double { return acos(a[i]); } );
652   }
653 
654   using std::asin;
655   template <int N>
asin(ngcore::SIMD<double,N> a)656   NETGEN_INLINE ngcore::SIMD<double,N> asin (ngcore::SIMD<double,N> a) {
657     return ngcore::SIMD<double,N>([a](int i)->double { return asin(a[i]); } );
658   }
659 
660   using std::sinh;
661   template <int N>
sinh(ngcore::SIMD<double,N> a)662   NETGEN_INLINE ngcore::SIMD<double,N> sinh (ngcore::SIMD<double,N> a) {
663     return ngcore::SIMD<double,N>([a](int i)->double { return sinh(a[i]); } );
664   }
665 
666   using std::cosh;
667   template <int N>
cosh(ngcore::SIMD<double,N> a)668   NETGEN_INLINE ngcore::SIMD<double,N> cosh (ngcore::SIMD<double,N> a) {
669     return ngcore::SIMD<double,N>([a](int i)->double { return cosh(a[i]); } );
670   }
671 
672   template<int N, typename T>
673   using MultiSIMD = SIMD<T, N*GetDefaultSIMDSize()>;
674 
675   template<int N>
Unpack(SIMD<double,N> a,SIMD<double,N> b)676   NETGEN_INLINE auto Unpack (SIMD<double,N> a, SIMD<double,N> b)
677   {
678     if constexpr(N==1)
679       {
680         return std::make_tuple(SIMD<double,N>{a.Data()}, SIMD<double,N>{b.Data()} );
681       }
682     else if constexpr(N==2)
683       {
684         return std::make_tuple(SIMD<double,N>{ a.Lo(), b.Lo() },
685             SIMD<double,N>{ a.Hi(), b.Hi() });
686       }
687     else
688       {
689         auto [a1,b1] = Unpack(a.Lo(), b.Lo());
690         auto [a2,b2] = Unpack(a.Hi(), b.Hi());
691         return std::make_tuple(SIMD<double,N>{ a1, a2 },
692             SIMD<double,N>{ b1, b2 });
693       }
694   }
695 
696 }
697 
698 
699 namespace std
700 {
701   // structured binding support
702   template <typename T, int N >
703   struct tuple_size<ngcore::SIMD<T,N>> : std::integral_constant<std::size_t, N> {};
704   template<size_t N, typename T, int M> struct tuple_element<N,ngcore::SIMD<T,M>> { using type = T; };
705 }
706 
707 #endif // NETGEN_CORE_SIMD_GENERIC_HPP
708