1 #ifndef NETGEN_CORE_SIMD_GENERIC_HPP 2 #define NETGEN_CORE_SIMD_GENERIC_HPP 3 4 /**************************************************************************/ 5 /* File: simd_base.hpp */ 6 /* Author: Joachim Schoeberl, Matthias Hochsteger */ 7 /* Date: 25. Mar. 16 */ 8 /**************************************************************************/ 9 10 #include <type_traits> 11 #include <functional> 12 #include <tuple> 13 14 #include "array.hpp" 15 16 namespace ngcore 17 { 18 GetDefaultSIMDSize()19 constexpr int GetDefaultSIMDSize() { 20 #if defined __AVX512F__ 21 return 8; 22 #elif defined __AVX__ 23 return 4; 24 #elif defined NETGEN_ARCH_AMD64 25 return 2; 26 #else 27 return 2; 28 #endif 29 } 30 31 32 template <typename T, int N=GetDefaultSIMDSize()> class SIMD; 33 34 class mask64; 35 36 //////////////////////////////////////////////////////////////////////////// 37 namespace detail { 38 template <typename T, size_t N, size_t... I> array_range_impl(std::array<T,N> const & arr,size_t first,std::index_sequence<I...>)39 auto array_range_impl(std::array<T, N> const& arr, 40 size_t first, 41 std::index_sequence<I...>) 42 -> std::array<T, sizeof...(I)> { 43 return {arr[first + I]...}; 44 } 45 46 template <size_t S, typename T, size_t N> array_range(std::array<T,N> const & arr,size_t first)47 auto array_range(std::array<T, N> const& arr, size_t first) { 48 return array_range_impl(arr, first, std::make_index_sequence<S>{}); 49 } 50 51 } // namespace detail 52 53 //////////////////////////////////////////////////////////////////////////// 54 // mask 55 56 template <> 57 class SIMD<mask64,1> 58 { 59 int64_t mask; 60 public: SIMD(int64_t i)61 SIMD (int64_t i) 62 : mask(i > 0 ? -1 : 0) { ; } Data() const63 bool Data() const { return mask; } Size()64 static constexpr int Size() { return 1; } operator [](int) const65 auto operator[] (int /* i */) const { return mask; } 66 }; 67 68 69 template <int N> 70 class alignas(GetDefaultSIMDSize()*sizeof(int64_t)) SIMD<mask64,N> 71 { 72 static constexpr int N1 = std::min(GetDefaultSIMDSize(), N/2); 73 static constexpr int N2 = N-N1; 74 75 SIMD<mask64,N1> lo; 76 SIMD<mask64,N2> hi; 77 public: 78 SIMD(int64_t i)79 SIMD (int64_t i) : lo(i), hi(i-N1 ) { ; } SIMD(SIMD<mask64,N1> lo_,SIMD<mask64,N2> hi_)80 SIMD (SIMD<mask64,N1> lo_, SIMD<mask64,N2> hi_) : lo(lo_), hi(hi_) { ; } Lo() const81 SIMD<mask64,N1> Lo() const { return lo; } Hi() const82 SIMD<mask64,N2> Hi() const { return hi; } Size()83 static constexpr int Size() { return N; } 84 }; 85 86 template<int N> operator &&(SIMD<mask64,N> a,SIMD<mask64,N> b)87 NETGEN_INLINE SIMD<mask64,N> operator&& (SIMD<mask64,N> a, SIMD<mask64,N> b) 88 { 89 if constexpr(N==1) return a.Data() && b.Data(); 90 else return { a.Lo() && b.Lo(), a.Hi() && b.Hi() }; 91 } 92 93 94 //////////////////////////////////////////////////////////////////////////// 95 // int64 96 97 template<> 98 class SIMD<int64_t,1> 99 { 100 int64_t data; 101 102 public: Size()103 static constexpr int Size() { return 1; } SIMD()104 SIMD () {} 105 SIMD (const SIMD &) = default; 106 SIMD & operator= (const SIMD &) = default; SIMD(int val)107 SIMD (int val) : data{val} {} SIMD(int64_t val)108 SIMD (int64_t val) : data{val} {} SIMD(size_t val)109 SIMD (size_t val) : data(val) {} SIMD(std::array<int64_t,1> arr)110 explicit SIMD (std::array<int64_t, 1> arr) 111 : data{arr[0]} 112 {} 113 operator [](int i) const114 int64_t operator[] (int i) const { return ((int64_t*)(&data))[i]; } Data() const115 auto Data() const { return data; } FirstInt(int64_t n0=0)116 static SIMD FirstInt(int64_t n0=0) { return {n0}; } 117 template <int I> Get()118 int64_t Get() 119 { 120 static_assert(I==0); 121 return data; 122 } 123 }; 124 125 template<int N> 126 class alignas(GetDefaultSIMDSize()*sizeof(int64_t)) SIMD<int64_t,N> 127 { 128 static constexpr int N1 = std::min(GetDefaultSIMDSize(), N/2); 129 static constexpr int N2 = N-N1; 130 131 SIMD<int64_t,N1> lo; 132 SIMD<int64_t,N2> high; 133 134 public: Size()135 static constexpr int Size() { return N; } 136 SIMD()137 SIMD () {} 138 SIMD (const SIMD &) = default; 139 SIMD & operator= (const SIMD &) = default; 140 SIMD(int val)141 SIMD (int val) : lo{val}, high{val} { ; } SIMD(int64_t val)142 SIMD (int64_t val) : lo{val}, high{val} { ; } SIMD(size_t val)143 SIMD (size_t val) : lo{val}, high{val} { ; } SIMD(SIMD<int64_t,N1> lo_,SIMD<int64_t,N2> high_)144 SIMD (SIMD<int64_t,N1> lo_, SIMD<int64_t,N2> high_) : lo(lo_), high(high_) { ; } 145 SIMD(std::array<int64_t,N> arr)146 explicit SIMD( std::array<int64_t, N> arr ) 147 : lo(detail::array_range<N1>(arr, 0)), 148 high(detail::array_range<N2>(arr, N1)) 149 {} 150 151 template<typename ...T> SIMD(const T...vals)152 explicit SIMD(const T... vals) 153 : lo(detail::array_range<N1>(std::array<int64_t, N>{vals...}, 0)), 154 high(detail::array_range<N2>(std::array<int64_t, N>{vals...}, N1)) 155 { 156 static_assert(sizeof...(vals)==N, "wrong number of arguments"); 157 } 158 159 160 template<typename T, typename std::enable_if<std::is_convertible<T, std::function<int64_t(int)>>::value, int>::type = 0> SIMD(const T & func)161 SIMD (const T & func) 162 { 163 for(auto i : IntRange(N1)) 164 lo[i] = func(i); 165 for(auto i : IntRange(N2)) 166 high[i] = func(N1+i); 167 } 168 Lo() const169 auto Lo() const { return lo; } Hi() const170 auto Hi() const { return high; } 171 operator [](int i) const172 int64_t operator[] (int i) const { return ((int64_t*)(&lo))[i]; } 173 174 /* 175 operator tuple<int64_t&,int64_t&,int64_t&,int64_t&> () 176 { return tuple<int64_t&,int64_t&,int64_t&,int64_t&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); } 177 */ 178 179 /* 180 static SIMD FirstInt() { return { 0, 1, 2, 3 }; } 181 */ FirstInt(int64_t n0=0)182 static SIMD FirstInt(int64_t n0=0) { return {SIMD<int64_t,N1>::FirstInt(n0), SIMD<int64_t,N2>::FirstInt(n0+N1)}; } 183 template <int I> Get()184 int64_t Get() 185 { 186 static_assert(I>=0 && I<N, "Index out of range"); 187 if constexpr(I<N1) return lo.template Get<I>(); 188 else return high.template Get<I-N1>(); 189 } 190 }; 191 192 193 //////////////////////////////////////////////////////////////////////////// 194 // double 195 196 template<> 197 class SIMD<double,1> 198 { 199 double data; 200 201 public: Size()202 static constexpr int Size() { return 1; } SIMD()203 SIMD () {} 204 SIMD (const SIMD &) = default; 205 SIMD & operator= (const SIMD &) = default; SIMD(double val)206 SIMD (double val) { data = val; } SIMD(int val)207 SIMD (int val) { data = val; } SIMD(size_t val)208 SIMD (size_t val) { data = val; } SIMD(double const * p)209 SIMD (double const * p) { data = *p; } SIMD(double const * p,SIMD<mask64,1> mask)210 SIMD (double const * p, SIMD<mask64,1> mask) { data = mask.Data() ? *p : 0.0; } SIMD(std::array<double,1> arr)211 explicit SIMD (std::array<double, 1> arr) 212 : data{arr[0]} 213 {} 214 215 template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0> SIMD(const T & func)216 SIMD (const T & func) 217 { 218 data = func(0); 219 } 220 221 template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0> operator =(const T & func)222 SIMD & operator= (const T & func) 223 { 224 data = func(0); 225 return *this; 226 } 227 Store(double * p)228 void Store (double * p) { *p = data; } Store(double * p,SIMD<mask64,1> mask)229 void Store (double * p, SIMD<mask64,1> mask) { if (mask.Data()) *p = data; } 230 operator [](int i) const231 double operator[] (int i) const { return ((double*)(&data))[i]; } Data() const232 double Data() const { return data; } 233 template <int I> Get()234 double Get() 235 { 236 static_assert(I==0); 237 return data; 238 } 239 }; 240 241 242 template<int N> 243 class alignas(GetDefaultSIMDSize()*sizeof(double)) SIMD<double, N> 244 { 245 static constexpr int N1 = std::min(GetDefaultSIMDSize(), N/2); 246 static constexpr int N2 = N-N1; 247 248 SIMD<double, N1> lo; 249 SIMD<double, N2> high; 250 251 public: Size()252 static constexpr int Size() { return N; } SIMD()253 SIMD () {} 254 SIMD (const SIMD &) = default; SIMD(SIMD<double,N1> lo_,SIMD<double,N2> hi_)255 SIMD (SIMD<double,N1> lo_, SIMD<double,N2> hi_) : lo(lo_), high(hi_) { ; } 256 257 template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0> SIMD(const T & func)258 SIMD (const T & func) 259 { 260 double *p = (double*)this; 261 for(auto i : IntRange(N)) 262 p[i] = func(i); 263 } 264 265 template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0> operator =(const T & func)266 SIMD & operator= (const T & func) 267 { 268 double *p = (double*)this; 269 for(auto i : IntRange(N)) 270 p[i] = func(i); 271 return *this; 272 } 273 274 275 SIMD & operator= (const SIMD &) = default; 276 SIMD(double val)277 SIMD (double val) : lo{val}, high{val} { ; } SIMD(int val)278 SIMD (int val) : lo{val}, high{val} { ; } SIMD(size_t val)279 SIMD (size_t val) : lo{val}, high{val} { ; } 280 SIMD(double const * p)281 SIMD (double const * p) : lo{p}, high{p+N1} { ; } SIMD(double const * p,SIMD<mask64,N> mask)282 SIMD (double const * p, SIMD<mask64,N> mask) 283 : lo{p, mask.Lo()}, high{p+N1, mask.Hi()} 284 { } SIMD(double * p)285 SIMD (double * p) : lo{p}, high{p+N1} { ; } SIMD(double * p,SIMD<mask64,N> mask)286 SIMD (double * p, SIMD<mask64,N> mask) 287 : lo{p, mask.Lo()}, high{p+N1, mask.Hi()} 288 { } 289 SIMD(std::array<double,N> arr)290 explicit SIMD( std::array<double, N> arr ) 291 : lo(detail::array_range<N1>(arr, 0)), 292 high(detail::array_range<N2>(arr, N1)) 293 {} 294 295 template<typename ...T> SIMD(const T...vals)296 explicit SIMD(const T... vals) 297 : lo(detail::array_range<N1>(std::array<double, N>{vals...}, 0)), 298 high(detail::array_range<N2>(std::array<double, N>{vals...}, N1)) 299 { 300 static_assert(sizeof...(vals)==N, "wrong number of arguments"); 301 } 302 Store(double * p)303 void Store (double * p) { lo.Store(p); high.Store(p+N1); } Store(double * p,SIMD<mask64,N> mask)304 void Store (double * p, SIMD<mask64,N> mask) 305 { 306 lo.Store(p, mask.Lo()); 307 high.Store(p+N1, mask.Hi()); 308 } 309 Lo() const310 auto Lo() const { return lo; } Hi() const311 auto Hi() const { return high; } 312 operator [](int i) const313 double operator[] (int i) const { return ((double*)(&lo))[i]; } 314 315 template<typename=std::enable_if<N==2>> operator std::tuple<double&,double&>()316 operator std::tuple<double&,double&> () 317 { 318 double *p = (double*)this; 319 return std::tuple<double&,double&>(p[0], p[1]); 320 } 321 322 template<typename=std::enable_if<N==4>> operator std::tuple<double&,double&,double&,double&>()323 operator std::tuple<double&,double&,double&,double&> () 324 { return std::tuple<double&,double&,double&,double&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); } 325 326 template <int I> Get()327 double Get() 328 { 329 static_assert(I>=0 && I<N, "Index out of range"); 330 if constexpr(I<N1) return lo.template Get<I>(); 331 else return high.template Get<I-N1>(); 332 } Data() const333 auto Data() const { return *this; } 334 }; 335 336 337 // Generic operators for any arithmetic type/simd width 338 template <typename T, int N> operator +(SIMD<T,N> a,SIMD<T,N> b)339 NETGEN_INLINE SIMD<T,N> operator+ (SIMD<T,N> a, SIMD<T,N> b) { 340 if constexpr(N==1) return a.Data()+b.Data(); 341 else return { a.Lo()+b.Lo(), a.Hi()+b.Hi() }; 342 } 343 344 template <typename T, int N> operator -(SIMD<T,N> a,SIMD<T,N> b)345 NETGEN_INLINE SIMD<T,N> operator- (SIMD<T,N> a, SIMD<T,N> b) { 346 if constexpr(N==1) return a.Data()-b.Data(); 347 else return { a.Lo()-b.Lo(), a.Hi()-b.Hi() }; 348 } 349 template <typename T, int N> operator -(SIMD<T,N> a)350 NETGEN_INLINE SIMD<T,N> operator- (SIMD<T,N> a) { 351 if constexpr(N==1) return -a.Data(); 352 else return { -a.Lo(), -a.Hi() }; 353 } 354 355 template <typename T, int N> operator *(SIMD<T,N> a,SIMD<T,N> b)356 NETGEN_INLINE SIMD<T,N> operator* (SIMD<T,N> a, SIMD<T,N> b) { 357 if constexpr(N==1) return a.Data()*b.Data(); 358 else return { a.Lo()*b.Lo(), a.Hi()*b.Hi() }; 359 } 360 361 template <typename T, int N> operator /(SIMD<T,N> a,SIMD<T,N> b)362 NETGEN_INLINE SIMD<T,N> operator/ (SIMD<T,N> a, SIMD<T,N> b) { 363 if constexpr(N==1) return a.Data()/b.Data(); 364 else return { a.Lo()/b.Lo(), a.Hi()/b.Hi() }; 365 } 366 367 template <typename T, int N> operator <(SIMD<T,N> a,SIMD<T,N> b)368 NETGEN_INLINE SIMD<mask64,N> operator< (SIMD<T,N> a, SIMD<T,N> b) 369 { 370 if constexpr(N==1) return a.Data() < b.Data(); 371 else return { a.Lo()<b.Lo(), a.Hi()<b.Hi() }; 372 } 373 374 template <typename T, int N> operator <=(SIMD<T,N> a,SIMD<T,N> b)375 NETGEN_INLINE SIMD<mask64,N> operator<= (SIMD<T,N> a, SIMD<T,N> b) 376 { 377 if constexpr(N==1) return a.Data() <= b.Data(); 378 else return { a.Lo()<=b.Lo(), a.Hi()<=b.Hi() }; 379 } 380 381 template <typename T, int N> operator >(SIMD<T,N> a,SIMD<T,N> b)382 NETGEN_INLINE SIMD<mask64,N> operator> (SIMD<T,N> a, SIMD<T,N> b) 383 { 384 if constexpr(N==1) return a.Data() > b.Data(); 385 else return { a.Lo()>b.Lo(), a.Hi()>b.Hi() }; 386 } 387 388 template <typename T, int N> operator >=(SIMD<T,N> a,SIMD<T,N> b)389 NETGEN_INLINE SIMD<mask64,N> operator>= (SIMD<T,N> a, SIMD<T,N> b) 390 { 391 if constexpr(N==1) return a.Data() >= b.Data(); 392 else return { a.Lo()>=b.Lo(), a.Hi()>=b.Hi() }; 393 } 394 395 template <typename T, int N> operator ==(SIMD<T,N> a,SIMD<T,N> b)396 NETGEN_INLINE SIMD<mask64,N> operator== (SIMD<T,N> a, SIMD<T,N> b) 397 { 398 if constexpr(N==1) return a.Data() == b.Data(); 399 else return { a.Lo()==b.Lo(), a.Hi()==b.Hi() }; 400 } 401 402 template <typename T, int N> operator !=(SIMD<T,N> a,SIMD<T,N> b)403 NETGEN_INLINE SIMD<mask64,N> operator!= (SIMD<T,N> a, SIMD<T,N> b) 404 { 405 if constexpr(N==1) return a.Data() != b.Data(); 406 else return { a.Lo()!=b.Lo(), a.Hi()!=b.Hi() }; 407 } 408 409 // int64_t operators with scalar operand (implement overloads to allow implicit casts for second operand) 410 template <int N> operator +(SIMD<int64_t,N> a,int64_t b)411 NETGEN_INLINE SIMD<int64_t,N> operator+ (SIMD<int64_t,N> a, int64_t b) { return a+SIMD<int64_t,N>(b); } 412 template <int N> operator +(int64_t a,SIMD<int64_t,N> b)413 NETGEN_INLINE SIMD<int64_t,N> operator+ (int64_t a, SIMD<int64_t,N> b) { return SIMD<int64_t,N>(a)+b; } 414 template <int N> operator -(int64_t a,SIMD<int64_t,N> b)415 NETGEN_INLINE SIMD<int64_t,N> operator- (int64_t a, SIMD<int64_t,N> b) { return SIMD<int64_t,N>(a)-b; } 416 template <int N> operator -(SIMD<int64_t,N> a,int64_t b)417 NETGEN_INLINE SIMD<int64_t,N> operator- (SIMD<int64_t,N> a, int64_t b) { return a-SIMD<int64_t,N>(b); } 418 template <int N> operator *(int64_t a,SIMD<int64_t,N> b)419 NETGEN_INLINE SIMD<int64_t,N> operator* (int64_t a, SIMD<int64_t,N> b) { return SIMD<int64_t,N>(a)*b; } 420 template <int N> operator *(SIMD<int64_t,N> b,int64_t a)421 NETGEN_INLINE SIMD<int64_t,N> operator* (SIMD<int64_t,N> b, int64_t a) { return SIMD<int64_t,N>(a)*b; } 422 template <int N> operator /(SIMD<int64_t,N> a,int64_t b)423 NETGEN_INLINE SIMD<int64_t,N> operator/ (SIMD<int64_t,N> a, int64_t b) { return a/SIMD<int64_t,N>(b); } 424 template <int N> operator /(int64_t a,SIMD<int64_t,N> b)425 NETGEN_INLINE SIMD<int64_t,N> operator/ (int64_t a, SIMD<int64_t,N> b) { return SIMD<int64_t,N>(a)/b; } 426 template <int N> operator +=(SIMD<int64_t,N> & a,SIMD<int64_t,N> b)427 NETGEN_INLINE SIMD<int64_t,N> & operator+= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a=a+b; return a; } 428 template <int N> operator +=(SIMD<int64_t,N> & a,int64_t b)429 NETGEN_INLINE SIMD<int64_t,N> & operator+= (SIMD<int64_t,N> & a, int64_t b) { a+=SIMD<int64_t,N>(b); return a; } 430 template <int N> operator -=(SIMD<int64_t,N> & a,SIMD<int64_t,N> b)431 NETGEN_INLINE SIMD<int64_t,N> & operator-= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a = a-b; return a; } 432 template <int N> operator -=(SIMD<int64_t,N> & a,int64_t b)433 NETGEN_INLINE SIMD<int64_t,N> & operator-= (SIMD<int64_t,N> & a, int64_t b) { a-=SIMD<int64_t,N>(b); return a; } 434 template <int N> operator *=(SIMD<int64_t,N> & a,SIMD<int64_t,N> b)435 NETGEN_INLINE SIMD<int64_t,N> & operator*= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a=a*b; return a; } 436 template <int N> operator *=(SIMD<int64_t,N> & a,int64_t b)437 NETGEN_INLINE SIMD<int64_t,N> & operator*= (SIMD<int64_t,N> & a, int64_t b) { a*=SIMD<int64_t,N>(b); return a; } 438 template <int N> operator /=(SIMD<int64_t,N> & a,SIMD<int64_t,N> b)439 NETGEN_INLINE SIMD<int64_t,N> & operator/= (SIMD<int64_t,N> & a, SIMD<int64_t,N> b) { a = a/b; return a; } 440 441 // double operators with scalar operand (implement overloads to allow implicit casts for second operand) 442 template <int N> operator +(SIMD<double,N> a,double b)443 NETGEN_INLINE SIMD<double,N> operator+ (SIMD<double,N> a, double b) { return a+SIMD<double,N>(b); } 444 template <int N> operator +(double a,SIMD<double,N> b)445 NETGEN_INLINE SIMD<double,N> operator+ (double a, SIMD<double,N> b) { return SIMD<double,N>(a)+b; } 446 template <int N> operator -(double a,SIMD<double,N> b)447 NETGEN_INLINE SIMD<double,N> operator- (double a, SIMD<double,N> b) { return SIMD<double,N>(a)-b; } 448 template <int N> operator -(SIMD<double,N> a,double b)449 NETGEN_INLINE SIMD<double,N> operator- (SIMD<double,N> a, double b) { return a-SIMD<double,N>(b); } 450 template <int N> operator *(double a,SIMD<double,N> b)451 NETGEN_INLINE SIMD<double,N> operator* (double a, SIMD<double,N> b) { return SIMD<double,N>(a)*b; } 452 template <int N> operator *(SIMD<double,N> b,double a)453 NETGEN_INLINE SIMD<double,N> operator* (SIMD<double,N> b, double a) { return SIMD<double,N>(a)*b; } 454 template <int N> operator /(SIMD<double,N> a,double b)455 NETGEN_INLINE SIMD<double,N> operator/ (SIMD<double,N> a, double b) { return a/SIMD<double,N>(b); } 456 template <int N> operator /(double a,SIMD<double,N> b)457 NETGEN_INLINE SIMD<double,N> operator/ (double a, SIMD<double,N> b) { return SIMD<double,N>(a)/b; } 458 template <int N> operator +=(SIMD<double,N> & a,SIMD<double,N> b)459 NETGEN_INLINE SIMD<double,N> & operator+= (SIMD<double,N> & a, SIMD<double,N> b) { a=a+b; return a; } 460 template <int N> operator +=(SIMD<double,N> & a,double b)461 NETGEN_INLINE SIMD<double,N> & operator+= (SIMD<double,N> & a, double b) { a+=SIMD<double,N>(b); return a; } 462 template <int N> operator -=(SIMD<double,N> & a,SIMD<double,N> b)463 NETGEN_INLINE SIMD<double,N> & operator-= (SIMD<double,N> & a, SIMD<double,N> b) { a = a-b; return a; } 464 template <int N> operator -=(SIMD<double,N> & a,double b)465 NETGEN_INLINE SIMD<double,N> & operator-= (SIMD<double,N> & a, double b) { a-=SIMD<double,N>(b); return a; } 466 template <int N> operator *=(SIMD<double,N> & a,SIMD<double,N> b)467 NETGEN_INLINE SIMD<double,N> & operator*= (SIMD<double,N> & a, SIMD<double,N> b) { a=a*b; return a; } 468 template <int N> operator *=(SIMD<double,N> & a,double b)469 NETGEN_INLINE SIMD<double,N> & operator*= (SIMD<double,N> & a, double b) { a*=SIMD<double,N>(b); return a; } 470 template <int N> operator /=(SIMD<double,N> & a,SIMD<double,N> b)471 NETGEN_INLINE SIMD<double,N> & operator/= (SIMD<double,N> & a, SIMD<double,N> b) { a = a/b; return a; } 472 473 // double functions 474 475 template <int N> L2Norm2(SIMD<double,N> a)476 NETGEN_INLINE SIMD<double,N> L2Norm2 (SIMD<double,N> a) { return a*a; } 477 template <int N> Trans(SIMD<double,N> a)478 NETGEN_INLINE SIMD<double,N> Trans (SIMD<double,N> a) { return a; } 479 480 template <int N> HSum(SIMD<double,N> a)481 NETGEN_INLINE double HSum (SIMD<double,N> a) 482 { 483 if constexpr(N==1) 484 return a.Data(); 485 else 486 return HSum(a.Lo()) + HSum(a.Hi()); 487 } 488 IfPos(double a,double b,double c)489 NETGEN_INLINE double IfPos (double a, double b, double c) { return a>0 ? b : c; } IfZero(double a,double b,double c)490 NETGEN_INLINE double IfZero (double a, double b, double c) { return a==0. ? b : c; } 491 492 template<typename T, int N> IfPos(SIMD<T,N> a,SIMD<T,N> b,SIMD<T,N> c)493 NETGEN_INLINE SIMD<T,N> IfPos (SIMD<T,N> a, SIMD<T,N> b, SIMD<T,N> c) 494 { 495 if constexpr(N==1) return a.Data()>0.0 ? b : c; 496 else return { IfPos(a.Lo(), b.Lo(), c.Lo()), IfPos(a.Hi(), b.Hi(), c.Hi())}; 497 498 } 499 500 template<typename T, int N> IfZero(SIMD<T,N> a,SIMD<T,N> b,SIMD<T,N> c)501 NETGEN_INLINE SIMD<T,N> IfZero (SIMD<T,N> a, SIMD<T,N> b, SIMD<T,N> c) 502 { 503 if constexpr(N==1) return a.Data()==0.0 ? b : c; 504 else return { IfZero(a.Lo(), b.Lo(), c.Lo()), IfZero(a.Hi(), b.Hi(), c.Hi())}; 505 506 } 507 508 template<typename T, int N> If(SIMD<mask64,N> a,SIMD<T,N> b,SIMD<T,N> c)509 NETGEN_INLINE SIMD<T,N> If (SIMD<mask64,N> a, SIMD<T,N> b, SIMD<T,N> c) 510 { 511 if constexpr(N==1) return a.Data() ? b : c; 512 else return { If(a.Lo(), b.Lo(), c.Lo()), If(a.Hi(), b.Hi(), c.Hi())}; 513 514 } 515 516 // a*b+c 517 template <typename T1, typename T2, typename T3> FMA(T1 a,T2 b,T3 c)518 NETGEN_INLINE auto FMA(T1 a, T2 b, T3 c) 519 { 520 return c+a*b; 521 } 522 523 template <typename T1, typename T2, typename T3> FNMA(T1 a,T2 b,T3 c)524 NETGEN_INLINE auto FNMA(T1 a, T2 b, T3 c) 525 { 526 return c-a*b; 527 } 528 529 // update form of fma 530 template <int N> FMAasm(SIMD<double,N> a,SIMD<double,N> b,SIMD<double,N> & sum)531 void FMAasm (SIMD<double,N> a, SIMD<double,N> b, SIMD<double,N> & sum) 532 { 533 sum = FMA(a,b,sum); 534 } 535 536 // update form of fms 537 template <int N> FNMAasm(SIMD<double,N> a,SIMD<double,N> b,SIMD<double,N> & sum)538 void FNMAasm (SIMD<double,N> a, SIMD<double,N> b, SIMD<double,N> & sum) 539 { 540 // sum -= a*b; 541 sum = FNMA(a,b,sum); 542 } 543 544 545 template <int i, typename T, int N> 546 T get(SIMD<T,N> a) { return a[i]; } 547 548 template <int NUM, typename FUNC> Iterate2(FUNC f)549 NETGEN_INLINE void Iterate2 (FUNC f) 550 { 551 if constexpr (NUM > 1) Iterate2<NUM-1> (f); 552 if constexpr (NUM >= 1) f(std::integral_constant<int,NUM-1>()); 553 } 554 555 556 template <typename T, int N> operator <<(ostream & ost,SIMD<T,N> simd)557 ostream & operator<< (ostream & ost, SIMD<T,N> simd) 558 { 559 /* 560 ost << simd[0]; 561 for (int i = 1; i < simd.Size(); i++) 562 ost << " " << simd[i]; 563 */ 564 Iterate2<simd.Size()> ([&] (auto I) { 565 if (I.value != 0) ost << " "; 566 ost << get<I.value>(simd); 567 }); 568 return ost; 569 } 570 571 using std::sqrt; 572 template <int N> sqrt(ngcore::SIMD<double,N> a)573 NETGEN_INLINE ngcore::SIMD<double,N> sqrt (ngcore::SIMD<double,N> a) { 574 return ngcore::SIMD<double>([a](int i)->double { return sqrt(a[i]); } ); 575 } 576 577 using std::fabs; 578 template <int N> fabs(ngcore::SIMD<double,N> a)579 NETGEN_INLINE ngcore::SIMD<double,N> fabs (ngcore::SIMD<double,N> a) { 580 return ngcore::SIMD<double>([a](int i)->double { return fabs(a[i]); } ); 581 } 582 583 using std::floor; 584 template <int N> floor(ngcore::SIMD<double,N> a)585 NETGEN_INLINE ngcore::SIMD<double,N> floor (ngcore::SIMD<double,N> a) { 586 return ngcore::SIMD<double>([a](int i)->double { return floor(a[i]); } ); 587 } 588 589 using std::ceil; 590 template <int N> ceil(ngcore::SIMD<double,N> a)591 NETGEN_INLINE ngcore::SIMD<double,N> ceil (ngcore::SIMD<double,N> a) { 592 return ngcore::SIMD<double>([a](int i)->double { return ceil(a[i]); } ); 593 } 594 595 using std::exp; 596 template <int N> exp(ngcore::SIMD<double,N> a)597 NETGEN_INLINE ngcore::SIMD<double,N> exp (ngcore::SIMD<double,N> a) { 598 return ngcore::SIMD<double>([a](int i)->double { return exp(a[i]); } ); 599 } 600 601 using std::log; 602 template <int N> log(ngcore::SIMD<double,N> a)603 NETGEN_INLINE ngcore::SIMD<double,N> log (ngcore::SIMD<double,N> a) { 604 return ngcore::SIMD<double,N>([a](int i)->double { return log(a[i]); } ); 605 } 606 607 using std::pow; 608 template <int N> pow(ngcore::SIMD<double,N> a,double x)609 NETGEN_INLINE ngcore::SIMD<double,N> pow (ngcore::SIMD<double,N> a, double x) { 610 return ngcore::SIMD<double,N>([a,x](int i)->double { return pow(a[i],x); } ); 611 } 612 613 template <int N> pow(ngcore::SIMD<double,N> a,ngcore::SIMD<double,N> b)614 NETGEN_INLINE ngcore::SIMD<double,N> pow (ngcore::SIMD<double,N> a, ngcore::SIMD<double,N> b) { 615 return ngcore::SIMD<double,N>([a,b](int i)->double { return pow(a[i],b[i]); } ); 616 } 617 618 using std::sin; 619 template <int N> sin(ngcore::SIMD<double,N> a)620 NETGEN_INLINE ngcore::SIMD<double,N> sin (ngcore::SIMD<double,N> a) { 621 return ngcore::SIMD<double,N>([a](int i)->double { return sin(a[i]); } ); 622 } 623 624 using std::cos; 625 template <int N> cos(ngcore::SIMD<double,N> a)626 NETGEN_INLINE ngcore::SIMD<double,N> cos (ngcore::SIMD<double,N> a) { 627 return ngcore::SIMD<double,N>([a](int i)->double { return cos(a[i]); } ); 628 } 629 630 using std::tan; 631 template <int N> tan(ngcore::SIMD<double,N> a)632 NETGEN_INLINE ngcore::SIMD<double,N> tan (ngcore::SIMD<double,N> a) { 633 return ngcore::SIMD<double,N>([a](int i)->double { return tan(a[i]); } ); 634 } 635 636 using std::atan; 637 template <int N> atan(ngcore::SIMD<double,N> a)638 NETGEN_INLINE ngcore::SIMD<double,N> atan (ngcore::SIMD<double,N> a) { 639 return ngcore::SIMD<double,N>([a](int i)->double { return atan(a[i]); } ); 640 } 641 642 using std::atan2; 643 template <int N> atan2(ngcore::SIMD<double,N> y,ngcore::SIMD<double,N> x)644 NETGEN_INLINE ngcore::SIMD<double,N> atan2 (ngcore::SIMD<double,N> y, ngcore::SIMD<double,N> x) { 645 return ngcore::SIMD<double,N>([y,x](int i)->double { return atan2(y[i], x[i]); } ); 646 } 647 648 using std::acos; 649 template <int N> acos(ngcore::SIMD<double,N> a)650 NETGEN_INLINE ngcore::SIMD<double,N> acos (ngcore::SIMD<double,N> a) { 651 return ngcore::SIMD<double,N>([a](int i)->double { return acos(a[i]); } ); 652 } 653 654 using std::asin; 655 template <int N> asin(ngcore::SIMD<double,N> a)656 NETGEN_INLINE ngcore::SIMD<double,N> asin (ngcore::SIMD<double,N> a) { 657 return ngcore::SIMD<double,N>([a](int i)->double { return asin(a[i]); } ); 658 } 659 660 using std::sinh; 661 template <int N> sinh(ngcore::SIMD<double,N> a)662 NETGEN_INLINE ngcore::SIMD<double,N> sinh (ngcore::SIMD<double,N> a) { 663 return ngcore::SIMD<double,N>([a](int i)->double { return sinh(a[i]); } ); 664 } 665 666 using std::cosh; 667 template <int N> cosh(ngcore::SIMD<double,N> a)668 NETGEN_INLINE ngcore::SIMD<double,N> cosh (ngcore::SIMD<double,N> a) { 669 return ngcore::SIMD<double,N>([a](int i)->double { return cosh(a[i]); } ); 670 } 671 672 template<int N, typename T> 673 using MultiSIMD = SIMD<T, N*GetDefaultSIMDSize()>; 674 675 template<int N> Unpack(SIMD<double,N> a,SIMD<double,N> b)676 NETGEN_INLINE auto Unpack (SIMD<double,N> a, SIMD<double,N> b) 677 { 678 if constexpr(N==1) 679 { 680 return std::make_tuple(SIMD<double,N>{a.Data()}, SIMD<double,N>{b.Data()} ); 681 } 682 else if constexpr(N==2) 683 { 684 return std::make_tuple(SIMD<double,N>{ a.Lo(), b.Lo() }, 685 SIMD<double,N>{ a.Hi(), b.Hi() }); 686 } 687 else 688 { 689 auto [a1,b1] = Unpack(a.Lo(), b.Lo()); 690 auto [a2,b2] = Unpack(a.Hi(), b.Hi()); 691 return std::make_tuple(SIMD<double,N>{ a1, a2 }, 692 SIMD<double,N>{ b1, b2 }); 693 } 694 } 695 696 } 697 698 699 namespace std 700 { 701 // structured binding support 702 template <typename T, int N > 703 struct tuple_size<ngcore::SIMD<T,N>> : std::integral_constant<std::size_t, N> {}; 704 template<size_t N, typename T, int M> struct tuple_element<N,ngcore::SIMD<T,M>> { using type = T; }; 705 } 706 707 #endif // NETGEN_CORE_SIMD_GENERIC_HPP 708