1 #ifndef NETGEN_CORE_SIMD_HPP 2 #define NETGEN_CORE_SIMD_HPP 3 4 /**************************************************************************/ 5 /* File: simd.hpp */ 6 /* Author: Joachim Schoeberl, Matthias Hochsteger */ 7 /* Date: 25. Mar. 16 */ 8 /**************************************************************************/ 9 10 #include "ngcore_api.hpp" 11 12 #include "simd_generic.hpp" 13 14 #ifdef NETGEN_ARCH_AMD64 15 #ifndef __SSE__ 16 #define __SSE__ 17 #endif 18 #include "simd_sse.hpp" 19 #endif 20 21 #ifdef __AVX__ 22 #include "simd_avx.hpp" 23 #endif 24 25 #ifdef __AVX512F__ 26 #include "simd_avx512.hpp" 27 #endif 28 29 #ifdef __aarch64__ 30 #include "simd_arm64.hpp" 31 #endif 32 33 namespace ngcore 34 { 35 #ifdef NETGEN_ARCH_AMD64 HSum(SIMD<double,2> v1,SIMD<double,2> v2,SIMD<double,2> v3,SIMD<double,2> v4)36 NETGEN_INLINE auto HSum (SIMD<double,2> v1, SIMD<double,2> v2, SIMD<double,2> v3, SIMD<double,2> v4) 37 { 38 SIMD<double,2> hsum1 = my_mm_hadd_pd (v1.Data(), v2.Data()); 39 SIMD<double,2> hsum2 = my_mm_hadd_pd (v3.Data(), v4.Data()); 40 return SIMD<double,4> (hsum1, hsum2); 41 } 42 GetMaskFromBits(unsigned int i)43 NETGEN_INLINE auto GetMaskFromBits( unsigned int i ) 44 { 45 return SIMD<mask64>::GetMaskFromBits(i); 46 } 47 #endif 48 49 SIMDTranspose(SIMD<double,4> a1,SIMD<double,4> a2,SIMD<double,4> a3,SIMD<double,4> a4,SIMD<double,4> & b1,SIMD<double,4> & b2,SIMD<double,4> & b3,SIMD<double,4> & b4)50 NETGEN_INLINE void SIMDTranspose (SIMD<double,4> a1, SIMD<double,4> a2, SIMD <double,4> a3, SIMD<double,4> a4, 51 SIMD<double,4> & b1, SIMD<double,4> & b2, SIMD<double,4> & b3, SIMD<double,4> & b4) 52 { 53 SIMD<double,4> h1,h2,h3,h4; 54 std::tie(h1,h2) = Unpack(a1,a2); 55 std::tie(h3,h4) = Unpack(a3,a4); 56 b1 = SIMD<double,4> (h1.Lo(), h3.Lo()); 57 b2 = SIMD<double,4> (h2.Lo(), h4.Lo()); 58 b3 = SIMD<double,4> (h1.Hi(), h3.Hi()); 59 b4 = SIMD<double,4> (h2.Hi(), h4.Hi()); 60 } 61 62 template<int N> HSum(SIMD<double,N> s1,SIMD<double,N> s2)63 NETGEN_INLINE auto HSum (SIMD<double,N> s1, SIMD<double,N> s2) 64 { 65 return SIMD<double,2>(HSum(s1), HSum(s2)); 66 } 67 68 template<int N> HSum(SIMD<double,N> s1,SIMD<double,N> s2,SIMD<double,N> s3,SIMD<double,N> s4)69 NETGEN_INLINE auto HSum (SIMD<double,N> s1, SIMD<double,N> s2, SIMD<double,N> s3, SIMD<double,N> s4 ) 70 { 71 return SIMD<double,4>(HSum(s1), HSum(s2), HSum(s3), HSum(s4)); 72 } 73 } 74 75 #endif // NETGEN_CORE_SIMD_HPP 76