1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_SIMD_SVE_HPP
13 #define MFEM_SIMD_SVE_HPP
14 
15 #if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
16 
17 #include "../../config/tconfig.hpp"
18 #include <arm_sve.h>
19 
20 namespace mfem
21 {
22 
23 // Use this macro as a workaround for astyle formatting issue with 'alignas'
24 #define MFEM_AUTOSIMD_ALIGN_SVE alignas(64)
25 
26 template <typename,int,int> struct AutoSIMD;
27 
28 template <> struct MFEM_AUTOSIMD_ALIGN_SVE AutoSIMD<double,8,64>
29 {
30    typedef double scalar_type;
31    static constexpr int size = 8;
32    static constexpr int align_bytes = 64;
33 
34    double vec[size];
35 
operator []mfem::AutoSIMD36    inline MFEM_ALWAYS_INLINE double &operator[](int i)
37    {
38       return vec[i];
39    }
40 
operator []mfem::AutoSIMD41    inline MFEM_ALWAYS_INLINE const double &operator[](int i) const
42    {
43       return vec[i];
44    }
45 
operator =mfem::AutoSIMD46    inline MFEM_ALWAYS_INLINE AutoSIMD &operator=(const AutoSIMD &v)
47    {
48       svst1_f64(svptrue_b64(), vec, svld1_f64(svptrue_b64(),v.vec));
49       return *this;
50    }
51 
operator =mfem::AutoSIMD52    inline MFEM_ALWAYS_INLINE AutoSIMD &operator=(const double &e)
53    {
54       svst1_f64(svptrue_b64(), vec, svdup_n_f64(e));
55       return *this;
56    }
57 
operator +=mfem::AutoSIMD58    inline MFEM_ALWAYS_INLINE AutoSIMD &operator+=(const AutoSIMD &v)
59    {
60       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
61       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
62       svst1_f64(svptrue_b64(), vec, svadd_f64_z(svptrue_b64(),vd,vvd));
63       return *this;
64    }
65 
operator +=mfem::AutoSIMD66    inline MFEM_ALWAYS_INLINE AutoSIMD &operator+=(const double &e)
67    {
68       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
69       svst1_f64(svptrue_b64(), vec, svadd_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
70       return *this;
71    }
72 
operator -=mfem::AutoSIMD73    inline MFEM_ALWAYS_INLINE AutoSIMD &operator-=(const AutoSIMD &v)
74    {
75       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
76       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
77       svst1_f64(svptrue_b64(), vec, svsub_f64_z(svptrue_b64(),vd,vvd));
78       return *this;
79    }
80 
operator -=mfem::AutoSIMD81    inline MFEM_ALWAYS_INLINE AutoSIMD &operator-=(const double &e)
82    {
83       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
84       svst1_f64(svptrue_b64(), vec, svsub_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
85       return *this;
86    }
87 
operator *=mfem::AutoSIMD88    inline MFEM_ALWAYS_INLINE AutoSIMD &operator*=(const AutoSIMD &v)
89    {
90       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
91       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
92       svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vd,vvd));
93       return *this;
94    }
95 
operator *=mfem::AutoSIMD96    inline MFEM_ALWAYS_INLINE AutoSIMD &operator*=(const double &e)
97    {
98       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
99       svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
100       return *this;
101    }
102 
operator /=mfem::AutoSIMD103    inline MFEM_ALWAYS_INLINE AutoSIMD &operator/=(const AutoSIMD &v)
104    {
105       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
106       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
107       svst1_f64(svptrue_b64(), vec, svdiv_f64_z(svptrue_b64(),vd,vvd));
108       return *this;
109    }
110 
operator /=mfem::AutoSIMD111    inline MFEM_ALWAYS_INLINE AutoSIMD &operator/=(const double &e)
112    {
113       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
114       svst1_f64(svptrue_b64(), vec, svdiv_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
115       return *this;
116    }
117 
operator -mfem::AutoSIMD118    inline MFEM_ALWAYS_INLINE AutoSIMD operator-() const
119    {
120       AutoSIMD r;
121       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
122       svst1_f64(svptrue_b64(), r.vec, svneg_f64_z(svptrue_b64(),vd));
123       return r;
124    }
125 
operator +mfem::AutoSIMD126    inline MFEM_ALWAYS_INLINE AutoSIMD operator+(const AutoSIMD &v) const
127    {
128       AutoSIMD r;
129       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
130       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
131       svst1_f64(svptrue_b64(), r.vec, svadd_f64_z(svptrue_b64(),vd,vvd));
132       return r;
133    }
134 
operator +mfem::AutoSIMD135    inline MFEM_ALWAYS_INLINE AutoSIMD operator+(const double &e) const
136    {
137       AutoSIMD r;
138       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
139       svst1_f64(svptrue_b64(), r.vec, svadd_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
140       return r;
141    }
142 
operator -mfem::AutoSIMD143    inline MFEM_ALWAYS_INLINE AutoSIMD operator-(const AutoSIMD &v) const
144    {
145       AutoSIMD r;
146       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
147       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
148       svst1_f64(svptrue_b64(), r.vec, svsub_f64_z(svptrue_b64(),vd,vvd));
149       return r;
150    }
151 
operator -mfem::AutoSIMD152    inline MFEM_ALWAYS_INLINE AutoSIMD operator-(const double &e) const
153    {
154       AutoSIMD r;
155       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
156       svst1_f64(svptrue_b64(), r.vec, svsub_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
157       return r;
158    }
159 
operator *mfem::AutoSIMD160    inline MFEM_ALWAYS_INLINE AutoSIMD operator*(const AutoSIMD &v) const
161    {
162       AutoSIMD r;
163       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
164       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
165       svst1_f64(svptrue_b64(), r.vec, svmul_f64_z(svptrue_b64(),vd,vvd));
166       return r;
167    }
168 
operator *mfem::AutoSIMD169    inline MFEM_ALWAYS_INLINE AutoSIMD operator*(const double &e) const
170    {
171       AutoSIMD r;
172       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
173       svst1_f64(svptrue_b64(), r.vec, svmul_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
174       return r;
175    }
176 
operator /mfem::AutoSIMD177    inline MFEM_ALWAYS_INLINE AutoSIMD operator/(const AutoSIMD &v) const
178    {
179       AutoSIMD r;
180       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
181       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
182       svst1_f64(svptrue_b64(), r.vec, svdiv_f64_z(svptrue_b64(),vd,vvd));
183       return r;
184    }
185 
operator /mfem::AutoSIMD186    inline MFEM_ALWAYS_INLINE AutoSIMD operator/(const double &e) const
187    {
188       AutoSIMD r;
189       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
190       svst1_f64(svptrue_b64(), r.vec, svdiv_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
191       return r;
192    }
193 
fmamfem::AutoSIMD194    inline MFEM_ALWAYS_INLINE AutoSIMD &fma(const AutoSIMD &v, const AutoSIMD &w)
195    {
196       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
197       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
198       const svfloat64_t wvd = svld1_f64(svptrue_b64(), w.vec);
199       svst1_f64(svptrue_b64(), vec, svmad_f64_z(svptrue_b64(),wvd,vd,vvd));
200       return *this;
201    }
202 
fmamfem::AutoSIMD203    inline MFEM_ALWAYS_INLINE AutoSIMD &fma(const AutoSIMD &v, const double &e)
204    {
205       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
206       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
207       svst1_f64(svptrue_b64(), vec, svmad_f64_z(svptrue_b64(),vvd,svdup_n_f64(e),vd));
208       return *this;
209    }
210 
fmamfem::AutoSIMD211    inline MFEM_ALWAYS_INLINE AutoSIMD &fma(const double &e, const AutoSIMD &v)
212    {
213       const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
214       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
215       svst1_f64(svptrue_b64(), vec, svmad_f64_z(svptrue_b64(),svdup_n_f64(e),vvd,vd));
216       return *this;
217    }
218 
mulmfem::AutoSIMD219    inline MFEM_ALWAYS_INLINE AutoSIMD &mul(const AutoSIMD &v, const AutoSIMD &w)
220    {
221       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
222       const svfloat64_t wvd = svld1_f64(svptrue_b64(), w.vec);
223       svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vvd,wvd));
224       return *this;
225    }
226 
mulmfem::AutoSIMD227    inline MFEM_ALWAYS_INLINE AutoSIMD &mul(const AutoSIMD &v,const double &e)
228    {
229       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
230       svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vvd,svdup_n_f64(e)));
231       return *this;
232    }
233 
mulmfem::AutoSIMD234    inline MFEM_ALWAYS_INLINE AutoSIMD &mul(const double &e, const AutoSIMD &v)
235    {
236       const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
237       svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
238       return *this;
239    }
240 };
241 
242 inline MFEM_ALWAYS_INLINE
operator +(const double & e,const AutoSIMD<double,8,64> & v)243 AutoSIMD<double,8,64> operator+(const double &e, const AutoSIMD<double,8,64> &v)
244 {
245    AutoSIMD<double,8,64> r;
246    const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
247    svst1_f64(svptrue_b64(), r.vec, svadd_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
248    return r;
249 }
250 
251 inline MFEM_ALWAYS_INLINE
operator -(const double & e,const AutoSIMD<double,8,64> & v)252 AutoSIMD<double,8,64> operator-(const double &e, const AutoSIMD<double,8,64> &v)
253 {
254    AutoSIMD<double,8,64> r;
255    const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
256    svst1_f64(svptrue_b64(), r.vec, svsub_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
257    return r;
258 }
259 
260 inline MFEM_ALWAYS_INLINE
operator *(const double & e,const AutoSIMD<double,8,64> & v)261 AutoSIMD<double,8,64> operator*(const double &e, const AutoSIMD<double,8,64> &v)
262 {
263    AutoSIMD<double,8,64> r;
264    const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
265    svst1_f64(svptrue_b64(), r.vec, svmul_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
266    return r;
267 }
268 
269 inline MFEM_ALWAYS_INLINE
operator /(const double & e,const AutoSIMD<double,8,64> & v)270 AutoSIMD<double,8,64> operator/(const double &e, const AutoSIMD<double,8,64> &v)
271 {
272    AutoSIMD<double,8,64> r;
273    const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
274    svst1_f64(svptrue_b64(), r.vec, svdiv_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
275    return r;
276 }
277 
278 } // namespace mfem
279 
280 #endif // __aarch64__ && __ARM_FEATURE_SVE
281 
282 #endif // MFEM_SIMD_SVE_HPP
283