1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11
12 #ifndef MFEM_SIMD_SVE_HPP
13 #define MFEM_SIMD_SVE_HPP
14
15 #if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
16
17 #include "../../config/tconfig.hpp"
18 #include <arm_sve.h>
19
20 namespace mfem
21 {
22
23 // Use this macro as a workaround for astyle formatting issue with 'alignas'
24 #define MFEM_AUTOSIMD_ALIGN_SVE alignas(64)
25
26 template <typename,int,int> struct AutoSIMD;
27
28 template <> struct MFEM_AUTOSIMD_ALIGN_SVE AutoSIMD<double,8,64>
29 {
30 typedef double scalar_type;
31 static constexpr int size = 8;
32 static constexpr int align_bytes = 64;
33
34 double vec[size];
35
operator []mfem::AutoSIMD36 inline MFEM_ALWAYS_INLINE double &operator[](int i)
37 {
38 return vec[i];
39 }
40
operator []mfem::AutoSIMD41 inline MFEM_ALWAYS_INLINE const double &operator[](int i) const
42 {
43 return vec[i];
44 }
45
operator =mfem::AutoSIMD46 inline MFEM_ALWAYS_INLINE AutoSIMD &operator=(const AutoSIMD &v)
47 {
48 svst1_f64(svptrue_b64(), vec, svld1_f64(svptrue_b64(),v.vec));
49 return *this;
50 }
51
operator =mfem::AutoSIMD52 inline MFEM_ALWAYS_INLINE AutoSIMD &operator=(const double &e)
53 {
54 svst1_f64(svptrue_b64(), vec, svdup_n_f64(e));
55 return *this;
56 }
57
operator +=mfem::AutoSIMD58 inline MFEM_ALWAYS_INLINE AutoSIMD &operator+=(const AutoSIMD &v)
59 {
60 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
61 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
62 svst1_f64(svptrue_b64(), vec, svadd_f64_z(svptrue_b64(),vd,vvd));
63 return *this;
64 }
65
operator +=mfem::AutoSIMD66 inline MFEM_ALWAYS_INLINE AutoSIMD &operator+=(const double &e)
67 {
68 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
69 svst1_f64(svptrue_b64(), vec, svadd_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
70 return *this;
71 }
72
operator -=mfem::AutoSIMD73 inline MFEM_ALWAYS_INLINE AutoSIMD &operator-=(const AutoSIMD &v)
74 {
75 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
76 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
77 svst1_f64(svptrue_b64(), vec, svsub_f64_z(svptrue_b64(),vd,vvd));
78 return *this;
79 }
80
operator -=mfem::AutoSIMD81 inline MFEM_ALWAYS_INLINE AutoSIMD &operator-=(const double &e)
82 {
83 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
84 svst1_f64(svptrue_b64(), vec, svsub_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
85 return *this;
86 }
87
operator *=mfem::AutoSIMD88 inline MFEM_ALWAYS_INLINE AutoSIMD &operator*=(const AutoSIMD &v)
89 {
90 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
91 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
92 svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vd,vvd));
93 return *this;
94 }
95
operator *=mfem::AutoSIMD96 inline MFEM_ALWAYS_INLINE AutoSIMD &operator*=(const double &e)
97 {
98 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
99 svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
100 return *this;
101 }
102
operator /=mfem::AutoSIMD103 inline MFEM_ALWAYS_INLINE AutoSIMD &operator/=(const AutoSIMD &v)
104 {
105 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
106 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
107 svst1_f64(svptrue_b64(), vec, svdiv_f64_z(svptrue_b64(),vd,vvd));
108 return *this;
109 }
110
operator /=mfem::AutoSIMD111 inline MFEM_ALWAYS_INLINE AutoSIMD &operator/=(const double &e)
112 {
113 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
114 svst1_f64(svptrue_b64(), vec, svdiv_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
115 return *this;
116 }
117
operator -mfem::AutoSIMD118 inline MFEM_ALWAYS_INLINE AutoSIMD operator-() const
119 {
120 AutoSIMD r;
121 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
122 svst1_f64(svptrue_b64(), r.vec, svneg_f64_z(svptrue_b64(),vd));
123 return r;
124 }
125
operator +mfem::AutoSIMD126 inline MFEM_ALWAYS_INLINE AutoSIMD operator+(const AutoSIMD &v) const
127 {
128 AutoSIMD r;
129 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
130 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
131 svst1_f64(svptrue_b64(), r.vec, svadd_f64_z(svptrue_b64(),vd,vvd));
132 return r;
133 }
134
operator +mfem::AutoSIMD135 inline MFEM_ALWAYS_INLINE AutoSIMD operator+(const double &e) const
136 {
137 AutoSIMD r;
138 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
139 svst1_f64(svptrue_b64(), r.vec, svadd_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
140 return r;
141 }
142
operator -mfem::AutoSIMD143 inline MFEM_ALWAYS_INLINE AutoSIMD operator-(const AutoSIMD &v) const
144 {
145 AutoSIMD r;
146 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
147 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
148 svst1_f64(svptrue_b64(), r.vec, svsub_f64_z(svptrue_b64(),vd,vvd));
149 return r;
150 }
151
operator -mfem::AutoSIMD152 inline MFEM_ALWAYS_INLINE AutoSIMD operator-(const double &e) const
153 {
154 AutoSIMD r;
155 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
156 svst1_f64(svptrue_b64(), r.vec, svsub_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
157 return r;
158 }
159
operator *mfem::AutoSIMD160 inline MFEM_ALWAYS_INLINE AutoSIMD operator*(const AutoSIMD &v) const
161 {
162 AutoSIMD r;
163 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
164 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
165 svst1_f64(svptrue_b64(), r.vec, svmul_f64_z(svptrue_b64(),vd,vvd));
166 return r;
167 }
168
operator *mfem::AutoSIMD169 inline MFEM_ALWAYS_INLINE AutoSIMD operator*(const double &e) const
170 {
171 AutoSIMD r;
172 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
173 svst1_f64(svptrue_b64(), r.vec, svmul_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
174 return r;
175 }
176
operator /mfem::AutoSIMD177 inline MFEM_ALWAYS_INLINE AutoSIMD operator/(const AutoSIMD &v) const
178 {
179 AutoSIMD r;
180 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
181 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
182 svst1_f64(svptrue_b64(), r.vec, svdiv_f64_z(svptrue_b64(),vd,vvd));
183 return r;
184 }
185
operator /mfem::AutoSIMD186 inline MFEM_ALWAYS_INLINE AutoSIMD operator/(const double &e) const
187 {
188 AutoSIMD r;
189 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
190 svst1_f64(svptrue_b64(), r.vec, svdiv_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
191 return r;
192 }
193
fmamfem::AutoSIMD194 inline MFEM_ALWAYS_INLINE AutoSIMD &fma(const AutoSIMD &v, const AutoSIMD &w)
195 {
196 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
197 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
198 const svfloat64_t wvd = svld1_f64(svptrue_b64(), w.vec);
199 svst1_f64(svptrue_b64(), vec, svmad_f64_z(svptrue_b64(),wvd,vd,vvd));
200 return *this;
201 }
202
fmamfem::AutoSIMD203 inline MFEM_ALWAYS_INLINE AutoSIMD &fma(const AutoSIMD &v, const double &e)
204 {
205 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
206 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
207 svst1_f64(svptrue_b64(), vec, svmad_f64_z(svptrue_b64(),vvd,svdup_n_f64(e),vd));
208 return *this;
209 }
210
fmamfem::AutoSIMD211 inline MFEM_ALWAYS_INLINE AutoSIMD &fma(const double &e, const AutoSIMD &v)
212 {
213 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
214 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
215 svst1_f64(svptrue_b64(), vec, svmad_f64_z(svptrue_b64(),svdup_n_f64(e),vvd,vd));
216 return *this;
217 }
218
mulmfem::AutoSIMD219 inline MFEM_ALWAYS_INLINE AutoSIMD &mul(const AutoSIMD &v, const AutoSIMD &w)
220 {
221 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
222 const svfloat64_t wvd = svld1_f64(svptrue_b64(), w.vec);
223 svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vvd,wvd));
224 return *this;
225 }
226
mulmfem::AutoSIMD227 inline MFEM_ALWAYS_INLINE AutoSIMD &mul(const AutoSIMD &v,const double &e)
228 {
229 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
230 svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vvd,svdup_n_f64(e)));
231 return *this;
232 }
233
mulmfem::AutoSIMD234 inline MFEM_ALWAYS_INLINE AutoSIMD &mul(const double &e, const AutoSIMD &v)
235 {
236 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
237 svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
238 return *this;
239 }
240 };
241
242 inline MFEM_ALWAYS_INLINE
operator +(const double & e,const AutoSIMD<double,8,64> & v)243 AutoSIMD<double,8,64> operator+(const double &e, const AutoSIMD<double,8,64> &v)
244 {
245 AutoSIMD<double,8,64> r;
246 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
247 svst1_f64(svptrue_b64(), r.vec, svadd_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
248 return r;
249 }
250
251 inline MFEM_ALWAYS_INLINE
operator -(const double & e,const AutoSIMD<double,8,64> & v)252 AutoSIMD<double,8,64> operator-(const double &e, const AutoSIMD<double,8,64> &v)
253 {
254 AutoSIMD<double,8,64> r;
255 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
256 svst1_f64(svptrue_b64(), r.vec, svsub_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
257 return r;
258 }
259
260 inline MFEM_ALWAYS_INLINE
operator *(const double & e,const AutoSIMD<double,8,64> & v)261 AutoSIMD<double,8,64> operator*(const double &e, const AutoSIMD<double,8,64> &v)
262 {
263 AutoSIMD<double,8,64> r;
264 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
265 svst1_f64(svptrue_b64(), r.vec, svmul_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
266 return r;
267 }
268
269 inline MFEM_ALWAYS_INLINE
operator /(const double & e,const AutoSIMD<double,8,64> & v)270 AutoSIMD<double,8,64> operator/(const double &e, const AutoSIMD<double,8,64> &v)
271 {
272 AutoSIMD<double,8,64> r;
273 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
274 svst1_f64(svptrue_b64(), r.vec, svdiv_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
275 return r;
276 }
277
278 } // namespace mfem
279
280 #endif // __aarch64__ && __ARM_FEATURE_SVE
281
282 #endif // MFEM_SIMD_SVE_HPP
283