1 /*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 2016,2017,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
8 *
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
13 *
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 *
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
31 *
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
34 */
35 #ifndef GMX_SIMD_SCALAR_MATH_H
36 #define GMX_SIMD_SCALAR_MATH_H
37
38 #include <cmath>
39
40 #include "gromacs/math/functions.h"
41 #include "gromacs/math/utilities.h"
42 #include "gromacs/simd/scalar/scalar.h"
43
44 /*! \libinternal \file
45 *
46 * \brief Scalar math functions mimicking GROMACS SIMD math functions
47 *
48 * These versions make it possible to write functions that are templated with
49 * either a SIMD or scalar type. While some of these functions might not appear
50 * SIMD-specific, we have placed them here because the only reason to use these
51 * instead of generic function is in templated combined SIMD/non-SIMD code.
52 * It is important that these functions match the SIMD versions exactly in their
53 * arguments and template arguments so that overload resolution works correctly.
54 *
55 * \author Erik Lindahl <erik.lindahl@gmail.com>
56 *
57 * \inlibraryapi
58 * \ingroup module_simd
59 */
60
61 namespace gmx
62 {
63
64 /*****************************************************************************
65 * Single-precision floating point math functions mimicking SIMD versions *
66 *****************************************************************************/
67
68
69 /*! \brief Composes single value with the magnitude of x and the sign of y.
70 *
71 * \param x Value to set sign for
72 * \param y Value used to set sign
73 * \return Magnitude of x, sign of y
74 *
75 * \note This function might be superficially meaningless, but it helps us to
76 * write templated SIMD/non-SIMD code. For clarity it should not be used
77 * outside such code.
78 */
copysign(float x,float y)79 static inline float copysign(float x, float y)
80 {
81 return std::copysign(x, y);
82 }
83
84 // invsqrt(x) is already defined in math/functions.h
85
86 /*! \brief Calculate 1/sqrt(x) for two floats.
87 *
88 * \param x0 First argument, x0 must be positive - no argument checking.
89 * \param x1 Second argument, x1 must be positive - no argument checking.
90 * \param[out] out0 Result 1/sqrt(x0)
91 * \param[out] out1 Result 1/sqrt(x1)
92 *
93 * \note This function might be superficially meaningless, but it helps us to
94 * write templated SIMD/non-SIMD code. For clarity it should not be used
95 * outside such code.
96 */
invsqrtPair(float x0,float x1,float * out0,float * out1)97 static inline void invsqrtPair(float x0, float x1, float* out0, float* out1)
98 {
99 *out0 = invsqrt(x0);
100 *out1 = invsqrt(x1);
101 }
102
103 /*! \brief Calculate 1/x for float.
104 *
105 * \param x Argument that must be nonzero. This routine does not check arguments.
106 * \return 1/x. Result is undefined if your argument was invalid.
107 *
108 * \note This function might be superficially meaningless, but it helps us to
109 * write templated SIMD/non-SIMD code. For clarity it should not be used
110 * outside such code.
111 */
inv(float x)112 static inline float inv(float x)
113 {
114 return 1.0F / x;
115 }
116
117 /*! \brief Calculate 1/sqrt(x) for masked entry of float.
118 *
119 * This routine only evaluates 1/sqrt(x) if mask is true.
120 * Illegal values for a masked-out float will not lead to
121 * floating-point exceptions.
122 *
123 * \param x Argument that must be >0 if masked-in.
124 * \param m Mask
125 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
126 * entry was not masked, and 0.0 for masked-out entries.
127 *
128 * \note This function might be superficially meaningless, but it helps us to
129 * write templated SIMD/non-SIMD code. For clarity it should not be used
130 * outside such code.
131 */
maskzInvsqrt(float x,bool m)132 static inline float maskzInvsqrt(float x, bool m)
133 {
134 return m ? invsqrt(x) : 0.0F;
135 }
136
137 /*! \brief Calculate 1/x for masked entry of float.
138 *
139 * This routine only evaluates 1/x if mask is true.
140 * Illegal values for a masked-out float will not lead to
141 * floating-point exceptions.
142 *
143 * \param x Argument that must be nonzero if masked-in.
144 * \param m Mask
145 * \return 1/x. Result is undefined if your argument was invalid or
146 * entry was not masked, and 0.0 for masked-out entries.
147 *
148 * \note This function might be superficially meaningless, but it helps us to
149 * write templated SIMD/non-SIMD code. For clarity it should not be used
150 * outside such code.
151 */
maskzInv(float x,bool m)152 static inline float maskzInv(float x, bool m)
153 {
154 return m ? inv(x) : 0.0F;
155 }
156
157 /*! \brief Float sqrt(x). This is the square root.
158 *
159 * \param x Argument, should be >= 0.
160 * \result The square root of x. Undefined if argument is invalid.
161 *
162 * \note This function might be superficially meaningless, but it helps us to
163 * write templated SIMD/non-SIMD code. For clarity it should not be used
164 * outside such code.
165 */
166 template<MathOptimization opt = MathOptimization::Safe>
sqrt(float x)167 static inline float sqrt(float x)
168 {
169 return std::sqrt(x);
170 }
171
172 /*! \brief Float log(x). This is the natural logarithm.
173 *
174 * \param x Argument, should be >0.
175 * \result The natural logarithm of x. Undefined if argument is invalid.
176 *
177 * \note This function might be superficially meaningless, but it helps us to
178 * write templated SIMD/non-SIMD code. For clarity it should not be used
179 * outside such code.
180 */
log(float x)181 static inline float log(float x)
182 {
183 return std::log(x);
184 }
185
186 /*! \brief Float 2^x.
187 *
188 * \param x Argument.
189 * \result 2^x. Undefined if input argument caused overflow.
190 *
191 * \note This function might be superficially meaningless, but it helps us to
192 * write templated SIMD/non-SIMD code. For clarity it should not be used
193 * outside such code.
194 */
195 template<MathOptimization opt = MathOptimization::Safe>
exp2(float x)196 static inline float exp2(float x)
197 {
198 return std::exp2(x);
199 }
200
201 /*! \brief Float exp(x).
202 *
203 * \param x Argument.
204 * \result exp(x). Undefined if input argument caused overflow.
205 *
206 * \note This function might be superficially meaningless, but it helps us to
207 * write templated SIMD/non-SIMD code. For clarity it should not be used
208 * outside such code.
209 */
210 template<MathOptimization opt = MathOptimization::Safe>
exp(float x)211 static inline float exp(float x)
212 {
213 return std::exp(x);
214 }
215
216 /*! \brief Float erf(x).
217 *
218 * \param x Argument.
219 * \result erf(x)
220 *
221 * \note This function might be superficially meaningless, but it helps us to
222 * write templated SIMD/non-SIMD code. For clarity it should not be used
223 * outside such code.
224 */
erf(float x)225 static inline float erf(float x)
226 {
227 return std::erf(x);
228 }
229
230 /*! \brief Float erfc(x).
231 *
232 * \param x Argument.
233 * \result erfc(x)
234 *
235 * \note This function might be superficially meaningless, but it helps us to
236 * write templated SIMD/non-SIMD code. For clarity it should not be used
237 * outside such code.
238 */
erfc(float x)239 static inline float erfc(float x)
240 {
241 return std::erfc(x);
242 }
243
244
245 /*! \brief Float sin \& cos.
246 *
247 * \param x The argument to evaluate sin/cos for
248 * \param[out] sinval Sin(x)
249 * \param[out] cosval Cos(x)
250 *
251 * \note This function might be superficially meaningless, but it helps us to
252 * write templated SIMD/non-SIMD code. For clarity it should not be used
253 * outside such code.
254 */
sincos(float x,float * sinval,float * cosval)255 static inline void sincos(float x, float* sinval, float* cosval)
256 {
257 *sinval = std::sin(x);
258 *cosval = std::cos(x);
259 }
260
261 /*! \brief Float sin.
262 *
263 * \param x The argument to evaluate sin for
264 * \result Sin(x)
265 *
266 * \note This function might be superficially meaningless, but it helps us to
267 * write templated SIMD/non-SIMD code. For clarity it should not be used
268 * outside such code.
269 */
sin(float x)270 static inline float sin(float x)
271 {
272 return std::sin(x);
273 }
274
275 /*! \brief Float cos.
276 *
277 * \param x The argument to evaluate cos for
278 * \result Cos(x)
279 *
280 * \note This function might be superficially meaningless, but it helps us to
281 * write templated SIMD/non-SIMD code. For clarity it should not be used
282 * outside such code.
283 */
cos(float x)284 static inline float cos(float x)
285 {
286 return std::cos(x);
287 }
288
289 /*! \brief Float tan.
290 *
291 * \param x The argument to evaluate tan for
292 * \result Tan(x)
293 *
294 * \note This function might be superficially meaningless, but it helps us to
295 * write templated SIMD/non-SIMD code. For clarity it should not be used
296 * outside such code.
297 */
tan(float x)298 static inline float tan(float x)
299 {
300 return std::tan(x);
301 }
302
303 /*! \brief float asin.
304 *
305 * \param x The argument to evaluate asin for
306 * \result Asin(x)
307 *
308 * \note This function might be superficially meaningless, but it helps us to
309 * write templated SIMD/non-SIMD code. For clarity it should not be used
310 * outside such code.
311 */
asin(float x)312 static inline float asin(float x)
313 {
314 return std::asin(x);
315 }
316
317 /*! \brief Float acos.
318 *
319 * \param x The argument to evaluate acos for
320 * \result Acos(x)
321 *
322 * \note This function might be superficially meaningless, but it helps us to
323 * write templated SIMD/non-SIMD code. For clarity it should not be used
324 * outside such code.
325 */
acos(float x)326 static inline float acos(float x)
327 {
328 return std::acos(x);
329 }
330
331 /*! \brief Float atan.
332 *
333 * \param x The argument to evaluate atan for
334 * \result Atan(x)
335 *
336 * \note This function might be superficially meaningless, but it helps us to
337 * write templated SIMD/non-SIMD code. For clarity it should not be used
338 * outside such code.
339 */
atan(float x)340 static inline float atan(float x)
341 {
342 return std::atan(x);
343 }
344
345 /*! \brief Float atan2(y,x).
346 *
347 * \param y Y component of vector, any quartile
348 * \param x X component of vector, any quartile
349 * \result Atan(y,x)
350 *
351 * \note This function might be superficially meaningless, but it helps us to
352 * write templated SIMD/non-SIMD code. For clarity it should not be used
353 * outside such code.
354 */
atan2(float y,float x)355 static inline float atan2(float y, float x)
356 {
357 return std::atan2(y, x);
358 }
359
360 /*! \brief Calculate the force correction due to PME analytically in float.
361 *
362 * See the SIMD version of this function for details.
363 *
364 * \param z2 input parameter
365 * \returns Correction to use on force
366 *
367 * \note This function might be superficially meaningless, but it helps us to
368 * write templated SIMD/non-SIMD code. For clarity it should not be used
369 * outside such code.
370 */
pmeForceCorrection(float z2)371 static inline float pmeForceCorrection(float z2)
372 {
373 const float FN6(-1.7357322914161492954e-8F);
374 const float FN5(1.4703624142580877519e-6F);
375 const float FN4(-0.000053401640219807709149F);
376 const float FN3(0.0010054721316683106153F);
377 const float FN2(-0.019278317264888380590F);
378 const float FN1(0.069670166153766424023F);
379 const float FN0(-0.75225204789749321333F);
380
381 const float FD4(0.0011193462567257629232F);
382 const float FD3(0.014866955030185295499F);
383 const float FD2(0.11583842382862377919F);
384 const float FD1(0.50736591960530292870F);
385 const float FD0(1.0F);
386
387 float z4;
388 float polyFN0, polyFN1, polyFD0, polyFD1;
389
390 z4 = z2 * z2;
391
392 polyFD0 = fma(FD4, z4, FD2);
393 polyFD1 = fma(FD3, z4, FD1);
394 polyFD0 = fma(polyFD0, z4, FD0);
395 polyFD0 = fma(polyFD1, z2, polyFD0);
396
397 polyFN0 = fma(FN6, z4, FN4);
398 polyFN1 = fma(FN5, z4, FN3);
399 polyFN0 = fma(polyFN0, z4, FN2);
400 polyFN1 = fma(polyFN1, z4, FN1);
401 polyFN0 = fma(polyFN0, z4, FN0);
402 polyFN0 = fma(polyFN1, z2, polyFN0);
403
404 return polyFN0 / polyFD0;
405 }
406
407 /*! \brief Calculate the potential correction due to PME analytically in float.
408 *
409 * See the SIMD version of this function for details.
410 *
411 * \param z2 input parameter
412 * \returns Correction to use on potential.
413 *
414 * \note This function might be superficially meaningless, but it helps us to
415 * write templated SIMD/non-SIMD code. For clarity it should not be used
416 * outside such code.
417 */
pmePotentialCorrection(float z2)418 static inline float pmePotentialCorrection(float z2)
419 {
420 const float VN6(1.9296833005951166339e-8F);
421 const float VN5(-1.4213390571557850962e-6F);
422 const float VN4(0.000041603292906656984871F);
423 const float VN3(-0.00013134036773265025626F);
424 const float VN2(0.038657983986041781264F);
425 const float VN1(0.11285044772717598220F);
426 const float VN0(1.1283802385263030286F);
427
428 const float VD3(0.0066752224023576045451F);
429 const float VD2(0.078647795836373922256F);
430 const float VD1(0.43336185284710920150F);
431 const float VD0(1.0F);
432
433 float z4;
434 float polyVN0, polyVN1, polyVD0, polyVD1;
435
436 z4 = z2 * z2;
437
438 polyVD1 = fma(VD3, z4, VD1);
439 polyVD0 = fma(VD2, z4, VD0);
440 polyVD0 = fma(polyVD1, z2, polyVD0);
441
442 polyVN0 = fma(VN6, z4, VN4);
443 polyVN1 = fma(VN5, z4, VN3);
444 polyVN0 = fma(polyVN0, z4, VN2);
445 polyVN1 = fma(polyVN1, z4, VN1);
446 polyVN0 = fma(polyVN0, z4, VN0);
447 polyVN0 = fma(polyVN1, z2, polyVN0);
448
449 return polyVN0 / polyVD0;
450 }
451
452 /*****************************************************************************
453 * Double-precision floating point math functions mimicking SIMD versions *
454 *****************************************************************************/
455
456
457 /*! \brief Composes double value with the magnitude of x and the sign of y.
458 *
459 * \param x Value to set sign for
460 * \param y Value used to set sign
461 * \return Magnitude of x, sign of y
462 *
463 * \note This function might be superficially meaningless, but it helps us to
464 * write templated SIMD/non-SIMD code. For clarity it should not be used
465 * outside such code.
466 */
copysign(double x,double y)467 static inline double copysign(double x, double y)
468 {
469 return std::copysign(x, y);
470 }
471
472 // invsqrt(x) is already defined in math/functions.h
473
474 /*! \brief Calculate 1/sqrt(x) for two doubles.
475 *
476 * \param x0 First argument, x0 must be positive - no argument checking.
477 * \param x1 Second argument, x1 must be positive - no argument checking.
478 * \param[out] out0 Result 1/sqrt(x0)
479 * \param[out] out1 Result 1/sqrt(x1)
480 *
481 * \note This function might be superficially meaningless, but it helps us to
482 * write templated SIMD/non-SIMD code. For clarity it should not be used
483 * outside such code.
484 */
invsqrtPair(double x0,double x1,double * out0,double * out1)485 static inline void invsqrtPair(double x0, double x1, double* out0, double* out1)
486 {
487 *out0 = invsqrt(x0);
488 *out1 = invsqrt(x1);
489 }
490
491 /*! \brief Calculate 1/x for double.
492 *
493 * \param x Argument that must be nonzero. This routine does not check arguments.
494 * \return 1/x. Result is undefined if your argument was invalid.
495 *
496 * \note This function might be superficially meaningless, but it helps us to
497 * write templated SIMD/non-SIMD code. For clarity it should not be used
498 * outside such code.
499 */
inv(double x)500 static inline double inv(double x)
501 {
502 return 1.0 / x;
503 }
504
505 /*! \brief Calculate 1/sqrt(x) for masked entry of double.
506 *
507 * This routine only evaluates 1/sqrt(x) if mask is true.
508 * Illegal values for a masked-out double will not lead to
509 * floating-point exceptions.
510 *
511 * \param x Argument that must be >0 if masked-in.
512 * \param m Mask
513 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
514 * entry was not masked, and 0.0 for masked-out entries.
515 *
516 * \note This function might be superficially meaningless, but it helps us to
517 * write templated SIMD/non-SIMD code. For clarity it should not be used
518 * outside such code.
519 */
maskzInvsqrt(double x,bool m)520 static inline double maskzInvsqrt(double x, bool m)
521 {
522 return m ? invsqrt(x) : 0.0;
523 }
524
525 /*! \brief Calculate 1/x for masked entry of double.
526 *
527 * This routine only evaluates 1/x if mask is true.
528 * Illegal values for a masked-out double will not lead to
529 * floating-point exceptions.
530 *
531 * \param x Argument that must be nonzero if masked-in.
532 * \param m Mask
533 * \return 1/x. Result is undefined if your argument was invalid or
534 * entry was not masked, and 0.0 for masked-out entries.
535 *
536 * \note This function might be superficially meaningless, but it helps us to
537 * write templated SIMD/non-SIMD code. For clarity it should not be used
538 * outside such code.
539 */
maskzInv(double x,bool m)540 static inline double maskzInv(double x, bool m)
541 {
542 return m ? inv(x) : 0.0;
543 }
544
545 /*! \brief Double sqrt(x). This is the square root.
546 *
547 * \param x Argument, should be >= 0.
548 * \result The square root of x. Undefined if argument is invalid.
549 *
550 * \note This function might be superficially meaningless, but it helps us to
551 * write templated SIMD/non-SIMD code. For clarity it should not be used
552 * outside such code.
553 */
554 template<MathOptimization opt = MathOptimization::Safe>
sqrt(double x)555 static inline double sqrt(double x)
556 {
557 return std::sqrt(x);
558 }
559
560 /*! \brief Double log(x). This is the natural logarithm.
561 *
562 * \param x Argument, should be >0.
563 * \result The natural logarithm of x. Undefined if argument is invalid.
564 *
565 * \note This function might be superficially meaningless, but it helps us to
566 * write templated SIMD/non-SIMD code. For clarity it should not be used
567 * outside such code.
568 */
log(double x)569 static inline double log(double x)
570 {
571 return std::log(x);
572 }
573
574 /*! \brief Double 2^x.
575 *
576 * \param x Argument.
577 * \result 2^x. Undefined if input argument caused overflow.
578 *
579 * \note This function might be superficially meaningless, but it helps us to
580 * write templated SIMD/non-SIMD code. For clarity it should not be used
581 * outside such code.
582 */
583 template<MathOptimization opt = MathOptimization::Safe>
exp2(double x)584 static inline double exp2(double x)
585 {
586 return std::exp2(x);
587 }
588
589 /*! \brief Double exp(x).
590 *
591 * \param x Argument.
592 * \result exp(x). Undefined if input argument caused overflow.
593 *
594 * \note This function might be superficially meaningless, but it helps us to
595 * write templated SIMD/non-SIMD code. For clarity it should not be used
596 * outside such code.
597 */
598 template<MathOptimization opt = MathOptimization::Safe>
exp(double x)599 static inline double exp(double x)
600 {
601 return std::exp(x);
602 }
603
604 /*! \brief Double erf(x).
605 *
606 * \param x Argument.
607 * \result erf(x)
608 *
609 * \note This function might be superficially meaningless, but it helps us to
610 * write templated SIMD/non-SIMD code. For clarity it should not be used
611 * outside such code.
612 */
erf(double x)613 static inline double erf(double x)
614 {
615 return std::erf(x);
616 }
617
618 /*! \brief Double erfc(x).
619 *
620 * \param x Argument.
621 * \result erfc(x)
622 *
623 * \note This function might be superficially meaningless, but it helps us to
624 * write templated SIMD/non-SIMD code. For clarity it should not be used
625 * outside such code.
626 */
erfc(double x)627 static inline double erfc(double x)
628 {
629 return std::erfc(x);
630 }
631
632
633 /*! \brief Double sin \& cos.
634 *
635 * \param x The argument to evaluate sin/cos for
636 * \param[out] sinval Sin(x)
637 * \param[out] cosval Cos(x)
638 *
639 * \note This function might be superficially meaningless, but it helps us to
640 * write templated SIMD/non-SIMD code. For clarity it should not be used
641 * outside such code.
642 */
sincos(double x,double * sinval,double * cosval)643 static inline void sincos(double x, double* sinval, double* cosval)
644 {
645 *sinval = std::sin(x);
646 *cosval = std::cos(x);
647 }
648
649 /*! \brief Double sin.
650 *
651 * \param x The argument to evaluate sin for
652 * \result Sin(x)
653 *
654 * \note This function might be superficially meaningless, but it helps us to
655 * write templated SIMD/non-SIMD code. For clarity it should not be used
656 * outside such code.
657 */
sin(double x)658 static inline double sin(double x)
659 {
660 return std::sin(x);
661 }
662
663 /*! \brief Double cos.
664 *
665 * \param x The argument to evaluate cos for
666 * \result Cos(x)
667 *
668 * \note This function might be superficially meaningless, but it helps us to
669 * write templated SIMD/non-SIMD code. For clarity it should not be used
670 * outside such code.
671 */
cos(double x)672 static inline double cos(double x)
673 {
674 return std::cos(x);
675 }
676
677 /*! \brief Double tan.
678 *
679 * \param x The argument to evaluate tan for
680 * \result Tan(x)
681 *
682 * \note This function might be superficially meaningless, but it helps us to
683 * write templated SIMD/non-SIMD code. For clarity it should not be used
684 * outside such code.
685 */
tan(double x)686 static inline double tan(double x)
687 {
688 return std::tan(x);
689 }
690
691 /*! \brief Double asin.
692 *
693 * \param x The argument to evaluate asin for
694 * \result Asin(x)
695 *
696 * \note This function might be superficially meaningless, but it helps us to
697 * write templated SIMD/non-SIMD code. For clarity it should not be used
698 * outside such code.
699 */
asin(double x)700 static inline double asin(double x)
701 {
702 return std::asin(x);
703 }
704
705 /*! \brief Double acos.
706 *
707 * \param x The argument to evaluate acos for
708 * \result Acos(x)
709 *
710 * \note This function might be superficially meaningless, but it helps us to
711 * write templated SIMD/non-SIMD code. For clarity it should not be used
712 * outside such code.
713 */
acos(double x)714 static inline double acos(double x)
715 {
716 return std::acos(x);
717 }
718
719 /*! \brief Double atan.
720 *
721 * \param x The argument to evaluate atan for
722 * \result Atan(x)
723 *
724 * \note This function might be superficially meaningless, but it helps us to
725 * write templated SIMD/non-SIMD code. For clarity it should not be used
726 * outside such code.
727 */
atan(double x)728 static inline double atan(double x)
729 {
730 return std::atan(x);
731 }
732
733 /*! \brief Double atan2(y,x).
734 *
735 * \param y Y component of vector, any quartile
736 * \param x X component of vector, any quartile
737 * \result Atan(y,x)
738 *
739 * \note This function might be superficially meaningless, but it helps us to
740 * write templated SIMD/non-SIMD code. For clarity it should not be used
741 * outside such code.
742 */
atan2(double y,double x)743 static inline double atan2(double y, double x)
744 {
745 return std::atan2(y, x);
746 }
747
748 /*! \brief Calculate the force correction due to PME analytically in double.
749 *
750 * See the SIMD version of this function for details.
751 *
752 * \param z2 input parameter
753 * \returns Correction to use on force
754 *
755 * \note This function might be superficially meaningless, but it helps us to
756 * write templated SIMD/non-SIMD code. For clarity it should not be used
757 * outside such code.
758 */
pmeForceCorrection(double z2)759 static inline double pmeForceCorrection(double z2)
760 {
761 const double FN10(-8.0072854618360083154e-14);
762 const double FN9(1.1859116242260148027e-11);
763 const double FN8(-8.1490406329798423616e-10);
764 const double FN7(3.4404793543907847655e-8);
765 const double FN6(-9.9471420832602741006e-7);
766 const double FN5(0.000020740315999115847456);
767 const double FN4(-0.00031991745139313364005);
768 const double FN3(0.0035074449373659008203);
769 const double FN2(-0.031750380176100813405);
770 const double FN1(0.13884101728898463426);
771 const double FN0(-0.75225277815249618847);
772
773 const double FD5(0.000016009278224355026701);
774 const double FD4(0.00051055686934806966046);
775 const double FD3(0.0081803507497974289008);
776 const double FD2(0.077181146026670287235);
777 const double FD1(0.41543303143712535988);
778 const double FD0(1.0);
779
780 double z4;
781 double polyFN0, polyFN1, polyFD0, polyFD1;
782
783 z4 = z2 * z2;
784
785 polyFD1 = fma(FD5, z4, FD3);
786 polyFD1 = fma(polyFD1, z4, FD1);
787 polyFD1 = polyFD1 * z2;
788 polyFD0 = fma(FD4, z4, FD2);
789 polyFD0 = fma(polyFD0, z4, FD0);
790 polyFD0 = polyFD0 + polyFD1;
791
792 polyFD0 = inv(polyFD0);
793
794 polyFN0 = fma(FN10, z4, FN8);
795 polyFN0 = fma(polyFN0, z4, FN6);
796 polyFN0 = fma(polyFN0, z4, FN4);
797 polyFN0 = fma(polyFN0, z4, FN2);
798 polyFN0 = fma(polyFN0, z4, FN0);
799 polyFN1 = fma(FN9, z4, FN7);
800 polyFN1 = fma(polyFN1, z4, FN5);
801 polyFN1 = fma(polyFN1, z4, FN3);
802 polyFN1 = fma(polyFN1, z4, FN1);
803 polyFN0 = fma(polyFN1, z2, polyFN0);
804
805 return polyFN0 * polyFD0;
806 }
807
808 /*! \brief Calculate the potential correction due to PME analytically in double.
809 *
810 * See the SIMD version of this function for details.
811 *
812 * \param z2 input parameter
813 * \returns Correction to use on potential.
814 *
815 * \note This function might be superficially meaningless, but it helps us to
816 * write templated SIMD/non-SIMD code. For clarity it should not be used
817 * outside such code.
818 */
pmePotentialCorrection(double z2)819 static inline double pmePotentialCorrection(double z2)
820 {
821 const double VN9(-9.3723776169321855475e-13);
822 const double VN8(1.2280156762674215741e-10);
823 const double VN7(-7.3562157912251309487e-9);
824 const double VN6(2.6215886208032517509e-7);
825 const double VN5(-4.9532491651265819499e-6);
826 const double VN4(0.00025907400778966060389);
827 const double VN3(0.0010585044856156469792);
828 const double VN2(0.045247661136833092885);
829 const double VN1(0.11643931522926034421);
830 const double VN0(1.1283791671726767970);
831
832 const double VD5(0.000021784709867336150342);
833 const double VD4(0.00064293662010911388448);
834 const double VD3(0.0096311444822588683504);
835 const double VD2(0.085608012351550627051);
836 const double VD1(0.43652499166614811084);
837 const double VD0(1.0);
838
839 double z4;
840 double polyVN0, polyVN1, polyVD0, polyVD1;
841
842 z4 = z2 * z2;
843
844 polyVD1 = fma(VD5, z4, VD3);
845 polyVD0 = fma(VD4, z4, VD2);
846 polyVD1 = fma(polyVD1, z4, VD1);
847 polyVD0 = fma(polyVD0, z4, VD0);
848 polyVD0 = fma(polyVD1, z2, polyVD0);
849
850 polyVD0 = inv(polyVD0);
851
852 polyVN1 = fma(VN9, z4, VN7);
853 polyVN0 = fma(VN8, z4, VN6);
854 polyVN1 = fma(polyVN1, z4, VN5);
855 polyVN0 = fma(polyVN0, z4, VN4);
856 polyVN1 = fma(polyVN1, z4, VN3);
857 polyVN0 = fma(polyVN0, z4, VN2);
858 polyVN1 = fma(polyVN1, z4, VN1);
859 polyVN0 = fma(polyVN0, z4, VN0);
860 polyVN0 = fma(polyVN1, z2, polyVN0);
861
862 return polyVN0 * polyVD0;
863 }
864
865
866 /*****************************************************************************
867 * Floating point math functions mimicking SIMD versions. *
868 * Double precision data, single precision accuracy. *
869 *****************************************************************************/
870
871
872 /*! \brief Calculate 1/sqrt(x) for double, but with single accuracy.
873 *
874 * \param x Argument that must be >0. This routine does not check arguments.
875 * \return 1/sqrt(x). Result is undefined if your argument was invalid.
876 *
877 * \note This function might be superficially meaningless, but it helps us to
878 * write templated SIMD/non-SIMD code. For clarity it should not be used
879 * outside such code.
880 */
invsqrtSingleAccuracy(double x)881 static inline double invsqrtSingleAccuracy(double x)
882 {
883 return invsqrt(static_cast<float>(x));
884 }
885
886 /*! \brief Calculate 1/sqrt(x) for two doubles, but with single accuracy.
887 *
888 * \param x0 First argument, x0 must be positive - no argument checking.
889 * \param x1 Second argument, x1 must be positive - no argument checking.
890 * \param[out] out0 Result 1/sqrt(x0)
891 * \param[out] out1 Result 1/sqrt(x1)
892 *
893 * \note This function might be superficially meaningless, but it helps us to
894 * write templated SIMD/non-SIMD code. For clarity it should not be used
895 * outside such code.
896 */
invsqrtPairSingleAccuracy(double x0,double x1,double * out0,double * out1)897 static inline void invsqrtPairSingleAccuracy(double x0, double x1, double* out0, double* out1)
898 {
899 *out0 = invsqrt(static_cast<float>(x0));
900 *out1 = invsqrt(static_cast<float>(x1));
901 }
902
903 /*! \brief Calculate 1/x for double, but with single accuracy.
904 *
905 * \param x Argument that must be nonzero. This routine does not check arguments.
906 * \return 1/x. Result is undefined if your argument was invalid.
907 *
908 * \note This function might be superficially meaningless, but it helps us to
909 * write templated SIMD/non-SIMD code. For clarity it should not be used
910 * outside such code.
911 */
invSingleAccuracy(double x)912 static inline double invSingleAccuracy(double x)
913 {
914 return 1.0F / x;
915 }
916
917 /*! \brief Calculate 1/sqrt(x) for masked entry of double, but with single accuracy.
918 *
919 * This routine only evaluates 1/sqrt(x) if mask is true.
920 * Illegal values for a masked-out double will not lead to
921 * floating-point exceptions.
922 *
923 * \param x Argument that must be >0 if masked-in.
924 * \param m Mask
925 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
926 * entry was not masked, and 0.0 for masked-out entries.
927 *
928 * \note This function might be superficially meaningless, but it helps us to
929 * write templated SIMD/non-SIMD code. For clarity it should not be used
930 * outside such code.
931 */
maskzInvsqrtSingleAccuracy(double x,bool m)932 static inline double maskzInvsqrtSingleAccuracy(double x, bool m)
933 {
934 return m ? invsqrtSingleAccuracy(x) : 0.0;
935 }
936
937 /*! \brief Calculate 1/x for masked entry of double, but with single accuracy.
938 *
939 * This routine only evaluates 1/x if mask is true.
940 * Illegal values for a masked-out double will not lead to
941 * floating-point exceptions.
942 *
943 * \param x Argument that must be nonzero if masked-in.
944 * \param m Mask
945 * \return 1/x. Result is undefined if your argument was invalid or
946 * entry was not masked, and 0.0 for masked-out entries.
947 *
948 * \note This function might be superficially meaningless, but it helps us to
949 * write templated SIMD/non-SIMD code. For clarity it should not be used
950 * outside such code.
951 */
maskzInvSingleAccuracy(double x,bool m)952 static inline double maskzInvSingleAccuracy(double x, bool m)
953 {
954 return m ? invSingleAccuracy(x) : 0.0;
955 }
956
957 /*! \brief Calculate sqrt(x) for double, but with single accuracy.
958 *
959 * \param x Argument that must be >=0.
960 * \return sqrt(x).
961 *
962 * \note This function might be superficially meaningless, but it helps us to
963 * write templated SIMD/non-SIMD code. For clarity it should not be used
964 * outside such code.
965 */
sqrtSingleAccuracy(double x)966 static inline double sqrtSingleAccuracy(double x)
967 {
968 return std::sqrt(static_cast<float>(x));
969 }
970
971 /*! \brief Double log(x), but with single accuracy. This is the natural logarithm.
972 *
973 * \param x Argument, should be >0.
974 * \result The natural logarithm of x. Undefined if argument is invalid.
975 *
976 * \note This function might be superficially meaningless, but it helps us to
977 * write templated SIMD/non-SIMD code. For clarity it should not be used
978 * outside such code.
979 */
logSingleAccuracy(double x)980 static inline double logSingleAccuracy(double x)
981 {
982 return std::log(static_cast<float>(x));
983 }
984
985 /*! \brief Double 2^x, but with single accuracy.
986 *
987 * \param x Argument.
988 * \result 2^x. Undefined if input argument caused overflow.
989 *
990 * \note This function might be superficially meaningless, but it helps us to
991 * write templated SIMD/non-SIMD code. For clarity it should not be used
992 * outside such code.
993 */
exp2SingleAccuracy(double x)994 static inline double exp2SingleAccuracy(double x)
995 {
996 return std::exp2(static_cast<float>(x));
997 }
998
999 /*! \brief Double exp(x), but with single accuracy.
1000 *
1001 * \param x Argument.
1002 * \result exp(x). Undefined if input argument caused overflow.
1003 *
1004 * \note This function might be superficially meaningless, but it helps us to
1005 * write templated SIMD/non-SIMD code. For clarity it should not be used
1006 * outside such code.
1007 */
expSingleAccuracy(double x)1008 static inline double expSingleAccuracy(double x)
1009 {
1010 return std::exp(static_cast<float>(x));
1011 }
1012
1013 /*! \brief Double erf(x), but with single accuracy.
1014 *
1015 * \param x Argument.
1016 * \result erf(x)
1017 *
1018 * \note This function might be superficially meaningless, but it helps us to
1019 * write templated SIMD/non-SIMD code. For clarity it should not be used
1020 * outside such code.
1021 */
erfSingleAccuracy(double x)1022 static inline double erfSingleAccuracy(double x)
1023 {
1024 return std::erf(static_cast<float>(x));
1025 }
1026
1027 /*! \brief Double erfc(x), but with single accuracy.
1028 *
1029 * \param x Argument.
1030 * \result erfc(x)
1031 *
1032 * \note This function might be superficially meaningless, but it helps us to
1033 * write templated SIMD/non-SIMD code. For clarity it should not be used
1034 * outside such code.
1035 */
erfcSingleAccuracy(double x)1036 static inline double erfcSingleAccuracy(double x)
1037 {
1038 return std::erfc(static_cast<float>(x));
1039 }
1040
1041
1042 /*! \brief Double sin \& cos, but with single accuracy.
1043 *
1044 * \param x The argument to evaluate sin/cos for
1045 * \param[out] sinval Sin(x)
1046 * \param[out] cosval Cos(x)
1047 *
1048 * \note This function might be superficially meaningless, but it helps us to
1049 * write templated SIMD/non-SIMD code. For clarity it should not be used
1050 * outside such code.
1051 */
sincosSingleAccuracy(double x,double * sinval,double * cosval)1052 static inline void sincosSingleAccuracy(double x, double* sinval, double* cosval)
1053 {
1054 // There is no single-precision sincos guaranteed in C++11, so use
1055 // separate functions and hope the compiler optimizes it for us.
1056 *sinval = std::sin(static_cast<float>(x));
1057 *cosval = std::cos(static_cast<float>(x));
1058 }
1059
1060 /*! \brief Double sin, but with single accuracy.
1061 *
1062 * \param x The argument to evaluate sin for
1063 * \result Sin(x)
1064 *
1065 * \note This function might be superficially meaningless, but it helps us to
1066 * write templated SIMD/non-SIMD code. For clarity it should not be used
1067 * outside such code.
1068 */
sinSingleAccuracy(double x)1069 static inline double sinSingleAccuracy(double x)
1070 {
1071 return std::sin(static_cast<float>(x));
1072 }
1073
1074 /*! \brief Double cos, but with single accuracy.
1075 *
1076 * \param x The argument to evaluate cos for
1077 * \result Cos(x)
1078 *
1079 * \note This function might be superficially meaningless, but it helps us to
1080 * write templated SIMD/non-SIMD code. For clarity it should not be used
1081 * outside such code.
1082 */
cosSingleAccuracy(double x)1083 static inline double cosSingleAccuracy(double x)
1084 {
1085 return std::cos(static_cast<float>(x));
1086 }
1087
1088 /*! \brief Double tan, but with single accuracy.
1089 *
1090 * \param x The argument to evaluate tan for
1091 * \result Tan(x)
1092 *
1093 * \note This function might be superficially meaningless, but it helps us to
1094 * write templated SIMD/non-SIMD code. For clarity it should not be used
1095 * outside such code.
1096 */
tanSingleAccuracy(double x)1097 static inline double tanSingleAccuracy(double x)
1098 {
1099 return std::tan(static_cast<float>(x));
1100 }
1101
1102 /*! \brief Double asin, but with single accuracy.
1103 *
1104 * \param x The argument to evaluate asin for
1105 * \result Asin(x)
1106 *
1107 * \note This function might be superficially meaningless, but it helps us to
1108 * write templated SIMD/non-SIMD code. For clarity it should not be used
1109 * outside such code.
1110 */
asinSingleAccuracy(double x)1111 static inline double asinSingleAccuracy(double x)
1112 {
1113 return std::asin(static_cast<float>(x));
1114 }
1115
1116 /*! \brief Double acos, but with single accuracy.
1117 *
1118 * \param x The argument to evaluate acos for
1119 * \result Acos(x)
1120 *
1121 * \note This function might be superficially meaningless, but it helps us to
1122 * write templated SIMD/non-SIMD code. For clarity it should not be used
1123 * outside such code.
1124 */
acosSingleAccuracy(double x)1125 static inline double acosSingleAccuracy(double x)
1126 {
1127 return std::acos(static_cast<float>(x));
1128 }
1129
1130 /*! \brief Double atan, but with single accuracy.
1131 *
1132 * \param x The argument to evaluate atan for
1133 * \result Atan(x)
1134 *
1135 * \note This function might be superficially meaningless, but it helps us to
1136 * write templated SIMD/non-SIMD code. For clarity it should not be used
1137 * outside such code.
1138 */
atanSingleAccuracy(double x)1139 static inline double atanSingleAccuracy(double x)
1140 {
1141 return std::atan(static_cast<float>(x));
1142 }
1143
1144 /*! \brief Double atan2(y,x), but with single accuracy.
1145 *
1146 * \param y Y component of vector, any quartile
1147 * \param x X component of vector, any quartile
1148 * \result Atan(y,x)
1149 *
1150 * \note This function might be superficially meaningless, but it helps us to
1151 * write templated SIMD/non-SIMD code. For clarity it should not be used
1152 * outside such code.
1153 */
atan2SingleAccuracy(double y,double x)1154 static inline double atan2SingleAccuracy(double y, double x)
1155 {
1156 return std::atan2(static_cast<float>(y), static_cast<float>(x));
1157 }
1158
1159 /*! \brief Force correction due to PME in double, but with single accuracy.
1160 *
1161 * See the SIMD version of this function for details.
1162 *
1163 * \param z2 input parameter
1164 * \returns Correction to use on force
1165 *
1166 * \note This function might be superficially meaningless, but it helps us to
1167 * write templated SIMD/non-SIMD code. For clarity it should not be used
1168 * outside such code.
1169 */
pmeForceCorrectionSingleAccuracy(double z2)1170 static inline double pmeForceCorrectionSingleAccuracy(double z2)
1171 {
1172 const float FN6(-1.7357322914161492954e-8F);
1173 const float FN5(1.4703624142580877519e-6F);
1174 const float FN4(-0.000053401640219807709149F);
1175 const float FN3(0.0010054721316683106153F);
1176 const float FN2(-0.019278317264888380590F);
1177 const float FN1(0.069670166153766424023F);
1178 const float FN0(-0.75225204789749321333F);
1179
1180 const float FD4(0.0011193462567257629232F);
1181 const float FD3(0.014866955030185295499F);
1182 const float FD2(0.11583842382862377919F);
1183 const float FD1(0.50736591960530292870F);
1184 const float FD0(1.0F);
1185
1186 float z4;
1187 float polyFN0, polyFN1, polyFD0, polyFD1;
1188
1189 float z2f = z2;
1190
1191 z4 = z2f * z2f;
1192
1193 polyFD0 = fma(FD4, z4, FD2);
1194 polyFD1 = fma(FD3, z4, FD1);
1195 polyFD0 = fma(polyFD0, z4, FD0);
1196 polyFD0 = fma(polyFD1, z2f, polyFD0);
1197
1198 polyFN0 = fma(FN6, z4, FN4);
1199 polyFN1 = fma(FN5, z4, FN3);
1200 polyFN0 = fma(polyFN0, z4, FN2);
1201 polyFN1 = fma(polyFN1, z4, FN1);
1202 polyFN0 = fma(polyFN0, z4, FN0);
1203 polyFN0 = fma(polyFN1, z2f, polyFN0);
1204
1205 return polyFN0 / polyFD0;
1206 }
1207
1208 /*! \brief Potential correction due to PME in double, but with single accuracy.
1209 *
1210 * See the SIMD version of this function for details.
1211 *
1212 * \param z2 input parameter
1213 * \returns Correction to use on potential.
1214 *
1215 * \note This function might be superficially meaningless, but it helps us to
1216 * write templated SIMD/non-SIMD code. For clarity it should not be used
1217 * outside such code.
1218 */
pmePotentialCorrectionSingleAccuracy(double z2)1219 static inline double pmePotentialCorrectionSingleAccuracy(double z2)
1220 {
1221 const float VN6(1.9296833005951166339e-8F);
1222 const float VN5(-1.4213390571557850962e-6F);
1223 const float VN4(0.000041603292906656984871F);
1224 const float VN3(-0.00013134036773265025626F);
1225 const float VN2(0.038657983986041781264F);
1226 const float VN1(0.11285044772717598220F);
1227 const float VN0(1.1283802385263030286F);
1228
1229 const float VD3(0.0066752224023576045451F);
1230 const float VD2(0.078647795836373922256F);
1231 const float VD1(0.43336185284710920150F);
1232 const float VD0(1.0F);
1233
1234 float z4;
1235 float polyVN0, polyVN1, polyVD0, polyVD1;
1236
1237 float z2f = z2;
1238
1239 z4 = z2f * z2f;
1240
1241 polyVD1 = fma(VD3, z4, VD1);
1242 polyVD0 = fma(VD2, z4, VD0);
1243 polyVD0 = fma(polyVD1, z2f, polyVD0);
1244
1245 polyVN0 = fma(VN6, z4, VN4);
1246 polyVN1 = fma(VN5, z4, VN3);
1247 polyVN0 = fma(polyVN0, z4, VN2);
1248 polyVN1 = fma(polyVN1, z4, VN1);
1249 polyVN0 = fma(polyVN0, z4, VN0);
1250 polyVN0 = fma(polyVN1, z2f, polyVN0);
1251
1252 return polyVN0 / polyVD0;
1253 }
1254
1255
1256 } // namespace gmx
1257
1258
1259 #endif // GMX_SIMD_SCALAR_MATH_H
1260