1 /*************************** vectormath_common.h ****************************
2 * Author: Agner Fog
3 * Date created: 2014-04-18
4 * Last modified: 2016-11-25
5 * Version: 1.25
6 * Project: vector classes
7 * Description:
8 * Header file containing common code for inline version of mathematical functions.
9 *
10 * Theory, methods and inspiration based partially on these sources:
11 * > Moshier, Stephen Lloyd Baluk: Methods and programs for mathematical functions.
12 * Ellis Horwood, 1989.
13 * > VDT library developed on CERN by Danilo Piparo, Thomas Hauth and
14 * Vincenzo Innocente, 2012, https://svnweb.cern.ch/trac/vdt
15 * > Cephes math library by Stephen L. Moshier 1992,
16 * http://www.netlib.org/cephes/
17 *
18 * Calculation methods:
19 * Some functions are using Pad� approximations f(x) = P(x)/Q(x)
20 * Most single precision functions are using Taylor expansions
21 *
22 * For detailed instructions, see VectorClass.pdf
23 *
24 * (c) Copyright 2014-2016 GNU General Public License http://www.gnu.org/licenses
25 ******************************************************************************/
26
27 #ifndef VECTORMATH_COMMON_H
28 #define VECTORMATH_COMMON_H 1
29
30 #ifdef VECTORMATH_LIB_H
31 #error conflicting header files: vectormath_lib.h for external math functions, other vectormath_xxx.h for inline math functions
32 #endif
33
34 #include <math.h>
35 #include "vectorclass.h"
36
37
38
39 /******************************************************************************
40 define mathematical constants
41 ******************************************************************************/
42 #define VM_PI 3.14159265358979323846 // pi
43 #define VM_PI_2 1.57079632679489661923 // pi / 2
44 #define VM_PI_4 0.785398163397448309616 // pi / 4
45 #define VM_SQRT2 1.41421356237309504880 // sqrt(2)
46 #define VM_LOG2E 1.44269504088896340736 // 1/log(2)
47 #define VM_LOG10E 0.434294481903251827651 // 1/log(10)
48 #define VM_LOG210 3.321928094887362347808 // log2(10)
49 #define VM_LN2 0.693147180559945309417 // log(2)
50 #define VM_LN10 2.30258509299404568402 // log(10)
51 #define VM_SMALLEST_NORMAL 2.2250738585072014E-308 // smallest normal number, double
52 #define VM_SMALLEST_NORMALF 1.17549435E-38f // smallest normal number, float
53
54 #ifdef VCL_NAMESPACE
55 namespace VCL_NAMESPACE {
56 #endif
57
58 /******************************************************************************
59 templates for producing infinite and nan in desired vector type
60 ******************************************************************************/
61 template <class VTYPE>
62 static inline VTYPE infinite_vec();
63
64 template <>
65 inline Vec2d infinite_vec<Vec2d>() {
66 return infinite2d();
67 }
68
69 template <>
70 inline Vec4f infinite_vec<Vec4f>() {
71 return infinite4f();
72 }
73
74 #if MAX_VECTOR_SIZE >= 256
75
76 template <>
77 inline Vec4d infinite_vec<Vec4d>() {
78 return infinite4d();
79 }
80
81 template <>
82 inline Vec8f infinite_vec<Vec8f>() {
83 return infinite8f();
84 }
85
86 #endif // MAX_VECTOR_SIZE >= 256
87
88 #if MAX_VECTOR_SIZE >= 512
89
90 template <>
91 inline Vec8d infinite_vec<Vec8d>() {
92 return infinite8d();
93 }
94
95 template <>
96 inline Vec16f infinite_vec<Vec16f>() {
97 return infinite16f();
98 }
99
100 #endif // MAX_VECTOR_SIZE >= 512
101
102
103 // template for producing quiet NAN
104 template <class VTYPE>
105 static inline VTYPE nan_vec(int n = 0x100);
106
107 template <>
108 inline Vec2d nan_vec<Vec2d>(int n) {
109 return nan2d(n);
110 }
111
112 template <>
113 inline Vec4f nan_vec<Vec4f>(int n) {
114 return nan4f(n);
115 }
116
117 #if MAX_VECTOR_SIZE >= 256
118
119 template <>
120 inline Vec4d nan_vec<Vec4d>(int n) {
121 return nan4d(n);
122 }
123
124 template <>
125 inline Vec8f nan_vec<Vec8f>(int n) {
126 return nan8f(n);
127 }
128
129 #endif // MAX_VECTOR_SIZE >= 256
130
131 #if MAX_VECTOR_SIZE >= 512
132
133 template <>
134 inline Vec8d nan_vec<Vec8d>(int n) {
135 return nan8d(n);
136 }
137
138 template <>
139 inline Vec16f nan_vec<Vec16f>(int n) {
140 return nan16f(n);
141 }
142
143 #endif // MAX_VECTOR_SIZE >= 512
144
145 // Define NAN trace values
146 #define NAN_LOG 0x101 // logarithm for x<0
147 #define NAN_POW 0x102 // negative number raised to non-integer power
148 #define NAN_HYP 0x104 // acosh for x<1 and atanh for abs(x)>1
149
150
151 /******************************************************************************
152 templates for polynomials
153 Using Estrin's scheme to make shorter dependency chains and use FMA, starting
154 longest dependency chains first.
155 ******************************************************************************/
156
157 // template <typedef VECTYPE, typedef CTYPE>
158 template <class VTYPE, class CTYPE>
polynomial_2(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2)159 static inline VTYPE polynomial_2(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2) {
160 // calculates polynomial c2*x^2 + c1*x + c0
161 // VTYPE may be a vector type, CTYPE is a scalar type
162 VTYPE x2 = x * x;
163 //return = x2 * c2 + (x * c1 + c0);
164 return mul_add(x2, c2, mul_add(x, c1, c0));
165 }
166
167 template<class VTYPE, class CTYPE>
polynomial_3(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2,CTYPE c3)168 static inline VTYPE polynomial_3(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3) {
169 // calculates polynomial c3*x^3 + c2*x^2 + c1*x + c0
170 // VTYPE may be a vector type, CTYPE is a scalar type
171 VTYPE x2 = x * x;
172 //return (c2 + c3*x)*x2 + (c1*x + c0);
173 return mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0));
174 }
175
176 template<class VTYPE, class CTYPE>
polynomial_4(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2,CTYPE c3,CTYPE c4)177 static inline VTYPE polynomial_4(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4) {
178 // calculates polynomial c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
179 // VTYPE may be a vector type, CTYPE is a scalar type
180 VTYPE x2 = x * x;
181 VTYPE x4 = x2 * x2;
182 //return (c2+c3*x)*x2 + ((c0+c1*x) + c4*x4);
183 return mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0) + c4*x4);
184 }
185
186 template<class VTYPE, class CTYPE>
polynomial_4n(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2,CTYPE c3)187 static inline VTYPE polynomial_4n(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3) {
188 // calculates polynomial 1*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
189 // VTYPE may be a vector type, CTYPE is a scalar type
190 VTYPE x2 = x * x;
191 VTYPE x4 = x2 * x2;
192 //return (c2+c3*x)*x2 + ((c0+c1*x) + x4);
193 return mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0) + x4);
194 }
195
196 template<class VTYPE, class CTYPE>
polynomial_5(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2,CTYPE c3,CTYPE c4,CTYPE c5)197 static inline VTYPE polynomial_5(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5) {
198 // calculates polynomial c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
199 // VTYPE may be a vector type, CTYPE is a scalar type
200 VTYPE x2 = x * x;
201 VTYPE x4 = x2 * x2;
202 //return (c2+c3*x)*x2 + ((c4+c5*x)*x4 + (c0+c1*x));
203 return mul_add(mul_add(c3, x, c2), x2, mul_add(mul_add(c5, x, c4), x4, mul_add(c1, x, c0)));
204 }
205
206 template<class VTYPE, class CTYPE>
polynomial_5n(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2,CTYPE c3,CTYPE c4)207 static inline VTYPE polynomial_5n(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4) {
208 // calculates polynomial 1*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
209 // VTYPE may be a vector type, CTYPE is a scalar type
210 VTYPE x2 = x * x;
211 VTYPE x4 = x2 * x2;
212 //return (c2+c3*x)*x2 + ((c4+x)*x4 + (c0+c1*x));
213 return mul_add(mul_add(c3, x, c2), x2, mul_add(c4 + x, x4, mul_add(c1, x, c0)));
214 }
215
216 template<class VTYPE, class CTYPE>
polynomial_6(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2,CTYPE c3,CTYPE c4,CTYPE c5,CTYPE c6)217 static inline VTYPE polynomial_6(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6) {
218 // calculates polynomial c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
219 // VTYPE may be a vector type, CTYPE is a scalar type
220 VTYPE x2 = x * x;
221 VTYPE x4 = x2 * x2;
222 //return (c4+c5*x+c6*x2)*x4 + ((c2+c3*x)*x2 + (c0+c1*x));
223 return mul_add(mul_add(c6, x2, mul_add(c5, x, c4)), x4, mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)));
224 }
225
226 template<class VTYPE, class CTYPE>
polynomial_6n(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2,CTYPE c3,CTYPE c4,CTYPE c5)227 static inline VTYPE polynomial_6n(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5) {
228 // calculates polynomial 1*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
229 // VTYPE may be a vector type, CTYPE is a scalar type
230 VTYPE x2 = x * x;
231 VTYPE x4 = x2 * x2;
232 //return (c4+c5*x+x2)*x4 + ((c2+c3*x)*x2 + (c0+c1*x));
233 return mul_add(mul_add(c5, x, c4 + x2), x4, mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)));
234 }
235
236 template<class VTYPE, class CTYPE>
polynomial_7(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2,CTYPE c3,CTYPE c4,CTYPE c5,CTYPE c6,CTYPE c7)237 static inline VTYPE polynomial_7(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7) {
238 // calculates polynomial c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
239 // VTYPE may be a vector type, CTYPE is a scalar type
240 VTYPE x2 = x * x;
241 VTYPE x4 = x2 * x2;
242 //return ((c6+c7*x)*x2 + (c4+c5*x))*x4 + ((c2+c3*x)*x2 + (c0+c1*x));
243 return mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4, mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0)));
244 }
245
246 template<class VTYPE, class CTYPE>
polynomial_8(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2,CTYPE c3,CTYPE c4,CTYPE c5,CTYPE c6,CTYPE c7,CTYPE c8)247 static inline VTYPE polynomial_8(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8) {
248 // calculates polynomial c8*x^8 + c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
249 // VTYPE may be a vector type, CTYPE is a scalar type
250 VTYPE x2 = x * x;
251 VTYPE x4 = x2 * x2;
252 VTYPE x8 = x4 * x4;
253 //return ((c6+c7*x)*x2 + (c4+c5*x))*x4 + (c8*x8 + (c2+c3*x)*x2 + (c0+c1*x));
254 return mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4,
255 mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0) + c8*x8));
256 }
257
258 template<class VTYPE, class CTYPE>
polynomial_9(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2,CTYPE c3,CTYPE c4,CTYPE c5,CTYPE c6,CTYPE c7,CTYPE c8,CTYPE c9)259 static inline VTYPE polynomial_9(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9) {
260 // calculates polynomial c9*x^9 + c8*x^8 + c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
261 // VTYPE may be a vector type, CTYPE is a scalar type
262 VTYPE x2 = x * x;
263 VTYPE x4 = x2 * x2;
264 VTYPE x8 = x4 * x4;
265 //return (((c6+c7*x)*x2 + (c4+c5*x))*x4 + (c8+c9*x)*x8) + ((c2+c3*x)*x2 + (c0+c1*x));
266 return mul_add(mul_add(c9, x, c8), x8, mul_add(
267 mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4,
268 mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0))));
269 }
270
271 template<class VTYPE, class CTYPE>
polynomial_10(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2,CTYPE c3,CTYPE c4,CTYPE c5,CTYPE c6,CTYPE c7,CTYPE c8,CTYPE c9,CTYPE c10)272 static inline VTYPE polynomial_10(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9, CTYPE c10) {
273 // calculates polynomial c10*x^10 + c9*x^9 + c8*x^8 + c7*x^7 + c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0
274 // VTYPE may be a vector type, CTYPE is a scalar type
275 VTYPE x2 = x * x;
276 VTYPE x4 = x2 * x2;
277 VTYPE x8 = x4 * x4;
278 //return (((c6+c7*x)*x2 + (c4+c5*x))*x4 + (c8+c9*x+c10*x2)*x8) + ((c2+c3*x)*x2 + (c0+c1*x));
279 return mul_add(mul_add(x2, c10, mul_add(c9, x, c8)), x8,
280 mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4,
281 mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0))));
282 }
283
284 template<class VTYPE, class CTYPE>
polynomial_13(VTYPE const & x,CTYPE c0,CTYPE c1,CTYPE c2,CTYPE c3,CTYPE c4,CTYPE c5,CTYPE c6,CTYPE c7,CTYPE c8,CTYPE c9,CTYPE c10,CTYPE c11,CTYPE c12,CTYPE c13)285 static inline VTYPE polynomial_13(VTYPE const & x, CTYPE c0, CTYPE c1, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9, CTYPE c10, CTYPE c11, CTYPE c12, CTYPE c13) {
286 // calculates polynomial c13*x^13 + c12*x^12 + ... + c1*x + c0
287 // VTYPE may be a vector type, CTYPE is a scalar type
288 VTYPE x2 = x * x;
289 VTYPE x4 = x2 * x2;
290 VTYPE x8 = x4 * x4;
291 return mul_add(
292 mul_add(
293 mul_add(c13, x, c12), x4,
294 mul_add(mul_add(c11, x, c10), x2, mul_add(c9, x, c8))), x8,
295 mul_add(
296 mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4,
297 mul_add(mul_add(c3, x, c2), x2, mul_add(c1, x, c0))));
298 }
299
300
301 template<class VTYPE, class CTYPE>
polynomial_13m(VTYPE const & x,CTYPE c2,CTYPE c3,CTYPE c4,CTYPE c5,CTYPE c6,CTYPE c7,CTYPE c8,CTYPE c9,CTYPE c10,CTYPE c11,CTYPE c12,CTYPE c13)302 static inline VTYPE polynomial_13m(VTYPE const & x, CTYPE c2, CTYPE c3, CTYPE c4, CTYPE c5, CTYPE c6, CTYPE c7, CTYPE c8, CTYPE c9, CTYPE c10, CTYPE c11, CTYPE c12, CTYPE c13) {
303 // calculates polynomial c13*x^13 + c12*x^12 + ... + x + 0
304 // VTYPE may be a vector type, CTYPE is a scalar type
305 VTYPE x2 = x * x;
306 VTYPE x4 = x2 * x2;
307 VTYPE x8 = x4 * x4;
308 // return ((c8+c9*x) + (c10+c11*x)*x2 + (c12+c13*x)*x4)*x8 + (((c6+c7*x)*x2 + (c4+c5*x))*x4 + ((c2+c3*x)*x2 + x));
309 return mul_add(
310 mul_add(mul_add(c13, x, c12), x4, mul_add(mul_add(c11, x, c10), x2, mul_add(c9, x, c8))), x8,
311 mul_add(mul_add(mul_add(c7, x, c6), x2, mul_add(c5, x, c4)), x4, mul_add(mul_add(c3, x, c2), x2, x)));
312 }
313
314 #ifdef VCL_NAMESPACE
315 }
316 #endif
317
318 #endif
319