1 /**************************** vectormath_exp.h ******************************
2 * Author: Agner Fog
3 * Date created: 2014-04-18
4 * Last modified: 2016-12-26
5 * Version: 1.26
6 * Project: vector classes
7 * Description:
8 * Header file containing inline vector functions of logarithms, exponential
9 * and power functions:
10 * exp exponential function
11 * exp2 exponential function base 2
12 * exp10 exponential function base 10
13 * exmp1 exponential function minus 1
14 * log natural logarithm
15 * log2 logarithm base 2
16 * log10 logarithm base 10
17 * log1p natural logarithm of 1+x
18 * cbrt cube root
19 * pow raise vector elements to power
20 * pow_ratio raise vector elements to rational power
21 *
22 * Theory, methods and inspiration based partially on these sources:
23 * > Moshier, Stephen Lloyd Baluk: Methods and programs for mathematical functions.
24 * Ellis Horwood, 1989.
25 * > VDT library developed on CERN by Danilo Piparo, Thomas Hauth and
26 * Vincenzo Innocente, 2012, https://svnweb.cern.ch/trac/vdt
27 * > Cephes math library by Stephen L. Moshier 1992,
28 * http://www.netlib.org/cephes/
29 *
30 * For detailed instructions, see vectormath_common.h and VectorClass.pdf
31 *
32 * (c) Copyright 2014-2016 GNU General Public License http://www.gnu.org/licenses
33 ******************************************************************************/
34
35 #ifndef VECTORMATH_EXP_H
36 #define VECTORMATH_EXP_H 1
37
38 #include "vectormath_common.h"
39
40 #ifdef VCL_NAMESPACE
41 namespace VCL_NAMESPACE {
42 #endif
43
44 /******************************************************************************
45 * Exponential functions
46 ******************************************************************************/
47
48 // Helper functions, used internally:
49
50 // This function calculates pow(2,n) where n must be an integer. Does not check for overflow or underflow
vm_pow2n(Vec2d const & n)51 static inline Vec2d vm_pow2n (Vec2d const & n) {
52 const double pow2_52 = 4503599627370496.0; // 2^52
53 const double bias = 1023.0; // bias in exponent
54 Vec2d a = n + (bias + pow2_52); // put n + bias in least significant bits
55 Vec2q b = reinterpret_i(a); // bit-cast to integer
56 Vec2q c = b << 52; // shift left 52 places to get into exponent field
57 Vec2d d = reinterpret_d(c); // bit-cast back to double
58 return d;
59 }
60
vm_pow2n(Vec4f const & n)61 static inline Vec4f vm_pow2n (Vec4f const & n) {
62 const float pow2_23 = 8388608.0; // 2^23
63 const float bias = 127.0; // bias in exponent
64 Vec4f a = n + (bias + pow2_23); // put n + bias in least significant bits
65 Vec4i b = reinterpret_i(a); // bit-cast to integer
66 Vec4i c = b << 23; // shift left 23 places to get into exponent field
67 Vec4f d = reinterpret_f(c); // bit-cast back to float
68 return d;
69 }
70
71 #if MAX_VECTOR_SIZE >= 256
72
vm_pow2n(Vec4d const & n)73 static inline Vec4d vm_pow2n (Vec4d const & n) {
74 const double pow2_52 = 4503599627370496.0; // 2^52
75 const double bias = 1023.0; // bias in exponent
76 Vec4d a = n + (bias + pow2_52); // put n + bias in least significant bits
77 Vec4q b = reinterpret_i(a); // bit-cast to integer
78 Vec4q c = b << 52; // shift left 52 places to get value into exponent field
79 Vec4d d = reinterpret_d(c); // bit-cast back to double
80 return d;
81 }
82
vm_pow2n(Vec8f const & n)83 static inline Vec8f vm_pow2n (Vec8f const & n) {
84 const float pow2_23 = 8388608.0; // 2^23
85 const float bias = 127.0; // bias in exponent
86 Vec8f a = n + (bias + pow2_23); // put n + bias in least significant bits
87 Vec8i b = reinterpret_i(a); // bit-cast to integer
88 Vec8i c = b << 23; // shift left 23 places to get into exponent field
89 Vec8f d = reinterpret_f(c); // bit-cast back to float
90 return d;
91 }
92
93 #endif // MAX_VECTOR_SIZE >= 256
94
95 #if MAX_VECTOR_SIZE >= 512
96
vm_pow2n(Vec8d const & n)97 static inline Vec8d vm_pow2n (Vec8d const & n) {
98 #ifdef __AVX512ER__
99 return _mm512_exp2a23_round_pd(n, _MM_FROUND_NO_EXC); // this is exact only for integral n
100 #else
101 const double pow2_52 = 4503599627370496.0; // 2^52
102 const double bias = 1023.0; // bias in exponent
103 Vec8d a = n + (bias + pow2_52); // put n + bias in least significant bits
104 Vec8q b = Vec8q(reinterpret_i(a)); // bit-cast to integer
105 Vec8q c = b << 52; // shift left 52 places to get value into exponent field
106 Vec8d d = Vec8d(reinterpret_d(c)); // bit-cast back to double
107 return d;
108 #endif
109 }
110
vm_pow2n(Vec16f const & n)111 static inline Vec16f vm_pow2n (Vec16f const & n) {
112 #ifdef __AVX512ER__
113 return _mm512_exp2a23_round_ps(n, _MM_FROUND_NO_EXC);
114 #else
115 const float pow2_23 = 8388608.0; // 2^23
116 const float bias = 127.0; // bias in exponent
117 Vec16f a = n + (bias + pow2_23); // put n + bias in least significant bits
118 Vec16i b = Vec16i(reinterpret_i(a)); // bit-cast to integer
119 Vec16i c = b << 23; // shift left 23 places to get into exponent field
120 Vec16f d = Vec16f(reinterpret_f(c)); // bit-cast back to float
121 return d;
122 #endif
123 }
124
125 #endif // MAX_VECTOR_SIZE >= 512
126
127
128 // Template for exp function, double precision
129 // The limit of abs(x) is defined by max_x below
130 // This function does not produce denormals
131 // Template parameters:
132 // VTYPE: double vector type
133 // BVTYPE: boolean vector type
134 // M1: 0 for exp, 1 for expm1
135 // BA: 0 for exp, 1 for 0.5*exp, 2 for pow(2,x), 10 for pow(10,x)
136
137 #if 1 // choose method
138
139 // Taylor expansion
140 template<class VTYPE, class BVTYPE, int M1, int BA>
exp_d(VTYPE const & initial_x)141 static inline VTYPE exp_d(VTYPE const & initial_x) {
142
143 // Taylor coefficients, 1/n!
144 // Not using minimax approximation because we prioritize precision close to x = 0
145 const double p2 = 1./2.;
146 const double p3 = 1./6.;
147 const double p4 = 1./24.;
148 const double p5 = 1./120.;
149 const double p6 = 1./720.;
150 const double p7 = 1./5040.;
151 const double p8 = 1./40320.;
152 const double p9 = 1./362880.;
153 const double p10 = 1./3628800.;
154 const double p11 = 1./39916800.;
155 const double p12 = 1./479001600.;
156 const double p13 = 1./6227020800.;
157
158 // maximum abs(x), value depends on BA, defined below
159 // The lower limit of x is slightly more restrictive than the upper limit.
160 // We are specifying the lower limit, except for BA = 1 because it is not used for negative x
161 double max_x;
162
163 // data vectors
164 VTYPE x, r, z, n2;
165 BVTYPE inrange; // boolean vector
166
167 if (BA <= 1) { // exp(x)
168 max_x = BA == 0 ? 708.39 : 709.7; // lower limit for 0.5*exp(x) is -707.6, but we are using 0.5*exp(x) only for positive x in hyperbolic functions
169 const double ln2d_hi = 0.693145751953125;
170 const double ln2d_lo = 1.42860682030941723212E-6;
171 x = initial_x;
172 r = round(initial_x*VM_LOG2E);
173 // subtraction in two steps for higher precision
174 x = nmul_add(r, ln2d_hi, x); // x -= r * ln2d_hi;
175 x = nmul_add(r, ln2d_lo, x); // x -= r * ln2d_lo;
176 }
177 else if (BA == 2) { // pow(2,x)
178 max_x = 1022.0;
179 r = round(initial_x);
180 x = initial_x - r;
181 x *= VM_LN2;
182 }
183 else if (BA == 10) { // pow(10,x)
184 max_x = 307.65;
185 const double log10_2_hi = 0.30102999554947019; // log10(2) in two parts
186 const double log10_2_lo = 1.1451100899212592E-10;
187 x = initial_x;
188 r = round(initial_x*(VM_LOG2E*VM_LN10));
189 // subtraction in two steps for higher precision
190 x = nmul_add(r, log10_2_hi, x); // x -= r * log10_2_hi;
191 x = nmul_add(r, log10_2_lo, x); // x -= r * log10_2_lo;
192 x *= VM_LN10;
193 }
194 else { // undefined value of BA
195 return 0.;
196 }
197
198 z = polynomial_13m(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13);
199
200 if (BA == 1) r--; // 0.5 * exp(x)
201
202 // multiply by power of 2
203 n2 = vm_pow2n(r);
204
205 if (M1 == 0) {
206 // exp
207 z = (z + 1.0) * n2;
208 }
209 else {
210 // expm1
211 z = mul_add(z, n2, n2 - 1.0); // z = z * n2 + (n2 - 1.0);
212 }
213
214 // check for overflow
215 inrange = abs(initial_x) < max_x;
216 // check for INF and NAN
217 inrange &= is_finite(initial_x);
218
219 if (horizontal_and(inrange)) {
220 // fast normal path
221 return z;
222 }
223 else {
224 // overflow, underflow and NAN
225 r = select(sign_bit(initial_x), 0.-M1, infinite_vec<VTYPE>()); // value in case of +/- overflow or INF
226 z = select(inrange, z, r); // +/- underflow
227 z = select(is_nan(initial_x), initial_x, z); // NAN goes through
228 return z;
229 }
230 }
231
232 #else
233
234 // Pade expansion uses less code and fewer registers, but is slower
235 template<class VTYPE, class BVTYPE, int M1, int BA>
exp_d(VTYPE const & initial_x)236 static inline VTYPE exp_d(VTYPE const & initial_x) {
237
238 // define constants
239 const double ln2p1 = 0.693145751953125;
240 const double ln2p2 = 1.42860682030941723212E-6;
241 const double log2e = VM_LOG2E;
242 const double max_exp = 708.39;
243 // coefficients of pade polynomials
244 const double P0exp = 9.99999999999999999910E-1;
245 const double P1exp = 3.02994407707441961300E-2;
246 const double P2exp = 1.26177193074810590878E-4;
247 const double Q0exp = 2.00000000000000000009E0;
248 const double Q1exp = 2.27265548208155028766E-1;
249 const double Q2exp = 2.52448340349684104192E-3;
250 const double Q3exp = 3.00198505138664455042E-6;
251
252 VTYPE x, r, xx, px, qx, y, n2; // data vectors
253 BVTYPE inrange; // boolean vector
254
255 x = initial_x;
256 r = round(initial_x*log2e);
257
258 // subtraction in one step would gives loss of precision
259 x -= r * ln2p1;
260 x -= r * ln2p2;
261
262 xx = x * x;
263
264 // px = x * P(x^2).
265 px = polynomial_2(xx, P0exp, P1exp, P2exp) * x;
266
267 // Evaluate Q(x^2).
268 qx = polynomial_3(xx, Q0exp, Q1exp, Q2exp, Q3exp);
269
270 // e^x = 1 + 2*P(x^2)/( Q(x^2) - P(x^2) )
271 y = (2.0 * px) / (qx - px);
272
273 // Get 2^n in double.
274 // n = round_to_int64_limited(r);
275 // n2 = exp2(n);
276 n2 = vm_pow2n(r); // this is faster
277
278 if (M1 == 0) {
279 // exp
280 y = (y + 1.0) * n2;
281 }
282 else {
283 // expm1
284 y = y * n2 + (n2 - 1.0);
285 }
286
287 // overflow
288 inrange = abs(initial_x) < max_exp;
289 // check for INF and NAN
290 inrange &= is_finite(initial_x);
291
292 if (horizontal_and(inrange)) {
293 // fast normal path
294 return y;
295 }
296 else {
297 // overflow, underflow and NAN
298 r = select(sign_bit(initial_x), 0.-M1, infinite_vec<VTYPE>()); // value in case of overflow or INF
299 y = select(inrange, y, r); // +/- overflow
300 y = select(is_nan(initial_x), initial_x, y); // NAN goes through
301 return y;
302 }
303 }
304 #endif
305
306 // instances of exp_d template
exp(Vec2d const & x)307 static inline Vec2d exp(Vec2d const & x) {
308 return exp_d<Vec2d, Vec2db, 0, 0>(x);
309 }
310
expm1(Vec2d const & x)311 static inline Vec2d expm1(Vec2d const & x) {
312 return exp_d<Vec2d, Vec2db, 1, 0>(x);
313 }
314
exp2(Vec2d const & x)315 static inline Vec2d exp2(Vec2d const & x) {
316 return exp_d<Vec2d, Vec2db, 0, 2>(x);
317 }
318
exp10(Vec2d const & x)319 static inline Vec2d exp10(Vec2d const & x) {
320 return exp_d<Vec2d, Vec2db, 0, 10>(x);
321 }
322
323 #if MAX_VECTOR_SIZE >= 256
324
exp(Vec4d const & x)325 static inline Vec4d exp(Vec4d const & x) {
326 return exp_d<Vec4d, Vec4db, 0, 0>(x);
327 }
328
expm1(Vec4d const & x)329 static inline Vec4d expm1(Vec4d const & x) {
330 return exp_d<Vec4d, Vec4db, 1, 0>(x);
331 }
332
exp2(Vec4d const & x)333 static inline Vec4d exp2(Vec4d const & x) {
334 return exp_d<Vec4d, Vec4db, 0, 2>(x);
335 }
336
exp10(Vec4d const & x)337 static inline Vec4d exp10(Vec4d const & x) {
338 return exp_d<Vec4d, Vec4db, 0, 10>(x);
339 }
340
341 #endif // MAX_VECTOR_SIZE >= 256
342
343 #if MAX_VECTOR_SIZE >= 512
344
exp(Vec8d const & x)345 static inline Vec8d exp(Vec8d const & x) {
346 return exp_d<Vec8d, Vec8db, 0, 0>(x);
347 }
348
expm1(Vec8d const & x)349 static inline Vec8d expm1(Vec8d const & x) {
350 return exp_d<Vec8d, Vec8db, 1, 0>(x);
351 }
352
exp2(Vec8d const & x)353 static inline Vec8d exp2(Vec8d const & x) {
354 return exp_d<Vec8d, Vec8db, 0, 2>(x);
355 }
356
exp10(Vec8d const & x)357 static inline Vec8d exp10(Vec8d const & x) {
358 return exp_d<Vec8d, Vec8db, 0, 10>(x);
359 }
360
361 #endif // MAX_VECTOR_SIZE >= 512
362
363 // Template for exp function, single precision
364 // The limit of abs(x) is defined by max_x below
365 // This function does not produce denormals
366 // Template parameters:
367 // VTYPE: float vector type
368 // BVTYPE: boolean vector type
369 // M1: 0 for exp, 1 for expm1
370 // BA: 0 for exp, 1 for 0.5*exp, 2 for pow(2,x), 10 for pow(10,x)
371
372 template<class VTYPE, class BVTYPE, int M1, int BA>
exp_f(VTYPE const & initial_x)373 static inline VTYPE exp_f(VTYPE const & initial_x) {
374
375 // Taylor coefficients
376 const float P0expf = 1.f/2.f;
377 const float P1expf = 1.f/6.f;
378 const float P2expf = 1.f/24.f;
379 const float P3expf = 1.f/120.f;
380 const float P4expf = 1.f/720.f;
381 const float P5expf = 1.f/5040.f;
382
383 VTYPE x, r, x2, z, n2; // data vectors
384 BVTYPE inrange; // boolean vector
385
386 // maximum abs(x), value depends on BA, defined below
387 // The lower limit of x is slightly more restrictive than the upper limit.
388 // We are specifying the lower limit, except for BA = 1 because it is not used for negative x
389 float max_x;
390
391 if (BA <= 1) { // exp(x)
392 const float ln2f_hi = 0.693359375f;
393 const float ln2f_lo = -2.12194440e-4f;
394 max_x = (BA == 0) ? 87.3f : 89.0f;
395
396 x = initial_x;
397 r = round(initial_x*float(VM_LOG2E));
398 x = nmul_add(r, VTYPE(ln2f_hi), x); // x -= r * ln2f_hi;
399 x = nmul_add(r, VTYPE(ln2f_lo), x); // x -= r * ln2f_lo;
400 }
401 else if (BA == 2) { // pow(2,x)
402 max_x = 126.f;
403 r = round(initial_x);
404 x = initial_x - r;
405 x = x * (float)VM_LN2;
406 }
407 else if (BA == 10) { // pow(10,x)
408 max_x = 37.9f;
409 const float log10_2_hi = 0.301025391f; // log10(2) in two parts
410 const float log10_2_lo = 4.60503907E-6f;
411 x = initial_x;
412 r = round(initial_x*float(VM_LOG2E*VM_LN10));
413 x = nmul_add(r, VTYPE(log10_2_hi), x); // x -= r * log10_2_hi;
414 x = nmul_add(r, VTYPE(log10_2_lo), x); // x -= r * log10_2_lo;
415 x = x * (float)VM_LN10;
416 }
417 else { // undefined value of BA
418 return 0.;
419 }
420
421 x2 = x * x;
422 z = polynomial_5(x,P0expf,P1expf,P2expf,P3expf,P4expf,P5expf);
423 z = mul_add(z, x2, x); // z *= x2; z += x;
424
425 if (BA == 1) r--; // 0.5 * exp(x)
426
427 // multiply by power of 2
428 n2 = vm_pow2n(r);
429
430 if (M1 == 0) {
431 // exp
432 z = (z + 1.0f) * n2;
433 }
434 else {
435 // expm1
436 z = mul_add(z, n2, n2 - 1.0f); // z = z * n2 + (n2 - 1.0f);
437 }
438
439 // check for overflow
440 inrange = abs(initial_x) < max_x;
441 // check for INF and NAN
442 inrange &= is_finite(initial_x);
443
444 if (horizontal_and(inrange)) {
445 // fast normal path
446 return z;
447 }
448 else {
449 // overflow, underflow and NAN
450 r = select(sign_bit(initial_x), 0.f-M1, infinite_vec<VTYPE>()); // value in case of +/- overflow or INF
451 z = select(inrange, z, r); // +/- underflow
452 z = select(is_nan(initial_x), initial_x, z); // NAN goes through
453 return z;
454 }
455 }
456 #if defined(__AVX512ER__) && MAX_VECTOR_SIZE >= 512
457 // forward declarations of fast 512 bit versions
458 static Vec16f exp(Vec16f const & x);
459 static Vec16f exp2(Vec16f const & x);
460 static Vec16f exp10(Vec16f const & x);
461 #endif
462
463 // instances of exp_f template
exp(Vec4f const & x)464 static inline Vec4f exp(Vec4f const & x) {
465 #if defined(__AVX512ER__) && MAX_VECTOR_SIZE >= 512 // use faster 512 bit version
466 return _mm512_castps512_ps128(exp(Vec16f(_mm512_castps128_ps512(x))));
467 #else
468 return exp_f<Vec4f, Vec4fb, 0, 0>(x);
469 #endif
470 }
471
expm1(Vec4f const & x)472 static inline Vec4f expm1(Vec4f const & x) {
473 return exp_f<Vec4f, Vec4fb, 1, 0>(x);
474 }
475
exp2(Vec4f const & x)476 static inline Vec4f exp2(Vec4f const & x) {
477 #if defined(__AVX512ER__) && MAX_VECTOR_SIZE >= 512 // use faster 512 bit version
478 return _mm512_castps512_ps128(exp2(Vec16f(_mm512_castps128_ps512(x))));
479 #else
480 return exp_f<Vec4f, Vec4fb, 0, 2>(x);
481 #endif
482 }
483
exp10(Vec4f const & x)484 static inline Vec4f exp10(Vec4f const & x) {
485 #if defined(__AVX512ER__) && MAX_VECTOR_SIZE >= 512 // use faster 512 bit version
486 return _mm512_castps512_ps128(exp10(Vec16f(_mm512_castps128_ps512(x))));
487 #else
488 return exp_f<Vec4f, Vec4fb, 0, 10>(x);
489 #endif
490 }
491
492 #if MAX_VECTOR_SIZE >= 256
493
exp(Vec8f const & x)494 static inline Vec8f exp(Vec8f const & x) {
495 #if defined(__AVX512ER__) && MAX_VECTOR_SIZE >= 512 // use faster 512 bit version
496 return _mm512_castps512_ps256(exp(Vec16f(_mm512_castps256_ps512(x))));
497 #else
498 return exp_f<Vec8f, Vec8fb, 0, 0>(x);
499 #endif
500 }
501
expm1(Vec8f const & x)502 static inline Vec8f expm1(Vec8f const & x) {
503 return exp_f<Vec8f, Vec8fb, 1, 0>(x);
504 }
505
exp2(Vec8f const & x)506 static inline Vec8f exp2(Vec8f const & x) {
507 #if defined(__AVX512ER__) && MAX_VECTOR_SIZE >= 512 // use faster 512 bit version
508 return _mm512_castps512_ps256(exp2(Vec16f(_mm512_castps256_ps512(x))));
509 #else
510 return exp_f<Vec8f, Vec8fb, 0, 2>(x);
511 #endif
512 }
513
exp10(Vec8f const & x)514 static inline Vec8f exp10(Vec8f const & x) {
515 #if defined(__AVX512ER__) && MAX_VECTOR_SIZE >= 512 // use faster 512 bit version
516 return _mm512_castps512_ps256(exp10(Vec16f(_mm512_castps256_ps512(x))));
517 #else
518 return exp_f<Vec8f, Vec8fb, 0, 10>(x);
519 #endif
520 }
521
522 #endif // MAX_VECTOR_SIZE >= 256
523
524 #if MAX_VECTOR_SIZE >= 512
525
exp(Vec16f const & x)526 static inline Vec16f exp(Vec16f const & x) {
527 #ifdef __AVX512ER__ // AVX512ER instruction set includes fast exponential function
528 #ifdef VCL_FASTEXP
529 // very fast, but less precise for large x:
530 return _mm512_exp2a23_round_ps(x*float(VM_LOG2E), _MM_FROUND_NO_EXC);
531 #else
532 // best precision, also for large x:
533 const Vec16f log2e = float(VM_LOG2E);
534 const float ln2f_hi = 0.693359375f;
535 const float ln2f_lo = -2.12194440e-4f;
536 Vec16f x1 = x, r, y;
537 r = round(x1*log2e);
538 x1 = nmul_add(r, Vec16f(ln2f_hi), x1); // x -= r * ln2f_hi;
539 x1 = nmul_add(r, Vec16f(ln2f_lo), x1); // x -= r * ln2f_lo;
540 x1 = x1 * log2e;
541 y = _mm512_exp2a23_round_ps(r, _MM_FROUND_NO_EXC);
542 // y = vm_pow2n(r);
543 return y * _mm512_exp2a23_round_ps(x1, _MM_FROUND_NO_EXC);
544 #endif // VCL_FASTEXP
545 #else // no AVX512ER, use above template
546 return exp_f<Vec16f, Vec16fb, 0, 0>(x);
547 #endif
548 }
549
expm1(Vec16f const & x)550 static inline Vec16f expm1(Vec16f const & x) {
551 return exp_f<Vec16f, Vec16fb, 1, 0>(x);
552 }
553
exp2(Vec16f const & x)554 static inline Vec16f exp2(Vec16f const & x) {
555 #ifdef __AVX512ER__
556 return Vec16f(_mm512_exp2a23_round_ps(x, _MM_FROUND_NO_EXC));
557 #else
558 return exp_f<Vec16f, Vec16fb, 0, 2>(x);
559 #endif
560 }
561
exp10(Vec16f const & x)562 static inline Vec16f exp10(Vec16f const & x) {
563 #ifdef __AVX512ER__ // AVX512ER instruction set includes fast exponential function
564 #ifdef VCL_FASTEXP
565 // very fast, but less precise for large x:
566 return _mm512_exp2a23_round_ps(x*float(VM_LOG210), _MM_FROUND_NO_EXC);
567 #else
568 // best precision, also for large x:
569 const float log10_2_hi = 0.301025391f; // log10(2) in two parts
570 const float log10_2_lo = 4.60503907E-6f;
571 Vec16f x1 = x, r, y;
572 Vec16f log210 = float(VM_LOG210);
573 r = round(x1*log210);
574 x1 = nmul_add(r, Vec16f(log10_2_hi), x1); // x -= r * log10_2_hi
575 x1 = nmul_add(r, Vec16f(log10_2_lo), x1); // x -= r * log10_2_lo
576 x1 = x1 * log210;
577 // y = vm_pow2n(r);
578 y = _mm512_exp2a23_round_ps(r, _MM_FROUND_NO_EXC);
579 return y * _mm512_exp2a23_round_ps(x1, _MM_FROUND_NO_EXC);
580 #endif // VCL_FASTEXP
581 #else // no AVX512ER, use above template
582 return exp_f<Vec16f, Vec16fb, 0, 10>(x);
583 #endif
584 }
585
586 #endif // MAX_VECTOR_SIZE >= 512
587
588
589 /******************************************************************************
590 * Logarithm functions
591 ******************************************************************************/
592
593 // Helper functions: fraction_2(x) = fraction(x)*0.5
594
595 // Modified fraction function:
596 // Extract the fraction part of a floating point number, and divide by 2
597 // The fraction function is defined in vectorf128.h etc.
598 // fraction_2(x) = fraction(x)*0.5
599 // This version gives half the fraction without extra delay
600 // Does not work for x = 0
fraction_2(Vec4f const & a)601 static inline Vec4f fraction_2(Vec4f const & a) {
602 Vec4ui t1 = _mm_castps_si128(a); // reinterpret as 32-bit integer
603 Vec4ui t2 = Vec4ui((t1 & 0x007FFFFF) | 0x3F000000); // set exponent to 0 + bias
604 return _mm_castsi128_ps(t2);
605 }
606
fraction_2(Vec2d const & a)607 static inline Vec2d fraction_2(Vec2d const & a) {
608 Vec2uq t1 = _mm_castpd_si128(a); // reinterpret as 64-bit integer
609 Vec2uq t2 = Vec2uq((t1 & 0x000FFFFFFFFFFFFFll) | 0x3FE0000000000000ll); // set exponent to 0 + bias
610 return _mm_castsi128_pd(t2);
611 }
612
613 #if MAX_VECTOR_SIZE >= 256
614
fraction_2(Vec8f const & a)615 static inline Vec8f fraction_2(Vec8f const & a) {
616 #if defined (VECTORI256_H) && VECTORI256_H > 2 // 256 bit integer vectors are available, AVX2
617 Vec8ui t1 = _mm256_castps_si256(a); // reinterpret as 32-bit integer
618 Vec8ui t2 = (t1 & 0x007FFFFF) | 0x3F000000; // set exponent to 0 + bias
619 return _mm256_castsi256_ps(t2);
620 #else
621 return Vec8f(fraction_2(a.get_low()), fraction_2(a.get_high()));
622 #endif
623 }
624
fraction_2(Vec4d const & a)625 static inline Vec4d fraction_2(Vec4d const & a) {
626 #if VECTORI256_H > 1 // AVX2
627 Vec4uq t1 = _mm256_castpd_si256(a); // reinterpret as 64-bit integer
628 Vec4uq t2 = Vec4uq((t1 & 0x000FFFFFFFFFFFFFll) | 0x3FE0000000000000ll); // set exponent to 0 + bias
629 return _mm256_castsi256_pd(t2);
630 #else
631 return Vec4d(fraction_2(a.get_low()), fraction_2(a.get_high()));
632 #endif
633 }
634
635 #endif // MAX_VECTOR_SIZE >= 256
636
637 #if MAX_VECTOR_SIZE >= 512
638
fraction_2(Vec16f const & a)639 static inline Vec16f fraction_2(Vec16f const & a) {
640 #if INSTRSET >= 9 // 512 bit integer vectors are available, AVX512
641 return _mm512_getmant_ps(a, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_zero);
642 //return Vec16f(_mm512_getmant_ps(a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_zero)) * 0.5f;
643 #else
644 return Vec16f(fraction_2(a.get_low()), fraction_2(a.get_high()));
645 #endif
646 }
647
fraction_2(Vec8d const & a)648 static inline Vec8d fraction_2(Vec8d const & a) {
649 #if INSTRSET >= 9 // 512 bit integer vectors are available, AVX512
650 return _mm512_getmant_pd(a, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_zero);
651 //return Vec8d(_mm512_getmant_pd(a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_zero)) * 0.5;
652 #else
653 return Vec8d(fraction_2(a.get_low()), fraction_2(a.get_high()));
654 #endif
655 }
656
657 #endif // MAX_VECTOR_SIZE >= 512
658
659
660 // Helper functions: exponent_f(x) = exponent(x) as floating point number
661
662 union vm_ufi {
663 float f;
664 uint32_t i;
665 };
666
667 union vm_udi {
668 double d;
669 uint64_t i;
670 };
671
672 // extract exponent of a positive number x as a floating point number
673 //Note: the AVX512 version return -inf for x=0, the non-AVX versions return a negative number
exponent_f(Vec4f const & x)674 static inline Vec4f exponent_f(Vec4f const & x) {
675 #ifdef __AVX512VL__ // AVX512VL
676 // note: this version returns -inf for x=0
677 return _mm_getexp_ps(x);
678 #else
679 const float pow2_23 = 8388608.0f; // 2^23
680 const float bias = 127.f; // bias in exponent
681 const vm_ufi upow2_23 = {pow2_23};
682 Vec4ui a = reinterpret_i(x); // bit-cast x to integer
683 Vec4ui b = a >> 23; // shift down exponent to low bits
684 Vec4ui c = b | Vec4ui(upow2_23.i); // insert new exponent
685 Vec4f d = reinterpret_f(c); // bit-cast back to double
686 Vec4f e = d - (pow2_23 + bias); // subtract magic number and bias
687 return e;
688 #endif
689 }
690
exponent_f(Vec2d const & x)691 static inline Vec2d exponent_f(Vec2d const & x) {
692 #ifdef __AVX512VL__ // AVX512VL
693 // note: this version returns -inf for x=0
694 return _mm_getexp_pd(x);
695 #else
696 const double pow2_52 = 4503599627370496.0; // 2^52
697 const double bias = 1023.0; // bias in exponent
698 const vm_udi upow2_52 = {pow2_52};
699
700 Vec2uq a = reinterpret_i(x); // bit-cast x to integer
701 Vec2uq b = a >> 52; // shift down exponent to low bits
702 Vec2uq c = b | Vec2uq(upow2_52.i); // insert new exponent
703 Vec2d d = reinterpret_d(c); // bit-cast back to double
704 Vec2d e = d - (pow2_52 + bias); // subtract magic number and bias
705 return e;
706 #endif
707 }
708
709 #if MAX_VECTOR_SIZE >= 256
710
exponent_f(Vec8f const & x)711 static inline Vec8f exponent_f(Vec8f const & x) {
712 #ifdef __AVX512VL__ // AVX512VL
713 // note: this version returns -inf for x=0
714 return _mm256_getexp_ps(x);
715 #else
716 const float pow2_23 = 8388608.0f; // 2^23
717 const float bias = 127.f; // bias in exponent
718 const vm_ufi upow2_23 = {pow2_23};
719 Vec8ui a = reinterpret_i(x); // bit-cast x to integer
720 Vec8ui b = a >> 23; // shift down exponent to low bits
721 Vec8ui c = b | Vec8ui(upow2_23.i); // insert new exponent
722 Vec8f d = reinterpret_f(c); // bit-cast back to double
723 Vec8f e = d - (pow2_23 + bias); // subtract magic number and bias
724 return e;
725 #endif
726 }
727
728 // extract exponent of a positive number x as a floating point number
exponent_f(Vec4d const & x)729 static inline Vec4d exponent_f(Vec4d const & x) {
730 #ifdef __AVX512VL__ // AVX512VL
731 return _mm256_getexp_pd(x);
732 #else
733 const double pow2_52 = 4503599627370496.0; // 2^52
734 const double bias = 1023.0; // bias in exponent
735 const vm_udi upow2_52 = {pow2_52};
736
737 Vec4uq a = reinterpret_i(x); // bit-cast x to integer
738 Vec4uq b = a >> 52; // shift down exponent to low bits
739 Vec4uq c = b | Vec4uq(upow2_52.i); // insert new exponent
740 Vec4d d = reinterpret_d(c); // bit-cast back to double
741 Vec4d e = d - (pow2_52 + bias); // subtract magic number and bias
742 return e;
743 #endif
744 }
745
746 #endif // MAX_VECTOR_SIZE >= 256
747
748 #if MAX_VECTOR_SIZE >= 512
749
exponent_f(Vec16f const & x)750 static inline Vec16f exponent_f(Vec16f const & x) {
751 #if INSTRSET >= 9 // AVX512
752 // note: this version returns -inf for x=0
753 return _mm512_getexp_ps(x);
754 #else
755 return Vec16f(exponent_f(x.get_low()), exponent_f(x.get_high()));
756 #endif
757 }
758
759 // extract exponent of a positive number x as a floating point number
exponent_f(Vec8d const & x)760 static inline Vec8d exponent_f(Vec8d const & x) {
761 #if INSTRSET >= 9 // AVX512
762 // note: this returns -inf for x=0
763 return _mm512_getexp_pd(x);
764 #else
765 return Vec8d(exponent_f(x.get_low()), exponent_f(x.get_high()));
766 #endif
767 }
768
769 #endif // MAX_VECTOR_SIZE >= 512
770
771
772 // log function, double precision
773 // template parameters:
774 // VTYPE: f.p. vector type
775 // BVTYPE: boolean vector type
776 // M1: 0 for log, 1 for log1p
777 template<class VTYPE, class BVTYPE, int M1>
log_d(VTYPE const & initial_x)778 static inline VTYPE log_d(VTYPE const & initial_x) {
779
780 // define constants
781 const double ln2_hi = 0.693359375;
782 const double ln2_lo = -2.121944400546905827679E-4;
783 const double P0log = 7.70838733755885391666E0;
784 const double P1log = 1.79368678507819816313E1;
785 const double P2log = 1.44989225341610930846E1;
786 const double P3log = 4.70579119878881725854E0;
787 const double P4log = 4.97494994976747001425E-1;
788 const double P5log = 1.01875663804580931796E-4;
789 const double Q0log = 2.31251620126765340583E1;
790 const double Q1log = 7.11544750618563894466E1;
791 const double Q2log = 8.29875266912776603211E1;
792 const double Q3log = 4.52279145837532221105E1;
793 const double Q4log = 1.12873587189167450590E1;
794
795 VTYPE x1, x, x2, px, qx, res, fe; // data vectors
796 BVTYPE blend, overflow, underflow; // boolean vectors
797
798 if (M1 == 0) {
799 x1 = initial_x; // log(x)
800 }
801 else {
802 x1 = initial_x + 1.0; // log(x+1)
803 }
804 // separate mantissa from exponent
805 // VTYPE x = fraction(x1) * 0.5;
806 x = fraction_2(x1);
807 fe = exponent_f(x1);
808
809 blend = x > VM_SQRT2*0.5;
810 x = if_add(!blend, x, x); // conditional add
811 fe = if_add(blend, fe, 1.); // conditional add
812
813 if (M1 == 0) {
814 // log(x). Expand around 1.0
815 x -= 1.0;
816 }
817 else {
818 // log(x+1). Avoid loss of precision when adding 1 and later subtracting 1 if exponent = 0
819 x = select(fe==0., initial_x, x - 1.0);
820 }
821
822 // rational form
823 px = polynomial_5 (x, P0log, P1log, P2log, P3log, P4log, P5log);
824 x2 = x * x;
825 px *= x * x2;
826 qx = polynomial_5n(x, Q0log, Q1log, Q2log, Q3log, Q4log);
827 res = px / qx ;
828
829 // add exponent
830 res = mul_add(fe, ln2_lo, res); // res += fe * ln2_lo;
831 res += nmul_add(x2, 0.5, x); // res += x - 0.5 * x2;
832 res = mul_add(fe, ln2_hi, res); // res += fe * ln2_hi;
833
834 overflow = !is_finite(x1);
835 underflow = x1 < VM_SMALLEST_NORMAL; // denormals not supported by this functions
836
837 if (!horizontal_or(overflow | underflow)) {
838 // normal path
839 return res;
840 }
841 else {
842 // overflow and underflow
843 res = select(underflow, nan_vec<VTYPE>(NAN_LOG), res); // x1 < 0 gives NAN
844 res = select(x1 == 0. || is_subnormal(x1), -infinite_vec<VTYPE>(), res); // x1 == 0 gives -INF
845 res = select(overflow, x1, res); // INF or NAN goes through
846 res = select(is_inf(x1)&sign_bit(x1), nan_vec<VTYPE>(NAN_LOG), res); // -INF gives NAN
847 return res;
848 }
849 }
850
851
log(Vec2d const & x)852 static inline Vec2d log(Vec2d const & x) {
853 return log_d<Vec2d, Vec2db, 0>(x);
854 }
855
log1p(Vec2d const & x)856 static inline Vec2d log1p(Vec2d const & x) {
857 return log_d<Vec2d, Vec2db, 1>(x);
858 }
859
log2(Vec2d const & x)860 static inline Vec2d log2(Vec2d const & x) {
861 return VM_LOG2E * log_d<Vec2d, Vec2db, 0>(x);
862 }
863
log10(Vec2d const & x)864 static inline Vec2d log10(Vec2d const & x) {
865 return VM_LOG10E * log_d<Vec2d, Vec2db, 0>(x);
866 }
867
868 #if MAX_VECTOR_SIZE >= 256
869
log(Vec4d const & x)870 static inline Vec4d log(Vec4d const & x) {
871 return log_d<Vec4d, Vec4db, 0>(x);
872 }
873
log1p(Vec4d const & x)874 static inline Vec4d log1p(Vec4d const & x) {
875 return log_d<Vec4d, Vec4db, 1>(x);
876 }
877
log2(Vec4d const & x)878 static inline Vec4d log2(Vec4d const & x) {
879 return VM_LOG2E * log_d<Vec4d, Vec4db, 0>(x);
880 }
881
log10(Vec4d const & x)882 static inline Vec4d log10(Vec4d const & x) {
883 return VM_LOG10E * log_d<Vec4d, Vec4db, 0>(x);
884 }
885
886 #endif // MAX_VECTOR_SIZE >= 256
887
888 #if MAX_VECTOR_SIZE >= 512
889
log(Vec8d const & x)890 static inline Vec8d log(Vec8d const & x) {
891 return log_d<Vec8d, Vec8db, 0>(x);
892 }
893
log1p(Vec8d const & x)894 static inline Vec8d log1p(Vec8d const & x) {
895 return log_d<Vec8d, Vec8db, 1>(x);
896 }
897
log2(Vec8d const & x)898 static inline Vec8d log2(Vec8d const & x) {
899 return VM_LOG2E * log_d<Vec8d, Vec8db, 0>(x);
900 }
901
log10(Vec8d const & x)902 static inline Vec8d log10(Vec8d const & x) {
903 return VM_LOG10E * log_d<Vec8d, Vec8db, 0>(x);
904 }
905
906 #endif // MAX_VECTOR_SIZE >= 512
907
908
909
910 // log function, single precision
911 // template parameters:
912 // VTYPE: f.p. vector type
913 // ITYPE: integer vector type with same element size
914 // BVTYPE: boolean vector type
915 // BTYPEI: boolean vector type for ITYPE
916 // M1: 0 for log, 1 for log1p
917 template<class VTYPE, class ITYPE, class BVTYPE, class BTYPEI, int M1>
log_f(VTYPE const & initial_x)918 static inline VTYPE log_f(VTYPE const & initial_x) {
919
920 // define constants
921 const float ln2f_hi = 0.693359375f;
922 const float ln2f_lo = -2.12194440E-4f;
923 const float P0logf = 3.3333331174E-1f;
924 const float P1logf = -2.4999993993E-1f;
925 const float P2logf = 2.0000714765E-1f;
926 const float P3logf = -1.6668057665E-1f;
927 const float P4logf = 1.4249322787E-1f;
928 const float P5logf = -1.2420140846E-1f;
929 const float P6logf = 1.1676998740E-1f;
930 const float P7logf = -1.1514610310E-1f;
931 const float P8logf = 7.0376836292E-2f;
932
933 VTYPE x1, x, res, x2, fe; // data vectors
934 ITYPE e; // integer vector
935 BVTYPE blend, overflow, underflow; // boolean vectors
936
937 if (M1 == 0) {
938 x1 = initial_x; // log(x)
939 }
940 else {
941 x1 = initial_x + 1.0f; // log(x+1)
942 }
943
944 // separate mantissa from exponent
945 x = fraction_2(x1);
946 e = exponent(x1);
947
948 blend = x > float(VM_SQRT2*0.5);
949 x = if_add(!blend, x, x); // conditional add
950 e = if_add(BTYPEI(blend), e, ITYPE(1)); // conditional add
951 fe = to_float(e);
952
953 if (M1 == 0) {
954 // log(x). Expand around 1.0
955 x -= 1.0f;
956 }
957 else {
958 // log(x+1). Avoid loss of precision when adding 1 and later subtracting 1 if exponent = 0
959 x = select(BVTYPE(e==0), initial_x, x - 1.0f);
960 }
961
962 // Taylor expansion
963 res = polynomial_8(x, P0logf, P1logf, P2logf, P3logf, P4logf, P5logf, P6logf, P7logf, P8logf);
964 x2 = x*x;
965 res *= x2*x;
966
967 // add exponent
968 res = mul_add(fe, ln2f_lo, res); // res += ln2f_lo * fe;
969 res += nmul_add(x2, 0.5f, x); // res += x - 0.5f * x2;
970 res = mul_add(fe, ln2f_hi, res); // res += ln2f_hi * fe;
971
972 overflow = !is_finite(x1);
973 underflow = x1 < VM_SMALLEST_NORMALF; // denormals not supported by this functions
974
975 if (!horizontal_or(overflow | underflow)) {
976 // normal path
977 return res;
978 }
979 else {
980 // overflow and underflow
981 res = select(underflow, nan_vec<VTYPE>(NAN_LOG), res); // x1 < 0 gives NAN
982 res = select(x1 == 0.f || is_subnormal(x1), -infinite_vec<VTYPE>(), res); // x1 == 0 or denormal gives -INF
983 res = select(overflow, x1, res); // INF or NAN goes through
984 res = select(is_inf(x1)&sign_bit(x1), nan_vec<VTYPE>(NAN_LOG), res); // -INF gives NAN
985 return res;
986 }
987 }
988
log(Vec4f const & x)989 static inline Vec4f log(Vec4f const & x) {
990 return log_f<Vec4f, Vec4i, Vec4fb, Vec4ib, 0>(x);
991 }
992
log1p(Vec4f const & x)993 static inline Vec4f log1p(Vec4f const & x) {
994 return log_f<Vec4f, Vec4i, Vec4fb, Vec4ib, 1>(x);
995 }
996
log2(Vec4f const & x)997 static inline Vec4f log2(Vec4f const & x) {
998 return float(VM_LOG2E) * log_f<Vec4f, Vec4i, Vec4fb, Vec4ib, 0>(x);
999 }
1000
log10(Vec4f const & x)1001 static inline Vec4f log10(Vec4f const & x) {
1002 return float(VM_LOG10E) * log_f<Vec4f, Vec4i, Vec4fb, Vec4ib, 0>(x);
1003 }
1004
1005 #if MAX_VECTOR_SIZE >= 256
1006
log(Vec8f const & x)1007 static inline Vec8f log(Vec8f const & x) {
1008 return log_f<Vec8f, Vec8i, Vec8fb, Vec8ib, 0>(x);
1009 }
1010
log1p(Vec8f const & x)1011 static inline Vec8f log1p(Vec8f const & x) {
1012 return log_f<Vec8f, Vec8i, Vec8fb, Vec8ib, 1>(x);
1013 }
1014
log2(Vec8f const & x)1015 static inline Vec8f log2(Vec8f const & x) {
1016 return float(VM_LOG2E) * log_f<Vec8f, Vec8i, Vec8fb, Vec8ib, 0>(x);
1017 }
1018
log10(Vec8f const & x)1019 static inline Vec8f log10(Vec8f const & x) {
1020 return float(VM_LOG10E) * log_f<Vec8f, Vec8i, Vec8fb, Vec8ib, 0>(x);
1021 }
1022
1023 #endif // MAX_VECTOR_SIZE >= 256
1024
1025 #if MAX_VECTOR_SIZE >= 512
1026
log(Vec16f const & x)1027 static inline Vec16f log(Vec16f const & x) {
1028 return log_f<Vec16f, Vec16i, Vec16fb, Vec16ib, 0>(x);
1029 }
1030
log1p(Vec16f const & x)1031 static inline Vec16f log1p(Vec16f const & x) {
1032 return log_f<Vec16f, Vec16i, Vec16fb, Vec16ib, 1>(x);
1033 }
1034
log2(Vec16f const & x)1035 static inline Vec16f log2(Vec16f const & x) {
1036 return float(VM_LOG2E) * log_f<Vec16f, Vec16i, Vec16fb, Vec16ib, 0>(x);
1037 }
1038
log10(Vec16f const & x)1039 static inline Vec16f log10(Vec16f const & x) {
1040 return float(VM_LOG10E) * log_f<Vec16f, Vec16i, Vec16fb, Vec16ib, 0>(x);
1041 }
1042
1043 #endif // MAX_VECTOR_SIZE >= 512
1044
1045
1046 /******************************************************************************
1047 * Cube root and reciprocal cube root
1048 ******************************************************************************/
1049
1050 // cube root template, double precision
1051 // template parameters:
1052 // VTYPE: f.p. vector type
1053 // ITYPE: uint32_t integer vector type with same total number of bits
1054 // ITYPE2: uint64_t integer vector type with same total number of bits
1055 // BVTYPE: boolean vector type
1056 // CR: -1 for reciprocal cube root, 1 for cube root, 2 for cube root squared
1057 template<class VTYPE, class ITYPE, class ITYPE2, class BVTYPE, int CR>
cbrt_d(VTYPE const & x)1058 static inline VTYPE cbrt_d(VTYPE const & x) {
1059 const int iter = 7; // iteration count of x^(-1/3) loop
1060 int i;
1061 VTYPE xa, xa3, a, a2;
1062 ITYPE m1, m2;
1063 BVTYPE underflow;
1064 ITYPE2 q1(0x5540000000000000ULL); // exponent bias
1065 ITYPE2 q2(0x0005555500000000ULL); // exponent multiplier for 1/3
1066 ITYPE2 q3(0x0010000000000000ULL); // denormal limit
1067 const double one_third = 1./3.;
1068 const double four_third = 4./3.;
1069
1070 xa = abs(x);
1071 xa3 = one_third*xa;
1072
1073 // multiply exponent by -1/3
1074 m1 = reinterpret_i(xa);
1075 m2 = ITYPE(q1) - (m1 >> 20) * ITYPE(q2);
1076 a = reinterpret_d(m2);
1077 underflow = BVTYPE(ITYPE2(m1) < q3); // true if denormal or zero
1078
1079 // Newton Raphson iteration
1080 for (i = 0; i < iter-1; i++) {
1081 a2 = a * a;
1082 a = nmul_add(xa3, a2*a2, four_third*a); // a = four_third*a - xa3*a2*a2;
1083 }
1084 // last iteration with better precision
1085 a2 = a * a;
1086 a = mul_add(one_third, nmul_add(xa, a2*a2, a), a); // a = a + one_third*(a - xa*a2*a2);
1087
1088 if (CR == -1) { // reciprocal cube root
1089 // (note: gives wrong sign when input is INF)
1090 // generate INF if underflow
1091 a = select(underflow, infinite_vec<VTYPE>(), a);
1092 // get sign
1093 a = sign_combine(a, x);
1094 }
1095 else if (CR == 1) { // cube root
1096 a = a * a * x;
1097 // generate 0 if underflow
1098 a = select(underflow, 0., a);
1099 }
1100 else if (CR == 2) { // cube root squared
1101 // (note: gives wrong sign when input is INF)
1102 a = a * xa;
1103 // generate 0 if underflow
1104 a = select(underflow, 0., a);
1105 }
1106 return a;
1107 }
1108
1109 // template instances for cbrt and reciprocal_cbrt
1110
1111 // cube root
cbrt(Vec2d const & x)1112 static inline Vec2d cbrt(Vec2d const & x) {
1113 return cbrt_d<Vec2d, Vec4ui, Vec2uq, Vec2db, 1> (x);
1114 }
1115
1116 // reciprocal cube root
reciprocal_cbrt(Vec2d const & x)1117 static inline Vec2d reciprocal_cbrt(Vec2d const & x) {
1118 return cbrt_d<Vec2d, Vec4ui, Vec2uq, Vec2db, -1> (x);
1119 }
1120
1121 // square cube root
square_cbrt(Vec2d const & x)1122 static inline Vec2d square_cbrt(Vec2d const & x) {
1123 return cbrt_d<Vec2d, Vec4ui, Vec2uq, Vec2db, 2> (x);
1124 }
1125
1126 #if MAX_VECTOR_SIZE >= 256
1127
cbrt(Vec4d const & x)1128 static inline Vec4d cbrt(Vec4d const & x) {
1129 return cbrt_d<Vec4d, Vec8ui, Vec4uq, Vec4db, 1> (x);
1130 }
1131
reciprocal_cbrt(Vec4d const & x)1132 static inline Vec4d reciprocal_cbrt(Vec4d const & x) {
1133 return cbrt_d<Vec4d, Vec8ui, Vec4uq, Vec4db, -1> (x);
1134 }
1135
square_cbrt(Vec4d const & x)1136 static inline Vec4d square_cbrt(Vec4d const & x) {
1137 return cbrt_d<Vec4d, Vec8ui, Vec4uq, Vec4db, 2> (x);
1138 }
1139
1140 #endif // MAX_VECTOR_SIZE >= 256
1141
1142 #if MAX_VECTOR_SIZE >= 512
1143
cbrt(Vec8d const & x)1144 static inline Vec8d cbrt(Vec8d const & x) {
1145 return cbrt_d<Vec8d, Vec16ui, Vec8uq, Vec8db, 1> (x);
1146 }
1147
reciprocal_cbrt(Vec8d const & x)1148 static inline Vec8d reciprocal_cbrt(Vec8d const & x) {
1149 return cbrt_d<Vec8d, Vec16ui, Vec8uq, Vec8db, -1> (x);
1150 }
1151
square_cbrt(Vec8d const & x)1152 static inline Vec8d square_cbrt(Vec8d const & x) {
1153 return cbrt_d<Vec8d, Vec16ui, Vec8uq, Vec8db, 2> (x);
1154 }
1155
1156 #endif // MAX_VECTOR_SIZE >= 512
1157
1158
1159 // cube root template, single precision
1160 // template parameters:
1161 // VTYPE: f.p. vector type
1162 // ITYPE: uint32_t integer vector type
1163 // BVTYPE: boolean vector type
1164 // CR: -1 for reciprocal cube root, 1 for cube root, 2 for cube root squared
1165 template<class VTYPE, class ITYPE, class BVTYPE, int CR>
cbrt_f(VTYPE const & x)1166 static inline VTYPE cbrt_f(VTYPE const & x) {
1167
1168 const int iter = 6; // iteration count of x^(-1/3) loop
1169 int i;
1170 VTYPE xa, xa3, a, a2;
1171 ITYPE m1, m2;
1172 BVTYPE underflow;
1173 ITYPE q1(0x54800000U); // exponent bias
1174 ITYPE q2(0x002AAAAAU); // exponent multiplier for 1/3
1175 ITYPE q3(0x00800000U); // denormal limit
1176 const float one_third = float(1./3.);
1177 const float four_third = float(4./3.);
1178
1179 xa = abs(x);
1180 xa3 = one_third*xa;
1181
1182 // multiply exponent by -1/3
1183 m1 = reinterpret_i(xa);
1184 m2 = q1 - (m1 >> 23) * q2;
1185 a = reinterpret_f(m2);
1186
1187 underflow = BVTYPE(m1 < q3); // true if denormal or zero
1188
1189 // Newton Raphson iteration
1190 for (i = 0; i < iter-1; i++) {
1191 a2 = a*a;
1192 a = nmul_add(xa3, a2*a2, four_third*a); // a = four_third*a - xa3*a2*a2;
1193 }
1194 // last iteration with better precision
1195 a2 = a*a;
1196 a = mul_add(one_third, nmul_add(xa, a2*a2, a), a); //a = a + one_third*(a - xa*a2*a2);
1197
1198 if (CR == -1) { // reciprocal cube root
1199 // generate INF if underflow
1200 a = select(underflow, infinite_vec<VTYPE>(), a);
1201 // get sign
1202 a = sign_combine(a, x);
1203 }
1204 else if (CR == 1) { // cube root
1205 a = a * a * x;
1206 // generate 0 if underflow
1207 a = select(underflow, 0., a);
1208 }
1209 else if (CR == 2) { // cube root squared
1210 a = a * xa;
1211 // generate 0 if underflow
1212 a = select(underflow, 0., a);
1213 }
1214 return a;
1215 }
1216
1217 // template instances for cbrt and reciprocal_cbrt
1218
1219 // cube root
cbrt(Vec4f const & x)1220 static inline Vec4f cbrt(Vec4f const & x) {
1221 return cbrt_f<Vec4f, Vec4ui, Vec4fb, 1> (x);
1222 }
1223
1224 // reciprocal cube root
reciprocal_cbrt(Vec4f const & x)1225 static inline Vec4f reciprocal_cbrt(Vec4f const & x) {
1226 return cbrt_f<Vec4f, Vec4ui, Vec4fb, -1> (x);
1227 }
1228
1229 // square cube root
square_cbrt(Vec4f const & x)1230 static inline Vec4f square_cbrt(Vec4f const & x) {
1231 return cbrt_f<Vec4f, Vec4ui, Vec4fb, 2> (x);
1232 }
1233
1234 #if MAX_VECTOR_SIZE >= 256
1235
cbrt(Vec8f const & x)1236 static inline Vec8f cbrt(Vec8f const & x) {
1237 return cbrt_f<Vec8f, Vec8ui, Vec8fb, 1> (x);
1238 }
1239
reciprocal_cbrt(Vec8f const & x)1240 static inline Vec8f reciprocal_cbrt(Vec8f const & x) {
1241 return cbrt_f<Vec8f, Vec8ui, Vec8fb, -1> (x);
1242 }
1243
square_cbrt(Vec8f const & x)1244 static inline Vec8f square_cbrt(Vec8f const & x) {
1245 return cbrt_f<Vec8f, Vec8ui, Vec8fb, 2> (x);
1246 }
1247
1248 #endif // MAX_VECTOR_SIZE >= 256
1249
1250 #if MAX_VECTOR_SIZE >= 512
1251
cbrt(Vec16f const & x)1252 static inline Vec16f cbrt(Vec16f const & x) {
1253 return cbrt_f<Vec16f, Vec16ui, Vec16fb, 1> (x);
1254 }
1255
reciprocal_cbrt(Vec16f const & x)1256 static inline Vec16f reciprocal_cbrt(Vec16f const & x) {
1257 return cbrt_f<Vec16f, Vec16ui, Vec16fb, -1> (x);
1258 }
1259
square_cbrt(Vec16f const & x)1260 static inline Vec16f square_cbrt(Vec16f const & x) {
1261 return cbrt_f<Vec16f, Vec16ui, Vec16fb, 2> (x);
1262 }
1263
1264 // Helper function for power function: insert special values of pow(x,y) when x=0:
1265 // y<0 -> inf, y=0 -> 1, y>0 -> 0, y=nan -> nan
wm_pow_case_x0(Vec8db const & xiszero,Vec8d const & y,Vec8d const & z)1266 static inline Vec8d wm_pow_case_x0(Vec8db const & xiszero, Vec8d const & y, Vec8d const & z) {
1267 #if INSTRSET >= 9
1268 const __m512i table = Vec8q(0x85858A00);
1269 return _mm512_mask_fixupimm_pd(z, xiszero, y, table, 0);
1270 #else
1271 return select(xiszero, select(y < 0., infinite_vec<Vec8d>(), select(y == 0., Vec8d(1.), Vec8d(0.))), z);
1272 #endif
1273 }
1274
1275 #endif // MAX_VECTOR_SIZE >= 512
1276
1277 #if MAX_VECTOR_SIZE >= 256
1278
wm_pow_case_x0(Vec4db const & xiszero,Vec4d const & y,Vec4d const & z)1279 static inline Vec4d wm_pow_case_x0(Vec4db const & xiszero, Vec4d const & y, Vec4d const & z) {
1280 //#if defined __AVX512VL__ && defined ?
1281 // const __m256i table = Vec4q(0x85858A00);
1282 // return _mm256_mask_fixupimm_pd(z, xiszero, y, table, 0);
1283 //#else
1284 return select(xiszero, select(y < 0., infinite_vec<Vec4d>(), select(y == 0., Vec4d(1.), Vec4d(0.))), z);
1285 //#endif
1286 }
1287 #endif
1288
wm_pow_case_x0(Vec2db const & xiszero,Vec2d const & y,Vec2d const & z)1289 static inline Vec2d wm_pow_case_x0(Vec2db const & xiszero, Vec2d const & y, Vec2d const & z) {
1290 //#if defined __AVX512VL__ && defined ?
1291 // const __m128i table = Vec2q(0x85858A00);
1292 // return _mm_mask_fixupimm_pd(z, xiszero, y, table, 0);
1293 //#else
1294 return select(xiszero, select(y < 0., infinite_vec<Vec2d>(), select(y == 0., Vec2d(1.), Vec2d(0.))), z);
1295 //#endif
1296 }
1297
1298 // ****************************************************************************
1299 // pow template, double precision
1300 // ****************************************************************************
1301 // Calculate x to the power of y.
1302
1303 // Precision is important here because rounding errors get multiplied by y.
1304 // The logarithm is calculated with extra precision, and the exponent is
1305 // calculated separately.
1306 // The logarithm is calculated by Pad\E9 approximation with 6'th degree
1307 // polynomials. A 7'th degree would be preferred for best precision by high y.
1308 // The alternative method: log(x) = z + z^3*R(z)/S(z), where z = 2(x-1)/(x+1)
1309 // did not give better precision.
1310
1311 // Template parameters:
1312 // VTYPE: data vector type
1313 // ITYPE: signed integer vector type
1314 // BVTYPE: boolean vector type
1315 template <class VTYPE, class ITYPE, class BVTYPE>
pow_template_d(VTYPE const & x0,VTYPE const & y)1316 static inline VTYPE pow_template_d(VTYPE const & x0, VTYPE const & y) {
1317
1318 // define constants
1319 const double ln2d_hi = 0.693145751953125; // log(2) in extra precision, high bits
1320 const double ln2d_lo = 1.42860682030941723212E-6; // low bits of log(2)
1321 const double log2e = VM_LOG2E; // 1/log(2)
1322 const double pow2_52 = 4503599627370496.0; // 2^52
1323
1324 // coefficients for Pad\E9 polynomials
1325 const double P0logl = 2.0039553499201281259648E1;
1326 const double P1logl = 5.7112963590585538103336E1;
1327 const double P2logl = 6.0949667980987787057556E1;
1328 const double P3logl = 2.9911919328553073277375E1;
1329 const double P4logl = 6.5787325942061044846969E0;
1330 const double P5logl = 4.9854102823193375972212E-1;
1331 const double P6logl = 4.5270000862445199635215E-5;
1332 const double Q0logl = 6.0118660497603843919306E1;
1333 const double Q1logl = 2.1642788614495947685003E2;
1334 const double Q2logl = 3.0909872225312059774938E2;
1335 const double Q3logl = 2.2176239823732856465394E2;
1336 const double Q4logl = 8.3047565967967209469434E1;
1337 const double Q5logl = 1.5062909083469192043167E1;
1338
1339 // Taylor coefficients for exp function, 1/n!
1340 const double p2 = 1./2.;
1341 const double p3 = 1./6.;
1342 const double p4 = 1./24.;
1343 const double p5 = 1./120.;
1344 const double p6 = 1./720.;
1345 const double p7 = 1./5040.;
1346 const double p8 = 1./40320.;
1347 const double p9 = 1./362880.;
1348 const double p10 = 1./3628800.;
1349 const double p11 = 1./39916800.;
1350 const double p12 = 1./479001600.;
1351 const double p13 = 1./6227020800.;
1352
1353 // data vectors
1354 VTYPE x, x1, x2;
1355 VTYPE px, qx, ef, yr, v, z, z1;
1356 VTYPE lg, lg1, lg2;
1357 VTYPE lgerr, x2err;
1358 VTYPE e1, e2, e3, ee;
1359 // integer vectors
1360 ITYPE ei, ej, yodd;
1361 // boolean vectors
1362 BVTYPE blend, xzero, xnegative;
1363 BVTYPE overflow, underflow, xfinite, yfinite, efinite;
1364
1365 // remove sign
1366 x1 = abs(x0);
1367
1368 // Separate mantissa from exponent
1369 // This gives the mantissa * 0.5
1370 x = fraction_2(x1);
1371
1372 // reduce range of x = +/- sqrt(2)/2
1373 blend = x > VM_SQRT2*0.5;
1374 x = if_add(!blend, x, x); // conditional add
1375
1376 // Pade approximation
1377 // Higher precision than in log function. Still higher precision wanted
1378 x -= 1.0;
1379 x2 = x*x;
1380 px = polynomial_6 (x, P0logl, P1logl, P2logl, P3logl, P4logl, P5logl, P6logl);
1381 px *= x * x2;
1382 qx = polynomial_6n (x, Q0logl, Q1logl, Q2logl, Q3logl, Q4logl, Q5logl);
1383 lg1 = px / qx;
1384
1385 // extract exponent
1386 ef = exponent_f(x1);
1387 ef = if_add(blend, ef, 1.); // conditional add
1388
1389 // multiply exponent by y
1390 // nearest integer e1 goes into exponent of result, remainder yr is added to log
1391 e1 = round(ef * y);
1392 yr = mul_sub_x(ef, y, e1); // calculate remainder yr. precision very important here
1393
1394 // add initial terms to Pade expansion
1395 lg = nmul_add(0.5, x2, x) + lg1; // lg = (x - 0.5 * x2) + lg1;
1396 // calculate rounding errors in lg
1397 // rounding error in multiplication 0.5*x*x
1398 x2err = mul_sub_x(0.5*x, x, 0.5*x2);
1399 // rounding error in additions and subtractions
1400 lgerr = mul_add(0.5, x2, lg - x) - lg1; // lgerr = ((lg - x) + 0.5 * x2) - lg1;
1401
1402 // extract something for the exponent
1403 e2 = round(lg * y * VM_LOG2E);
1404 // subtract this from lg, with extra precision
1405 v = mul_sub_x(lg, y, e2 * ln2d_hi);
1406 v = nmul_add(e2, ln2d_lo, v); // v -= e2 * ln2d_lo;
1407
1408 // add remainder from ef * y
1409 v = mul_add(yr, VM_LN2, v); // v += yr * VM_LN2;
1410
1411 // correct for previous rounding errors
1412 v = nmul_add(lgerr + x2err, y, v); // v -= (lgerr + x2err) * y;
1413
1414 // exp function
1415
1416 // extract something for the exponent if possible
1417 x = v;
1418 e3 = round(x*log2e);
1419 // high precision multiplication not needed here because abs(e3) <= 1
1420 x = nmul_add(e3, VM_LN2, x); // x -= e3 * VM_LN2;
1421
1422 z = polynomial_13m(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13);
1423 z = z + 1.0;
1424
1425 // contributions to exponent
1426 ee = e1 + e2 + e3;
1427 ei = round_to_int64_limited(ee);
1428 // biased exponent of result:
1429 ej = ei + (ITYPE(reinterpret_i(z)) >> 52);
1430 // check exponent for overflow and underflow
1431 overflow = BVTYPE(ej >= 0x07FF) | (ee > 3000.);
1432 underflow = BVTYPE(ej <= 0x0000) | (ee < -3000.);
1433
1434 // add exponent by integer addition
1435 z = reinterpret_d(ITYPE(reinterpret_i(z)) + (ei << 52));
1436
1437 // check for special cases
1438 xfinite = is_finite(x0);
1439 yfinite = is_finite(y);
1440 efinite = is_finite(ee);
1441 xzero = is_zero_or_subnormal(x0);
1442 xnegative = x0 < 0.;
1443
1444 // check for overflow and underflow
1445 if (horizontal_or(overflow | underflow)) {
1446 // handle errors
1447 z = select(underflow, VTYPE(0.), z);
1448 z = select(overflow, infinite_vec<VTYPE>(), z);
1449 }
1450
1451 // check for x == 0
1452 z = wm_pow_case_x0(xzero, y, z);
1453 //z = select(xzero, select(y < 0., infinite_vec<VTYPE>(), select(y == 0., VTYPE(1.), VTYPE(0.))), z);
1454
1455 // check for x < 0. y must be integer
1456 if (horizontal_or(xnegative)) {
1457 // test if y odd
1458 yodd = ITYPE(reinterpret_i(abs(y) + pow2_52)) << 63; // convert y to integer and shift bit 0 to position of sign bit
1459 z1 = z | (x0 & VTYPE(reinterpret_d(yodd))); // apply sign if y odd
1460 z1 = select(y == round(y), z1, nan_vec<VTYPE>(NAN_POW)); // NAN if y not integer
1461 z = select(xnegative, z1, z);
1462 }
1463
1464 // check for range errors
1465 if (horizontal_and(xfinite & yfinite & (efinite | xzero))) {
1466 // fast return if no special cases
1467 return z;
1468 }
1469
1470 // handle special error cases
1471 z = select(yfinite & efinite, z, select(x1 == 1., VTYPE(1.), select((x1 > 1.) ^ sign_bit(y), infinite_vec<VTYPE>(), 0.)));
1472 yodd = ITYPE(reinterpret_i(abs(y) + pow2_52)) << 63; // same as above
1473 z = select(xfinite, z, select(y == 0., VTYPE(1.), select(y < 0., VTYPE(0.), infinite_vec<VTYPE>() | ( VTYPE(reinterpret_d(yodd)) & x0))));
1474 z = select(is_nan(x0), select(is_nan(y), x0 | y, x0), select(is_nan(y), y, z));
1475 return z;
1476 }
1477
1478
1479 //This template is in vectorf128.h to prevent implicit conversion of float y to int when float version is not defined:
1480 //template <typename TT> static Vec2d pow(Vec2d const & a, TT n);
1481
1482 // instantiations of pow_template_d:
1483 template <>
1484 inline Vec2d pow<Vec2d>(Vec2d const & x, Vec2d const & y) {
1485 return pow_template_d<Vec2d, Vec2q, Vec2db>(x, y);
1486 }
1487
1488 template <>
1489 inline Vec2d pow<double>(Vec2d const & x, double const & y) {
1490 return pow_template_d<Vec2d, Vec2q, Vec2db>(x, y);
1491 }
1492 template <>
1493 inline Vec2d pow<float>(Vec2d const & x, float const & y) {
1494 return pow_template_d<Vec2d, Vec2q, Vec2db>(x, (double)y);
1495 }
1496
1497 #if MAX_VECTOR_SIZE >= 256
1498
1499 template <>
1500 inline Vec4d pow<Vec4d>(Vec4d const & x, Vec4d const & y) {
1501 return pow_template_d<Vec4d, Vec4q, Vec4db>(x, y);
1502 }
1503
1504 template <>
1505 inline Vec4d pow<double>(Vec4d const & x, double const & y) {
1506 return pow_template_d<Vec4d, Vec4q, Vec4db>(x, y);
1507 }
1508
1509 template <>
1510 inline Vec4d pow<float>(Vec4d const & x, float const & y) {
1511 return pow_template_d<Vec4d, Vec4q, Vec4db>(x, (double)y);
1512 }
1513
1514 #endif // MAX_VECTOR_SIZE >= 256
1515
1516 #if MAX_VECTOR_SIZE >= 512
1517
1518 template <>
1519 inline Vec8d pow<Vec8d>(Vec8d const & x, Vec8d const & y) {
1520 return pow_template_d<Vec8d, Vec8q, Vec8db>(x, y);
1521 }
1522
1523 template <>
1524 inline Vec8d pow<double>(Vec8d const & x, double const & y) {
1525 return pow_template_d<Vec8d, Vec8q, Vec8db>(x, y);
1526 }
1527
1528 template <>
1529 inline Vec8d pow<float>(Vec8d const & x, float const & y) {
1530 return pow_template_d<Vec8d, Vec8q, Vec8db>(x, (double)y);
1531 }
1532
1533 // Helper function for power function: insert special values of pow(x,y) when x=0:
1534 // y<0 -> inf, y=0 -> 1, y>0 -> 0, y=nan -> nan
wm_pow_case_x0(Vec16fb const & xiszero,Vec16f const & y,Vec16f const & z)1535 static inline Vec16f wm_pow_case_x0(Vec16fb const & xiszero, Vec16f const & y, Vec16f const & z) {
1536 #if INSTRSET >= 9
1537 const __m512i table = Vec16ui(0x85858A00);
1538 return _mm512_mask_fixupimm_ps(z, xiszero, y, table, 0);
1539 #else
1540 return select(xiszero, select(y < 0.f, infinite_vec<Vec16f>(), select(y == 0.f, Vec16f(1.f), Vec16f(0.f))), z);
1541 #endif
1542 }
1543
1544 #endif // MAX_VECTOR_SIZE >= 512
1545
1546 #if MAX_VECTOR_SIZE >= 256
wm_pow_case_x0(Vec8fb const & xiszero,Vec8f const & y,Vec8f const & z)1547 static inline Vec8f wm_pow_case_x0(Vec8fb const & xiszero, Vec8f const & y, Vec8f const & z) {
1548 return select(xiszero, select(y < 0.f, infinite_vec<Vec8f>(), select(y == 0.f, Vec8f(1.f), Vec8f(0.f))), z);
1549 }
1550 #endif
1551
wm_pow_case_x0(Vec4fb const & xiszero,Vec4f const & y,Vec4f const & z)1552 static inline Vec4f wm_pow_case_x0(Vec4fb const & xiszero, Vec4f const & y, Vec4f const & z) {
1553 return select(xiszero, select(y < 0.f, infinite_vec<Vec4f>(), select(y == 0.f, Vec4f(1.f), Vec4f(0.f))), z);
1554 }
1555
1556 // ****************************************************************************
1557 // pow template, single precision
1558 // ****************************************************************************
1559
1560 // Template parameters:
1561 // VTYPE: data vector type
1562 // ITYPE: signed integer vector type
1563 // BVTYPE: boolean vector type
1564 // Calculate x to the power of y
1565 template <class VTYPE, class ITYPE, class BVTYPE>
pow_template_f(VTYPE const & x0,VTYPE const & y)1566 static inline VTYPE pow_template_f(VTYPE const & x0, VTYPE const & y) {
1567
1568 // define constants
1569 const float ln2f_hi = 0.693359375f;
1570 const float ln2f_lo = -2.12194440e-4f;
1571 //const float max_expf = 87.3f;
1572 const float log2e = float(VM_LOG2E); // 1/log(2)
1573 const float pow2_23 = 8388608.0f; // 2^23
1574
1575 const float P0logf = 3.3333331174E-1f;
1576 const float P1logf = -2.4999993993E-1f;
1577 const float P2logf = 2.0000714765E-1f;
1578 const float P3logf = -1.6668057665E-1f;
1579 const float P4logf = 1.4249322787E-1f;
1580 const float P5logf = -1.2420140846E-1f;
1581 const float P6logf = 1.1676998740E-1f;
1582 const float P7logf = -1.1514610310E-1f;
1583 const float P8logf = 7.0376836292E-2f;
1584
1585 // Taylor coefficients for exp function, 1/n!
1586 const float p2expf = 1.f/2.f;
1587 const float p3expf = 1.f/6.f;
1588 const float p4expf = 1.f/24.f;
1589 const float p5expf = 1.f/120.f;
1590 const float p6expf = 1.f/720.f;
1591 const float p7expf = 1.f/5040.f;
1592
1593 // data vectors
1594 VTYPE x, x1, x2;
1595 VTYPE ef, yr, v, z, z1;
1596 VTYPE lg, lg1;
1597 VTYPE lgerr, x2err;
1598 VTYPE e1, e2, e3, ee;
1599 // integer vectors
1600 ITYPE ei, ej, yodd;
1601 // boolean vectors
1602 BVTYPE blend, xzero, xnegative;
1603 BVTYPE overflow, underflow, xfinite, yfinite, efinite;
1604
1605 // remove sign
1606 x1 = abs(x0);
1607
1608 // Separate mantissa from exponent
1609 // This gives the mantissa * 0.5
1610 x = fraction_2(x1);
1611
1612 // reduce range of x = +/- sqrt(2)/2
1613 blend = x > float(VM_SQRT2 * 0.5);
1614 x = if_add(!blend, x, x); // conditional add
1615
1616 // Taylor expansion, high precision
1617 x -= 1.0f;
1618 x2 = x * x;
1619 lg1 = polynomial_8(x, P0logf, P1logf, P2logf, P3logf, P4logf, P5logf, P6logf, P7logf, P8logf);
1620 lg1 *= x2 * x;
1621
1622 // extract exponent
1623 ef = exponent_f(x1);
1624 ef = if_add(blend, ef, 1.0f); // conditional add
1625
1626 // multiply exponent by y
1627 // nearest integer e1 goes into exponent of result, remainder yr is added to log
1628 e1 = round(ef * y);
1629 yr = mul_sub_x(ef, y, e1); // calculate remainder yr. precision very important here
1630
1631 // add initial terms to expansion
1632 lg = nmul_add(0.5f, x2, x) + lg1; // lg = (x - 0.5f * x2) + lg1;
1633
1634 // calculate rounding errors in lg
1635 // rounding error in multiplication 0.5*x*x
1636 x2err = mul_sub_x(0.5f*x, x, 0.5f * x2);
1637 // rounding error in additions and subtractions
1638 lgerr = mul_add(0.5f, x2, lg - x) - lg1; // lgerr = ((lg - x) + 0.5f * x2) - lg1;
1639
1640 // extract something for the exponent
1641 e2 = round(lg * y * float(VM_LOG2E));
1642 // subtract this from lg, with extra precision
1643 v = mul_sub_x(lg, y, e2 * ln2f_hi);
1644 v = nmul_add(e2, ln2f_lo, v); // v -= e2 * ln2f_lo;
1645
1646 // correct for previous rounding errors
1647 v -= mul_sub(lgerr + x2err, y, yr * float(VM_LN2)); // v -= (lgerr + x2err) * y - yr * float(VM_LN2) ;
1648
1649 // exp function
1650
1651 // extract something for the exponent if possible
1652 x = v;
1653 e3 = round(x*log2e);
1654 // high precision multiplication not needed here because abs(e3) <= 1
1655 x = nmul_add(e3, float(VM_LN2), x); // x -= e3 * float(VM_LN2);
1656
1657 // Taylor polynomial
1658 x2 = x * x;
1659 z = polynomial_5(x, p2expf, p3expf, p4expf, p5expf, p6expf, p7expf)*x2 + x + 1.0f;
1660
1661 // contributions to exponent
1662 ee = e1 + e2 + e3;
1663 ei = round_to_int(ee);
1664 // biased exponent of result:
1665 ej = ei + (ITYPE(reinterpret_i(z)) >> 23);
1666 // check exponent for overflow and underflow
1667 overflow = BVTYPE(ej >= 0x0FF) | (ee > 300.f);
1668 underflow = BVTYPE(ej <= 0x000) | (ee < -300.f);
1669
1670 // add exponent by integer addition
1671 z = reinterpret_f(ITYPE(reinterpret_i(z)) + (ei << 23)); // the extra 0x10000 is shifted out here
1672
1673 // check for special cases
1674 xfinite = is_finite(x0);
1675 yfinite = is_finite(y);
1676 efinite = is_finite(ee);
1677
1678 xzero = is_zero_or_subnormal(x0);
1679 xnegative = x0 < 0.f;
1680
1681 // check for overflow and underflow
1682 if (horizontal_or(overflow | underflow)) {
1683 // handle errors
1684 z = select(underflow, VTYPE(0.f), z);
1685 z = select(overflow, infinite_vec<VTYPE>(), z);
1686 }
1687
1688 // check for x == 0
1689 z = wm_pow_case_x0(xzero, y, z);
1690 //z = select(xzero, select(y < 0.f, infinite_vec<VTYPE>(), select(y == 0.f, VTYPE(1.f), VTYPE(0.f))), z);
1691
1692 // check for x < 0. y must be integer
1693 if (horizontal_or(xnegative)) {
1694 // test if y odd
1695 yodd = ITYPE(reinterpret_i(abs(y) + pow2_23)) << 31; // convert y to integer and shift bit 0 to position of sign bit
1696 z1 = z | (x0 & VTYPE(reinterpret_f(yodd))); // apply sign if y odd
1697 z1 = select(y == round(y), z1, nan_vec<VTYPE>(NAN_POW)); // NAN if y not integer
1698 z = select(xnegative, z1, z);
1699 }
1700
1701 // check for range errors
1702 if (horizontal_and(xfinite & yfinite & (efinite | xzero))) {
1703 // fast return if no special cases
1704 return z;
1705 }
1706
1707 // handle special error cases
1708 z = select(yfinite & efinite, z, select(x1 == 1.f, VTYPE(1.f), select((x1 > 1.f) ^ sign_bit(y), infinite_vec<VTYPE>(), 0.f)));
1709 yodd = ITYPE(reinterpret_i(abs(y) + pow2_23)) << 31; // same as above
1710 z = select(xfinite, z, select(y == 0.f, VTYPE(1.f), select(y < 0.f, VTYPE(0.f), infinite_vec<VTYPE>() | (VTYPE(reinterpret_f(yodd)) & x0))));
1711 z = select(is_nan(x0), select(is_nan(y), x0 | y, x0), select(is_nan(y), y, z));
1712 return z;
1713 }
1714
1715 //This template is in vectorf128.h to prevent implicit conversion of float y to int when float version is not defined:
1716 //template <typename TT> static Vec4f pow(Vec4f const & a, TT n);
1717
1718 template <>
1719 inline Vec4f pow<Vec4f>(Vec4f const & x, Vec4f const & y) {
1720 return pow_template_f<Vec4f, Vec4i, Vec4fb>(x, y);
1721 }
1722
1723 template <>
1724 inline Vec4f pow<float>(Vec4f const & x, float const & y) {
1725 return pow_template_f<Vec4f, Vec4i, Vec4fb>(x, y);
1726 }
1727
1728 template <>
1729 inline Vec4f pow<double>(Vec4f const & x, double const & y) {
1730 return pow_template_f<Vec4f, Vec4i, Vec4fb>(x, (float)y);
1731 }
1732
1733 #if MAX_VECTOR_SIZE >= 256
1734
1735 template <>
1736 inline Vec8f pow<Vec8f>(Vec8f const & x, Vec8f const & y) {
1737 return pow_template_f<Vec8f, Vec8i, Vec8fb>(x, y);
1738 }
1739
1740 template <>
1741 inline Vec8f pow<float>(Vec8f const & x, float const & y) {
1742 return pow_template_f<Vec8f, Vec8i, Vec8fb>(x, y);
1743 }
1744 template <>
1745 inline Vec8f pow<double>(Vec8f const & x, double const & y) {
1746 return pow_template_f<Vec8f, Vec8i, Vec8fb>(x, (float)y);
1747 }
1748
1749 #endif // MAX_VECTOR_SIZE >= 256
1750
1751 #if MAX_VECTOR_SIZE >= 512
1752
1753 template <>
1754 inline Vec16f pow<Vec16f>(Vec16f const & x, Vec16f const & y) {
1755 return pow_template_f<Vec16f, Vec16i, Vec16fb>(x, y);
1756 }
1757
1758 template <>
1759 inline Vec16f pow<float>(Vec16f const & x, float const & y) {
1760 return pow_template_f<Vec16f, Vec16i, Vec16fb>(x, y);
1761 }
1762
1763 template <>
1764 inline Vec16f pow<double>(Vec16f const & x, double const & y) {
1765 return pow_template_f<Vec16f, Vec16i, Vec16fb>(x, (float)y);
1766 }
1767
1768 #endif // MAX_VECTOR_SIZE >= 512
1769
1770
1771 // *************************************************************
1772 // power function with rational exponent
1773 // *************************************************************
1774 // Power function with rational exponent: x^(a/b)
1775 // Template must be defined as class to allow partial template specialization
1776 template <int a, int b>
1777 class Power_rational {
1778 public:
1779 // overloaded member function for each vector type
pow(Vec4f const & x)1780 Vec4f pow(Vec4f const & x) {
1781 Vec4f y = x;
1782 // negative x allowed when b odd or a even
1783 // (if a is even then either b is odd or a/b can be reduced,
1784 // but we can check a even anyway at no cost to be sure)
1785 if (a == 0) return 1.f;
1786 if ((b | ~a) & 1) y = abs(y);
1787 y = pow(y, float(double(a)/double(b)));
1788 if (a & b & 1) y = sign_combine(y, x); // apply sign if a and b both odd
1789 if ((a ^ b) >= 0) y = select(x == 0.f, 0.f, y); // zero allowed for positive a and b
1790 return y;
1791 }
pow(Vec2d const & x)1792 Vec2d pow(Vec2d const & x) {
1793 Vec2d y = x;
1794 if (a == 0) return 1.;
1795 if ((b | ~a) & 1) y = abs(y);
1796 y = pow(y, double((long double)a/(long double)b));
1797 if (a & b & 1) y = sign_combine(y, x);
1798 if ((a ^ b) >= 0) y = select(x == 0., 0., y);
1799 return y;
1800 }
1801 #if MAX_VECTOR_SIZE >= 256
pow(Vec8f const & x)1802 Vec8f pow(Vec8f const & x) {
1803 Vec8f y = x;
1804 if (a == 0) return 1.f;
1805 if ((b | ~a) & 1) y = abs(y);
1806 y = pow(y, float(double(a)/double(b)));
1807 if (a & b & 1) y = sign_combine(y, x);
1808 if ((a ^ b) >= 0) y = select(x == 0.f, 0.f, y);
1809 return y;
1810 }
pow(Vec4d const & x)1811 Vec4d pow(Vec4d const & x) {
1812 Vec4d y = x;
1813 if (a == 0) return 1.;
1814 if ((b | ~a) & 1) y = abs(y);
1815 y = pow(y, double((long double)a/(long double)b));
1816 if (a & b & 1) y = sign_combine(y, x);
1817 if ((a ^ b) >= 0) y = select(x == 0., 0., y);
1818 return y;
1819 }
1820 #endif // MAX_VECTOR_SIZE >= 256
1821 #if MAX_VECTOR_SIZE >= 512
pow(Vec16f const & x)1822 Vec16f pow(Vec16f const & x) {
1823 Vec16f y = x;
1824 if (a == 0) return 1.f;
1825 if ((b | ~a) & 1) y = abs(y);
1826 y = pow(y, float(double(a)/double(b)));
1827 if (a & b & 1) y = sign_combine(y, x);
1828 if ((a ^ b) >= 0) y = select(x == 0.f, 0.f, y);
1829 return y;
1830 }
pow(Vec8d const & x)1831 Vec8d pow(Vec8d const & x) {
1832 Vec8d y = x;
1833 if (a == 0) return 1.;
1834 if ((b | ~a) & 1) y = abs(y);
1835 y = pow(y, double((long double)a/(long double)b));
1836 if (a & b & 1) y = sign_combine(y, x);
1837 if ((a ^ b) >= 0) y = select(x == 0., 0., y);
1838 return y;
1839 }
1840 #endif // MAX_VECTOR_SIZE >= 512
1841 };
1842
1843 // partial specialization for b = 1
1844 template<int a>
1845 class Power_rational<a,1> {
1846 public:
1847 template<class VTYPE>
pow(VTYPE const & x)1848 VTYPE pow(VTYPE const & x) {return pow_n<a>(x);}
1849 };
1850
1851 // partial specialization for b = 2
1852 template<int a>
1853 class Power_rational<a,2> {
1854 public:
1855 template<class VTYPE>
pow(VTYPE const & x)1856 VTYPE pow(VTYPE const & x) {
1857 VTYPE y = pow_n<(a > 0 ? a/2 : (a-1)/2)>(x);
1858 if (a & 1) y *= sqrt(x);
1859 return y;
1860 }
1861 };
1862
1863 // full specialization for a = 1, b = 2
1864 template<>
1865 class Power_rational<1,2> {
1866 public:
1867 template<class VTYPE>
pow(VTYPE const & x)1868 VTYPE pow(VTYPE const & x) {
1869 return sqrt(x);
1870 }
1871 };
1872
1873 // full specialization for a = -1, b = 2
1874 template<>
1875 class Power_rational<-1,2> {
1876 public:
1877 template<class VTYPE>
pow(VTYPE const & x)1878 VTYPE pow(VTYPE const & x) {
1879 // (this is faster than iteration method on modern CPUs)
1880 return VTYPE(1.f) / sqrt(x);
1881 }
1882 };
1883
1884 // partial specialization for b = 3
1885 template<int a>
1886 class Power_rational<a,3> {
1887 public:
1888 template<class VTYPE>
pow(VTYPE const & x)1889 VTYPE pow(VTYPE const & x) {
1890 VTYPE t;
1891 switch (a % 3) {
1892 case -2:
1893 t = reciprocal_cbrt(x);
1894 t *= t;
1895 if (a == -2) return t;
1896 t = t / pow_n<(-a-2)/3>(x);
1897 break;
1898 case -1:
1899 t = reciprocal_cbrt(x);
1900 if (a == -1) return t;
1901 t = t / pow_n<(-a-1)/3>(x);
1902 break;
1903 case 0:
1904 t = pow_n<a/3>(x);
1905 break;
1906 case 1:
1907 t = cbrt(x);
1908 if (a == 1) return t;
1909 t = t * pow_n<a/3>(x);
1910 break;
1911 case 2:
1912 t = square_cbrt(x);
1913 if (a == 2) return t;
1914 t = t * pow_n<a/3>(x);
1915 break;
1916 }
1917 return t;
1918 }
1919 };
1920
1921 // partial specialization for b = 4
1922 template<int a>
1923 class Power_rational<a,4> {
1924 public:
1925 template<class VTYPE>
pow(VTYPE const & x)1926 VTYPE pow(VTYPE const & x) {
1927 VTYPE t, s1, s2;
1928 s1 = sqrt(x);
1929 if (a & 1) s2 = sqrt(s1);
1930 switch (a % 4) {
1931 case -3:
1932 t = s2 / pow_n<1+(-a)/4>(x);
1933 break;
1934 case -2:
1935 t = s1 / pow_n<1+(-a)/4>(x);
1936 break;
1937 case -1:
1938 if (a != -1) s2 *= pow_n<(-a)/4>(x);
1939 t = VTYPE(1.f) / s2;
1940 break;
1941 case 0: default:
1942 t = pow_n<a/4>(x);
1943 break;
1944 case 1:
1945 t = s2;
1946 if (a != 1) t *= pow_n<a/4>(x);
1947 break;
1948 case 2:
1949 t = s1;
1950 if (a != 2) t *= pow_n<a/4>(x);
1951 break;
1952 case 3:
1953 t = s1 * s2;
1954 if (a != 3) t *= pow_n<a/4>(x);
1955 break;
1956 }
1957 return t;
1958 }
1959 };
1960
1961 // partial specialization for b = 6
1962 template<int a>
1963 class Power_rational<a,6> {
1964 public:
1965 template<class VTYPE>
pow(VTYPE const & x)1966 VTYPE pow(VTYPE const & x) {
1967 VTYPE t, s1, s2, s3;
1968 switch (a % 6) {
1969 case -5:
1970 t = reciprocal_cbrt(x);
1971 t = t * t * sqrt(t);
1972 if (a != -5) t /= pow_n<(-a)/6>(x);
1973 break;
1974 case -4:
1975 t = reciprocal_cbrt(x);
1976 t *= t;
1977 if (a != -4) t /= pow_n<(-a)/6>(x);
1978 break;
1979 case -3:
1980 t = pow_n<a/6>(x);
1981 t /= sqrt(x);
1982 break;
1983 case -2:
1984 t = reciprocal_cbrt(x);
1985 if (a != -2) t /= pow_n<(-a)/6>(x);
1986 break;
1987 case -1:
1988 t = sqrt(reciprocal_cbrt(x));
1989 if (a != -1) t /= pow_n<(-a)/6>(x);
1990 break;
1991 case 0: default:
1992 t = pow_n<a/6>(x);
1993 break;
1994 case 1:
1995 t = sqrt(cbrt(x));
1996 if (a != 1) t *= pow_n<a/6>(x);
1997 break;
1998 case 2:
1999 t = cbrt(x);
2000 if (a != 2) t *= pow_n<a/6>(x);
2001 break;
2002 case 3:
2003 t = sqrt(x);
2004 if (a != 3) t *= pow_n<a/6>(x);
2005 break;
2006 case 4:
2007 t = square_cbrt(x);
2008 if (a != 4) t *= pow_n<a/6>(x);
2009 break;
2010 case 5:
2011 t = cbrt(x);
2012 t = t * t * sqrt(t);
2013 if (a != 5) t *= pow_n<a/6>(x);
2014 break;
2015 }
2016 return t;
2017 }
2018 };
2019
2020 // partial specialization for b = 8
2021 template<int a>
2022 class Power_rational<a,8> {
2023 public:
2024 template<class VTYPE>
pow(VTYPE const & x)2025 VTYPE pow(VTYPE const & x) {
2026 VTYPE t, s1, s2, s3;
2027 s1 = sqrt(x); // x^(1/2)
2028 if (a & 3) s2 = sqrt(s1); // x^(1/4)
2029 if (a & 1) s3 = sqrt(s2); // x^(1/8)
2030 switch (a % 8) {
2031 case -7:
2032 t = s3 / pow_n<1+(-a)/8>(x);
2033 break;
2034 case -6:
2035 t = s2 / pow_n<1+(-a)/8>(x);
2036 break;
2037 case -5:
2038 t = s3 * (s2 / pow_n<1+(-a)/8>(x));
2039 break;
2040 case -4:
2041 t = s1 / pow_n<1+(-a)/8>(x);
2042 break;
2043 case -3:
2044 t = s3 * (s1 / pow_n<1+(-a)/8>(x));
2045 break;
2046 case -2:
2047 if (a != -2) s2 *= pow_n<(-a)/8>(x);
2048 t = VTYPE(1.f) / s2;
2049 break;
2050 case -1:
2051 if (a != -1) s3 *= pow_n<(-a)/8>(x);
2052 t = VTYPE(1.f) / s3;
2053 break;
2054 case 0: default:
2055 t = pow_n<a/8>(x);
2056 break;
2057 case 1:
2058 t = s3;
2059 if (a != 1) t *= pow_n<a/8>(x);
2060 break;
2061 case 2:
2062 t = s2;
2063 if (a != 2) t *= pow_n<a/8>(x);
2064 break;
2065 case 3:
2066 t = s2 * s3;
2067 if (a != 3) t *= pow_n<a/8>(x);
2068 break;
2069 case 4:
2070 t = s1;
2071 if (a != 4) t *= pow_n<a/8>(x);
2072 break;
2073 case 5:
2074 t = s1 * s3;
2075 if (a != 5) t *= pow_n<a/8>(x);
2076 break;
2077 case 6:
2078 t = s1 * s2;
2079 if (a != 6) t *= pow_n<a/8>(x);
2080 break;
2081 case 7:
2082 t = s2 * s3;
2083 if (a != 7) s1 *= pow_n<a/8>(x);
2084 t *= s1;
2085 break;
2086
2087 }
2088 return t;
2089 }
2090 };
2091
2092 // macro to call template class member function pow
2093 #define pow_ratio(x, a, b) (Power_rational<(b)<0 ? -(a):(a), (b)<0 ? -(b):(b)> ().pow(x))
2094
2095
2096 /******************************************************************************
2097 * Detect NAN codes
2098 *
2099 * These functions return the code hidden in a NAN. The sign bit is ignored
2100 ******************************************************************************/
2101
nan_code(Vec4f const & x)2102 static inline Vec4i nan_code(Vec4f const & x) {
2103 Vec4i a = reinterpret_i(x);
2104 Vec4ib b = (a & 0x7F800000) == 0x7F800000; // check if NAN/INF
2105 return a & 0x007FFFFF & Vec4i(b); // isolate NAN code bits
2106 }
2107
2108 // This function returns the code hidden in a NAN. The sign bit is ignored
nan_code(Vec2d const & x)2109 static inline Vec2q nan_code(Vec2d const & x) {
2110 Vec2q a = reinterpret_i(x);
2111 Vec2q const m = 0x7FF0000000000000;
2112 Vec2q const n = 0x000FFFFFFFFFFFFF;
2113 Vec2qb b = (a & m) == m; // check if NAN/INF
2114 return a & n & Vec2q(b); // isolate NAN code bits
2115 }
2116
2117 #if MAX_VECTOR_SIZE >= 256
2118
2119 // This function returns the code hidden in a NAN. The sign bit is ignored
nan_code(Vec8f const & x)2120 static inline Vec8i nan_code(Vec8f const & x) {
2121 Vec8i a = reinterpret_i(x);
2122 Vec8ib b = (a & 0x7F800000) == 0x7F800000; // check if NAN/INF
2123 return a & 0x007FFFFF & Vec8i(b); // isolate NAN code bits
2124 }
2125
2126 // This function returns the code hidden in a NAN. The sign bit is ignored
nan_code(Vec4d const & x)2127 static inline Vec4q nan_code(Vec4d const & x) {
2128 Vec4q a = reinterpret_i(x);
2129 Vec4q const m = 0x7FF0000000000000;
2130 Vec4q const n = 0x000FFFFFFFFFFFFF;
2131 Vec4qb b = (a & m) == m; // check if NAN/INF
2132 return a & n & Vec4q(b); // isolate NAN code bits
2133 }
2134
2135 #endif // MAX_VECTOR_SIZE >= 256
2136 #if MAX_VECTOR_SIZE >= 512
2137
2138 // This function returns the code hidden in a NAN. The sign bit is ignored
nan_code(Vec16f const & x)2139 static inline Vec16i nan_code(Vec16f const & x) {
2140 Vec16i a = Vec16i(reinterpret_i(x));
2141 Vec16ib b = (a & 0x7F800000) == 0x7F800000; // check if NAN/INF
2142 return a & 0x007FFFFF & Vec16i(b); // isolate NAN code bits
2143 }
2144
2145 // This function returns the code hidden in a NAN. The sign bit is ignored
nan_code(Vec8d const & x)2146 static inline Vec8q nan_code(Vec8d const & x) {
2147 Vec8q a = Vec8q(reinterpret_i(x));
2148 Vec8q const m = 0x7FF0000000000000;
2149 Vec8q const n = 0x000FFFFFFFFFFFFF;
2150 Vec8qb b = (a & m) == m; // check if NAN/INF
2151 return a & n & Vec8q(b); // isolate NAN code bits
2152 }
2153
2154 #endif // MAX_VECTOR_SIZE >= 512
2155
2156 #ifdef VCL_NAMESPACE
2157 }
2158 #endif
2159
2160 #endif // VECTORMATH_EXP_H
2161