1 /**************************** vectormath_hyp.h ******************************
2 * Author: Agner Fog
3 * Date created: 2014-07-09
4 * Last modified: 2015-02-10
5 * Version: 1.16
6 * Project: vector classes
7 * Description:
8 * Header file containing inline vector functions of hyperbolic and inverse
9 * hyperbolic functions:
10 * sinh hyperbolic sine
11 * cosh hyperbolic cosine
12 * tanh hyperbolic tangent
13 * asinh inverse hyperbolic sine
14 * acosh inverse hyperbolic cosine
15 * atanh inverse hyperbolic tangent
16 *
17 * Theory, methods and inspiration based partially on these sources:
18 * > Moshier, Stephen Lloyd Baluk: Methods and programs for mathematical functions.
19 * Ellis Horwood, 1989.
20 * > VDT library developed on CERN by Danilo Piparo, Thomas Hauth and
21 * Vincenzo Innocente, 2012, https://svnweb.cern.ch/trac/vdt
22 * > Cephes math library by Stephen L. Moshier 1992,
23 * http://www.netlib.org/cephes/
24 *
25 * For detailed instructions, see vectormath_common.h and VectorClass.pdf
26 *
27 * (c) Copyright 2014-2016 GNU General Public License http://www.gnu.org/licenses
28 ******************************************************************************/
29
30 #ifndef VECTORMATH_HYP_H
31 #define VECTORMATH_HYP_H 1
32
33 #include "vectormath_exp.h"
34
35 #ifdef VCL_NAMESPACE
36 namespace VCL_NAMESPACE {
37 #endif
38
39 /******************************************************************************
40 * Hyperbolic functions
41 ******************************************************************************/
42
43 // Template for sinh function, double precision
44 // This function does not produce denormals
45 // Template parameters:
46 // VTYPE: double vector type
47 // BVTYPE: boolean vector type
48 template<class VTYPE, class BVTYPE>
sinh_d(VTYPE const & x0)49 static inline VTYPE sinh_d(VTYPE const & x0) {
50 // The limit of abs(x) is 709.7, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
51
52 // Coefficients
53 const double p0 = -3.51754964808151394800E5;
54 const double p1 = -1.15614435765005216044E4;
55 const double p2 = -1.63725857525983828727E2;
56 const double p3 = -7.89474443963537015605E-1;
57
58 const double q0 = -2.11052978884890840399E6;
59 const double q1 = 3.61578279834431989373E4;
60 const double q2 = -2.77711081420602794433E2;
61 const double q3 = 1.0;
62
63 // data vectors
64 VTYPE x, x2, y1, y2;
65 BVTYPE x_small; // boolean vector
66
67 x = abs(x0);
68 x_small = x <= 1.0; // use Pade approximation if abs(x) <= 1
69
70 if (horizontal_or(x_small)) {
71 // At least one element needs small method
72 x2 = x*x;
73 y1 = polynomial_3(x2, p0, p1, p2, p3) / polynomial_3(x2, q0, q1, q2, q3);
74 y1 = mul_add(y1, x*x2, x); // y1 = x + x2*(x*y1);
75 }
76 if (!horizontal_and(x_small)) {
77 // At least one element needs big method
78 y2 = exp_d<VTYPE, BVTYPE, 0, 1>(x); // 0.5 * exp(x)
79 y2 -= 0.25 / y2; // - 0.5 * exp(-x)
80 }
81 y1 = select(x_small, y1, y2); // choose method
82 y1 = sign_combine(y1, x0); // get original sign
83
84 return y1;
85 }
86
87 // instances of sinh_d template
sinh(Vec2d const & x)88 static inline Vec2d sinh(Vec2d const & x) {
89 return sinh_d<Vec2d, Vec2db>(x);
90 }
91
92 #if MAX_VECTOR_SIZE >= 256
sinh(Vec4d const & x)93 static inline Vec4d sinh(Vec4d const & x) {
94 return sinh_d<Vec4d, Vec4db>(x);
95 }
96 #endif // MAX_VECTOR_SIZE >= 256
97
98 #if MAX_VECTOR_SIZE >= 512
sinh(Vec8d const & x)99 static inline Vec8d sinh(Vec8d const & x) {
100 return sinh_d<Vec8d, Vec8db>(x);
101 }
102 #endif // MAX_VECTOR_SIZE >= 512
103
104
105 // Template for sinh function, single precision
106 // This function does not produce denormals
107 // Template parameters:
108 // VTYPE: double vector type
109 // BVTYPE: boolean vector type
110 template<class VTYPE, class BVTYPE>
sinh_f(VTYPE const & x0)111 static inline VTYPE sinh_f(VTYPE const & x0) {
112 // The limit of abs(x) is 89.0, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
113
114 // Coefficients
115 const float r0 = 1.66667160211E-1f;
116 const float r1 = 8.33028376239E-3f;
117 const float r2 = 2.03721912945E-4f;
118
119 // data vectors
120 VTYPE x, x2, y1, y2;
121 BVTYPE x_small; // boolean vector
122
123 x = abs(x0);
124 x_small = x <= 1.0f; // use polynomial approximation if abs(x) <= 1
125
126 if (horizontal_or(x_small)) {
127 // At least one element needs small method
128 x2 = x*x;
129 y1 = polynomial_2(x2, r0, r1, r2);
130 y1 = mul_add(y1, x2*x, x); // y1 = x + x2*(x*y1);
131 }
132 if (!horizontal_and(x_small)) {
133 // At least one element needs big method
134 y2 = exp_f<VTYPE, BVTYPE, 0, 1>(x); // 0.5 * exp(x)
135 y2 -= 0.25f / y2; // - 0.5 * exp(-x)
136 }
137 y1 = select(x_small, y1, y2); // choose method
138 y1 = sign_combine(y1, x0); // get original sign
139
140 return y1;
141 }
142
143 // instances of sinh_f template
sinh(Vec4f const & x)144 static inline Vec4f sinh(Vec4f const & x) {
145 return sinh_f<Vec4f, Vec4fb>(x);
146 }
147
148 #if MAX_VECTOR_SIZE >= 256
sinh(Vec8f const & x)149 static inline Vec8f sinh(Vec8f const & x) {
150 return sinh_f<Vec8f, Vec8fb>(x);
151 }
152 #endif // MAX_VECTOR_SIZE >= 256
153
154 #if MAX_VECTOR_SIZE >= 512
sinh(Vec16f const & x)155 static inline Vec16f sinh(Vec16f const & x) {
156 return sinh_f<Vec16f, Vec16fb>(x);
157 }
158 #endif // MAX_VECTOR_SIZE >= 512
159
160
161 // Template for cosh function, double precision
162 // This function does not produce denormals
163 // Template parameters:
164 // VTYPE: double vector type
165 // BVTYPE: boolean vector type
166 template<class VTYPE, class BVTYPE>
cosh_d(VTYPE const & x0)167 static inline VTYPE cosh_d(VTYPE const & x0) {
168 // The limit of abs(x) is 709.7, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
169
170 // data vectors
171 VTYPE x, y;
172
173 x = abs(x0);
174 y = exp_d<VTYPE, BVTYPE, 0, 1>(x); // 0.5 * exp(x)
175 y += 0.25 / y; // + 0.5 * exp(-x)
176 return y;
177 }
178
179 // instances of sinh_d template
cosh(Vec2d const & x)180 static inline Vec2d cosh(Vec2d const & x) {
181 return cosh_d<Vec2d, Vec2db>(x);
182 }
183
184 #if MAX_VECTOR_SIZE >= 256
cosh(Vec4d const & x)185 static inline Vec4d cosh(Vec4d const & x) {
186 return cosh_d<Vec4d, Vec4db>(x);
187 }
188 #endif // MAX_VECTOR_SIZE >= 256
189
190 #if MAX_VECTOR_SIZE >= 512
cosh(Vec8d const & x)191 static inline Vec8d cosh(Vec8d const & x) {
192 return cosh_d<Vec8d, Vec8db>(x);
193 }
194 #endif // MAX_VECTOR_SIZE >= 512
195
196
197 // Template for cosh function, single precision
198 // This function does not produce denormals
199 // Template parameters:
200 // VTYPE: double vector type
201 // BVTYPE: boolean vector type
202 template<class VTYPE, class BVTYPE>
cosh_f(VTYPE const & x0)203 static inline VTYPE cosh_f(VTYPE const & x0) {
204 // The limit of abs(x) is 89.0, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
205
206 // data vectors
207 VTYPE x, y;
208
209 x = abs(x0);
210 y = exp_f<VTYPE, BVTYPE, 0, 1>(x); // 0.5 * exp(x)
211 y += 0.25f / y; // + 0.5 * exp(-x)
212 return y;
213 }
214
215 // instances of sinh_d template
cosh(Vec4f const & x)216 static inline Vec4f cosh(Vec4f const & x) {
217 return cosh_f<Vec4f, Vec4fb>(x);
218 }
219
220 #if MAX_VECTOR_SIZE >= 256
cosh(Vec8f const & x)221 static inline Vec8f cosh(Vec8f const & x) {
222 return cosh_f<Vec8f, Vec8fb>(x);
223 }
224 #endif // MAX_VECTOR_SIZE >= 256
225
226 #if MAX_VECTOR_SIZE >= 512
cosh(Vec16f const & x)227 static inline Vec16f cosh(Vec16f const & x) {
228 return cosh_f<Vec16f, Vec16fb>(x);
229 }
230 #endif // MAX_VECTOR_SIZE >= 512
231
232
233 // Template for tanh function, double precision
234 // This function does not produce denormals
235 // Template parameters:
236 // VTYPE: double vector type
237 // BVTYPE: boolean vector type
238 template<class VTYPE, class BVTYPE>
tanh_d(VTYPE const & x0)239 static inline VTYPE tanh_d(VTYPE const & x0) {
240
241 // Coefficients
242 const double p0 = -1.61468768441708447952E3;
243 const double p1 = -9.92877231001918586564E1;
244 const double p2 = -9.64399179425052238628E-1;
245
246 const double q0 = 4.84406305325125486048E3;
247 const double q1 = 2.23548839060100448583E3;
248 const double q2 = 1.12811678491632931402E2;
249 const double q3 = 1.0;
250
251 // data vectors
252 VTYPE x, x2, y1, y2;
253 BVTYPE x_small, x_big; // boolean vectors
254
255 x = abs(x0);
256 x_small = x <= 0.625; // use Pade approximation if abs(x) <= 5/8
257
258 if (horizontal_or(x_small)) {
259 // At least one element needs small method
260 x2 = x*x;
261 y1 = polynomial_2(x2, p0, p1, p2) / polynomial_3(x2, q0, q1, q2, q3);
262 y1 = mul_add(y1, x2*x, x); // y1 = x + x2*(x*y1);
263 }
264 if (!horizontal_and(x_small)) {
265 // At least one element needs big method
266 y2 = exp(x+x); // exp(2*x)
267 y2 = 1.0 - 2.0 / (y2 + 1.0); // tanh(x)
268 }
269 x_big = x > 350.;
270 y1 = select(x_small, y1, y2); // choose method
271 y1 = select(x_big, 1.0, y1); // avoid overflow
272 y1 = sign_combine(y1, x0); // get original sign
273
274 return y1;
275 }
276
277 // instances of tanh_d template
tanh(Vec2d const & x)278 static inline Vec2d tanh(Vec2d const & x) {
279 return tanh_d<Vec2d, Vec2db>(x);
280 }
281
282 #if MAX_VECTOR_SIZE >= 256
tanh(Vec4d const & x)283 static inline Vec4d tanh(Vec4d const & x) {
284 return tanh_d<Vec4d, Vec4db>(x);
285 }
286 #endif // MAX_VECTOR_SIZE >= 256
287
288 #if MAX_VECTOR_SIZE >= 512
tanh(Vec8d const & x)289 static inline Vec8d tanh(Vec8d const & x) {
290 return tanh_d<Vec8d, Vec8db>(x);
291 }
292 #endif // MAX_VECTOR_SIZE >= 512
293
294
295 // Template for tanh function, single precision
296 // This function does not produce denormals
297 // Template parameters:
298 // VTYPE: double vector type
299 // BVTYPE: boolean vector type
300 template<class VTYPE, class BVTYPE>
tanh_f(VTYPE const & x0)301 static inline VTYPE tanh_f(VTYPE const & x0) {
302 // The limit of abs(x) is 89.0, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
303
304 // Coefficients
305 const float r0 = -3.33332819422E-1f;
306 const float r1 = 1.33314422036E-1f;
307 const float r2 = -5.37397155531E-2f;
308 const float r3 = 2.06390887954E-2f;
309 const float r4 = -5.70498872745E-3f;
310
311 // data vectors
312 VTYPE x, x2, y1, y2;
313 BVTYPE x_small, x_big; // boolean vectors
314
315 x = abs(x0);
316 x_small = x <= 0.625f; // use polynomial approximation if abs(x) <= 5/8
317
318 if (horizontal_or(x_small)) {
319 // At least one element needs small method
320 x2 = x*x;
321 y1 = polynomial_4(x2, r0, r1, r2, r3, r4);
322 y1 = mul_add(y1, x2*x, x); // y1 = x + (x2*x)*y1;
323 }
324 if (!horizontal_and(x_small)) {
325 // At least one element needs big method
326 y2 = exp(x+x); // exp(2*x)
327 y2 = 1.0f - 2.0f / (y2 + 1.0f); // tanh(x)
328 }
329 x_big = x > 44.4f;
330 y1 = select(x_small, y1, y2); // choose method
331 y1 = select(x_big, 1.0f, y1); // avoid overflow
332 y1 = sign_combine(y1, x0); // get original sign
333
334 return y1;
335 }
336
337 // instances of tanh_f template
tanh(Vec4f const & x)338 static inline Vec4f tanh(Vec4f const & x) {
339 return tanh_f<Vec4f, Vec4fb>(x);
340 }
341
342 #if MAX_VECTOR_SIZE >= 256
tanh(Vec8f const & x)343 static inline Vec8f tanh(Vec8f const & x) {
344 return tanh_f<Vec8f, Vec8fb>(x);
345 }
346 #endif // MAX_VECTOR_SIZE >= 256
347
348 #if MAX_VECTOR_SIZE >= 512
tanh(Vec16f const & x)349 static inline Vec16f tanh(Vec16f const & x) {
350 return tanh_f<Vec16f, Vec16fb>(x);
351 }
352 #endif // MAX_VECTOR_SIZE >= 512
353
354
355
356 /******************************************************************************
357 * Inverse hyperbolic functions
358 ******************************************************************************/
359
360 // Template for asinh function, double precision
361 // This function does not produce denormals
362 // Template parameters:
363 // VTYPE: double vector type
364 // BVTYPE: boolean vector type
365 template<class VTYPE, class BVTYPE>
asinh_d(VTYPE const & x0)366 static inline VTYPE asinh_d(VTYPE const & x0) {
367
368 // Coefficients
369 const double p0 = -5.56682227230859640450E0;
370 const double p1 = -9.09030533308377316566E0;
371 const double p2 = -4.37390226194356683570E0;
372 const double p3 = -5.91750212056387121207E-1;
373 const double p4 = -4.33231683752342103572E-3;
374
375 const double q0 = 3.34009336338516356383E1;
376 const double q1 = 6.95722521337257608734E1;
377 const double q2 = 4.86042483805291788324E1;
378 const double q3 = 1.28757002067426453537E1;
379 const double q4 = 1.0;
380
381 // data vectors
382 VTYPE x, x2, y1, y2;
383 BVTYPE x_small, x_huge; // boolean vectors
384
385 x2 = x0 * x0;
386 x = abs(x0);
387 x_small = x <= 0.533; // use Pade approximation if abs(x) <= 0.5
388 // both methods give the highest error close to 0.5. this limit is adjusted for minimum error
389 x_huge = x > 1.E20; // simple approximation, avoid overflow
390
391 if (horizontal_or(x_small)) {
392 // At least one element needs small method
393 y1 = polynomial_4(x2, p0, p1, p2, p3, p4) / polynomial_4(x2, q0, q1, q2, q3, q4);
394 y1 = mul_add(y1, x2*x, x); // y1 = x + (x2*x)*y1;
395 }
396 if (!horizontal_and(x_small)) {
397 // At least one element needs big method
398 y2 = log(x + sqrt(x2 + 1.0));
399 if (horizontal_or(x_huge)) {
400 // At least one element needs huge method to avoid overflow
401 y2 = select(x_huge, log(x) + VM_LN2, y2);
402 }
403 }
404 y1 = select(x_small, y1, y2); // choose method
405 y1 = sign_combine(y1, x0); // get original sign
406
407 return y1;
408 }
409
410 // instances of asinh_d template
asinh(Vec2d const & x)411 static inline Vec2d asinh(Vec2d const & x) {
412 return asinh_d<Vec2d, Vec2db>(x);
413 }
414
415 #if MAX_VECTOR_SIZE >= 256
asinh(Vec4d const & x)416 static inline Vec4d asinh(Vec4d const & x) {
417 return asinh_d<Vec4d, Vec4db>(x);
418 }
419 #endif // MAX_VECTOR_SIZE >= 256
420
421 #if MAX_VECTOR_SIZE >= 512
asinh(Vec8d const & x)422 static inline Vec8d asinh(Vec8d const & x) {
423 return asinh_d<Vec8d, Vec8db>(x);
424 }
425 #endif // MAX_VECTOR_SIZE >= 512
426
427
428 // Template for asinh function, single precision
429 // This function does not produce denormals
430 // Template parameters:
431 // VTYPE: double vector type
432 // BVTYPE: boolean vector type
433 template<class VTYPE, class BVTYPE>
asinh_f(VTYPE const & x0)434 static inline VTYPE asinh_f(VTYPE const & x0) {
435
436 // Coefficients
437 const float r0 = -1.6666288134E-1f;
438 const float r1 = 7.4847586088E-2f;
439 const float r2 = -4.2699340972E-2f;
440 const float r3 = 2.0122003309E-2f;
441
442 // data vectors
443 VTYPE x, x2, y1, y2;
444 BVTYPE x_small, x_huge; // boolean vectors
445
446 x2 = x0 * x0;
447 x = abs(x0);
448 x_small = x <= 0.51f; // use polynomial approximation if abs(x) <= 0.5
449 x_huge = x > 1.E10f; // simple approximation, avoid overflow
450
451 if (horizontal_or(x_small)) {
452 // At least one element needs small method
453 y1 = polynomial_3(x2, r0, r1, r2, r3);
454 y1 = mul_add(y1, x2*x, x); // y1 = x + (x2*x)*y1;
455 }
456 if (!horizontal_and(x_small)) {
457 // At least one element needs big method
458 y2 = log(x + sqrt(x2 + 1.0f));
459 if (horizontal_or(x_huge)) {
460 // At least one element needs huge method to avoid overflow
461 y2 = select(x_huge, log(x) + (float)VM_LN2, y2);
462 }
463 }
464 y1 = select(x_small, y1, y2); // choose method
465 y1 = sign_combine(y1, x0); // get original sign
466
467 return y1;
468 }
469
470 // instances of asinh_f template
asinh(Vec4f const & x)471 static inline Vec4f asinh(Vec4f const & x) {
472 return asinh_f<Vec4f, Vec4fb>(x);
473 }
474
475 #if MAX_VECTOR_SIZE >= 256
asinh(Vec8f const & x)476 static inline Vec8f asinh(Vec8f const & x) {
477 return asinh_f<Vec8f, Vec8fb>(x);
478 }
479 #endif // MAX_VECTOR_SIZE >= 256
480
481 #if MAX_VECTOR_SIZE >= 512
asinh(Vec16f const & x)482 static inline Vec16f asinh(Vec16f const & x) {
483 return asinh_f<Vec16f, Vec16fb>(x);
484 }
485 #endif // MAX_VECTOR_SIZE >= 512
486
487
488 // Template for acosh function, double precision
489 // This function does not produce denormals
490 // Template parameters:
491 // VTYPE: double vector type
492 // BVTYPE: boolean vector type
493 template<class VTYPE, class BVTYPE>
acosh_d(VTYPE const & x0)494 static inline VTYPE acosh_d(VTYPE const & x0) {
495
496 // Coefficients
497 const double p0 = 1.10855947270161294369E5;
498 const double p1 = 1.08102874834699867335E5;
499 const double p2 = 3.43989375926195455866E4;
500 const double p3 = 3.94726656571334401102E3;
501 const double p4 = 1.18801130533544501356E2;
502
503 const double q0 = 7.83869920495893927727E4;
504 const double q1 = 8.29725251988426222434E4;
505 const double q2 = 2.97683430363289370382E4;
506 const double q3 = 4.15352677227719831579E3;
507 const double q4 = 1.86145380837903397292E2;
508 const double q5 = 1.0;
509
510 // data vectors
511 VTYPE x1, y1, y2;
512 BVTYPE x_small, x_huge, undef; // boolean vectors
513
514 x1 = x0 - 1.0;
515 undef = x0 < 1.0; // result is NAN
516 x_small = x1 < 0.49; // use Pade approximation if abs(x-1) < 0.5
517 x_huge = x1 > 1.E20; // simple approximation, avoid overflow
518
519 if (horizontal_or(x_small)) {
520 // At least one element needs small method
521 y1 = sqrt(x1) * (polynomial_4(x1, p0, p1, p2, p3, p4) / polynomial_5(x1, q0, q1, q2, q3, q4, q5));
522 // x < 1 generates NAN
523 y1 = select(undef, nan_vec<VTYPE>(NAN_HYP), y1);
524 }
525 if (!horizontal_and(x_small)) {
526 // At least one element needs big method
527 y2 = log(x0 + sqrt(mul_sub(x0,x0,1.0)));
528 if (horizontal_or(x_huge)) {
529 // At least one element needs huge method to avoid overflow
530 y2 = select(x_huge, log(x0) + VM_LN2, y2);
531 }
532 }
533 y1 = select(x_small, y1, y2); // choose method
534 return y1;
535 }
536
537 // instances of acosh_d template
acosh(Vec2d const & x)538 static inline Vec2d acosh(Vec2d const & x) {
539 return acosh_d<Vec2d, Vec2db>(x);
540 }
541
542 #if MAX_VECTOR_SIZE >= 256
acosh(Vec4d const & x)543 static inline Vec4d acosh(Vec4d const & x) {
544 return acosh_d<Vec4d, Vec4db>(x);
545 }
546 #endif // MAX_VECTOR_SIZE >= 256
547
548 #if MAX_VECTOR_SIZE >= 512
acosh(Vec8d const & x)549 static inline Vec8d acosh(Vec8d const & x) {
550 return acosh_d<Vec8d, Vec8db>(x);
551 }
552 #endif // MAX_VECTOR_SIZE >= 512
553
554
555 // Template for acosh function, single precision
556 // This function does not produce denormals
557 // Template parameters:
558 // VTYPE: double vector type
559 // BVTYPE: boolean vector type
560 template<class VTYPE, class BVTYPE>
acosh_f(VTYPE const & x0)561 static inline VTYPE acosh_f(VTYPE const & x0) {
562
563 // Coefficients
564 const float r0 = 1.4142135263E0f;
565 const float r1 = -1.1784741703E-1f;
566 const float r2 = 2.6454905019E-2f;
567 const float r3 = -7.5272886713E-3f;
568 const float r4 = 1.7596881071E-3f;
569
570 // data vectors
571 VTYPE x1, y1, y2;
572 BVTYPE x_small, x_huge, undef; // boolean vectors
573
574 x1 = x0 - 1.0f;
575 undef = x0 < 1.0f; // result is NAN
576 x_small = x1 < 0.49f; // use Pade approximation if abs(x-1) < 0.5
577 x_huge = x1 > 1.E10f; // simple approximation, avoid overflow
578
579 if (horizontal_or(x_small)) {
580 // At least one element needs small method
581 y1 = sqrt(x1) * polynomial_4(x1, r0, r1, r2, r3, r4);
582 // x < 1 generates NAN
583 y1 = select(undef, nan_vec<VTYPE>(NAN_HYP), y1);
584 }
585 if (!horizontal_and(x_small)) {
586 // At least one element needs big method
587 y2 = log(x0 + sqrt(mul_sub(x0,x0,1.0)));
588 if (horizontal_or(x_huge)) {
589 // At least one element needs huge method to avoid overflow
590 y2 = select(x_huge, log(x0) + (float)VM_LN2, y2);
591 }
592 }
593 y1 = select(x_small, y1, y2); // choose method
594 return y1;
595 }
596
597 // instances of acosh_f template
acosh(Vec4f const & x)598 static inline Vec4f acosh(Vec4f const & x) {
599 return acosh_f<Vec4f, Vec4fb>(x);
600 }
601
602 #if MAX_VECTOR_SIZE >= 256
acosh(Vec8f const & x)603 static inline Vec8f acosh(Vec8f const & x) {
604 return acosh_f<Vec8f, Vec8fb>(x);
605 }
606 #endif // MAX_VECTOR_SIZE >= 256
607
608 #if MAX_VECTOR_SIZE >= 512
acosh(Vec16f const & x)609 static inline Vec16f acosh(Vec16f const & x) {
610 return acosh_f<Vec16f, Vec16fb>(x);
611 }
612 #endif // MAX_VECTOR_SIZE >= 512
613
614
615 // Template for atanh function, double precision
616 // This function does not produce denormals
617 // Template parameters:
618 // VTYPE: double vector type
619 // BVTYPE: boolean vector type
620 template<class VTYPE, class BVTYPE>
atanh_d(VTYPE const & x0)621 static inline VTYPE atanh_d(VTYPE const & x0) {
622
623 // Coefficients
624 const double p0 = -3.09092539379866942570E1;
625 const double p1 = 6.54566728676544377376E1;
626 const double p2 = -4.61252884198732692637E1;
627 const double p3 = 1.20426861384072379242E1;
628 const double p4 = -8.54074331929669305196E-1;
629
630 const double q0 = -9.27277618139601130017E1;
631 const double q1 = 2.52006675691344555838E2;
632 const double q2 = -2.49839401325893582852E2;
633 const double q3 = 1.08938092147140262656E2;
634 const double q4 = -1.95638849376911654834E1;
635 const double q5 = 1.0;
636
637 // data vectors
638 VTYPE x, x2, y1, y2, y3;
639 BVTYPE x_small; // boolean vector
640
641 x = abs(x0);
642 x_small = x < 0.5; // use Pade approximation if abs(x) < 0.5
643
644 if (horizontal_or(x_small)) {
645 // At least one element needs small method
646 x2 = x * x;
647 y1 = polynomial_4(x2, p0, p1, p2, p3, p4) / polynomial_5(x2, q0, q1, q2, q3, q4, q5);
648 y1 = mul_add(y1, x2*x, x);
649 }
650 if (!horizontal_and(x_small)) {
651 // At least one element needs big method
652 y2 = log((1.0+x)/(1.0-x)) * 0.5;
653 // check if out of range
654 y3 = select(x == 1.0, infinite_vec<VTYPE>(), nan_vec<VTYPE>(NAN_HYP));
655 y2 = select(x >= 1.0, y3, y2);
656 }
657 y1 = select(x_small, y1, y2); // choose method
658 y1 = sign_combine(y1, x0); // get original sign
659
660 return y1;
661 }
662
663 // instances of atanh_d template
atanh(Vec2d const & x)664 static inline Vec2d atanh(Vec2d const & x) {
665 return atanh_d<Vec2d, Vec2db>(x);
666 }
667
668 #if MAX_VECTOR_SIZE >= 256
atanh(Vec4d const & x)669 static inline Vec4d atanh(Vec4d const & x) {
670 return atanh_d<Vec4d, Vec4db>(x);
671 }
672 #endif // MAX_VECTOR_SIZE >= 256
673
674 #if MAX_VECTOR_SIZE >= 512
atanh(Vec8d const & x)675 static inline Vec8d atanh(Vec8d const & x) {
676 return atanh_d<Vec8d, Vec8db>(x);
677 }
678 #endif // MAX_VECTOR_SIZE >= 512
679
680
681 // Template for atanh function, single precision
682 // This function does not produce denormals
683 // Template parameters:
684 // VTYPE: double vector type
685 // BVTYPE: boolean vector type
686 template<class VTYPE, class BVTYPE>
atanh_f(VTYPE const & x0)687 static inline VTYPE atanh_f(VTYPE const & x0) {
688
689 // Coefficients
690 const float r0 = 3.33337300303E-1f;
691 const float r1 = 1.99782164500E-1f;
692 const float r2 = 1.46691431730E-1f;
693 const float r3 = 8.24370301058E-2f;
694 const float r4 = 1.81740078349E-1f;
695
696 // data vectors
697 VTYPE x, x2, y1, y2, y3;
698 BVTYPE x_small; // boolean vector
699
700 x = abs(x0);
701 x_small = x < 0.5f; // use polynomial approximation if abs(x) < 0.5
702
703 if (horizontal_or(x_small)) {
704 // At least one element needs small method
705 x2 = x * x;
706 y1 = polynomial_4(x2, r0, r1, r2, r3, r4);
707 y1 = mul_add(y1, x2*x, x);
708 }
709 if (!horizontal_and(x_small)) {
710 // At least one element needs big method
711 y2 = log((1.0f+x)/(1.0f-x)) * 0.5f;
712 // check if out of range
713 y3 = select(x == 1.0f, infinite_vec<VTYPE>(), nan_vec<VTYPE>(NAN_HYP));
714 y2 = select(x >= 1.0f, y3, y2);
715 }
716 y1 = select(x_small, y1, y2); // choose method
717 y1 = sign_combine(y1, x0); // get original sign
718
719 return y1;
720 }
721
722 // instances of atanh_f template
atanh(Vec4f const & x)723 static inline Vec4f atanh(Vec4f const & x) {
724 return atanh_f<Vec4f, Vec4fb>(x);
725 }
726
727 #if MAX_VECTOR_SIZE >= 256
atanh(Vec8f const & x)728 static inline Vec8f atanh(Vec8f const & x) {
729 return atanh_f<Vec8f, Vec8fb>(x);
730 }
731 #endif // MAX_VECTOR_SIZE >= 256
732
733 #if MAX_VECTOR_SIZE >= 512
atanh(Vec16f const & x)734 static inline Vec16f atanh(Vec16f const & x) {
735 return atanh_f<Vec16f, Vec16fb>(x);
736 }
737 #endif // MAX_VECTOR_SIZE >= 512
738
739 #ifdef VCL_NAMESPACE
740 }
741 #endif
742
743 #endif
744