1 /****************************  vectormath_hyp.h   ******************************
2 * Author:        Agner Fog
3 * Date created:  2014-07-09
4 * Last modified: 2015-02-10
5 * Version:       1.16
6 * Project:       vector classes
7 * Description:
8 * Header file containing inline vector functions of hyperbolic and inverse
9 * hyperbolic functions:
10 * sinh        hyperbolic sine
11 * cosh        hyperbolic cosine
12 * tanh        hyperbolic tangent
13 * asinh       inverse hyperbolic sine
14 * acosh       inverse hyperbolic cosine
15 * atanh       inverse hyperbolic tangent
16 *
17 * Theory, methods and inspiration based partially on these sources:
18 * > Moshier, Stephen Lloyd Baluk: Methods and programs for mathematical functions.
19 *   Ellis Horwood, 1989.
20 * > VDT library developed on CERN by Danilo Piparo, Thomas Hauth and
21 *   Vincenzo Innocente, 2012, https://svnweb.cern.ch/trac/vdt
22 * > Cephes math library by Stephen L. Moshier 1992,
23 *   http://www.netlib.org/cephes/
24 *
25 * For detailed instructions, see vectormath_common.h and VectorClass.pdf
26 *
27 * (c) Copyright 2014-2016 GNU General Public License http://www.gnu.org/licenses
28 ******************************************************************************/
29 
30 #ifndef VECTORMATH_HYP_H
31 #define VECTORMATH_HYP_H  1
32 
33 #include "vectormath_exp.h"
34 
35 #ifdef VCL_NAMESPACE
36 namespace VCL_NAMESPACE {
37 #endif
38 
39 /******************************************************************************
40 *                 Hyperbolic functions
41 ******************************************************************************/
42 
43 // Template for sinh function, double precision
44 // This function does not produce denormals
45 // Template parameters:
46 // VTYPE:  double vector type
47 // BVTYPE: boolean vector type
48 template<class VTYPE, class BVTYPE>
sinh_d(VTYPE const & x0)49 static inline VTYPE sinh_d(VTYPE const & x0) {
50 // The limit of abs(x) is 709.7, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
51 
52     // Coefficients
53     const double p0 = -3.51754964808151394800E5;
54     const double p1 = -1.15614435765005216044E4;
55     const double p2 = -1.63725857525983828727E2;
56     const double p3 = -7.89474443963537015605E-1;
57 
58     const double q0 = -2.11052978884890840399E6;
59     const double q1 =  3.61578279834431989373E4;
60     const double q2 = -2.77711081420602794433E2;
61     const double q3 =  1.0;
62 
63     // data vectors
64     VTYPE  x, x2, y1, y2;
65     BVTYPE x_small;                              // boolean vector
66 
67     x = abs(x0);
68     x_small = x <= 1.0;                          // use Pade approximation if abs(x) <= 1
69 
70     if (horizontal_or(x_small)) {
71         // At least one element needs small method
72         x2 = x*x;
73         y1 = polynomial_3(x2, p0, p1, p2, p3) / polynomial_3(x2, q0, q1, q2, q3);
74         y1 = mul_add(y1, x*x2, x);               // y1 = x + x2*(x*y1);
75     }
76     if (!horizontal_and(x_small)) {
77         // At least one element needs big method
78         y2 =  exp_d<VTYPE, BVTYPE, 0, 1>(x);     //   0.5 * exp(x)
79         y2 -= 0.25 / y2;                         // - 0.5 * exp(-x)
80     }
81     y1 = select(x_small, y1, y2);                // choose method
82     y1 = sign_combine(y1, x0);                   // get original sign
83 
84     return y1;
85 }
86 
87 // instances of sinh_d template
sinh(Vec2d const & x)88 static inline Vec2d sinh(Vec2d const & x) {
89     return sinh_d<Vec2d, Vec2db>(x);
90 }
91 
92 #if MAX_VECTOR_SIZE >= 256
sinh(Vec4d const & x)93 static inline Vec4d sinh(Vec4d const & x) {
94     return sinh_d<Vec4d, Vec4db>(x);
95 }
96 #endif // MAX_VECTOR_SIZE >= 256
97 
98 #if MAX_VECTOR_SIZE >= 512
sinh(Vec8d const & x)99 static inline Vec8d sinh(Vec8d const & x) {
100     return sinh_d<Vec8d, Vec8db>(x);
101 }
102 #endif // MAX_VECTOR_SIZE >= 512
103 
104 
105 // Template for sinh function, single precision
106 // This function does not produce denormals
107 // Template parameters:
108 // VTYPE:  double vector type
109 // BVTYPE: boolean vector type
110 template<class VTYPE, class BVTYPE>
sinh_f(VTYPE const & x0)111 static inline VTYPE sinh_f(VTYPE const & x0) {
112 // The limit of abs(x) is 89.0, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
113 
114     // Coefficients
115     const float r0 = 1.66667160211E-1f;
116     const float r1 = 8.33028376239E-3f;
117     const float r2 = 2.03721912945E-4f;
118 
119     // data vectors
120     VTYPE x, x2, y1, y2;
121     BVTYPE x_small;                              // boolean vector
122 
123     x = abs(x0);
124     x_small = x <= 1.0f;                         // use polynomial approximation if abs(x) <= 1
125 
126     if (horizontal_or(x_small)) {
127         // At least one element needs small method
128         x2 = x*x;
129         y1 = polynomial_2(x2, r0, r1, r2);
130         y1 = mul_add(y1, x2*x, x);               // y1 = x + x2*(x*y1);
131     }
132     if (!horizontal_and(x_small)) {
133         // At least one element needs big method
134         y2 =  exp_f<VTYPE, BVTYPE, 0, 1>(x);     //   0.5 * exp(x)
135         y2 -= 0.25f / y2;                        // - 0.5 * exp(-x)
136     }
137     y1 = select(x_small, y1, y2);                // choose method
138     y1 = sign_combine(y1, x0);                   // get original sign
139 
140     return y1;
141 }
142 
143 // instances of sinh_f template
sinh(Vec4f const & x)144 static inline Vec4f sinh(Vec4f const & x) {
145     return sinh_f<Vec4f, Vec4fb>(x);
146 }
147 
148 #if MAX_VECTOR_SIZE >= 256
sinh(Vec8f const & x)149 static inline Vec8f sinh(Vec8f const & x) {
150     return sinh_f<Vec8f, Vec8fb>(x);
151 }
152 #endif // MAX_VECTOR_SIZE >= 256
153 
154 #if MAX_VECTOR_SIZE >= 512
sinh(Vec16f const & x)155 static inline Vec16f sinh(Vec16f const & x) {
156     return sinh_f<Vec16f, Vec16fb>(x);
157 }
158 #endif // MAX_VECTOR_SIZE >= 512
159 
160 
161 // Template for cosh function, double precision
162 // This function does not produce denormals
163 // Template parameters:
164 // VTYPE:  double vector type
165 // BVTYPE: boolean vector type
166 template<class VTYPE, class BVTYPE>
cosh_d(VTYPE const & x0)167 static inline VTYPE cosh_d(VTYPE const & x0) {
168 // The limit of abs(x) is 709.7, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
169 
170     // data vectors
171     VTYPE x, y;
172 
173     x  = abs(x0);
174     y  = exp_d<VTYPE, BVTYPE, 0, 1>(x);          //   0.5 * exp(x)
175     y += 0.25 / y;                               // + 0.5 * exp(-x)
176     return y;
177 }
178 
179 // instances of sinh_d template
cosh(Vec2d const & x)180 static inline Vec2d cosh(Vec2d const & x) {
181     return cosh_d<Vec2d, Vec2db>(x);
182 }
183 
184 #if MAX_VECTOR_SIZE >= 256
cosh(Vec4d const & x)185 static inline Vec4d cosh(Vec4d const & x) {
186     return cosh_d<Vec4d, Vec4db>(x);
187 }
188 #endif // MAX_VECTOR_SIZE >= 256
189 
190 #if MAX_VECTOR_SIZE >= 512
cosh(Vec8d const & x)191 static inline Vec8d cosh(Vec8d const & x) {
192     return cosh_d<Vec8d, Vec8db>(x);
193 }
194 #endif // MAX_VECTOR_SIZE >= 512
195 
196 
197 // Template for cosh function, single precision
198 // This function does not produce denormals
199 // Template parameters:
200 // VTYPE:  double vector type
201 // BVTYPE: boolean vector type
202 template<class VTYPE, class BVTYPE>
cosh_f(VTYPE const & x0)203 static inline VTYPE cosh_f(VTYPE const & x0) {
204 // The limit of abs(x) is 89.0, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
205 
206     // data vectors
207     VTYPE x, y;
208 
209     x  = abs(x0);
210     y  = exp_f<VTYPE, BVTYPE, 0, 1>(x);          //   0.5 * exp(x)
211     y += 0.25f / y;                              // + 0.5 * exp(-x)
212     return y;
213 }
214 
215 // instances of sinh_d template
cosh(Vec4f const & x)216 static inline Vec4f cosh(Vec4f const & x) {
217     return cosh_f<Vec4f, Vec4fb>(x);
218 }
219 
220 #if MAX_VECTOR_SIZE >= 256
cosh(Vec8f const & x)221 static inline Vec8f cosh(Vec8f const & x) {
222     return cosh_f<Vec8f, Vec8fb>(x);
223 }
224 #endif // MAX_VECTOR_SIZE >= 256
225 
226 #if MAX_VECTOR_SIZE >= 512
cosh(Vec16f const & x)227 static inline Vec16f cosh(Vec16f const & x) {
228     return cosh_f<Vec16f, Vec16fb>(x);
229 }
230 #endif // MAX_VECTOR_SIZE >= 512
231 
232 
233 // Template for tanh function, double precision
234 // This function does not produce denormals
235 // Template parameters:
236 // VTYPE:  double vector type
237 // BVTYPE: boolean vector type
238 template<class VTYPE, class BVTYPE>
tanh_d(VTYPE const & x0)239 static inline VTYPE tanh_d(VTYPE const & x0) {
240 
241     // Coefficients
242     const double p0 = -1.61468768441708447952E3;
243     const double p1 = -9.92877231001918586564E1;
244     const double p2 = -9.64399179425052238628E-1;
245 
246     const double q0 =  4.84406305325125486048E3;
247     const double q1 =  2.23548839060100448583E3;
248     const double q2 =  1.12811678491632931402E2;
249     const double q3 =  1.0;
250 
251     // data vectors
252     VTYPE  x, x2, y1, y2;
253     BVTYPE x_small, x_big;                       // boolean vectors
254 
255     x = abs(x0);
256     x_small = x <= 0.625;                        // use Pade approximation if abs(x) <= 5/8
257 
258     if (horizontal_or(x_small)) {
259         // At least one element needs small method
260         x2 = x*x;
261         y1 = polynomial_2(x2, p0, p1, p2) / polynomial_3(x2, q0, q1, q2, q3);
262         y1 = mul_add(y1, x2*x, x);               // y1 = x + x2*(x*y1);
263     }
264     if (!horizontal_and(x_small)) {
265         // At least one element needs big method
266         y2 = exp(x+x);                           // exp(2*x)
267         y2 = 1.0 - 2.0 / (y2 + 1.0);             // tanh(x)
268     }
269     x_big = x > 350.;
270     y1 = select(x_small, y1, y2);                // choose method
271     y1 = select(x_big,  1.0, y1);                // avoid overflow
272     y1 = sign_combine(y1, x0);                   // get original sign
273 
274     return y1;
275 }
276 
277 // instances of tanh_d template
tanh(Vec2d const & x)278 static inline Vec2d tanh(Vec2d const & x) {
279     return tanh_d<Vec2d, Vec2db>(x);
280 }
281 
282 #if MAX_VECTOR_SIZE >= 256
tanh(Vec4d const & x)283 static inline Vec4d tanh(Vec4d const & x) {
284     return tanh_d<Vec4d, Vec4db>(x);
285 }
286 #endif // MAX_VECTOR_SIZE >= 256
287 
288 #if MAX_VECTOR_SIZE >= 512
tanh(Vec8d const & x)289 static inline Vec8d tanh(Vec8d const & x) {
290     return tanh_d<Vec8d, Vec8db>(x);
291 }
292 #endif // MAX_VECTOR_SIZE >= 512
293 
294 
295 // Template for tanh function, single precision
296 // This function does not produce denormals
297 // Template parameters:
298 // VTYPE:  double vector type
299 // BVTYPE: boolean vector type
300 template<class VTYPE, class BVTYPE>
tanh_f(VTYPE const & x0)301 static inline VTYPE tanh_f(VTYPE const & x0) {
302 // The limit of abs(x) is 89.0, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
303 
304     // Coefficients
305     const float r0 = -3.33332819422E-1f;
306     const float r1 =  1.33314422036E-1f;
307     const float r2 = -5.37397155531E-2f;
308     const float r3 =  2.06390887954E-2f;
309     const float r4 = -5.70498872745E-3f;
310 
311     // data vectors
312     VTYPE x, x2, y1, y2;
313     BVTYPE x_small, x_big;                       // boolean vectors
314 
315     x = abs(x0);
316     x_small = x <= 0.625f;                       // use polynomial approximation if abs(x) <= 5/8
317 
318     if (horizontal_or(x_small)) {
319         // At least one element needs small method
320         x2 = x*x;
321         y1 = polynomial_4(x2, r0, r1, r2, r3, r4);
322         y1 = mul_add(y1, x2*x, x);               // y1 = x + (x2*x)*y1;
323     }
324     if (!horizontal_and(x_small)) {
325         // At least one element needs big method
326         y2 = exp(x+x);                           // exp(2*x)
327         y2 = 1.0f - 2.0f / (y2 + 1.0f);          // tanh(x)
328     }
329     x_big = x > 44.4f;
330     y1 = select(x_small, y1, y2);                // choose method
331     y1 = select(x_big,  1.0f, y1);               // avoid overflow
332     y1 = sign_combine(y1, x0);                   // get original sign
333 
334     return y1;
335 }
336 
337 // instances of tanh_f template
tanh(Vec4f const & x)338 static inline Vec4f tanh(Vec4f const & x) {
339     return tanh_f<Vec4f, Vec4fb>(x);
340 }
341 
342 #if MAX_VECTOR_SIZE >= 256
tanh(Vec8f const & x)343 static inline Vec8f tanh(Vec8f const & x) {
344     return tanh_f<Vec8f, Vec8fb>(x);
345 }
346 #endif // MAX_VECTOR_SIZE >= 256
347 
348 #if MAX_VECTOR_SIZE >= 512
tanh(Vec16f const & x)349 static inline Vec16f tanh(Vec16f const & x) {
350     return tanh_f<Vec16f, Vec16fb>(x);
351 }
352 #endif // MAX_VECTOR_SIZE >= 512
353 
354 
355 
356 /******************************************************************************
357 *                 Inverse hyperbolic functions
358 ******************************************************************************/
359 
360 // Template for asinh function, double precision
361 // This function does not produce denormals
362 // Template parameters:
363 // VTYPE:  double vector type
364 // BVTYPE: boolean vector type
365 template<class VTYPE, class BVTYPE>
asinh_d(VTYPE const & x0)366 static inline VTYPE asinh_d(VTYPE const & x0) {
367 
368     // Coefficients
369     const double p0 = -5.56682227230859640450E0;
370     const double p1 = -9.09030533308377316566E0;
371     const double p2 = -4.37390226194356683570E0;
372     const double p3 = -5.91750212056387121207E-1;
373     const double p4 = -4.33231683752342103572E-3;
374 
375     const double q0 =  3.34009336338516356383E1;
376     const double q1 =  6.95722521337257608734E1;
377     const double q2 =  4.86042483805291788324E1;
378     const double q3 =  1.28757002067426453537E1;
379     const double q4 =  1.0;
380 
381     // data vectors
382     VTYPE  x, x2, y1, y2;
383     BVTYPE x_small, x_huge;                      // boolean vectors
384 
385     x2 = x0 * x0;
386     x  = abs(x0);
387     x_small = x <= 0.533;                        // use Pade approximation if abs(x) <= 0.5
388                                                  // both methods give the highest error close to 0.5. this limit is adjusted for minimum error
389     x_huge  = x > 1.E20;                         // simple approximation, avoid overflow
390 
391     if (horizontal_or(x_small)) {
392         // At least one element needs small method
393         y1 = polynomial_4(x2, p0, p1, p2, p3, p4) / polynomial_4(x2, q0, q1, q2, q3, q4);
394         y1 = mul_add(y1, x2*x, x);               // y1 = x + (x2*x)*y1;
395     }
396     if (!horizontal_and(x_small)) {
397         // At least one element needs big method
398         y2 = log(x + sqrt(x2 + 1.0));
399         if (horizontal_or(x_huge)) {
400             // At least one element needs huge method to avoid overflow
401             y2 = select(x_huge, log(x) + VM_LN2, y2);
402         }
403     }
404     y1 = select(x_small, y1, y2);                // choose method
405     y1 = sign_combine(y1, x0);                   // get original sign
406 
407     return y1;
408 }
409 
410 // instances of asinh_d template
asinh(Vec2d const & x)411 static inline Vec2d asinh(Vec2d const & x) {
412     return asinh_d<Vec2d, Vec2db>(x);
413 }
414 
415 #if MAX_VECTOR_SIZE >= 256
asinh(Vec4d const & x)416 static inline Vec4d asinh(Vec4d const & x) {
417     return asinh_d<Vec4d, Vec4db>(x);
418 }
419 #endif // MAX_VECTOR_SIZE >= 256
420 
421 #if MAX_VECTOR_SIZE >= 512
asinh(Vec8d const & x)422 static inline Vec8d asinh(Vec8d const & x) {
423     return asinh_d<Vec8d, Vec8db>(x);
424 }
425 #endif // MAX_VECTOR_SIZE >= 512
426 
427 
428 // Template for asinh function, single precision
429 // This function does not produce denormals
430 // Template parameters:
431 // VTYPE:  double vector type
432 // BVTYPE: boolean vector type
433 template<class VTYPE, class BVTYPE>
asinh_f(VTYPE const & x0)434 static inline VTYPE asinh_f(VTYPE const & x0) {
435 
436     // Coefficients
437     const float r0 = -1.6666288134E-1f;
438     const float r1 =  7.4847586088E-2f;
439     const float r2 = -4.2699340972E-2f;
440     const float r3 =  2.0122003309E-2f;
441 
442     // data vectors
443     VTYPE  x, x2, y1, y2;
444     BVTYPE x_small, x_huge;                      // boolean vectors
445 
446     x2 = x0 * x0;
447     x  = abs(x0);
448     x_small = x <= 0.51f;                        // use polynomial approximation if abs(x) <= 0.5
449     x_huge  = x > 1.E10f;                        // simple approximation, avoid overflow
450 
451     if (horizontal_or(x_small)) {
452         // At least one element needs small method
453         y1 = polynomial_3(x2, r0, r1, r2, r3);
454         y1 = mul_add(y1, x2*x, x);               // y1 = x + (x2*x)*y1;
455     }
456     if (!horizontal_and(x_small)) {
457         // At least one element needs big method
458         y2 = log(x + sqrt(x2 + 1.0f));
459         if (horizontal_or(x_huge)) {
460             // At least one element needs huge method to avoid overflow
461             y2 = select(x_huge, log(x) + (float)VM_LN2, y2);
462         }
463     }
464     y1 = select(x_small, y1, y2);                // choose method
465     y1 = sign_combine(y1, x0);                   // get original sign
466 
467     return y1;
468 }
469 
470 // instances of asinh_f template
asinh(Vec4f const & x)471 static inline Vec4f asinh(Vec4f const & x) {
472     return asinh_f<Vec4f, Vec4fb>(x);
473 }
474 
475 #if MAX_VECTOR_SIZE >= 256
asinh(Vec8f const & x)476 static inline Vec8f asinh(Vec8f const & x) {
477     return asinh_f<Vec8f, Vec8fb>(x);
478 }
479 #endif // MAX_VECTOR_SIZE >= 256
480 
481 #if MAX_VECTOR_SIZE >= 512
asinh(Vec16f const & x)482 static inline Vec16f asinh(Vec16f const & x) {
483     return asinh_f<Vec16f, Vec16fb>(x);
484 }
485 #endif // MAX_VECTOR_SIZE >= 512
486 
487 
488 // Template for acosh function, double precision
489 // This function does not produce denormals
490 // Template parameters:
491 // VTYPE:  double vector type
492 // BVTYPE: boolean vector type
493 template<class VTYPE, class BVTYPE>
acosh_d(VTYPE const & x0)494 static inline VTYPE acosh_d(VTYPE const & x0) {
495 
496     // Coefficients
497     const double p0 = 1.10855947270161294369E5;
498     const double p1 = 1.08102874834699867335E5;
499     const double p2 = 3.43989375926195455866E4;
500     const double p3 = 3.94726656571334401102E3;
501     const double p4 = 1.18801130533544501356E2;
502 
503     const double q0 = 7.83869920495893927727E4;
504     const double q1 = 8.29725251988426222434E4;
505     const double q2 = 2.97683430363289370382E4;
506     const double q3 = 4.15352677227719831579E3;
507     const double q4 = 1.86145380837903397292E2;
508     const double q5 = 1.0;
509 
510     // data vectors
511     VTYPE  x1, y1, y2;
512     BVTYPE x_small, x_huge, undef;               // boolean vectors
513 
514     x1      = x0 - 1.0;
515     undef   = x0 < 1.0;                          // result is NAN
516     x_small = x1 < 0.49;                         // use Pade approximation if abs(x-1) < 0.5
517     x_huge  = x1 > 1.E20;                        // simple approximation, avoid overflow
518 
519     if (horizontal_or(x_small)) {
520         // At least one element needs small method
521         y1 = sqrt(x1) * (polynomial_4(x1, p0, p1, p2, p3, p4) / polynomial_5(x1, q0, q1, q2, q3, q4, q5));
522         // x < 1 generates NAN
523         y1 = select(undef, nan_vec<VTYPE>(NAN_HYP), y1);
524     }
525     if (!horizontal_and(x_small)) {
526         // At least one element needs big method
527         y2 = log(x0 + sqrt(mul_sub(x0,x0,1.0)));
528         if (horizontal_or(x_huge)) {
529             // At least one element needs huge method to avoid overflow
530             y2 = select(x_huge, log(x0) + VM_LN2, y2);
531         }
532     }
533     y1 = select(x_small, y1, y2);                // choose method
534     return y1;
535 }
536 
537 // instances of acosh_d template
acosh(Vec2d const & x)538 static inline Vec2d acosh(Vec2d const & x) {
539     return acosh_d<Vec2d, Vec2db>(x);
540 }
541 
542 #if MAX_VECTOR_SIZE >= 256
acosh(Vec4d const & x)543 static inline Vec4d acosh(Vec4d const & x) {
544     return acosh_d<Vec4d, Vec4db>(x);
545 }
546 #endif // MAX_VECTOR_SIZE >= 256
547 
548 #if MAX_VECTOR_SIZE >= 512
acosh(Vec8d const & x)549 static inline Vec8d acosh(Vec8d const & x) {
550     return acosh_d<Vec8d, Vec8db>(x);
551 }
552 #endif // MAX_VECTOR_SIZE >= 512
553 
554 
555 // Template for acosh function, single precision
556 // This function does not produce denormals
557 // Template parameters:
558 // VTYPE:  double vector type
559 // BVTYPE: boolean vector type
560 template<class VTYPE, class BVTYPE>
acosh_f(VTYPE const & x0)561 static inline VTYPE acosh_f(VTYPE const & x0) {
562 
563     // Coefficients
564     const float r0 =  1.4142135263E0f;
565     const float r1 = -1.1784741703E-1f;
566     const float r2 =  2.6454905019E-2f;
567     const float r3 = -7.5272886713E-3f;
568     const float r4 =  1.7596881071E-3f;
569 
570     // data vectors
571     VTYPE  x1, y1, y2;
572     BVTYPE x_small, x_huge, undef;               // boolean vectors
573 
574     x1      = x0 - 1.0f;
575     undef   = x0 < 1.0f;                         // result is NAN
576     x_small = x1 < 0.49f;                        // use Pade approximation if abs(x-1) < 0.5
577     x_huge  = x1 > 1.E10f;                       // simple approximation, avoid overflow
578 
579     if (horizontal_or(x_small)) {
580         // At least one element needs small method
581         y1 = sqrt(x1) * polynomial_4(x1, r0, r1, r2, r3, r4);
582         // x < 1 generates NAN
583         y1 = select(undef, nan_vec<VTYPE>(NAN_HYP), y1);
584     }
585     if (!horizontal_and(x_small)) {
586         // At least one element needs big method
587         y2 = log(x0 + sqrt(mul_sub(x0,x0,1.0)));
588         if (horizontal_or(x_huge)) {
589             // At least one element needs huge method to avoid overflow
590             y2 = select(x_huge, log(x0) + (float)VM_LN2, y2);
591         }
592     }
593     y1 = select(x_small, y1, y2);                // choose method
594     return y1;
595 }
596 
597 // instances of acosh_f template
acosh(Vec4f const & x)598 static inline Vec4f acosh(Vec4f const & x) {
599     return acosh_f<Vec4f, Vec4fb>(x);
600 }
601 
602 #if MAX_VECTOR_SIZE >= 256
acosh(Vec8f const & x)603 static inline Vec8f acosh(Vec8f const & x) {
604     return acosh_f<Vec8f, Vec8fb>(x);
605 }
606 #endif // MAX_VECTOR_SIZE >= 256
607 
608 #if MAX_VECTOR_SIZE >= 512
acosh(Vec16f const & x)609 static inline Vec16f acosh(Vec16f const & x) {
610     return acosh_f<Vec16f, Vec16fb>(x);
611 }
612 #endif // MAX_VECTOR_SIZE >= 512
613 
614 
615 // Template for atanh function, double precision
616 // This function does not produce denormals
617 // Template parameters:
618 // VTYPE:  double vector type
619 // BVTYPE: boolean vector type
620 template<class VTYPE, class BVTYPE>
atanh_d(VTYPE const & x0)621 static inline VTYPE atanh_d(VTYPE const & x0) {
622 
623     // Coefficients
624     const double p0 = -3.09092539379866942570E1;
625     const double p1 =  6.54566728676544377376E1;
626     const double p2 = -4.61252884198732692637E1;
627     const double p3 =  1.20426861384072379242E1;
628     const double p4 = -8.54074331929669305196E-1;
629 
630     const double q0 = -9.27277618139601130017E1;
631     const double q1 =  2.52006675691344555838E2;
632     const double q2 = -2.49839401325893582852E2;
633     const double q3 =  1.08938092147140262656E2;
634     const double q4 = -1.95638849376911654834E1;
635     const double q5 =  1.0;
636 
637     // data vectors
638     VTYPE  x, x2, y1, y2, y3;
639     BVTYPE x_small;                              // boolean vector
640 
641     x  = abs(x0);
642     x_small = x < 0.5;                           // use Pade approximation if abs(x) < 0.5
643 
644     if (horizontal_or(x_small)) {
645         // At least one element needs small method
646         x2 = x * x;
647         y1 = polynomial_4(x2, p0, p1, p2, p3, p4) / polynomial_5(x2, q0, q1, q2, q3, q4, q5);
648         y1 = mul_add(y1, x2*x, x);
649     }
650     if (!horizontal_and(x_small)) {
651         // At least one element needs big method
652         y2 = log((1.0+x)/(1.0-x)) * 0.5;
653         // check if out of range
654         y3 = select(x == 1.0, infinite_vec<VTYPE>(), nan_vec<VTYPE>(NAN_HYP));
655         y2 = select(x >= 1.0, y3, y2);
656     }
657     y1 = select(x_small, y1, y2);                // choose method
658     y1 = sign_combine(y1, x0);                   // get original sign
659 
660     return y1;
661 }
662 
663 // instances of atanh_d template
atanh(Vec2d const & x)664 static inline Vec2d atanh(Vec2d const & x) {
665     return atanh_d<Vec2d, Vec2db>(x);
666 }
667 
668 #if MAX_VECTOR_SIZE >= 256
atanh(Vec4d const & x)669 static inline Vec4d atanh(Vec4d const & x) {
670     return atanh_d<Vec4d, Vec4db>(x);
671 }
672 #endif // MAX_VECTOR_SIZE >= 256
673 
674 #if MAX_VECTOR_SIZE >= 512
atanh(Vec8d const & x)675 static inline Vec8d atanh(Vec8d const & x) {
676     return atanh_d<Vec8d, Vec8db>(x);
677 }
678 #endif // MAX_VECTOR_SIZE >= 512
679 
680 
681 // Template for atanh function, single precision
682 // This function does not produce denormals
683 // Template parameters:
684 // VTYPE:  double vector type
685 // BVTYPE: boolean vector type
686 template<class VTYPE, class BVTYPE>
atanh_f(VTYPE const & x0)687 static inline VTYPE atanh_f(VTYPE const & x0) {
688 
689     // Coefficients
690     const float r0 = 3.33337300303E-1f;
691     const float r1 = 1.99782164500E-1f;
692     const float r2 = 1.46691431730E-1f;
693     const float r3 = 8.24370301058E-2f;
694     const float r4 = 1.81740078349E-1f;
695 
696     // data vectors
697     VTYPE  x, x2, y1, y2, y3;
698     BVTYPE x_small;                              // boolean vector
699 
700     x  = abs(x0);
701     x_small = x < 0.5f;                          // use polynomial approximation if abs(x) < 0.5
702 
703     if (horizontal_or(x_small)) {
704         // At least one element needs small method
705         x2 = x * x;
706         y1 = polynomial_4(x2, r0, r1, r2, r3, r4);
707         y1 = mul_add(y1, x2*x, x);
708     }
709     if (!horizontal_and(x_small)) {
710         // At least one element needs big method
711         y2 = log((1.0f+x)/(1.0f-x)) * 0.5f;
712         // check if out of range
713         y3 = select(x == 1.0f, infinite_vec<VTYPE>(), nan_vec<VTYPE>(NAN_HYP));
714         y2 = select(x >= 1.0f, y3, y2);
715     }
716     y1 = select(x_small, y1, y2);                // choose method
717     y1 = sign_combine(y1, x0);                   // get original sign
718 
719     return y1;
720 }
721 
722 // instances of atanh_f template
atanh(Vec4f const & x)723 static inline Vec4f atanh(Vec4f const & x) {
724     return atanh_f<Vec4f, Vec4fb>(x);
725 }
726 
727 #if MAX_VECTOR_SIZE >= 256
atanh(Vec8f const & x)728 static inline Vec8f atanh(Vec8f const & x) {
729     return atanh_f<Vec8f, Vec8fb>(x);
730 }
731 #endif // MAX_VECTOR_SIZE >= 256
732 
733 #if MAX_VECTOR_SIZE >= 512
atanh(Vec16f const & x)734 static inline Vec16f atanh(Vec16f const & x) {
735     return atanh_f<Vec16f, Vec16fb>(x);
736 }
737 #endif // MAX_VECTOR_SIZE >= 512
738 
739 #ifdef VCL_NAMESPACE
740 }
741 #endif
742 
743 #endif
744