1 //          Copyright Naoki Shibata 2010 - 2017.
2 // Distributed under the Boost Software License, Version 1.0.
3 //    (See accompanying file LICENSE.txt or copy at
4 //          http://www.boost.org/LICENSE_1_0.txt)
5 
6 // Always use -ffp-contract=off option to compile SLEEF.
7 
8 #include <stdint.h>
9 #include <limits.h>
10 #include <float.h>
11 
12 #define ENABLE_BUILTIN_MATH
13 
14 #ifndef ENABLE_BUILTIN_MATH
15 #include <math.h>
16 #define SQRT sqrt
17 #else
18 #define SQRT __builtin_sqrt
19 #endif
20 
21 #include "misc.h"
22 
23 // debug prints using fprintf
24 #define NDEBUG
25 
26 #if (defined(_MSC_VER))
27 #pragma fp_contract (off)
28 #endif
29 
30 #include "helpers.h"
31 
doubleToRawLongBits(double d)32 static INLINE CONST int64_t doubleToRawLongBits(double d) {
33   union {
34     double f;
35     int64_t i;
36   } tmp;
37   tmp.f = d;
38   return tmp.i;
39 }
40 
longBitsToDouble(int64_t i)41 static INLINE CONST double longBitsToDouble(int64_t i) {
42   union {
43     double f;
44     int64_t i;
45   } tmp;
46   tmp.i = i;
47   return tmp.f;
48 }
49 
fabsk(double x)50 static INLINE CONST double fabsk(double x) {
51   return longBitsToDouble(0x7fffffffffffffffLL & doubleToRawLongBits(x));
52 }
53 
mulsign(double x,double y)54 static INLINE CONST double mulsign(double x, double y) {
55   return longBitsToDouble(doubleToRawLongBits(x) ^ (doubleToRawLongBits(y) & (1LL << 63)));
56 }
57 
copysignk(double x,double y)58 static INLINE CONST double copysignk(double x, double y) {
59   return longBitsToDouble((doubleToRawLongBits(x) & ~(1LL << 63)) ^ (doubleToRawLongBits(y) & (1LL << 63)));
60 }
61 
sign(double d)62 static INLINE CONST double sign(double d) { return mulsign(1, d); }
mla(double x,double y,double z)63 static INLINE CONST double mla(double x, double y, double z) { return x * y + z; }
rintk(double x)64 static INLINE CONST double rintk(double x) { return x < 0 ? (int)(x - 0.5) : (int)(x + 0.5); }
ceilk(double x)65 static INLINE CONST int ceilk(double x) { return (int)x + (x < 0 ? 0 : 1); }
trunck(double x)66 static INLINE CONST double trunck(double x) { return (double)(int)x; }
fmink(double x,double y)67 static INLINE CONST double fmink(double x, double y) { return x < y ? x : y; }
fmaxk(double x,double y)68 static INLINE CONST double fmaxk(double x, double y) { return x > y ? x : y; }
69 
xisnan(double x)70 static INLINE CONST int xisnan(double x) { return x != x; }
xisinf(double x)71 static INLINE CONST int xisinf(double x) { return x == SLEEF_INFINITY || x == -SLEEF_INFINITY; }
xisminf(double x)72 static INLINE CONST int xisminf(double x) { return x == -SLEEF_INFINITY; }
xispinf(double x)73 static INLINE CONST int xispinf(double x) { return x == SLEEF_INFINITY; }
xisnegzero(double x)74 static INLINE CONST int xisnegzero(double x) { return doubleToRawLongBits(x) == doubleToRawLongBits(-0.0); }
xisnumber(double x)75 static INLINE CONST int xisnumber(double x) { return !xisinf(x) && !xisnan(x); }
76 
xisint(double d)77 static INLINE CONST int xisint(double d) {
78   double x = d - (double)(1LL << 31) * (int)(d * (1.0 / (1LL << 31)));
79   return (x == (int)x) || (fabsk(d) >= (double)(1LL << 53));
80 }
81 
xisodd(double d)82 static INLINE CONST int xisodd(double d) {
83   double x = d - (double)(1LL << 31) * (int)(d * (1.0 / (1LL << 31)));
84   return (1 & (int)x) != 0 && fabsk(d) < (double)(1LL << 53);
85 }
86 
pow2i(int q)87 static INLINE CONST double pow2i(int q) {
88   return longBitsToDouble(((int64_t)(q + 0x3ff)) << 52);
89 }
90 
ldexpk(double x,int q)91 static INLINE CONST double ldexpk(double x, int q) {
92   double u;
93   int m;
94   m = q >> 31;
95   m = (((m + q) >> 9) - m) << 7;
96   q = q - (m << 2);
97   m += 0x3ff;
98   m = m < 0     ? 0     : m;
99   m = m > 0x7ff ? 0x7ff : m;
100   u = longBitsToDouble(((int64_t)m) << 52);
101   x = x * u * u * u * u;
102   u = longBitsToDouble(((int64_t)(q + 0x3ff)) << 52);
103   return x * u;
104 }
105 
ldexp2k(double d,int e)106 static INLINE CONST double ldexp2k(double d, int e) { // faster than ldexpk, short reach
107   return d * pow2i(e >> 1) * pow2i(e - (e >> 1));
108 }
109 
ldexp3k(double d,int e)110 static INLINE CONST double ldexp3k(double d, int e) { // very fast, no denormal
111   return longBitsToDouble(doubleToRawLongBits(d) + (((int64_t)e) << 52));
112 }
113 
xldexp(double x,int exp)114 EXPORT CONST double xldexp(double x, int exp) {
115   if (exp >  2100) exp =  2100;
116   if (exp < -2100) exp = -2100;
117 
118   int e0 = exp >> 2;
119   if (exp < 0) e0++;
120   if (-100 < exp && exp < 100) e0 = 0;
121   int e1 = exp - (e0 << 2);
122 
123   double p = pow2i(e0);
124   double ret = x * pow2i(e1) * p * p * p * p;
125 
126   return ret;
127 }
128 
ilogbk(double d)129 static INLINE CONST int ilogbk(double d) {
130   int m = d < 4.9090934652977266E-91;
131   d = m ? 2.037035976334486E90 * d : d;
132   int q = (doubleToRawLongBits(d) >> 52) & 0x7ff;
133   q = m ? q - (300 + 0x03ff) : q - 0x03ff;
134   return q;
135 }
136 
137 // ilogb2k is similar to ilogbk, but the argument has to be a
138 // normalized FP value.
ilogb2k(double d)139 static INLINE CONST int ilogb2k(double d) {
140   return ((doubleToRawLongBits(d) >> 52) & 0x7ff) - 0x3ff;
141 }
142 
xilogb(double d)143 EXPORT CONST int xilogb(double d) {
144   int e = ilogbk(fabsk(d));
145   e = d == 0.0  ? SLEEF_FP_ILOGB0 : e;
146   e = xisnan(d) ? SLEEF_FP_ILOGBNAN : e;
147   e = xisinf(d) ? INT_MAX : e;
148   return e;
149 }
150 
151 //
152 
153 #ifndef NDEBUG
checkfp(double x)154 static int checkfp(double x) {
155   if (xisinf(x) || xisnan(x)) return 1;
156   return 0;
157 }
158 #endif
159 
upper(double d)160 static INLINE CONST double upper(double d) {
161   return longBitsToDouble(doubleToRawLongBits(d) & 0xfffffffff8000000LL);
162 }
163 
dd(double h,double l)164 static INLINE CONST Sleef_double2 dd(double h, double l) {
165   Sleef_double2 ret;
166   ret.x = h; ret.y = l;
167   return ret;
168 }
169 
ddnormalize_d2_d2(Sleef_double2 t)170 static INLINE CONST Sleef_double2 ddnormalize_d2_d2(Sleef_double2 t) {
171   Sleef_double2 s;
172 
173   s.x = t.x + t.y;
174   s.y = t.x - s.x + t.y;
175 
176   return s;
177 }
178 
ddscale_d2_d2_d(Sleef_double2 d,double s)179 static INLINE CONST Sleef_double2 ddscale_d2_d2_d(Sleef_double2 d, double s) {
180   Sleef_double2 r;
181 
182   r.x = d.x * s;
183   r.y = d.y * s;
184 
185   return r;
186 }
187 
ddneg_d2_d2(Sleef_double2 d)188 static INLINE CONST Sleef_double2 ddneg_d2_d2(Sleef_double2 d) {
189   Sleef_double2 r;
190 
191   r.x = -d.x;
192   r.y = -d.y;
193 
194   return r;
195 }
196 
ddabs_d2_d2(Sleef_double2 x)197 static INLINE CONST Sleef_double2 ddabs_d2_d2(Sleef_double2 x) {
198   return dd(x.x < 0 ? -x.x : x.x, x.x < 0 ? -x.y : x.y);
199 }
200 
201 /*
202  * ddadd and ddadd2 are functions for double-double addition.  ddadd
203  * is simpler and faster than ddadd2, but it requires the absolute
204  * value of first argument to be larger than the second argument. The
205  * exact condition that should be met is checked if NDEBUG macro is
206  * not defined.
207  *
208  * Please note that if the results won't be used, it is no problem to
209  * feed arguments that do not meet this condition. You will see
210  * warning messages if you turn off NDEBUG macro and run tester2, but
211  * this is normal.
212  *
213  * Please see :
214  * Jonathan Richard Shewchuk, Adaptive Precision Floating-Point
215  * Arithmetic and Fast Robust Geometric Predicates, Discrete &
216  * Computational Geometry 18:305-363, 1997.
217  */
218 
ddadd_d2_d_d(double x,double y)219 static INLINE CONST Sleef_double2 ddadd_d2_d_d(double x, double y) {
220   // |x| >= |y|
221 
222   Sleef_double2 r;
223 
224 #ifndef NDEBUG
225   if (!(checkfp(x) || checkfp(y) || fabsk(x) >= fabsk(y) || (fabs(x+y) <= fabs(x) && fabs(x+y) <= fabs(y)))) {
226     fprintf(stderr, "[ddadd_d2_d_d : %g, %g]\n", x, y);
227     fflush(stderr);
228   }
229 #endif
230 
231   r.x = x + y;
232   r.y = x - r.x + y;
233 
234   return r;
235 }
236 
ddadd2_d2_d_d(double x,double y)237 static INLINE CONST Sleef_double2 ddadd2_d2_d_d(double x, double y) {
238   Sleef_double2 r;
239 
240   r.x = x + y;
241   double v = r.x - x;
242   r.y = (x - (r.x - v)) + (y - v);
243 
244   return r;
245 }
246 
ddadd_d2_d2_d(Sleef_double2 x,double y)247 static INLINE CONST Sleef_double2 ddadd_d2_d2_d(Sleef_double2 x, double y) {
248   // |x| >= |y|
249 
250   Sleef_double2 r;
251 
252 #ifndef NDEBUG
253   if (!(checkfp(x.x) || checkfp(y) || fabsk(x.x) >= fabsk(y) || (fabs(x.x+y) <= fabs(x.x) && fabs(x.x+y) <= fabs(y)))) {
254     fprintf(stderr, "[ddadd_d2_d2_d : %g %g]\n", x.x, y);
255     fflush(stderr);
256   }
257 #endif
258 
259   r.x = x.x + y;
260   r.y = x.x - r.x + y + x.y;
261 
262   return r;
263 }
264 
ddadd2_d2_d2_d(Sleef_double2 x,double y)265 static INLINE CONST Sleef_double2 ddadd2_d2_d2_d(Sleef_double2 x, double y) {
266   Sleef_double2 r;
267 
268   r.x  = x.x + y;
269   double v = r.x - x.x;
270   r.y = (x.x - (r.x - v)) + (y - v);
271   r.y += x.y;
272 
273   return r;
274 }
275 
ddadd_d2_d_d2(double x,Sleef_double2 y)276 static INLINE CONST Sleef_double2 ddadd_d2_d_d2(double x, Sleef_double2 y) {
277   // |x| >= |y|
278 
279   Sleef_double2 r;
280 
281 #ifndef NDEBUG
282   if (!(checkfp(x) || checkfp(y.x) || fabsk(x) >= fabsk(y.x) || (fabs(x+y.x) <= fabs(x) && fabs(x+y.x) <= fabs(y.x)))) {
283     fprintf(stderr, "[ddadd_d2_d_d2 : %g %g]\n", x, y.x);
284     fflush(stderr);
285   }
286 #endif
287 
288   r.x = x + y.x;
289   r.y = x - r.x + y.x + y.y;
290 
291   return r;
292 }
293 
ddadd2_d2_d_d2(double x,Sleef_double2 y)294 static INLINE CONST Sleef_double2 ddadd2_d2_d_d2(double x, Sleef_double2 y) {
295   Sleef_double2 r;
296 
297   r.x  = x + y.x;
298   double v = r.x - x;
299   r.y = (x - (r.x - v)) + (y.x - v) + y.y;
300 
301   return r;
302 }
303 
ddadd2_d_d_d2(double x,Sleef_double2 y)304 static INLINE CONST double ddadd2_d_d_d2(double x, Sleef_double2 y) { return y.y + y.x + x; }
305 
ddadd_d2_d2_d2(Sleef_double2 x,Sleef_double2 y)306 static INLINE CONST Sleef_double2 ddadd_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) {
307   // |x| >= |y|
308 
309   Sleef_double2 r;
310 
311 #ifndef NDEBUG
312   if (!(checkfp(x.x) || checkfp(y.x) || fabsk(x.x) >= fabsk(y.x) || (fabs(x.x+y.x) <= fabs(x.x) && fabs(x.x+y.x) <= fabs(y.x)))) {
313     fprintf(stderr, "[ddadd_d2_d2_d2 : %g %g]\n", x.x, y.x);
314     fflush(stderr);
315   }
316 #endif
317 
318   r.x = x.x + y.x;
319   r.y = x.x - r.x + y.x + x.y + y.y;
320 
321   return r;
322 }
323 
ddadd2_d2_d2_d2(Sleef_double2 x,Sleef_double2 y)324 static INLINE CONST Sleef_double2 ddadd2_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) {
325   Sleef_double2 r;
326 
327   r.x  = x.x + y.x;
328   double v = r.x - x.x;
329   r.y = (x.x - (r.x - v)) + (y.x - v);
330   r.y += x.y + y.y;
331 
332   return r;
333 }
334 
ddsub_d2_d2_d2(Sleef_double2 x,Sleef_double2 y)335 static INLINE CONST Sleef_double2 ddsub_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) {
336   // |x| >= |y|
337 
338   Sleef_double2 r;
339 
340 #ifndef NDEBUG
341   if (!(checkfp(x.x) || checkfp(y.x) || fabsk(x.x) >= fabsk(y.x) || (fabs(x.x-y.x) <= fabs(x.x) && fabs(x.x-y.x) <= fabs(y.x)))) {
342     fprintf(stderr, "[ddsub_d2_d2_d2 : %g %g]\n", x.x, y.x);
343     fflush(stderr);
344   }
345 #endif
346 
347   r.x = x.x - y.x;
348   r.y = x.x - r.x - y.x + x.y - y.y;
349 
350   return r;
351 }
352 
dddiv_d2_d2_d2(Sleef_double2 n,Sleef_double2 d)353 static INLINE CONST Sleef_double2 dddiv_d2_d2_d2(Sleef_double2 n, Sleef_double2 d) {
354   double t = 1.0 / d.x;
355   double dh  = upper(d.x), dl  = d.x - dh;
356   double th  = upper(t  ), tl  = t   - th;
357   double nhh = upper(n.x), nhl = n.x - nhh;
358 
359   Sleef_double2 q;
360 
361   q.x = n.x * t;
362 
363   double u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl +
364     q.x * (1 - dh * th - dh * tl - dl * th - dl * tl);
365 
366   q.y = t * (n.y - q.x * d.y) + u;
367 
368   return q;
369 }
370 
ddmul_d2_d_d(double x,double y)371 static INLINE CONST Sleef_double2 ddmul_d2_d_d(double x, double y) {
372   double xh = upper(x), xl = x - xh;
373   double yh = upper(y), yl = y - yh;
374   Sleef_double2 r;
375 
376   r.x = x * y;
377   r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl;
378 
379   return r;
380 }
381 
ddmul_d2_d2_d(Sleef_double2 x,double y)382 static INLINE CONST Sleef_double2 ddmul_d2_d2_d(Sleef_double2 x, double y) {
383   double xh = upper(x.x), xl = x.x - xh;
384   double yh = upper(y  ), yl = y   - yh;
385   Sleef_double2 r;
386 
387   r.x = x.x * y;
388   r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y;
389 
390   return r;
391 }
392 
ddmul_d2_d2_d2(Sleef_double2 x,Sleef_double2 y)393 static INLINE CONST Sleef_double2 ddmul_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) {
394   double xh = upper(x.x), xl = x.x - xh;
395   double yh = upper(y.x), yl = y.x - yh;
396   Sleef_double2 r;
397 
398   r.x = x.x * y.x;
399   r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x;
400 
401   return r;
402 }
403 
ddmul_d_d2_d2(Sleef_double2 x,Sleef_double2 y)404 static INLINE CONST double ddmul_d_d2_d2(Sleef_double2 x, Sleef_double2 y) {
405   double xh = upper(x.x), xl = x.x - xh;
406   double yh = upper(y.x), yl = y.x - yh;
407 
408   return x.y * yh + xh * y.y + xl * yl + xh * yl + xl * yh + xh * yh;
409 }
410 
ddsqu_d2_d2(Sleef_double2 x)411 static INLINE CONST Sleef_double2 ddsqu_d2_d2(Sleef_double2 x) {
412   double xh = upper(x.x), xl = x.x - xh;
413   Sleef_double2 r;
414 
415   r.x = x.x * x.x;
416   r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y);
417 
418   return r;
419 }
420 
ddsqu_d_d2(Sleef_double2 x)421 static INLINE CONST double ddsqu_d_d2(Sleef_double2 x) {
422   double xh = upper(x.x), xl = x.x - xh;
423 
424   return xh * x.y + xh * x.y + xl * xl + (xh * xl + xh * xl) + xh * xh;
425 }
426 
ddrec_d2_d(double d)427 static INLINE CONST Sleef_double2 ddrec_d2_d(double d) {
428   double t = 1.0 / d;
429   double dh = upper(d), dl = d - dh;
430   double th = upper(t), tl = t - th;
431   Sleef_double2 q;
432 
433   q.x = t;
434   q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl);
435 
436   return q;
437 }
438 
ddrec_d2_d2(Sleef_double2 d)439 static INLINE CONST Sleef_double2 ddrec_d2_d2(Sleef_double2 d) {
440   double t = 1.0 / d.x;
441   double dh = upper(d.x), dl = d.x - dh;
442   double th = upper(t  ), tl = t   - th;
443   Sleef_double2 q;
444 
445   q.x = t;
446   q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t);
447 
448   return q;
449 }
450 
ddsqrt_d2_d2(Sleef_double2 d)451 static INLINE CONST Sleef_double2 ddsqrt_d2_d2(Sleef_double2 d) {
452   double t = SQRT(d.x + d.y);
453   return ddscale_d2_d2_d(ddmul_d2_d2_d2(ddadd2_d2_d2_d2(d, ddmul_d2_d_d(t, t)), ddrec_d2_d(t)), 0.5);
454 }
455 
ddsqrt_d2_d(double d)456 static INLINE CONST Sleef_double2 ddsqrt_d2_d(double d) {
457   double t = SQRT(d);
458   return ddscale_d2_d2_d(ddmul_d2_d2_d2(ddadd2_d2_d_d2(d, ddmul_d2_d_d(t, t)), ddrec_d2_d(t)), 0.5);
459 }
460 
461 //
462 
atan2k(double y,double x)463 static INLINE CONST double atan2k(double y, double x) {
464   double s, t, u;
465   int q = 0;
466 
467   if (x < 0) { x = -x; q = -2; }
468   if (y > x) { t = x; x = y; y = -t; q += 1; }
469 
470   s = y / x;
471   t = s * s;
472 
473   u = -1.88796008463073496563746e-05;
474   u = mla(u, t, 0.000209850076645816976906797);
475   u = mla(u, t, -0.00110611831486672482563471);
476   u = mla(u, t, 0.00370026744188713119232403);
477   u = mla(u, t, -0.00889896195887655491740809);
478   u = mla(u, t, 0.016599329773529201970117);
479   u = mla(u, t, -0.0254517624932312641616861);
480   u = mla(u, t, 0.0337852580001353069993897);
481   u = mla(u, t, -0.0407629191276836500001934);
482   u = mla(u, t, 0.0466667150077840625632675);
483   u = mla(u, t, -0.0523674852303482457616113);
484   u = mla(u, t, 0.0587666392926673580854313);
485   u = mla(u, t, -0.0666573579361080525984562);
486   u = mla(u, t, 0.0769219538311769618355029);
487   u = mla(u, t, -0.090908995008245008229153);
488   u = mla(u, t, 0.111111105648261418443745);
489   u = mla(u, t, -0.14285714266771329383765);
490   u = mla(u, t, 0.199999999996591265594148);
491   u = mla(u, t, -0.333333333333311110369124);
492 
493   t = u * t * s + s;
494   t = q * (M_PI/2) + t;
495 
496   return t;
497 }
498 
xatan2(double y,double x)499 EXPORT CONST double xatan2(double y, double x) {
500   double r = atan2k(fabsk(y), x);
501 
502   r = mulsign(r, x);
503   if (xisinf(x) || x == 0) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI  /2)) : 0);
504   if (xisinf(y)          ) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI*1/4)) : 0);
505   if (             y == 0) r = (sign(x) == -1 ? M_PI : 0);
506 
507   return xisnan(x) || xisnan(y) ? SLEEF_NAN : mulsign(r, y);
508 }
509 
xasin(double d)510 EXPORT CONST double xasin(double d) {
511   int o = fabsk(d) < 0.5;
512   double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), x = o ? fabsk(d) : SQRT(x2), u;
513 
514   u = +0.3161587650653934628e-1;
515   u = mla(u, x2, -0.1581918243329996643e-1);
516   u = mla(u, x2, +0.1929045477267910674e-1);
517   u = mla(u, x2, +0.6606077476277170610e-2);
518   u = mla(u, x2, +0.1215360525577377331e-1);
519   u = mla(u, x2, +0.1388715184501609218e-1);
520   u = mla(u, x2, +0.1735956991223614604e-1);
521   u = mla(u, x2, +0.2237176181932048341e-1);
522   u = mla(u, x2, +0.3038195928038132237e-1);
523   u = mla(u, x2, +0.4464285681377102438e-1);
524   u = mla(u, x2, +0.7500000000378581611e-1);
525   u = mla(u, x2, +0.1666666666666497543e+0);
526   u = mla(u, x * x2, x);
527 
528   double r = o ? u : (M_PI/2 - 2*u);
529   r = mulsign(r, d);
530 
531   return r;
532 }
533 
xacos(double d)534 EXPORT CONST double xacos(double d) {
535   int o = fabsk(d) < 0.5;
536   double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), u;
537   double x = o ? fabsk(d) : SQRT(x2);
538   x = fabsk(d) == 1.0 ? 0 : x;
539 
540   u = +0.3161587650653934628e-1;
541   u = mla(u, x2, -0.1581918243329996643e-1);
542   u = mla(u, x2, +0.1929045477267910674e-1);
543   u = mla(u, x2, +0.6606077476277170610e-2);
544   u = mla(u, x2, +0.1215360525577377331e-1);
545   u = mla(u, x2, +0.1388715184501609218e-1);
546   u = mla(u, x2, +0.1735956991223614604e-1);
547   u = mla(u, x2, +0.2237176181932048341e-1);
548   u = mla(u, x2, +0.3038195928038132237e-1);
549   u = mla(u, x2, +0.4464285681377102438e-1);
550   u = mla(u, x2, +0.7500000000378581611e-1);
551   u = mla(u, x2, +0.1666666666666497543e+0);
552 
553   u *= x * x2;
554 
555   double y = 3.1415926535897932/2 - (mulsign(x, d) + mulsign(u, d));
556   x += u;
557   double r = o ? y : (x*2);
558   if (!o && d < 0) r = ddadd_d2_d2_d(dd(3.141592653589793116, 1.2246467991473532072e-16), -r).x;
559 
560   return r;
561 }
562 
563 
xatan(double s)564 EXPORT CONST double xatan(double s) {
565   double t, u;
566   int q = 0;
567 
568   if (sign(s) == -1) { s = -s; q = 2; }
569   if (s > 1) { s = 1.0 / s; q |= 1; }
570 
571   t = s * s;
572 
573   u = -1.88796008463073496563746e-05;
574   u = mla(u, t, 0.000209850076645816976906797);
575   u = mla(u, t, -0.00110611831486672482563471);
576   u = mla(u, t, 0.00370026744188713119232403);
577   u = mla(u, t, -0.00889896195887655491740809);
578   u = mla(u, t, 0.016599329773529201970117);
579   u = mla(u, t, -0.0254517624932312641616861);
580   u = mla(u, t, 0.0337852580001353069993897);
581   u = mla(u, t, -0.0407629191276836500001934);
582   u = mla(u, t, 0.0466667150077840625632675);
583   u = mla(u, t, -0.0523674852303482457616113);
584   u = mla(u, t, 0.0587666392926673580854313);
585   u = mla(u, t, -0.0666573579361080525984562);
586   u = mla(u, t, 0.0769219538311769618355029);
587   u = mla(u, t, -0.090908995008245008229153);
588   u = mla(u, t, 0.111111105648261418443745);
589   u = mla(u, t, -0.14285714266771329383765);
590   u = mla(u, t, 0.199999999996591265594148);
591   u = mla(u, t, -0.333333333333311110369124);
592 
593   t = s + s * (t * u);
594 
595   if ((q & 1) != 0) t = 1.570796326794896557998982 - t;
596   if ((q & 2) != 0) t = -t;
597 
598   return t;
599 }
600 
atan2k_u1(Sleef_double2 y,Sleef_double2 x)601 static Sleef_double2 atan2k_u1(Sleef_double2 y, Sleef_double2 x) {
602   double u;
603   Sleef_double2 s, t;
604   int q = 0;
605 
606   if (x.x < 0) { x.x = -x.x; x.y = -x.y; q = -2; }
607   if (y.x > x.x) { t = x; x = y; y.x = -t.x; y.y = -t.y; q += 1; }
608 
609   s = dddiv_d2_d2_d2(y, x);
610   t = ddsqu_d2_d2(s);
611   t = ddnormalize_d2_d2(t);
612 
613   u = 1.06298484191448746607415e-05;
614   u = mla(u, t.x, -0.000125620649967286867384336);
615   u = mla(u, t.x, 0.00070557664296393412389774);
616   u = mla(u, t.x, -0.00251865614498713360352999);
617   u = mla(u, t.x, 0.00646262899036991172313504);
618   u = mla(u, t.x, -0.0128281333663399031014274);
619   u = mla(u, t.x, 0.0208024799924145797902497);
620   u = mla(u, t.x, -0.0289002344784740315686289);
621   u = mla(u, t.x, 0.0359785005035104590853656);
622   u = mla(u, t.x, -0.041848579703592507506027);
623   u = mla(u, t.x, 0.0470843011653283988193763);
624   u = mla(u, t.x, -0.0524914210588448421068719);
625   u = mla(u, t.x, 0.0587946590969581003860434);
626   u = mla(u, t.x, -0.0666620884778795497194182);
627   u = mla(u, t.x, 0.0769225330296203768654095);
628   u = mla(u, t.x, -0.0909090442773387574781907);
629   u = mla(u, t.x, 0.111111108376896236538123);
630   u = mla(u, t.x, -0.142857142756268568062339);
631   u = mla(u, t.x, 0.199999999997977351284817);
632   u = mla(u, t.x, -0.333333333333317605173818);
633 
634   t = ddmul_d2_d2_d(t, u);
635   t = ddmul_d2_d2_d2(s, ddadd_d2_d_d2(1, t));
636   if (fabsk(s.x) < 1e-200) t = s;
637   t = ddadd2_d2_d2_d2(ddmul_d2_d2_d(dd(1.570796326794896557998982, 6.12323399573676603586882e-17), q), t);
638 
639   return t;
640 }
641 
xatan2_u1(double y,double x)642 EXPORT CONST double xatan2_u1(double y, double x) {
643   if (fabsk(x) < 5.5626846462680083984e-309) { y *= (1ULL << 53); x *= (1ULL << 53); } // nexttoward((1.0 / DBL_MAX), 1)
644   Sleef_double2 d = atan2k_u1(dd(fabsk(y), 0), dd(x, 0));
645   double r = d.x + d.y;
646 
647   r = mulsign(r, x);
648   if (xisinf(x) || x == 0) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI  /2)) : 0);
649   if (xisinf(y)          ) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI*1/4)) : 0);
650   if (             y == 0) r = (sign(x) == -1 ? M_PI : 0);
651 
652   return xisnan(x) || xisnan(y) ? SLEEF_NAN : mulsign(r, y);
653 }
654 
xasin_u1(double d)655 EXPORT CONST double xasin_u1(double d) {
656   int o = fabsk(d) < 0.5;
657   double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), u;
658   Sleef_double2 x = o ? dd(fabsk(d), 0) : ddsqrt_d2_d(x2);
659   x = fabsk(d) == 1.0 ? dd(0, 0) : x;
660 
661   u = +0.3161587650653934628e-1;
662   u = mla(u, x2, -0.1581918243329996643e-1);
663   u = mla(u, x2, +0.1929045477267910674e-1);
664   u = mla(u, x2, +0.6606077476277170610e-2);
665   u = mla(u, x2, +0.1215360525577377331e-1);
666   u = mla(u, x2, +0.1388715184501609218e-1);
667   u = mla(u, x2, +0.1735956991223614604e-1);
668   u = mla(u, x2, +0.2237176181932048341e-1);
669   u = mla(u, x2, +0.3038195928038132237e-1);
670   u = mla(u, x2, +0.4464285681377102438e-1);
671   u = mla(u, x2, +0.7500000000378581611e-1);
672   u = mla(u, x2, +0.1666666666666497543e+0);
673   u *= x2 * x.x;
674 
675   Sleef_double2 y = ddadd_d2_d2_d(ddsub_d2_d2_d2(dd(3.141592653589793116/4, 1.2246467991473532072e-16/4), x), -u);
676   double r = o ? (u + x.x) : ((y.x + y.y)*2);
677   r = mulsign(r, d);
678 
679   return r;
680 }
681 
xacos_u1(double d)682 EXPORT CONST double xacos_u1(double d) {
683   int o = fabsk(d) < 0.5;
684   double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), u;
685   Sleef_double2 x = o ? dd(fabsk(d), 0) : ddsqrt_d2_d(x2), w;
686   x = fabsk(d) == 1.0 ? dd(0, 0) : x;
687 
688   u = +0.3161587650653934628e-1;
689   u = mla(u, x2, -0.1581918243329996643e-1);
690   u = mla(u, x2, +0.1929045477267910674e-1);
691   u = mla(u, x2, +0.6606077476277170610e-2);
692   u = mla(u, x2, +0.1215360525577377331e-1);
693   u = mla(u, x2, +0.1388715184501609218e-1);
694   u = mla(u, x2, +0.1735956991223614604e-1);
695   u = mla(u, x2, +0.2237176181932048341e-1);
696   u = mla(u, x2, +0.3038195928038132237e-1);
697   u = mla(u, x2, +0.4464285681377102438e-1);
698   u = mla(u, x2, +0.7500000000378581611e-1);
699   u = mla(u, x2, +0.1666666666666497543e+0);
700 
701   u *= x.x * x2;
702 
703   Sleef_double2 y = ddsub_d2_d2_d2(dd(3.141592653589793116/2, 1.2246467991473532072e-16/2),
704                                    ddadd_d2_d_d(mulsign(x.x, d), mulsign(u, d)));
705   x = ddadd_d2_d2_d(x, u);
706   y = o ? y : ddscale_d2_d2_d(x, 2);
707   if (!o && d < 0) y = ddsub_d2_d2_d2(dd(3.141592653589793116, 1.2246467991473532072e-16), y);
708 
709   return y.x + y.y;
710 }
711 
xatan_u1(double d)712 EXPORT CONST double xatan_u1(double d) {
713   Sleef_double2 d2 = atan2k_u1(dd(fabsk(d), 0), dd(1, 0));
714   double r = d2.x + d2.y;
715   if (xisinf(d)) r = 1.570796326794896557998982;
716   return mulsign(r, d);
717 }
718 
xsin(double d)719 EXPORT CONST double xsin(double d) {
720   double u, s, t = d;
721 
722   double dqh = trunck(d * (M_1_PI / (1 << 24))) * (double)(1 << 24);
723   int ql = rintk(mla(d, M_1_PI, -dqh));
724 
725   d = mla(dqh, -PI_A, d);
726   d = mla( ql, -PI_A, d);
727   d = mla(dqh, -PI_B, d);
728   d = mla( ql, -PI_B, d);
729   d = mla(dqh, -PI_C, d);
730   d = mla( ql, -PI_C, d);
731   d = mla(dqh + ql, -PI_D, d);
732 
733   s = d * d;
734 
735   if ((ql & 1) != 0) d = -d;
736 
737   u = -7.97255955009037868891952e-18;
738   u = mla(u, s, 2.81009972710863200091251e-15);
739   u = mla(u, s, -7.64712219118158833288484e-13);
740   u = mla(u, s, 1.60590430605664501629054e-10);
741   u = mla(u, s, -2.50521083763502045810755e-08);
742   u = mla(u, s, 2.75573192239198747630416e-06);
743   u = mla(u, s, -0.000198412698412696162806809);
744   u = mla(u, s, 0.00833333333333332974823815);
745   u = mla(u, s, -0.166666666666666657414808);
746 
747   u = mla(s, u * d, d);
748 
749   if (!xisinf(t) && (xisnegzero(t) || fabsk(t) > TRIGRANGEMAX)) u = -0.0;
750 
751   return u;
752 }
753 
xsin_u1(double d)754 EXPORT CONST double xsin_u1(double d) {
755   double u;
756   Sleef_double2 s, t, x;
757   int ql;
758 
759   if (fabsk(d) < TRIGRANGEMAX2) {
760     ql = rintk(d * M_1_PI);
761     u = mla(ql, -PI_A2, d);
762     s = ddadd_d2_d_d (u,  ql * -PI_B2);
763   } else {
764     const double dqh = trunck(d * (M_1_PI / (1 << 24))) * (double)(1 << 24);
765     ql = rintk(mla(d, M_1_PI, -dqh));
766 
767     u = mla(dqh, -PI_A, d);
768     s = ddadd_d2_d_d  (u,  ql * -PI_A);
769     s = ddadd2_d2_d2_d(s, dqh * -PI_B);
770     s = ddadd2_d2_d2_d(s,  ql * -PI_B);
771     s = ddadd2_d2_d2_d(s, dqh * -PI_C);
772     s = ddadd2_d2_d2_d(s,  ql * -PI_C);
773     s = ddadd_d2_d2_d (s, (dqh + ql) * -PI_D);
774   }
775 
776   t = s;
777   s = ddsqu_d2_d2(s);
778 
779   u = 2.72052416138529567917983e-15;
780   u = mla(u, s.x, -7.6429259411395447190023e-13);
781   u = mla(u, s.x, 1.60589370117277896211623e-10);
782   u = mla(u, s.x, -2.5052106814843123359368e-08);
783   u = mla(u, s.x, 2.75573192104428224777379e-06);
784   u = mla(u, s.x, -0.000198412698412046454654947);
785   u = mla(u, s.x, 0.00833333333333318056201922);
786 
787   x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(-0.166666666666666657414808, u * s.x), s));
788 
789   u = ddmul_d_d2_d2(t, x);
790 
791   if ((ql & 1) != 0) u = -u;
792   if (!xisinf(d) && (xisnegzero(d) || fabsk(d) > TRIGRANGEMAX)) u = -0.0;
793 
794   return u;
795 }
796 
xcos(double d)797 EXPORT CONST double xcos(double d) {
798   double u, s, t = d;
799 
800   double dqh = trunck(d * (M_1_PI / (1LL << 23)) - 0.5 * (M_1_PI / (1LL << 23)));
801   int ql = 2*rintk(d * M_1_PI - 0.5 - dqh * (double)(1LL << 23))+1;
802   dqh *= 1 << 24;
803 
804   d = mla(dqh, -PI_A*0.5, d);
805   d = mla( ql, -PI_A*0.5, d);
806   d = mla(dqh, -PI_B*0.5, d);
807   d = mla( ql, -PI_B*0.5, d);
808   d = mla(dqh, -PI_C*0.5, d);
809   d = mla( ql, -PI_C*0.5, d);
810   d = mla(dqh + ql , -PI_D*0.5, d);
811 
812   s = d * d;
813 
814   if ((ql & 2) == 0) d = -d;
815 
816   u = -7.97255955009037868891952e-18;
817   u = mla(u, s, 2.81009972710863200091251e-15);
818   u = mla(u, s, -7.64712219118158833288484e-13);
819   u = mla(u, s, 1.60590430605664501629054e-10);
820   u = mla(u, s, -2.50521083763502045810755e-08);
821   u = mla(u, s, 2.75573192239198747630416e-06);
822   u = mla(u, s, -0.000198412698412696162806809);
823   u = mla(u, s, 0.00833333333333332974823815);
824   u = mla(u, s, -0.166666666666666657414808);
825 
826   u = mla(s, u * d, d);
827 
828   if (!xisinf(t) && fabsk(t) > TRIGRANGEMAX) u = 1.0;
829 
830   return u;
831 }
832 
xcos_u1(double d)833 EXPORT CONST double xcos_u1(double d) {
834   double u;
835   Sleef_double2 s, t, x;
836   int ql;
837 
838   d = fabsk(d);
839 
840   if (d < TRIGRANGEMAX2) {
841     ql = mla(2, rintk(d * M_1_PI - 0.5), 1);
842     s = ddadd2_d2_d_d(d, ql * (-PI_A2*0.5));
843     s = ddadd_d2_d2_d(s, ql * (-PI_B2*0.5));
844   } else {
845     double dqh = trunck(d * (M_1_PI / (1LL << 23)) - 0.5 * (M_1_PI / (1LL << 23)));
846     ql = 2*rintk(d * M_1_PI - 0.5 - dqh * (double)(1LL << 23))+1;
847     dqh *= 1 << 24;
848 
849     u = mla(dqh, -PI_A*0.5, d);
850     s = ddadd2_d2_d_d (u,  ql * (-PI_A*0.5));
851     s = ddadd2_d2_d2_d(s, dqh * (-PI_B*0.5));
852     s = ddadd2_d2_d2_d(s,  ql * (-PI_B*0.5));
853     s = ddadd2_d2_d2_d(s, dqh * (-PI_C*0.5));
854     s = ddadd2_d2_d2_d(s,  ql * (-PI_C*0.5));
855     s = ddadd_d2_d2_d(s, (dqh + ql) * (-PI_D*0.5));
856   }
857 
858   t = s;
859   s = ddsqu_d2_d2(s);
860 
861   u = 2.72052416138529567917983e-15;
862   u = mla(u, s.x, -7.6429259411395447190023e-13);
863   u = mla(u, s.x, 1.60589370117277896211623e-10);
864   u = mla(u, s.x, -2.5052106814843123359368e-08);
865   u = mla(u, s.x, 2.75573192104428224777379e-06);
866   u = mla(u, s.x, -0.000198412698412046454654947);
867   u = mla(u, s.x, 0.00833333333333318056201922);
868 
869   x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(-0.166666666666666657414808, u * s.x), s));
870 
871   u = ddmul_d_d2_d2(t, x);
872 
873   if ((((int)ql) & 2) == 0) u = -u;
874   if (!xisinf(d) && d > TRIGRANGEMAX) u = 1.0;
875 
876   return u;
877 }
878 
xsincos(double d)879 EXPORT CONST Sleef_double2 xsincos(double d) {
880   double u, s, t;
881   Sleef_double2 r;
882 
883   s = d;
884 
885   double dqh = trunck(d * ((2 * M_1_PI) / (1 << 24))) * (double)(1 << 24);
886   int ql = rintk(d * (2 * M_1_PI) - dqh);
887 
888   s = mla(dqh, -PI_A * 0.5, s);
889   s = mla( ql, -PI_A * 0.5, s);
890   s = mla(dqh, -PI_B * 0.5, s);
891   s = mla( ql, -PI_B * 0.5, s);
892   s = mla(dqh, -PI_C * 0.5, s);
893   s = mla( ql, -PI_C * 0.5, s);
894   s = mla(dqh + ql, -PI_D * 0.5, s);
895 
896   t = s;
897 
898   s = s * s;
899 
900   u = 1.58938307283228937328511e-10;
901   u = mla(u, s, -2.50506943502539773349318e-08);
902   u = mla(u, s, 2.75573131776846360512547e-06);
903   u = mla(u, s, -0.000198412698278911770864914);
904   u = mla(u, s, 0.0083333333333191845961746);
905   u = mla(u, s, -0.166666666666666130709393);
906   u = u * s * t;
907 
908   r.x = t + u;
909 
910   if (xisnegzero(d)) r.x = -0.0;
911 
912   u = -1.13615350239097429531523e-11;
913   u = mla(u, s, 2.08757471207040055479366e-09);
914   u = mla(u, s, -2.75573144028847567498567e-07);
915   u = mla(u, s, 2.48015872890001867311915e-05);
916   u = mla(u, s, -0.00138888888888714019282329);
917   u = mla(u, s, 0.0416666666666665519592062);
918   u = mla(u, s, -0.5);
919 
920   r.y = u * s + 1;
921 
922   if ((ql & 1) != 0) { s = r.y; r.y = r.x; r.x = s; }
923   if ((ql & 2) != 0) { r.x = -r.x; }
924   if (((ql+1) & 2) != 0) { r.y = -r.y; }
925 
926   if (fabsk(d) > TRIGRANGEMAX) { r.x = 0; r.y = 1; }
927   if (xisinf(d)) { r.x = r.y = SLEEF_NAN; }
928 
929   return r;
930 }
931 
xsincos_u1(double d)932 EXPORT CONST Sleef_double2 xsincos_u1(double d) {
933   double u;
934   Sleef_double2 r, s, t, x;
935   int ql;
936 
937   if (fabsk(d) < TRIGRANGEMAX2) {
938     ql = rintk(d * (2 * M_1_PI));
939     u = mla(ql, -PI_A2*0.5, d);
940     s = ddadd_d2_d_d (u,  ql * (-PI_B2*0.5));
941   } else {
942     const double dqh = trunck(d * ((2 * M_1_PI) / (1 << 24))) * (double)(1 << 24);
943     ql = rintk(d * (2 * M_1_PI) - dqh);
944 
945     u = mla(dqh, -PI_A*0.5, d);
946     s = ddadd_d2_d_d(u, ql * (-PI_A*0.5));
947     s = ddadd2_d2_d2_d(s, dqh * (-PI_B*0.5));
948     s = ddadd2_d2_d2_d(s, ql * (-PI_B*0.5));
949     s = ddadd2_d2_d2_d(s, dqh * (-PI_C*0.5));
950     s = ddadd2_d2_d2_d(s, ql * (-PI_C*0.5));
951     s = ddadd_d2_d2_d(s, (dqh + ql) * (-PI_D*0.5));
952   }
953 
954   t = s;
955 
956   s.x = ddsqu_d_d2(s);
957 
958   u = 1.58938307283228937328511e-10;
959   u = mla(u, s.x, -2.50506943502539773349318e-08);
960   u = mla(u, s.x, 2.75573131776846360512547e-06);
961   u = mla(u, s.x, -0.000198412698278911770864914);
962   u = mla(u, s.x, 0.0083333333333191845961746);
963   u = mla(u, s.x, -0.166666666666666130709393);
964 
965   u *= s.x * t.x;
966 
967   x = ddadd_d2_d2_d(t, u);
968   r.x = x.x + x.y;
969 
970   if (xisnegzero(d)) r.x = -0.0;
971 
972   u = -1.13615350239097429531523e-11;
973   u = mla(u, s.x, 2.08757471207040055479366e-09);
974   u = mla(u, s.x, -2.75573144028847567498567e-07);
975   u = mla(u, s.x, 2.48015872890001867311915e-05);
976   u = mla(u, s.x, -0.00138888888888714019282329);
977   u = mla(u, s.x, 0.0416666666666665519592062);
978   u = mla(u, s.x, -0.5);
979 
980   x = ddadd_d2_d_d2(1, ddmul_d2_d_d(s.x, u));
981   r.y = x.x + x.y;
982 
983   if ((ql & 1) != 0) { u = r.y; r.y = r.x; r.x = u; }
984   if ((ql & 2) != 0) { r.x = -r.x; }
985   if (((ql+1) & 2) != 0) { r.y = -r.y; }
986 
987   if (fabsk(d) > TRIGRANGEMAX) { r.x = 0; r.y = 1; }
988   if (xisinf(d)) { r.x = r.y = SLEEF_NAN; }
989 
990   return r;
991 }
992 
xsincospi_u05(double d)993 EXPORT CONST Sleef_double2 xsincospi_u05(double d) {
994   double u, s, t;
995   Sleef_double2 r, x, s2;
996 
997   u = d * 4;
998   int q = ceilk(u) & ~(int)1;
999 
1000   s = u - (double)q;
1001   t = s;
1002   s = s * s;
1003   s2 = ddmul_d2_d_d(t, t);
1004 
1005   //
1006 
1007   u = -2.02461120785182399295868e-14;
1008   u = mla(u, s, 6.94821830580179461327784e-12);
1009   u = mla(u, s, -1.75724749952853179952664e-09);
1010   u = mla(u, s, 3.13361688966868392878422e-07);
1011   u = mla(u, s, -3.6576204182161551920361e-05);
1012   u = mla(u, s, 0.00249039457019271850274356);
1013   x = ddadd2_d2_d_d2(u * s, dd(-0.0807455121882807852484731, 3.61852475067037104849987e-18));
1014   x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), dd(0.785398163397448278999491, 3.06287113727155002607105e-17));
1015 
1016   x = ddmul_d2_d2_d(x, t);
1017   r.x = x.x + x.y;
1018 
1019   if (xisnegzero(d)) r.x = -0.0;
1020 
1021   //
1022 
1023   u = 9.94480387626843774090208e-16;
1024   u = mla(u, s, -3.89796226062932799164047e-13);
1025   u = mla(u, s, 1.15011582539996035266901e-10);
1026   u = mla(u, s, -2.4611369501044697495359e-08);
1027   u = mla(u, s, 3.59086044859052754005062e-06);
1028   u = mla(u, s, -0.000325991886927389905997954);
1029   x = ddadd2_d2_d_d2(u * s, dd(0.0158543442438155018914259, -1.04693272280631521908845e-18));
1030   x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), dd(-0.308425137534042437259529, -1.95698492133633550338345e-17));
1031 
1032   x = ddadd2_d2_d2_d(ddmul_d2_d2_d2(x, s2), 1);
1033   r.y = x.x + x.y;
1034 
1035   //
1036 
1037   if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; }
1038   if ((q & 4) != 0) { r.x = -r.x; }
1039   if (((q+2) & 4) != 0) { r.y = -r.y; }
1040 
1041   if (fabsk(d) > TRIGRANGEMAX3/4) { r.x = 0; r.y = 1; }
1042   if (xisinf(d)) { r.x = r.y = SLEEF_NAN; }
1043 
1044   return r;
1045 }
1046 
xsincospi_u35(double d)1047 EXPORT CONST Sleef_double2 xsincospi_u35(double d) {
1048   double u, s, t;
1049   Sleef_double2 r;
1050 
1051   u = d * 4;
1052   int q = ceilk(u) & ~(int)1;
1053 
1054   s = u - (double)q;
1055   t = s;
1056   s = s * s;
1057 
1058   //
1059 
1060   u = +0.6880638894766060136e-11;
1061   u = mla(u, s, -0.1757159564542310199e-8);
1062   u = mla(u, s, +0.3133616327257867311e-6);
1063   u = mla(u, s, -0.3657620416388486452e-4);
1064   u = mla(u, s, +0.2490394570189932103e-2);
1065   u = mla(u, s, -0.8074551218828056320e-1);
1066   u = mla(u, s, +0.7853981633974482790e+0);
1067 
1068   r.x = u * t;
1069 
1070   //
1071 
1072   u = -0.3860141213683794352e-12;
1073   u = mla(u, s, +0.1150057888029681415e-9);
1074   u = mla(u, s, -0.2461136493006663553e-7);
1075   u = mla(u, s, +0.3590860446623516713e-5);
1076   u = mla(u, s, -0.3259918869269435942e-3);
1077   u = mla(u, s, +0.1585434424381541169e-1);
1078   u = mla(u, s, -0.3084251375340424373e+0);
1079   u = mla(u, s, 1);
1080 
1081   r.y = u;
1082 
1083   //
1084 
1085   if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; }
1086   if ((q & 4) != 0) { r.x = -r.x; }
1087   if (((q+2) & 4) != 0) { r.y = -r.y; }
1088 
1089   if (fabsk(d) > TRIGRANGEMAX3/4) { r.x = 0; r.y = 1; }
1090   if (xisinf(d)) { r.x = r.y = SLEEF_NAN; }
1091 
1092   return r;
1093 }
1094 
sinpik(double d)1095 static INLINE CONST Sleef_double2 sinpik(double d) {
1096   double u, s, t;
1097   Sleef_double2 x, s2;
1098 
1099   u = d * 4;
1100   int q = ceilk(u) & ~1;
1101   int o = (q & 2) != 0;
1102 
1103   s = u - (double)q;
1104   t = s;
1105   s = s * s;
1106   s2 = ddmul_d2_d_d(t, t);
1107 
1108   //
1109 
1110   u = o ? 9.94480387626843774090208e-16 : -2.02461120785182399295868e-14;
1111   u = mla(u, s, o ? -3.89796226062932799164047e-13 : 6.94821830580179461327784e-12);
1112   u = mla(u, s, o ? 1.15011582539996035266901e-10 : -1.75724749952853179952664e-09);
1113   u = mla(u, s, o ? -2.4611369501044697495359e-08 : 3.13361688966868392878422e-07);
1114   u = mla(u, s, o ? 3.59086044859052754005062e-06 : -3.6576204182161551920361e-05);
1115   u = mla(u, s, o ? -0.000325991886927389905997954 : 0.00249039457019271850274356);
1116   x = ddadd2_d2_d_d2(u * s, o ? dd(0.0158543442438155018914259, -1.04693272280631521908845e-18) :
1117          dd(-0.0807455121882807852484731, 3.61852475067037104849987e-18));
1118   x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), o ? dd(-0.308425137534042437259529, -1.95698492133633550338345e-17) :
1119           dd(0.785398163397448278999491, 3.06287113727155002607105e-17));
1120 
1121   x = ddmul_d2_d2_d2(x, o ? s2 : dd(t, 0));
1122   x = o ? ddadd2_d2_d2_d(x, 1) : x;
1123 
1124   //
1125 
1126   if ((q & 4) != 0) { x.x = -x.x; x.y = -x.y; }
1127 
1128   return x;
1129 }
1130 
xsinpi_u05(double d)1131 EXPORT CONST double xsinpi_u05(double d) {
1132   Sleef_double2 x = sinpik(d);
1133   double r = x.x + x.y;
1134 
1135   if (xisnegzero(d)) r = -0.0;
1136   if (fabsk(d) > TRIGRANGEMAX3/4) r = 0;
1137   if (xisinf(d)) r = SLEEF_NAN;
1138 
1139   return r;
1140 }
1141 
cospik(double d)1142 static INLINE CONST Sleef_double2 cospik(double d) {
1143   double u, s, t;
1144   Sleef_double2 x, s2;
1145 
1146   u = d * 4;
1147   int q = ceilk(u) & ~1;
1148   int o = (q & 2) == 0;
1149 
1150   s = u - (double)q;
1151   t = s;
1152   s = s * s;
1153   s2 = ddmul_d2_d_d(t, t);
1154 
1155   //
1156 
1157   u = o ? 9.94480387626843774090208e-16 : -2.02461120785182399295868e-14;
1158   u = mla(u, s, o ? -3.89796226062932799164047e-13 : 6.94821830580179461327784e-12);
1159   u = mla(u, s, o ? 1.15011582539996035266901e-10 : -1.75724749952853179952664e-09);
1160   u = mla(u, s, o ? -2.4611369501044697495359e-08 : 3.13361688966868392878422e-07);
1161   u = mla(u, s, o ? 3.59086044859052754005062e-06 : -3.6576204182161551920361e-05);
1162   u = mla(u, s, o ? -0.000325991886927389905997954 : 0.00249039457019271850274356);
1163   x = ddadd2_d2_d_d2(u * s, o ? dd(0.0158543442438155018914259, -1.04693272280631521908845e-18) :
1164          dd(-0.0807455121882807852484731, 3.61852475067037104849987e-18));
1165   x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), o ? dd(-0.308425137534042437259529, -1.95698492133633550338345e-17) :
1166           dd(0.785398163397448278999491, 3.06287113727155002607105e-17));
1167 
1168   x = ddmul_d2_d2_d2(x, o ? s2 : dd(t, 0));
1169   x = o ? ddadd2_d2_d2_d(x, 1) : x;
1170 
1171   //
1172 
1173   if (((q+2) & 4) != 0) { x.x = -x.x; x.y = -x.y; }
1174 
1175   return x;
1176 }
1177 
xcospi_u05(double d)1178 EXPORT CONST double xcospi_u05(double d) {
1179   Sleef_double2 x = cospik(d);
1180   double r = x.x + x.y;
1181 
1182   if (fabsk(d) > TRIGRANGEMAX3/4) r = 1;
1183   if (xisinf(d)) r = SLEEF_NAN;
1184 
1185   return r;
1186 }
1187 
xtan(double d)1188 EXPORT CONST double xtan(double d) {
1189   double u, s, x;
1190 
1191   double dqh = trunck(d * ((2 * M_1_PI) / (1 << 24))) * (double)(1 << 24);
1192   int ql = rintk(d * (2 * M_1_PI) - dqh);
1193 
1194   x = mla(dqh, -PI_A * 0.5, d);
1195   x = mla( ql, -PI_A * 0.5, x);
1196   x = mla(dqh, -PI_B * 0.5, x);
1197   x = mla( ql, -PI_B * 0.5, x);
1198   x = mla(dqh, -PI_C * 0.5, x);
1199   x = mla( ql, -PI_C * 0.5, x);
1200   x = mla(dqh + ql, -PI_D * 0.5, x);
1201 
1202   s = x * x;
1203 
1204   if ((ql & 1) != 0) x = -x;
1205 
1206   u = 9.99583485362149960784268e-06;
1207   u = mla(u, s, -4.31184585467324750724175e-05);
1208   u = mla(u, s, 0.000103573238391744000389851);
1209   u = mla(u, s, -0.000137892809714281708733524);
1210   u = mla(u, s, 0.000157624358465342784274554);
1211   u = mla(u, s, -6.07500301486087879295969e-05);
1212   u = mla(u, s, 0.000148898734751616411290179);
1213   u = mla(u, s, 0.000219040550724571513561967);
1214   u = mla(u, s, 0.000595799595197098359744547);
1215   u = mla(u, s, 0.00145461240472358871965441);
1216   u = mla(u, s, 0.0035923150771440177410343);
1217   u = mla(u, s, 0.00886321546662684547901456);
1218   u = mla(u, s, 0.0218694899718446938985394);
1219   u = mla(u, s, 0.0539682539049961967903002);
1220   u = mla(u, s, 0.133333333334818976423364);
1221   u = mla(u, s, 0.333333333333320047664472);
1222 
1223   u = mla(s, u * x, x);
1224 
1225   if ((ql & 1) != 0) u = 1.0 / u;
1226 
1227   if (xisinf(d)) u = SLEEF_NAN;
1228 
1229   return u;
1230 }
1231 
xtan_u1(double d)1232 EXPORT CONST double xtan_u1(double d) {
1233   double u;
1234   Sleef_double2 s, t, x;
1235   int ql;
1236 
1237   if (fabsk(d) < TRIGRANGEMAX2) {
1238     ql = rintk(d * (2 * M_1_PI));
1239     u = mla(ql, -PI_A2*0.5, d);
1240     s = ddadd_d2_d_d(u,  ql * (-PI_B2*0.5));
1241   } else {
1242     const double dqh = trunck(d * (M_2_PI / (1 << 24))) * (double)(1 << 24);
1243     s = ddadd2_d2_d2_d(ddmul_d2_d2_d(dd(M_2_PI_H, M_2_PI_L), d), (d < 0 ? -0.5 : 0.5) - dqh);
1244     ql = s.x + s.y;
1245 
1246     u = mla(dqh, -PI_A*0.5, d);
1247     s = ddadd_d2_d_d  (u,  ql * (-PI_A*0.5));
1248     s = ddadd2_d2_d2_d(s, dqh * (-PI_B*0.5));
1249     s = ddadd2_d2_d2_d(s,  ql * (-PI_B*0.5));
1250     s = ddadd2_d2_d2_d(s, dqh * (-PI_C*0.5));
1251     s = ddadd2_d2_d2_d(s,  ql * (-PI_C*0.5));
1252     s = ddadd_d2_d2_d(s, (dqh + ql) * (-PI_D*0.5));
1253   }
1254 
1255   if ((ql & 1) != 0) s = ddneg_d2_d2(s);
1256 
1257   t = s;
1258   s = ddsqu_d2_d2(s);
1259 
1260   u = 1.01419718511083373224408e-05;
1261   u = mla(u, s.x, -2.59519791585924697698614e-05);
1262   u = mla(u, s.x, 5.23388081915899855325186e-05);
1263   u = mla(u, s.x, -3.05033014433946488225616e-05);
1264   u = mla(u, s.x, 7.14707504084242744267497e-05);
1265   u = mla(u, s.x, 8.09674518280159187045078e-05);
1266   u = mla(u, s.x, 0.000244884931879331847054404);
1267   u = mla(u, s.x, 0.000588505168743587154904506);
1268   u = mla(u, s.x, 0.00145612788922812427978848);
1269   u = mla(u, s.x, 0.00359208743836906619142924);
1270   u = mla(u, s.x, 0.00886323944362401618113356);
1271   u = mla(u, s.x, 0.0218694882853846389592078);
1272   u = mla(u, s.x, 0.0539682539781298417636002);
1273   u = mla(u, s.x, 0.133333333333125941821962);
1274 
1275   x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(0.333333333333334980164153, u * s.x), s));
1276   x = ddmul_d2_d2_d2(t, x);
1277 
1278   if ((ql & 1) != 0) x = ddrec_d2_d2(x);
1279 
1280   u = x.x + x.y;
1281 
1282   if (!xisinf(d) && (xisnegzero(d) || fabsk(d) > TRIGRANGEMAX)) u = -0.0;
1283 
1284   return u;
1285 }
1286 
xlog(double d)1287 EXPORT CONST double xlog(double d) {
1288   double x, x2, t, m;
1289   int e;
1290 
1291   int o = d < DBL_MIN;
1292   if (o) d *= (double)(1LL << 32) * (double)(1LL << 32);
1293 
1294   e = ilogb2k(d * (1.0/0.75));
1295   m = ldexp3k(d, -e);
1296 
1297   if (o) e -= 64;
1298 
1299   x = (m-1) / (m+1);
1300   x2 = x * x;
1301 
1302   t = 0.153487338491425068243146;
1303   t = mla(t, x2, 0.152519917006351951593857);
1304   t = mla(t, x2, 0.181863266251982985677316);
1305   t = mla(t, x2, 0.222221366518767365905163);
1306   t = mla(t, x2, 0.285714294746548025383248);
1307   t = mla(t, x2, 0.399999999950799600689777);
1308   t = mla(t, x2, 0.6666666666667778740063);
1309   t = mla(t, x2, 2);
1310 
1311   x = x * t + 0.693147180559945286226764 * e;
1312 
1313   if (xisinf(d)) x = SLEEF_INFINITY;
1314   if (d < 0 || xisnan(d)) x = SLEEF_NAN;
1315   if (d == 0) x = -SLEEF_INFINITY;
1316 
1317   return x;
1318 }
1319 
xexp(double d)1320 EXPORT CONST double xexp(double d) {
1321   int q = (int)rintk(d * R_LN2);
1322   double s, u;
1323 
1324   s = mla(q, -L2U, d);
1325   s = mla(q, -L2L, s);
1326 
1327   u = 2.08860621107283687536341e-09;
1328   u = mla(u, s, 2.51112930892876518610661e-08);
1329   u = mla(u, s, 2.75573911234900471893338e-07);
1330   u = mla(u, s, 2.75572362911928827629423e-06);
1331   u = mla(u, s, 2.4801587159235472998791e-05);
1332   u = mla(u, s, 0.000198412698960509205564975);
1333   u = mla(u, s, 0.00138888888889774492207962);
1334   u = mla(u, s, 0.00833333333331652721664984);
1335   u = mla(u, s, 0.0416666666666665047591422);
1336   u = mla(u, s, 0.166666666666666851703837);
1337   u = mla(u, s, 0.5);
1338 
1339   u = s * s * u + s + 1;
1340   u = ldexp2k(u, q);
1341 
1342   if (d > 709.78271114955742909217217426) u = SLEEF_INFINITY;
1343   if (d < -1000) u = 0;
1344 
1345   return u;
1346 }
1347 
logk(double d)1348 static INLINE CONST Sleef_double2 logk(double d) {
1349   Sleef_double2 x, x2, s;
1350   double m, t;
1351   int e;
1352 
1353   int o = d < DBL_MIN;
1354   if (o) d *= (double)(1LL << 32) * (double)(1LL << 32);
1355 
1356   e = ilogb2k(d * (1.0/0.75));
1357   m = ldexp3k(d, -e);
1358 
1359   if (o) e -= 64;
1360 
1361   x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m));
1362   x2 = ddsqu_d2_d2(x);
1363 
1364   t = 0.116255524079935043668677;
1365   t = mla(t, x2.x, 0.103239680901072952701192);
1366   t = mla(t, x2.x, 0.117754809412463995466069);
1367   t = mla(t, x2.x, 0.13332981086846273921509);
1368   t = mla(t, x2.x, 0.153846227114512262845736);
1369   t = mla(t, x2.x, 0.181818180850050775676507);
1370   t = mla(t, x2.x, 0.222222222230083560345903);
1371   t = mla(t, x2.x, 0.285714285714249172087875);
1372   t = mla(t, x2.x, 0.400000000000000077715612);
1373   Sleef_double2 c = dd(0.666666666666666629659233, 3.80554962542412056336616e-17);
1374 
1375   s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e);
1376   s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2));
1377   s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d2(ddmul_d2_d2_d2(x2, x),
1378                                       ddadd2_d2_d2_d2(ddmul_d2_d2_d(x2, t), c)));
1379   return s;
1380 }
1381 
xlog_u1(double d)1382 EXPORT CONST double xlog_u1(double d) {
1383   Sleef_double2 x, s;
1384   double m, t, x2;
1385   int e;
1386 
1387   int o = d < DBL_MIN;
1388   if (o) d *= (double)(1LL << 32) * (double)(1LL << 32);
1389 
1390   e = ilogb2k(d * (1.0/0.75));
1391   m = ldexp3k(d, -e);
1392 
1393   if (o) e -= 64;
1394 
1395   x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m));
1396   x2 = x.x * x.x;
1397 
1398   t = 0.1532076988502701353e+0;
1399   t = mla(t, x2, 0.1525629051003428716e+0);
1400   t = mla(t, x2, 0.1818605932937785996e+0);
1401   t = mla(t, x2, 0.2222214519839380009e+0);
1402   t = mla(t, x2, 0.2857142932794299317e+0);
1403   t = mla(t, x2, 0.3999999999635251990e+0);
1404   t = mla(t, x2, 0.6666666666667333541e+0);
1405 
1406   s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), (double)e);
1407   s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2));
1408   s = ddadd_d2_d2_d(s, x2 * x.x * t);
1409 
1410   double r = s.x + s.y;
1411 
1412   if (xisinf(d)) r = SLEEF_INFINITY;
1413   if (d < 0 || xisnan(d)) r = SLEEF_NAN;
1414   if (d == 0) r = -SLEEF_INFINITY;
1415 
1416   return r;
1417 }
1418 
expk(Sleef_double2 d)1419 static INLINE CONST double expk(Sleef_double2 d) {
1420   int q = (int)rintk((d.x + d.y) * R_LN2);
1421   Sleef_double2 s, t;
1422   double u;
1423 
1424   s = ddadd2_d2_d2_d(d, q * -L2U);
1425   s = ddadd2_d2_d2_d(s, q * -L2L);
1426 
1427   s = ddnormalize_d2_d2(s);
1428 
1429   u = 2.51069683420950419527139e-08;
1430   u = mla(u, s.x, 2.76286166770270649116855e-07);
1431   u = mla(u, s.x, 2.75572496725023574143864e-06);
1432   u = mla(u, s.x, 2.48014973989819794114153e-05);
1433   u = mla(u, s.x, 0.000198412698809069797676111);
1434   u = mla(u, s.x, 0.0013888888939977128960529);
1435   u = mla(u, s.x, 0.00833333333332371417601081);
1436   u = mla(u, s.x, 0.0416666666665409524128449);
1437   u = mla(u, s.x, 0.166666666666666740681535);
1438   u = mla(u, s.x, 0.500000000000000999200722);
1439 
1440   t = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(ddsqu_d2_d2(s), u));
1441 
1442   t = ddadd_d2_d_d2(1, t);
1443 
1444   u = ldexpk(t.x + t.y, q);
1445 
1446   if (d.x < -1000) u = 0;
1447 
1448   return u;
1449 }
1450 
xpow(double x,double y)1451 EXPORT CONST double xpow(double x, double y) {
1452   int yisint = xisint(y);
1453   int yisodd = yisint && xisodd(y);
1454 
1455   Sleef_double2 d = ddmul_d2_d2_d(logk(fabsk(x)), y);
1456   double result = expk(d);
1457   if (d.x > 709.78271114955742909217217426) result = SLEEF_INFINITY;
1458 
1459   result = xisnan(result) ? SLEEF_INFINITY : result;
1460   result *= (x > 0 ? 1 : (!yisint ? SLEEF_NAN : (yisodd ? -1 : 1)));
1461 
1462   double efx = mulsign(fabsk(x) - 1, y);
1463   if (xisinf(y)) result = efx < 0 ? 0.0 : (efx == 0 ? 1.0 : SLEEF_INFINITY);
1464   if (xisinf(x) || x == 0) result = (yisodd ? sign(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : SLEEF_INFINITY);
1465   if (xisnan(x) || xisnan(y)) result = SLEEF_NAN;
1466   if (y == 0 || x == 1) result = 1;
1467 
1468   return result;
1469 }
1470 
xpown(double x,int y)1471 EXPORT CONST double xpown(double x, int y) {
1472   return xpow(x, (double)y);
1473 }
1474 
xpowr(double x,double y)1475 EXPORT CONST double xpowr(double x, double y) {
1476   if (x < 0.0)
1477     return SLEEF_NAN;
1478   if (xisnan(y))
1479     return y;
1480   return xpow(x, y);
1481 }
1482 
expk2(Sleef_double2 d)1483 static INLINE CONST Sleef_double2 expk2(Sleef_double2 d) {
1484   int q = (int)rintk((d.x + d.y) * R_LN2);
1485   Sleef_double2 s, t;
1486   double u;
1487 
1488   s = ddadd2_d2_d2_d(d, q * -L2U);
1489   s = ddadd2_d2_d2_d(s, q * -L2L);
1490 
1491   u = +0.1602472219709932072e-9;
1492   u = mla(u, s.x, +0.2092255183563157007e-8);
1493   u = mla(u, s.x, +0.2505230023782644465e-7);
1494   u = mla(u, s.x, +0.2755724800902135303e-6);
1495   u = mla(u, s.x, +0.2755731892386044373e-5);
1496   u = mla(u, s.x, +0.2480158735605815065e-4);
1497   u = mla(u, s.x, +0.1984126984148071858e-3);
1498   u = mla(u, s.x, +0.1388888888886763255e-2);
1499   u = mla(u, s.x, +0.8333333333333347095e-2);
1500   u = mla(u, s.x, +0.4166666666666669905e-1);
1501 
1502   t = ddadd2_d2_d2_d(ddmul_d2_d2_d(s, u), +0.1666666666666666574e+0);
1503   t = ddadd2_d2_d2_d(ddmul_d2_d2_d2(s, t), 0.5);
1504   t = ddadd2_d2_d2_d2(s, ddmul_d2_d2_d2(ddsqu_d2_d2(s), t));
1505 
1506   t = ddadd2_d2_d_d2(1, t);
1507 
1508   t.x = ldexp2k(t.x, q);
1509   t.y = ldexp2k(t.y, q);
1510 
1511   return d.x < -1000 ? dd(0, 0) : t;
1512 }
1513 
xsinh(double x)1514 EXPORT CONST double xsinh(double x) {
1515   double y = fabsk(x);
1516   Sleef_double2 d = expk2(dd(y, 0));
1517   d = ddsub_d2_d2_d2(d, ddrec_d2_d2(d));
1518   y = (d.x + d.y) * 0.5;
1519 
1520   y = fabsk(x) > 710 ? SLEEF_INFINITY : y;
1521   y = xisnan(y) ? SLEEF_INFINITY : y;
1522   y = mulsign(y, x);
1523   y = xisnan(x) ? SLEEF_NAN : y;
1524 
1525   return y;
1526 }
1527 
xcosh(double x)1528 EXPORT CONST double xcosh(double x) {
1529   double y = fabsk(x);
1530   Sleef_double2 d = expk2(dd(y, 0));
1531   d = ddadd_d2_d2_d2(d, ddrec_d2_d2(d));
1532   y = (d.x + d.y) * 0.5;
1533 
1534   y = fabsk(x) > 710 ? SLEEF_INFINITY : y;
1535   y = xisnan(y) ? SLEEF_INFINITY : y;
1536   y = xisnan(x) ? SLEEF_NAN : y;
1537 
1538   return y;
1539 }
1540 
xtanh(double x)1541 EXPORT CONST double xtanh(double x) {
1542   double y = fabsk(x);
1543   Sleef_double2 d = expk2(dd(y, 0));
1544   Sleef_double2 e = ddrec_d2_d2(d);
1545   d = dddiv_d2_d2_d2(ddsub_d2_d2_d2(d, e), ddadd_d2_d2_d2(d, e));
1546   y = d.x + d.y;
1547 
1548   y = fabsk(x) > 18.714973875 ? 1.0 : y;
1549   y = xisnan(y) ? 1.0 : y;
1550   y = mulsign(y, x);
1551   y = xisnan(x) ? SLEEF_NAN : y;
1552 
1553   return y;
1554 }
1555 
logk2(Sleef_double2 d)1556 static INLINE CONST Sleef_double2 logk2(Sleef_double2 d) {
1557   Sleef_double2 x, x2, m, s;
1558   double t;
1559   int e;
1560 
1561   e = ilogbk(d.x * (1.0/0.75));
1562 
1563   m.x = ldexp2k(d.x, -e);
1564   m.y = ldexp2k(d.y, -e);
1565 
1566   x = dddiv_d2_d2_d2(ddadd2_d2_d2_d(m, -1), ddadd2_d2_d2_d(m, 1));
1567   x2 = ddsqu_d2_d2(x);
1568 
1569   t = 0.13860436390467167910856;
1570   t = mla(t, x2.x, 0.131699838841615374240845);
1571   t = mla(t, x2.x, 0.153914168346271945653214);
1572   t = mla(t, x2.x, 0.181816523941564611721589);
1573   t = mla(t, x2.x, 0.22222224632662035403996);
1574   t = mla(t, x2.x, 0.285714285511134091777308);
1575   t = mla(t, x2.x, 0.400000000000914013309483);
1576   t = mla(t, x2.x, 0.666666666666664853302393);
1577 
1578   s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e);
1579   s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2));
1580   s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(ddmul_d2_d2_d2(x2, x), t));
1581 
1582   return s;
1583 }
1584 
xasinh(double x)1585 EXPORT CONST double xasinh(double x) {
1586   double y = fabsk(x);
1587   Sleef_double2 d;
1588 
1589   d = y > 1 ? ddrec_d2_d(x) : dd(y, 0);
1590   d = ddsqrt_d2_d2(ddadd2_d2_d2_d(ddsqu_d2_d2(d), 1));
1591   d = y > 1 ? ddmul_d2_d2_d(d, y) : d;
1592 
1593   d = logk2(ddnormalize_d2_d2(ddadd_d2_d2_d(d, x)));
1594   y = d.x + d.y;
1595 
1596   y = (fabsk(x) > SQRT_DBL_MAX || xisnan(y)) ? mulsign(SLEEF_INFINITY, x) : y;
1597   y = xisnan(x) ? SLEEF_NAN : y;
1598   y = xisnegzero(x) ? -0.0 : y;
1599 
1600   return y;
1601 }
1602 
xacosh(double x)1603 EXPORT CONST double xacosh(double x) {
1604   Sleef_double2 d = logk2(ddadd2_d2_d2_d(ddmul_d2_d2_d2(ddsqrt_d2_d2(ddadd2_d2_d_d(x, 1)), ddsqrt_d2_d2(ddadd2_d2_d_d(x, -1))), x));
1605   double y = d.x + d.y;
1606 
1607   y = (x > SQRT_DBL_MAX || xisnan(y)) ? SLEEF_INFINITY : y;
1608   y = x == 1.0 ? 0.0 : y;
1609   y = x < 1.0 ? SLEEF_NAN : y;
1610   y = xisnan(x) ? SLEEF_NAN : y;
1611 
1612   return y;
1613 }
1614 
xatanh(double x)1615 EXPORT CONST double xatanh(double x) {
1616   double y = fabsk(x);
1617   Sleef_double2 d = logk2(dddiv_d2_d2_d2(ddadd2_d2_d_d(1, y), ddadd2_d2_d_d(1, -y)));
1618   y = y > 1.0 ? SLEEF_NAN : (y == 1.0 ? SLEEF_INFINITY : (d.x + d.y) * 0.5);
1619 
1620   y = mulsign(y, x);
1621   y = (xisinf(x) || xisnan(y)) ? SLEEF_NAN : y;
1622 
1623   return y;
1624 }
1625 
1626 //
1627 
xcbrt(double d)1628 EXPORT CONST double xcbrt(double d) { // max error : 2 ulps
1629   double x, y, q = 1.0;
1630   int e, r;
1631 
1632   e = ilogbk(fabsk(d))+1;
1633   d = ldexp2k(d, -e);
1634   r = (e + 6144) % 3;
1635   q = (r == 1) ? 1.2599210498948731647672106 : q;
1636   q = (r == 2) ? 1.5874010519681994747517056 : q;
1637   q = ldexp2k(q, (e + 6144) / 3 - 2048);
1638 
1639   q = mulsign(q, d);
1640   d = fabsk(d);
1641 
1642   x = -0.640245898480692909870982;
1643   x = mla(x, d, 2.96155103020039511818595);
1644   x = mla(x, d, -5.73353060922947843636166);
1645   x = mla(x, d, 6.03990368989458747961407);
1646   x = mla(x, d, -3.85841935510444988821632);
1647   x = mla(x, d, 2.2307275302496609725722);
1648 
1649   y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0);
1650   y = d * x * x;
1651   y = (y - (2.0 / 3.0) * y * (y * x - 1)) * q;
1652 
1653   return y;
1654 }
1655 
xcbrt_u1(double d)1656 EXPORT CONST double xcbrt_u1(double d) {
1657   double x, y, z;
1658   Sleef_double2 q2 = dd(1, 0), u, v;
1659   int e, r;
1660 
1661   e = ilogbk(fabsk(d))+1;
1662   d = ldexp2k(d, -e);
1663   r = (e + 6144) % 3;
1664   q2 = (r == 1) ? dd(1.2599210498948731907, -2.5899333753005069177e-17) : q2;
1665   q2 = (r == 2) ? dd(1.5874010519681995834, -1.0869008194197822986e-16) : q2;
1666 
1667   q2.x = mulsign(q2.x, d); q2.y = mulsign(q2.y, d);
1668   d = fabsk(d);
1669 
1670   x = -0.640245898480692909870982;
1671   x = mla(x, d, 2.96155103020039511818595);
1672   x = mla(x, d, -5.73353060922947843636166);
1673   x = mla(x, d, 6.03990368989458747961407);
1674   x = mla(x, d, -3.85841935510444988821632);
1675   x = mla(x, d, 2.2307275302496609725722);
1676 
1677   y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0);
1678 
1679   z = x;
1680 
1681   u = ddmul_d2_d_d(x, x);
1682   u = ddmul_d2_d2_d2(u, u);
1683   u = ddmul_d2_d2_d(u, d);
1684   u = ddadd2_d2_d2_d(u, -x);
1685   y = u.x + u.y;
1686 
1687   y = -2.0 / 3.0 * y * z;
1688   v = ddadd2_d2_d2_d(ddmul_d2_d_d(z, z), y);
1689   v = ddmul_d2_d2_d(v, d);
1690   v = ddmul_d2_d2_d2(v, q2);
1691   z = ldexp2k(v.x + v.y, (e + 6144) / 3 - 2048);
1692 
1693   if (xisinf(d)) { z = mulsign(SLEEF_INFINITY, q2.x); }
1694   if (d == 0) { z = mulsign(0, q2.x); }
1695 
1696   return z;
1697 }
1698 
xexp2(double d)1699 EXPORT CONST double xexp2(double d) {
1700   int q = (int)rintk(d);
1701   double s, u;
1702 
1703   s = d - q;
1704 
1705   u = +0.4434359082926529454e-9;
1706   u = mla(u, s, +0.7073164598085707425e-8);
1707   u = mla(u, s, +0.1017819260921760451e-6);
1708   u = mla(u, s, +0.1321543872511327615e-5);
1709   u = mla(u, s, +0.1525273353517584730e-4);
1710   u = mla(u, s, +0.1540353045101147808e-3);
1711   u = mla(u, s, +0.1333355814670499073e-2);
1712   u = mla(u, s, +0.9618129107597600536e-2);
1713   u = mla(u, s, +0.5550410866482046596e-1);
1714   u = mla(u, s, +0.2402265069591012214e+0);
1715   u = mla(u, s, +0.6931471805599452862e+0);
1716   u = ddnormalize_d2_d2(ddadd_d2_d_d2(1, ddmul_d2_d_d(u, s))).x;
1717 
1718   u = ldexp2k(u, q);
1719 
1720   if (d >= 1024) u = SLEEF_INFINITY;
1721   if (d < -2000) u = 0;
1722 
1723   return u;
1724 }
1725 
xexp10(double d)1726 EXPORT CONST double xexp10(double d) {
1727   int q = (int)rintk(d * LOG10_2);
1728   double s, u;
1729 
1730   s = mla(q, -L10U, d);
1731   s = mla(q, -L10L, s);
1732 
1733   u = +0.2411463498334267652e-3;
1734   u = mla(u, s, +0.1157488415217187375e-2);
1735   u = mla(u, s, +0.5013975546789733659e-2);
1736   u = mla(u, s, +0.1959762320720533080e-1);
1737   u = mla(u, s, +0.6808936399446784138e-1);
1738   u = mla(u, s, +0.2069958494722676234e+0);
1739   u = mla(u, s, +0.5393829292058536229e+0);
1740   u = mla(u, s, +0.1171255148908541655e+1);
1741   u = mla(u, s, +0.2034678592293432953e+1);
1742   u = mla(u, s, +0.2650949055239205876e+1);
1743   u = mla(u, s, +0.2302585092994045901e+1);
1744   u = ddnormalize_d2_d2(ddadd_d2_d_d2(1, ddmul_d2_d_d(u, s))).x;
1745 
1746   u = ldexp2k(u, q);
1747 
1748   if (d > 308.25471555991671) u = SLEEF_INFINITY; // log10(DBL_MAX)
1749   if (d < -350) u = 0;
1750 
1751   return u;
1752 }
1753 
xexpm1(double a)1754 EXPORT CONST double xexpm1(double a) {
1755   Sleef_double2 d = ddadd2_d2_d2_d(expk2(dd(a, 0)), -1.0);
1756   double x = d.x + d.y;
1757   if (a > 709.782712893383996732223) x = SLEEF_INFINITY; // log(DBL_MAX)
1758   if (a < -36.736800569677101399113302437) x = -1; // log(1 - nexttoward(1, 0))
1759   if (xisnegzero(a)) x = -0.0;
1760   return x;
1761 }
1762 
xlog10(double d)1763 EXPORT CONST double xlog10(double d) {
1764   Sleef_double2 x, s;
1765   double m, t, x2;
1766   int e;
1767 
1768   int o = d < DBL_MIN;
1769   if (o) d *= (double)(1LL << 32) * (double)(1LL << 32);
1770 
1771   e = ilogb2k(d * (1.0/0.75));
1772   m = ldexp3k(d, -e);
1773 
1774   if (o) e -= 64;
1775 
1776   x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m));
1777   x2 = x.x * x.x;
1778 
1779   t = +0.6653725819576758460e-1;
1780   t = mla(t, x2, +0.6625722782820833712e-1);
1781   t = mla(t, x2, +0.7898105214313944078e-1);
1782   t = mla(t, x2, +0.9650955035715275132e-1);
1783   t = mla(t, x2, +0.1240841409721444993e+0);
1784   t = mla(t, x2, +0.1737177927454605086e+0);
1785   t = mla(t, x2, +0.2895296546021972617e+0);
1786 
1787   s = ddmul_d2_d2_d(dd(0.30102999566398119802, -2.803728127785170339e-18), (double)e);
1788   s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d2(x, dd(0.86858896380650363334, 1.1430059694096389311e-17)));
1789   s = ddadd_d2_d2_d(s, x2 * x.x * t);
1790 
1791   double r = s.x + s.y;
1792 
1793   if (xisinf(d)) r = SLEEF_INFINITY;
1794   if (d < 0 || xisnan(d)) r = SLEEF_NAN;
1795   if (d == 0) r = -SLEEF_INFINITY;
1796 
1797   return r;
1798 }
1799 
xlog1p_fast(double d)1800 static INLINE CONST double xlog1p_fast(double d) {
1801   Sleef_double2 x, s;
1802   double m, t, x2;
1803   int e;
1804 
1805   double dp1 = d + 1;
1806 
1807   int o = dp1 < DBL_MIN;
1808   if (o) dp1 *= (double)(1LL << 32) * (double)(1LL << 32);
1809 
1810   e = ilogb2k(dp1 * (1.0/0.75));
1811 
1812   t = ldexp3k(1, -e);
1813   m = mla(d, t, t - 1);
1814 
1815   if (o) e -= 64;
1816 
1817   x = dddiv_d2_d2_d2(dd(m, 0), ddadd_d2_d_d(2, m));
1818   x2 = x.x * x.x;
1819 
1820   t = 0.1532076988502701353e+0;
1821   t = mla(t, x2, 0.1525629051003428716e+0);
1822   t = mla(t, x2, 0.1818605932937785996e+0);
1823   t = mla(t, x2, 0.2222214519839380009e+0);
1824   t = mla(t, x2, 0.2857142932794299317e+0);
1825   t = mla(t, x2, 0.3999999999635251990e+0);
1826   t = mla(t, x2, 0.6666666666667333541e+0);
1827 
1828   s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), (double)e);
1829   s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2));
1830   s = ddadd_d2_d2_d(s, x2 * x.x * t);
1831 
1832   double r = s.x + s.y;
1833 
1834   if (d == SLEEF_INFINITY) r = SLEEF_INFINITY;
1835   if (d < -1) r = SLEEF_NAN;
1836   if (d == -1) r = -SLEEF_INFINITY;
1837   if (xisnegzero(d)) r = -0.0;
1838   if (xisnan(d)) r = d;
1839 
1840   return r;
1841 }
1842 
xlog2(double d)1843 EXPORT CONST double xlog2(double d) {
1844   Sleef_double2 x, s;
1845   double m, t, x2;
1846   int e;
1847 
1848   int o = d < DBL_MIN;
1849   if (o) d *= (double)(1LL << 32) * (double)(1LL << 32);
1850 
1851   e = ilogb2k(d * (1.0/0.75));
1852   m = ldexp3k(d, -e);
1853 
1854   if (o) e -= 64;
1855 
1856   x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m));
1857   x2 = x.x * x.x;
1858 
1859   t = +0.2211941750456081490e+0;
1860   t = mla(t, x2, +0.2200768693152277689e+0);
1861   t = mla(t, x2, +0.2623708057488514656e+0);
1862   t = mla(t, x2, +0.3205977477944495502e+0);
1863   t = mla(t, x2, +0.4121985945485324709e+0);
1864   t = mla(t, x2, +0.5770780162997058982e+0);
1865   t = mla(t, x2, +0.96179669392608091449  );
1866   s = ddadd2_d2_d_d2(e, ddmul_d2_d2_d2(x, dd(2.885390081777926774, 6.0561604995516736434e-18)));
1867   s = ddadd2_d2_d2_d(s, x2 * x.x * t);
1868 
1869   double r = s.x + s.y;
1870 
1871   if (xisinf(d)) r = SLEEF_INFINITY;
1872   if (d < 0 || xisnan(d)) r = SLEEF_NAN;
1873   if (d == 0) r = -SLEEF_INFINITY;
1874 
1875   return r;
1876 }
1877 
xlog1p(double d)1878 EXPORT CONST double xlog1p(double d) {
1879   if (d > 0x1.0p+1000)
1880     return xlog(d);
1881   else
1882     return xlog1p_fast(d);
1883 }
1884 
1885 //
1886 
xfma(double x,double y,double z)1887 EXPORT CONST double xfma(double x, double y, double z) {
1888 #if __has_builtin(__builtin_fma)
1889   return __builtin_fma(x, y, z);
1890 #else
1891 #warning Using software FMA
1892   double h2 = x * y + z, q = 1;
1893   if (fabsk(h2) < 1e-300) {
1894     const double c0 = 1ULL << 54, c1 = c0 * c0, c2 = c1 * c1;
1895     x *= c1;
1896     y *= c1;
1897     z *= c2;
1898     q = 1.0 / c2;
1899   }
1900   if (fabsk(h2) > 1e+300) {
1901     const double c0 = 1ULL << 54, c1 = c0 * c0, c2 = c1 * c1;
1902     x *= 1.0 / c1;
1903     y *= 1.0 / c1;
1904     z *= 1. / c2;
1905     q = c2;
1906   }
1907   Sleef_double2 d = ddmul_d2_d_d(x, y);
1908   d = ddadd2_d2_d2_d(d, z);
1909   double ret = (x == 0 || y == 0) ? z : (d.x + d.y);
1910   if ((xisinf(z) && !xisinf(x) && !xisnan(x) && !xisinf(y) && !xisnan(y))) h2 = z;
1911   return (xisinf(h2) || xisnan(h2)) ? h2 : ret*q;
1912 #endif
1913 }
1914 
xsqrt_u05(double d)1915 EXPORT CONST double xsqrt_u05(double d) {
1916 #if __has_builtin(__builtin_sqrt)
1917   return __builtin_sqrt(d);
1918 #else
1919 #warning Using software SQRT
1920   double q = 0.5;
1921 
1922   d = d < 0 ? SLEEF_NAN : d;
1923 
1924   if (d < 8.636168555094445E-78) {
1925     d *= 1.157920892373162E77;
1926     q = 2.9387358770557188E-39 * 0.5;
1927   }
1928 
1929   if (d > 1.3407807929942597e+154) {
1930     d *= 7.4583407312002070e-155;
1931     q = 1.1579208923731620e+77 * 0.5;
1932   }
1933 
1934   // http://en.wikipedia.org/wiki/Fast_inverse_square_root
1935   double x = longBitsToDouble(0x5fe6ec85e7de30da - (doubleToRawLongBits(d + 1e-320) >> 1));
1936 
1937   x = x * (1.5 - 0.5 * d * x * x);
1938   x = x * (1.5 - 0.5 * d * x * x);
1939   x = x * (1.5 - 0.5 * d * x * x) * d;
1940 
1941   Sleef_double2 d2 = ddmul_d2_d2_d2(ddadd2_d2_d_d2(d, ddmul_d2_d_d(x, x)), ddrec_d2_d(x));
1942 
1943   double ret = (d2.x + d2.y) * q;
1944 
1945   ret = d == SLEEF_INFINITY ? SLEEF_INFINITY : ret;
1946   ret = d == 0 ? d : ret;
1947 
1948   return ret;
1949 #endif
1950 }
1951 
xsqrt_u35(double d)1952 EXPORT CONST double xsqrt_u35(double d) { return xsqrt_u05(d); }
xsqrt(double d)1953 EXPORT CONST double xsqrt(double d) { return SQRT(d); }
1954 
xfabs(double x)1955 EXPORT CONST double xfabs(double x) { return fabsk(x); }
1956 
xcopysign(double x,double y)1957 EXPORT CONST double xcopysign(double x, double y) { return copysignk(x, y); }
1958 
xfmax(double x,double y)1959 EXPORT CONST double xfmax(double x, double y) {
1960   return y != y ? x : (x > y ? x : y);
1961 }
1962 
xfmin(double x,double y)1963 EXPORT CONST double xfmin(double x, double y) {
1964   return y != y ? x : (x < y ? x : y);
1965 }
1966 
xfdim(double x,double y)1967 EXPORT CONST double xfdim(double x, double y) {
1968   double ret = x - y;
1969   if (ret < 0 || x == y) ret = 0;
1970   return ret;
1971 }
1972 
xtrunc(double x)1973 EXPORT CONST double xtrunc(double x) {
1974   double fr = x - (double)(1LL << 31) * (int32_t)(x * (1.0 / (1LL << 31)));
1975   fr = fr - (int32_t)fr;
1976   return (xisinf(x) || fabsk(x) >= (double)(1LL << 52)) ? x : copysignk(x - fr, x);
1977 }
1978 
xfloor(double x)1979 EXPORT CONST double xfloor(double x) {
1980   double fr = x - (double)(1LL << 31) * (int32_t)(x * (1.0 / (1LL << 31)));
1981   fr = fr - (int32_t)fr;
1982   fr = fr < 0 ? fr+1.0 : fr;
1983   return (xisinf(x) || fabsk(x) >= (double)(1LL << 52)) ? x : copysignk(x - fr, x);
1984 }
1985 
xceil(double x)1986 EXPORT CONST double xceil(double x) {
1987   double fr = x - (double)(1LL << 31) * (int32_t)(x * (1.0 / (1LL << 31)));
1988   fr = fr - (int32_t)fr;
1989   fr = fr <= 0 ? fr : fr-1.0;
1990   return (xisinf(x) || fabsk(x) >= (double)(1LL << 52)) ? x : copysignk(x - fr, x);
1991 }
1992 
xround(double d)1993 EXPORT CONST double xround(double d) {
1994   double x = d + 0.5;
1995   double fr = x - (double)(1LL << 31) * (int32_t)(x * (1.0 / (1LL << 31)));
1996   fr = fr - (int32_t)fr;
1997   if (fr == 0 && x <= 0) x--;
1998   fr = fr < 0 ? fr+1.0 : fr;
1999   x = d == 0.49999999999999994449 ? 0 : x;  // nextafter(0.5, 0)
2000   return (xisinf(d) || fabsk(d) >= (double)(1LL << 52)) ? d : copysignk(x - fr, d);
2001 }
2002 
xrint(double d)2003 EXPORT CONST double xrint(double d) {
2004   double x = d + 0.5;
2005   double fr = x - (double)(1LL << 31) * (int32_t)(x * (1.0 / (1LL << 31)));
2006   int32_t isodd = (1 & (int32_t)fr) != 0;
2007   fr = fr - (int32_t)fr;
2008   fr = (fr < 0 || (fr == 0 && isodd)) ? fr+1.0 : fr;
2009   x = d == 0.50000000000000011102 ? 0 : x;  // nextafter(0.5, 1)
2010   return (xisinf(d) || fabsk(d) >= (double)(1LL << 52)) ? d : copysignk(x - fr, d);
2011 }
2012 
xhypot_u05(double x,double y)2013 EXPORT CONST double xhypot_u05(double x, double y) {
2014   x = fabsk(x);
2015   y = fabsk(y);
2016   double min = fmink(x, y), n = min;
2017   double max = fmaxk(x, y), d = max;
2018 
2019   if (max < DBL_MIN) { n *= 1ULL << 54; d *= 1ULL << 54; }
2020   Sleef_double2 t = dddiv_d2_d2_d2(dd(n, 0), dd(d, 0));
2021   t = ddmul_d2_d2_d(ddsqrt_d2_d2(ddadd2_d2_d2_d(ddsqu_d2_d2(t), 1)), max);
2022   double ret = t.x + t.y;
2023   if (xisnan(ret)) ret = SLEEF_INFINITY;
2024   if (min == 0) ret = max;
2025   if (xisnan(x) || xisnan(y)) ret = SLEEF_NAN;
2026   if (x == SLEEF_INFINITY || y == SLEEF_INFINITY) ret = SLEEF_INFINITY;
2027   return ret;
2028 }
2029 
xhypot_u35(double x,double y)2030 EXPORT CONST double xhypot_u35(double x, double y) {
2031   x = fabsk(x);
2032   y = fabsk(y);
2033   double min = fmink(x, y);
2034   double max = fmaxk(x, y);
2035 
2036   double t = min / max;
2037   double ret = max * SQRT(1 + t*t);
2038   if (min == 0) ret = max;
2039   if (xisnan(x) || xisnan(y)) ret = SLEEF_NAN;
2040   if (x == SLEEF_INFINITY || y == SLEEF_INFINITY) ret = SLEEF_INFINITY;
2041   return ret;
2042 }
2043 
xnextafter(double x,double y)2044 EXPORT CONST double xnextafter(double x, double y) {
2045   union {
2046     double f;
2047     int64_t i;
2048   } cx;
2049 
2050   x = x == 0 ? mulsign(0, y) : x;
2051   cx.f = x;
2052   int c = (cx.i < 0) == (y < x);
2053   if (c) cx.i = -(cx.i ^ (1ULL << 63));
2054 
2055   if (x != y) cx.i--;
2056 
2057   if (c) cx.i = -(cx.i ^ (1ULL << 63));
2058 
2059   if (cx.f == 0 && x != 0) cx.f = mulsign(0, x);
2060   if (x == 0 && y == 0) cx.f = y;
2061   if (xisnan(x) || xisnan(y)) cx.f = SLEEF_NAN;
2062 
2063   return cx.f;
2064 }
2065 
xfrfrexp(double x)2066 EXPORT CONST double xfrfrexp(double x) {
2067   union {
2068     double f;
2069     uint64_t u;
2070   } cx;
2071 
2072   if (xisnan(x)) return x;
2073 
2074   if (fabsk(x) < DBL_MIN) x *= (1ULL << 63);
2075 
2076   cx.f = x;
2077   cx.u &= ~0x7ff0000000000000ULL;
2078   cx.u |=  0x3fe0000000000000ULL;
2079 
2080   if (xisinf(x)) cx.f = mulsign(SLEEF_INFINITY, x);
2081   if (x == 0) cx.f = x;
2082 
2083   return cx.f;
2084 }
2085 
xexpfrexp(double x)2086 EXPORT CONST int xexpfrexp(double x) {
2087   union {
2088     double f;
2089     uint64_t u;
2090   } cx;
2091 
2092   int ret = 0;
2093 
2094   if (fabsk(x) < DBL_MIN) { x *= (1ULL << 63); ret = -63; }
2095 
2096   cx.f = x;
2097   ret += (int32_t)(((cx.u >> 52) & 0x7ff)) - 0x3fe;
2098 
2099   if (x == 0 || xisnan(x) || xisinf(x)) ret = 0;
2100 
2101   return ret;
2102 }
2103 
toward0(double d)2104 static INLINE CONST double toward0(double d) {
2105   return d == 0 ? 0 : longBitsToDouble(doubleToRawLongBits(d)-1);
2106 }
2107 
removelsb(double d)2108 static INLINE CONST double removelsb(double d) {
2109   return longBitsToDouble(doubleToRawLongBits(d) & 0xfffffffffffffffeLL);
2110 }
2111 
ptrunc(double x)2112 static INLINE CONST double ptrunc(double x) {
2113   double fr = mla(-(double)(1LL << 31), (int32_t)(x * (1.0 / (1LL << 31))), x);
2114   return fabsk(x) >= (double)(1LL << 52) ? x : (x - (fr - (int32_t)fr));
2115 }
2116 
xfmod(double x,double y)2117 EXPORT CONST double xfmod(double x, double y) {
2118   double nu = fabsk(x), de = fabsk(y), s = 1, q;
2119   if (de < DBL_MIN) { nu *= 1ULL << 54; de *= 1ULL << 54; s = 1.0 / (1ULL << 54); }
2120   Sleef_double2 r = dd(nu, 0);
2121   double rde = toward0(1.0 / de);
2122 
2123   for(int i=0;i < 21;i++) { // ceil(log2(DBL_MAX) / 51) + 1
2124     q = (de+de > r.x && r.x >= de) ? 1 : (toward0(r.x) * rde);
2125     r = ddnormalize_d2_d2(ddadd2_d2_d2_d2(r, ddmul_d2_d_d(removelsb(ptrunc(q)), -de)));
2126     if (r.x < de) break;
2127   }
2128 
2129   double ret = r.x * s;
2130   if (r.x + r.y == de) ret = 0;
2131   ret = mulsign(ret, x);
2132   if (nu < de) ret = x;
2133   if (de == 0) ret = SLEEF_NAN;
2134 
2135   return ret;
2136 }
2137 
2138 
xmodf(double x)2139 EXPORT CONST Sleef_double2 xmodf(double x) {
2140   double fr = x - (double)(1LL << 31) * (int32_t)(x * (1.0 / (1LL << 31)));
2141   fr = fr - (int32_t)fr;
2142   fr = fabsk(x) >= (double)(1LL << 52) ? 0 : fr;
2143   Sleef_double2 ret = { copysignk(fr, x), copysignk(x - fr, x) };
2144   return ret;
2145 }
2146 
2147 typedef struct {
2148   Sleef_double2 a, b;
2149 } dd2;
2150 
gammak(double a)2151 static CONST dd2 gammak(double a) {
2152   Sleef_double2 clc = dd(0, 0), clln = dd(1, 0), clld = dd(1, 0), v = dd(1, 0), x, y, z;
2153   double t, u;
2154 
2155   int otiny = fabsk(a) < 1e-306, oref = a < 0.5;
2156 
2157   x = otiny ? dd(0, 0) : (oref ? ddadd2_d2_d_d(1, -a) : dd(a, 0));
2158 
2159   int o0 = (0.5 <= x.x && x.x <= 1.1), o2 = 2.3 < x.x;
2160 
2161   y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 1), x));
2162   y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 2), y));
2163   y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 3), y));
2164   y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 4), y));
2165 
2166   clln = (o2 && x.x <= 7) ? y : clln;
2167 
2168   x = (o2 && x.x <= 7) ? ddadd2_d2_d2_d(x, 5) : x;
2169   t = o2 ? (1.0 / x.x) : ddnormalize_d2_d2(ddadd2_d2_d2_d(x, o0 ? -1 : -2)).x;
2170 
2171   u = o2 ? -156.801412704022726379848862 : (o0 ? +0.2947916772827614196e+2 : +0.7074816000864609279e-7);
2172   u = mla(u, t, o2 ? +1.120804464289911606838558160000 : (o0 ? +0.1281459691827820109e+3 : +0.4009244333008730443e-6));
2173   u = mla(u, t, o2 ? +13.39798545514258921833306020000 : (o0 ? +0.2617544025784515043e+3 : +0.1040114641628246946e-5));
2174   u = mla(u, t, o2 ? -0.116546276599463200848033357000 : (o0 ? +0.3287022855685790432e+3 : +0.1508349150733329167e-5));
2175   u = mla(u, t, o2 ? -1.391801093265337481495562410000 : (o0 ? +0.2818145867730348186e+3 : +0.1288143074933901020e-5));
2176   u = mla(u, t, o2 ? +0.015056113040026424412918973400 : (o0 ? +0.1728670414673559605e+3 : +0.4744167749884993937e-6));
2177   u = mla(u, t, o2 ? +0.179540117061234856098844714000 : (o0 ? +0.7748735764030416817e+2 : -0.6554816306542489902e-7));
2178   u = mla(u, t, o2 ? -0.002481743600264997730942489280 : (o0 ? +0.2512856643080930752e+2 : -0.3189252471452599844e-6));
2179   u = mla(u, t, o2 ? -0.029527880945699120504851034100 : (o0 ? +0.5766792106140076868e+1 : +0.1358883821470355377e-6));
2180   u = mla(u, t, o2 ? +0.000540164767892604515196325186 : (o0 ? +0.7270275473996180571e+0 : -0.4343931277157336040e-6));
2181   u = mla(u, t, o2 ? +0.006403362833808069794787256200 : (o0 ? +0.8396709124579147809e-1 : +0.9724785897406779555e-6));
2182   u = mla(u, t, o2 ? -0.000162516262783915816896611252 : (o0 ? -0.8211558669746804595e-1 : -0.2036886057225966011e-5));
2183   u = mla(u, t, o2 ? -0.001914438498565477526465972390 : (o0 ? +0.6828831828341884458e-1 : +0.4373363141819725815e-5));
2184   u = mla(u, t, o2 ? +7.20489541602001055898311517e-05 : (o0 ? -0.7712481339961671511e-1 : -0.9439951268304008677e-5));
2185   u = mla(u, t, o2 ? +0.000839498720672087279971000786 : (o0 ? +0.8337492023017314957e-1 : +0.2050727030376389804e-4));
2186   u = mla(u, t, o2 ? -5.17179090826059219329394422e-05 : (o0 ? -0.9094964931456242518e-1 : -0.4492620183431184018e-4));
2187   u = mla(u, t, o2 ? -0.000592166437353693882857342347 : (o0 ? +0.1000996313575929358e+0 : +0.9945751236071875931e-4));
2188   u = mla(u, t, o2 ? +6.97281375836585777403743539e-05 : (o0 ? -0.1113342861544207724e+0 : -0.2231547599034983196e-3));
2189   u = mla(u, t, o2 ? +0.000784039221720066627493314301 : (o0 ? +0.1255096673213020875e+0 : +0.5096695247101967622e-3));
2190   u = mla(u, t, o2 ? -0.000229472093621399176949318732 : (o0 ? -0.1440498967843054368e+0 : -0.1192753911667886971e-2));
2191   u = mla(u, t, o2 ? -0.002681327160493827160473958490 : (o0 ? +0.1695571770041949811e+0 : +0.2890510330742210310e-2));
2192   u = mla(u, t, o2 ? +0.003472222222222222222175164840 : (o0 ? -0.2073855510284092762e+0 : -0.7385551028674461858e-2));
2193   u = mla(u, t, o2 ? +0.083333333333333333335592087900 : (o0 ? +0.2705808084277815939e+0 : +0.2058080842778455335e-1));
2194 
2195   y = ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, -0.5), logk2(x));
2196   y = ddadd2_d2_d2_d2(y, ddneg_d2_d2(x));
2197   y = ddadd2_d2_d2_d2(y, dd(0.91893853320467278056, -3.8782941580672414498e-17)); // 0.5*log(2*M_PI)
2198 
2199   z = ddadd2_d2_d2_d(ddmul_d2_d_d (u, t), o0 ? -0.4006856343865314862e+0 : -0.6735230105319810201e-1);
2200   z = ddadd2_d2_d2_d(ddmul_d2_d2_d(z, t), o0 ? +0.8224670334241132030e+0 : +0.3224670334241132030e+0);
2201   z = ddadd2_d2_d2_d(ddmul_d2_d2_d(z, t), o0 ? -0.5772156649015328655e+0 : +0.4227843350984671345e+0);
2202   z = ddmul_d2_d2_d(z, t);
2203 
2204   clc = o2 ? y : z;
2205 
2206   clld = o2 ? ddadd2_d2_d2_d(ddmul_d2_d_d(u, t), 1) : clld;
2207 
2208   y = clln;
2209 
2210   clc = otiny ? dd(83.1776616671934334590333, 3.67103459631568507221878e-15) : // log(2^120)
2211     (oref ? ddadd2_d2_d2_d2(dd(1.1447298858494001639, 1.026595116270782638e-17), ddneg_d2_d2(clc)) : clc); // log(M_PI)
2212   clln = otiny ? dd(1, 0) : (oref ? clln : clld);
2213 
2214   if (oref) x = ddmul_d2_d2_d2(clld, sinpik(a - (double)(1LL << 28) * (int32_t)(a * (1.0 / (1LL << 28)))));
2215 
2216   clld = otiny ? dd(a*((1LL << 60)*(double)(1LL << 60)), 0) : (oref ? x : y);
2217 
2218   dd2 ret = { clc, dddiv_d2_d2_d2(clln, clld) };
2219 
2220   return ret;
2221 }
2222 
xtgamma_u1(double a)2223 EXPORT CONST double xtgamma_u1(double a) {
2224   dd2 d = gammak(a);
2225   Sleef_double2 y = ddmul_d2_d2_d2(expk2(d.a), d.b);
2226   double r = y.x + y.y;
2227   r = (a == -SLEEF_INFINITY || (a < 0 && xisint(a)) || (xisnumber(a) && a < 0 && xisnan(r))) ? SLEEF_NAN : r;
2228   r = ((a == SLEEF_INFINITY || xisnumber(a)) && a >= -DBL_MIN && (a == 0 || a > 200 || xisnan(r))) ? mulsign(SLEEF_INFINITY, a) : r;
2229   return r;
2230 }
2231 
xlgamma_u1(double a)2232 EXPORT CONST double xlgamma_u1(double a) {
2233   dd2 d = gammak(a);
2234   Sleef_double2 y = ddadd2_d2_d2_d2(d.a, logk2(ddabs_d2_d2(d.b)));
2235   double r = y.x + y.y;
2236   r = (xisinf(a) || (a <= 0 && xisint(a)) || (xisnumber(a) && xisnan(r))) ? SLEEF_INFINITY : r;
2237   return r;
2238 }
2239 
xlgamma_r_u1(double a)2240 EXPORT CONST Sleef_double2 xlgamma_r_u1(double a) {
2241   dd2 d = gammak(a);
2242   Sleef_double2 y = ddadd2_d2_d2_d2(d.a, logk2(ddabs_d2_d2(d.b)));
2243   double r = y.x + y.y;
2244   r = (xisinf(a) || (a <= 0 && xisint(a)) || (xisnumber(a) && xisnan(r))) ? SLEEF_INFINITY : r;
2245   Sleef_double2 ret;
2246   ret.x = r;
2247   ret.y = longBitsToDouble((doubleToRawLongBits(d.b.x) & (1LL << 63)) | (0x3ff0000000000000LL));
2248   return ret;
2249 }
2250 
2251 
xerf_u1(double a)2252 EXPORT CONST double xerf_u1(double a) {
2253   double s = a, t, u;
2254   Sleef_double2 d;
2255 
2256   a = fabsk(a);
2257   int o0 = a < 1.0, o1 = a < 3.7, o2 = a < 6.0;
2258   u = o0 ? (a*a) : a;
2259 
2260   t = o0 ? +0.6801072401395392157e-20 : o1 ? +0.2830954522087717660e-13 : -0.5846750404269610493e-17;
2261   t = mla(t, u, o0 ? -0.2161766247570056391e-18 : o1 ? -0.1509491946179481940e-11 : +0.6076691048812607898e-15);
2262   t = mla(t, u, o0 ? +0.4695919173301598752e-17 : o1 ? +0.3827857177807173152e-10 : -0.3007518609604893831e-13);
2263   t = mla(t, u, o0 ? -0.9049140419888010819e-16 : o1 ? -0.6139733921558987241e-09 : +0.9427906260824646063e-12);
2264   t = mla(t, u, o0 ? +0.1634018903557411517e-14 : o1 ? +0.6985387934608038824e-08 : -0.2100110908269393629e-10);
2265   t = mla(t, u, o0 ? -0.2783485786333455216e-13 : o1 ? -0.5988224513034371474e-07 : +0.3534639523461223473e-09);
2266   t = mla(t, u, o0 ? +0.4463221276786412722e-12 : o1 ? +0.4005716952355346640e-06 : -0.4664967728285395926e-08);
2267   t = mla(t, u, o0 ? -0.6711366622850138987e-11 : o1 ? -0.2132190104575784400e-05 : +0.4943823283769000532e-07);
2268   t = mla(t, u, o0 ? +0.9422759050232658346e-10 : o1 ? +0.9092461304042630325e-05 : -0.4271203394761148254e-06);
2269   t = mla(t, u, o0 ? -0.1229055530100228477e-08 : o1 ? -0.3079188080966205457e-04 : +0.3034067677404915895e-05);
2270   t = mla(t, u, o0 ? +0.1480719281585085023e-07 : o1 ? +0.7971413443082370762e-04 : -0.1776295289066871135e-04);
2271   t = mla(t, u, o0 ? -0.1636584469123402714e-06 : o1 ? -0.1387853215225442864e-03 : +0.8524547630559505050e-04);
2272   t = mla(t, u, o0 ? +0.1646211436588923363e-05 : o1 ? +0.6469678026257590965e-04 : -0.3290582944961784398e-03);
2273   t = mla(t, u, o0 ? -0.1492565035840624866e-04 : o1 ? +0.4996645280372945860e-03 : +0.9696966068789101157e-03);
2274   t = mla(t, u, o0 ? +0.1205533298178966496e-03 : o1 ? -0.1622802482842520535e-02 : -0.1812527628046986137e-02);
2275   t = mla(t, u, o0 ? -0.8548327023450851166e-03 : o1 ? +0.1615320557049377171e-03 : -0.4725409828123619017e-03);
2276   t = mla(t, u, o0 ? +0.5223977625442188799e-02 : o1 ? +0.1915262325574875607e-01 : +0.2090315427924229266e-01);
2277   t = mla(t, u, o0 ? -0.2686617064513125569e-01 : o1 ? -0.1027818298486033455e+00 : -0.1052041921842776645e+00);
2278   t = mla(t, u, o0 ? +0.1128379167095512753e+00 : o1 ? -0.6366172819842503827e+00 : -0.6345351808766568347e+00);
2279   t = mla(t, u, o0 ? -0.3761263890318375380e+00 : o1 ? -0.1128379590648910469e+01 : -0.1129442929103524396e+01);
2280   d = ddmul_d2_d_d(t, u);
2281   d = ddadd2_d2_d2_d2(d, o0 ? dd(1.1283791670955125586, 1.5335459613165822674e-17) :
2282           o1 ? dd(3.4110644736196137587e-08, -2.4875650708323294246e-24) :
2283           dd(0.00024963035690526438285, -5.4362665034856259795e-21));
2284   d = o0 ? ddmul_d2_d2_d(d, a) : ddadd_d2_d_d2(1.0, ddneg_d2_d2(expk2(d)));
2285   u = mulsign(o2 ? (d.x + d.y) : 1, s);
2286   u = xisnan(a) ? SLEEF_NAN : u;
2287   return u;
2288 }
2289 
xerfc_u15(double a)2290 EXPORT CONST double xerfc_u15(double a) {
2291   double s = a, r = 0, t;
2292   Sleef_double2 u, d, x;
2293   a = fabsk(a);
2294   int o0 = a < 1.0, o1 = a < 2.2, o2 = a < 4.2, o3 = a < 27.3;
2295   u = o0 ? ddmul_d2_d_d(a, a) : o1 ? dd(a, 0) : dddiv_d2_d2_d2(dd(1, 0), dd(a, 0));
2296 
2297   t = o0 ? +0.6801072401395386139e-20 : o1 ? +0.3438010341362585303e-12 : o2 ? -0.5757819536420710449e+2 : +0.2334249729638701319e+5;
2298   t = mla(t, u.x, o0 ? -0.2161766247570055669e-18 : o1 ? -0.1237021188160598264e-10 : o2 ? +0.4669289654498104483e+3 : -0.4695661044933107769e+5);
2299   t = mla(t, u.x, o0 ? +0.4695919173301595670e-17 : o1 ? +0.2117985839877627852e-09 : o2 ? -0.1796329879461355858e+4 : +0.3173403108748643353e+5);
2300   t = mla(t, u.x, o0 ? -0.9049140419888007122e-16 : o1 ? -0.2290560929177369506e-08 : o2 ? +0.4355892193699575728e+4 : +0.3242982786959573787e+4);
2301   t = mla(t, u.x, o0 ? +0.1634018903557410728e-14 : o1 ? +0.1748931621698149538e-07 : o2 ? -0.7456258884965764992e+4 : -0.2014717999760347811e+5);
2302   t = mla(t, u.x, o0 ? -0.2783485786333451745e-13 : o1 ? -0.9956602606623249195e-07 : o2 ? +0.9553977358167021521e+4 : +0.1554006970967118286e+5);
2303   t = mla(t, u.x, o0 ? +0.4463221276786415752e-12 : o1 ? +0.4330010240640327080e-06 : o2 ? -0.9470019905444229153e+4 : -0.6150874190563554293e+4);
2304   t = mla(t, u.x, o0 ? -0.6711366622850136563e-11 : o1 ? -0.1435050600991763331e-05 : o2 ? +0.7387344321849855078e+4 : +0.1240047765634815732e+4);
2305   t = mla(t, u.x, o0 ? +0.9422759050232662223e-10 : o1 ? +0.3460139479650695662e-05 : o2 ? -0.4557713054166382790e+4 : -0.8210325475752699731e+2);
2306   t = mla(t, u.x, o0 ? -0.1229055530100229098e-08 : o1 ? -0.4988908180632898173e-05 : o2 ? +0.2207866967354055305e+4 : +0.3242443880839930870e+2);
2307   t = mla(t, u.x, o0 ? +0.1480719281585086512e-07 : o1 ? -0.1308775976326352012e-05 : o2 ? -0.8217975658621754746e+3 : -0.2923418863833160586e+2);
2308   t = mla(t, u.x, o0 ? -0.1636584469123399803e-06 : o1 ? +0.2825086540850310103e-04 : o2 ? +0.2268659483507917400e+3 : +0.3457461732814383071e+0);
2309   t = mla(t, u.x, o0 ? +0.1646211436588923575e-05 : o1 ? -0.6393913713069986071e-04 : o2 ? -0.4633361260318560682e+2 : +0.5489730155952392998e+1);
2310   t = mla(t, u.x, o0 ? -0.1492565035840623511e-04 : o1 ? -0.2566436514695078926e-04 : o2 ? +0.9557380123733945965e+1 : +0.1559934132251294134e-2);
2311   t = mla(t, u.x, o0 ? +0.1205533298178967851e-03 : o1 ? +0.5895792375659440364e-03 : o2 ? -0.2958429331939661289e+1 : -0.1541741566831520638e+1);
2312   t = mla(t, u.x, o0 ? -0.8548327023450850081e-03 : o1 ? -0.1695715579163588598e-02 : o2 ? +0.1670329508092765480e+0 : +0.2823152230558364186e-5);
2313   t = mla(t, u.x, o0 ? +0.5223977625442187932e-02 : o1 ? +0.2089116434918055149e-03 : o2 ? +0.6096615680115419211e+0 : +0.6249999184195342838e+0);
2314   t = mla(t, u.x, o0 ? -0.2686617064513125222e-01 : o1 ? +0.1912855949584917753e-01 : o2 ? +0.1059212443193543585e-2 : +0.1741749416408701288e-8);
2315 
2316   d = ddmul_d2_d2_d(u, t);
2317   d = ddadd2_d2_d2_d2(d, o0 ? dd(0.11283791670955126141, -4.0175691625932118483e-18) :
2318           o1 ? dd(-0.10277263343147646779, -6.2338714083404900225e-18) :
2319           o2 ? dd(-0.50005180473999022439, 2.6362140569041995803e-17) :
2320           dd(-0.5000000000258444377, -4.0074044712386992281e-17));
2321   d = ddmul_d2_d2_d2(d, u);
2322   d = ddadd2_d2_d2_d2(d, o0 ? dd(-0.37612638903183753802, 1.3391897206042552387e-17) :
2323           o1 ? dd(-0.63661976742916359662, 7.6321019159085724662e-18) :
2324           o2 ? dd(1.601106273924963368e-06, 1.1974001857764476775e-23) :
2325           dd(2.3761973137523364792e-13, -1.1670076950531026582e-29));
2326   d = ddmul_d2_d2_d2(d, u);
2327   d = ddadd2_d2_d2_d2(d, o0 ? dd(1.1283791670955125586, 1.5335459613165822674e-17) :
2328           o1 ? dd(-1.1283791674717296161, 8.0896847755965377194e-17) :
2329           o2 ? dd(-0.57236496645145429341, 3.0704553245872027258e-17) :
2330           dd(-0.57236494292470108114, -2.3984352208056898003e-17));
2331 
2332   x = ddmul_d2_d2_d(o1 ? d : dd(-a, 0), a);
2333   x = o1 ? x : ddadd2_d2_d2_d2(x, d);
2334   x = o0 ? ddsub_d2_d2_d2(dd(1, 0), x) : expk2(x);
2335   x = o1 ? x : ddmul_d2_d2_d2(x, u);
2336 
2337   r = o3 ? (x.x + x.y) : 0;
2338   if (s < 0) r = 2 - r;
2339   r = xisnan(s) ? SLEEF_NAN : r;
2340   return r;
2341 }
2342 
2343 #ifdef ENABLE_MAIN
2344 // gcc -w -DENABLE_MAIN -I../common sleefdp.c -lm
2345 #include <stdlib.h>
main(int argc,char ** argv)2346 int main(int argc, char **argv) {
2347   double d1 = atof(argv[1]);
2348   printf("arg1 = %.20g\n", d1);
2349   //int i1 = atoi(argv[1]);
2350   //double d2 = atof(argv[2]);
2351   //printf("arg2 = %.20g\n", d2);
2352   //printf("%d\n", (int)d2);
2353 #if 0
2354   double d3 = atof(argv[3]);
2355   printf("arg3 = %.20g\n", d3);
2356 #endif
2357   //printf("%g\n", pow2i(i1));
2358   //int exp = xexpfrexp(d1);
2359   //double r = xnextafter(d1, d2);
2360   //double r = xfma(d1, d2, d3);
2361   printf("test = %.20g\n", xcos_u1(d1));
2362   //printf("test = %.20g\n", xlog(d1));
2363   //r = nextafter(d1, d2);
2364   printf("corr = %.20g\n", cos(d1));
2365   //printf("%.20g %.20g\n", xround(d1), xrint(d1));
2366   //Sleef_double2 r = xsincospi_u35(d);
2367   //printf("%g, %g\n", (double)r.x, (double)r.y);
2368 }
2369 #endif
2370