1 ////////////////////////////////////////////////////////////////
2 //
3 // this code was taken from http://shibatch.sourceforge.net/
4 // Many thanks to the author of original version: Naoki Shibata
5 //
6 // Copyright Naoki Shibata and contributors 2010 - 2021.
7 // Distributed under the Boost Software License, Version 1.0.
8 // (See accompanying file sleef_LICENSE.txt or copy at
9 // http://www.boost.org/LICENSE_1_0.txt)
10 //
11 // This version contains modifications made by Ingo Weyrich
12 //
13 ////////////////////////////////////////////////////////////////
14 #pragma once
15
16 #include <assert.h>
17 #include <stdint.h>
18
19 #ifndef INLINE
20 #define INLINE extern inline
21 #endif
22
23 #define RT_PI 3.14159265358979323846 // pi
24 #define RT_PI_2 1.57079632679489661923 // pi/2
25 #define RT_PI_180 0.017453292519943295769 // pi/180
26 #define RT_1_PI 0.31830988618379067154 // 1/pi
27 #define RT_2_PI 0.63661977236758134308 // 2/pi
28 #define RT_SQRT1_2 0.70710678118654752440 // 1/sqrt(2)
29
30 #define RT_PI_F 3.14159265358979323846f // pi
31 #define RT_PI_F_2 1.57079632679489661923f // pi/2
32 #define RT_PI_F_180 0.017453292519943295769f // pi/180
33 #define RT_1_PI_F 0.31830988618379067154f // 1/pi
34 #define RT_2_PI_F 0.63661977236758134308f // 2/pi
35
36 #define RT_INFINITY __builtin_infl()
37 #define RT_NAN __builtin_nanl("")
38
39 #define RT_INFINITY_F __builtin_inff()
40 #define RT_NAN_F __builtin_nanf("")
41
42
43
44 #define PI4_A .7853981554508209228515625
45 #define PI4_B .794662735614792836713604629039764404296875e-8
46 #define PI4_C .306161699786838294306516483068750264552437361480769e-16
47 #define M_4_PI 1.273239544735162542821171882678754627704620361328125
48
49 #define L2U .69314718055966295651160180568695068359375
50 #define L2L .28235290563031577122588448175013436025525412068e-12
51 #define R_LN2 1.442695040888963407359924681001892137426645954152985934135449406931
52 #define pow_F(a,b) (xexpf(b*xlogf(a)))
53
doubleToRawLongBits(double d)54 __inline __attribute__((always_inline)) int64_t doubleToRawLongBits(double d) {
55 union {
56 double f;
57 int64_t i;
58 } tmp;
59 tmp.f = d;
60 return tmp.i;
61 }
62
longBitsToDouble(int64_t i)63 __inline __attribute__((always_inline)) double longBitsToDouble(int64_t i) {
64 union {
65 double f;
66 int64_t i;
67 } tmp;
68 tmp.i = i;
69 return tmp.f;
70 }
71
xfabs(double x)72 __inline __attribute__((always_inline)) double xfabs(double x) {
73 return longBitsToDouble(0x7fffffffffffffffLL & doubleToRawLongBits(x));
74 }
75
mulsign(double x,double y)76 __inline __attribute__((always_inline)) double mulsign(double x, double y) {
77 return longBitsToDouble(doubleToRawLongBits(x) ^ (doubleToRawLongBits(y) & (1LL << 63)));
78 }
79
sign(double d)80 __inline __attribute__((always_inline)) double sign(double d) { return mulsign(1, d); }
mla(double x,double y,double z)81 __inline __attribute__((always_inline)) double mla(double x, double y, double z) { return x * y + z; }
xrint(double x)82 __inline __attribute__((always_inline)) double xrint(double x) { return x < 0 ? (int)(x - 0.5) : (int)(x + 0.5); }
83
xisnan(double x)84 __inline __attribute__((always_inline)) int xisnan(double x) { return x != x; }
xisinf(double x)85 __inline __attribute__((always_inline)) int xisinf(double x) { return x == RT_INFINITY || x == -RT_INFINITY; }
xisminf(double x)86 __inline __attribute__((always_inline)) int xisminf(double x) { return x == -RT_INFINITY; }
xispinf(double x)87 __inline __attribute__((always_inline)) int xispinf(double x) { return x == RT_INFINITY; }
88
ldexpk(double x,int q)89 __inline __attribute__((always_inline)) double ldexpk(double x, int q) {
90 double u;
91 int m;
92 m = q >> 31;
93 m = (((m + q) >> 9) - m) << 7;
94 q = q - (m << 2);
95 u = longBitsToDouble(((int64_t)(m + 0x3ff)) << 52);
96 double u2 = u*u;
97 u2 = u2 * u2;
98 x = x * u2;
99 u = longBitsToDouble(((int64_t)(q + 0x3ff)) << 52);
100 return x * u;
101 }
102
xldexp(double x,int q)103 __inline __attribute__((always_inline)) double xldexp(double x, int q) { return ldexpk(x, q); }
104
ilogbp1(double d)105 __inline __attribute__((always_inline)) int ilogbp1(double d) {
106 int m = d < 4.9090934652977266E-91;
107 d = m ? 2.037035976334486E90 * d : d;
108 int q = (doubleToRawLongBits(d) >> 52) & 0x7ff;
109 q = m ? q - (300 + 0x03fe) : q - 0x03fe;
110 return q;
111 }
112
xilogb(double d)113 __inline __attribute__((always_inline)) int xilogb(double d) {
114 int e = ilogbp1(xfabs(d)) - 1;
115 e = d == 0 ? (-2147483647 - 1) : e;
116 e = d == RT_INFINITY || d == -RT_INFINITY ? 2147483647 : e;
117 return e;
118 }
119
upper(double d)120 __inline __attribute__((always_inline)) double upper(double d) {
121 return longBitsToDouble(doubleToRawLongBits(d) & 0xfffffffff8000000LL);
122 }
123
124 typedef struct {
125 double x, y;
126 } double2;
127
128 typedef struct {
129 float x, y;
130 } float2;
131
dd(double h,double l)132 __inline __attribute__((always_inline)) double2 dd(double h, double l) {
133 double2 ret;
134 ret.x = h; ret.y = l;
135 return ret;
136 }
137
normalize_d(double2 t)138 __inline __attribute__((always_inline)) double2 normalize_d(double2 t) {
139 double2 s;
140
141 s.x = t.x + t.y;
142 s.y = t.x - s.x + t.y;
143
144 return s;
145 }
146
scale_d(double2 d,double s)147 __inline __attribute__((always_inline)) double2 scale_d(double2 d, double s) {
148 double2 r;
149
150 r.x = d.x * s;
151 r.y = d.y * s;
152
153 return r;
154 }
155
add2_ss(double x,double y)156 __inline __attribute__((always_inline)) double2 add2_ss(double x, double y) {
157 double2 r;
158
159 r.x = x + y;
160 double v = r.x - x;
161 r.y = (x - (r.x - v)) + (y - v);
162
163 return r;
164 }
165
add_ds(double2 x,double y)166 __inline __attribute__((always_inline)) double2 add_ds(double2 x, double y) {
167 // |x| >= |y|
168
169 double2 r;
170
171 assert(xisnan(x.x) || xisnan(y) || xfabs(x.x) >= xfabs(y));
172
173 r.x = x.x + y;
174 r.y = x.x - r.x + y + x.y;
175
176 return r;
177 }
178
add2_ds(double2 x,double y)179 __inline __attribute__((always_inline)) double2 add2_ds(double2 x, double y) {
180 // |x| >= |y|
181
182 double2 r;
183
184 r.x = x.x + y;
185 double v = r.x - x.x;
186 r.y = (x.x - (r.x - v)) + (y - v);
187 r.y += x.y;
188
189 return r;
190 }
191
add_sd(double x,double2 y)192 __inline __attribute__((always_inline)) double2 add_sd(double x, double2 y) {
193 // |x| >= |y|
194
195 double2 r;
196
197 assert(xisnan(x) || xisnan(y.x) || xfabs(x) >= xfabs(y.x));
198
199 r.x = x + y.x;
200 r.y = x - r.x + y.x + y.y;
201
202 return r;
203 }
204
add_dd(double2 x,double2 y)205 __inline __attribute__((always_inline)) double2 add_dd(double2 x, double2 y) {
206 // |x| >= |y|
207
208 double2 r;
209
210 assert(xisnan(x.x) || xisnan(y.x) || xfabs(x.x) >= xfabs(y.x));
211
212 r.x = x.x + y.x;
213 r.y = x.x - r.x + y.x + x.y + y.y;
214
215 return r;
216 }
217
add2_dd(double2 x,double2 y)218 __inline __attribute__((always_inline)) double2 add2_dd(double2 x, double2 y) {
219 double2 r;
220
221 r.x = x.x + y.x;
222 double v = r.x - x.x;
223 r.y = (x.x - (r.x - v)) + (y.x - v);
224 r.y += x.y + y.y;
225
226 return r;
227 }
228
div_dd(double2 n,double2 d)229 __inline __attribute__((always_inline)) double2 div_dd(double2 n, double2 d) {
230 double t = 1.0 / d.x;
231 double dh = upper(d.x), dl = d.x - dh;
232 double th = upper(t ), tl = t - th;
233 double nhh = upper(n.x), nhl = n.x - nhh;
234
235 double2 q;
236
237 q.x = n.x * t;
238
239 double u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl +
240 q.x * (1 - dh * th - dh * tl - dl * th - dl * tl);
241
242 q.y = t * (n.y - q.x * d.y) + u;
243
244 return q;
245 }
246
mul_ss(double x,double y)247 __inline __attribute__((always_inline)) double2 mul_ss(double x, double y) {
248 double xh = upper(x), xl = x - xh;
249 double yh = upper(y), yl = y - yh;
250 double2 r;
251
252 r.x = x * y;
253 r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl;
254
255 return r;
256 }
257
mul_ds(double2 x,double y)258 __inline __attribute__((always_inline)) double2 mul_ds(double2 x, double y) {
259 double xh = upper(x.x), xl = x.x - xh;
260 double yh = upper(y ), yl = y - yh;
261 double2 r;
262
263 r.x = x.x * y;
264 r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y;
265
266 return r;
267 }
268
mul_dd(double2 x,double2 y)269 __inline __attribute__((always_inline)) double2 mul_dd(double2 x, double2 y) {
270 double xh = upper(x.x), xl = x.x - xh;
271 double yh = upper(y.x), yl = y.x - yh;
272 double2 r;
273
274 r.x = x.x * y.x;
275 r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x;
276
277 return r;
278 }
279
squ_d(double2 x)280 __inline __attribute__((always_inline)) double2 squ_d(double2 x) {
281 double xh = upper(x.x), xl = x.x - xh;
282 double2 r;
283
284 r.x = x.x * x.x;
285 r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y);
286
287 return r;
288 }
289
rec_s(double d)290 __inline __attribute__((always_inline)) double2 rec_s(double d) {
291 double t = 1.0 / d;
292 double dh = upper(d), dl = d - dh;
293 double th = upper(t), tl = t - th;
294 double2 q;
295
296 q.x = t;
297 q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl);
298
299 return q;
300 }
301
sqrt_d(double2 d)302 __inline __attribute__((always_inline)) double2 sqrt_d(double2 d) {
303 double t = sqrt(d.x + d.y);
304 return scale_d(mul_dd(add2_dd(d, mul_ss(t, t)), rec_s(t)), 0.5);
305 }
306
atan2k(double y,double x)307 __inline __attribute__((always_inline)) double atan2k(double y, double x) {
308 double s, t, u;
309 int q = 0;
310
311 if (x < 0) { x = -x; q = -2; }
312 if (y > x) { t = x; x = y; y = -t; q += 1; }
313
314 s = y / x;
315 t = s * s;
316
317 u = -1.88796008463073496563746e-05;
318 u = u * t + (0.000209850076645816976906797);
319 u = u * t + (-0.00110611831486672482563471);
320 u = u * t + (0.00370026744188713119232403);
321 u = u * t + (-0.00889896195887655491740809);
322 u = u * t + (0.016599329773529201970117);
323 u = u * t + (-0.0254517624932312641616861);
324 u = u * t + (0.0337852580001353069993897);
325 u = u * t + (-0.0407629191276836500001934);
326 u = u * t + (0.0466667150077840625632675);
327 u = u * t + (-0.0523674852303482457616113);
328 u = u * t + (0.0587666392926673580854313);
329 u = u * t + (-0.0666573579361080525984562);
330 u = u * t + (0.0769219538311769618355029);
331 u = u * t + (-0.090908995008245008229153);
332 u = u * t + (0.111111105648261418443745);
333 u = u * t + (-0.14285714266771329383765);
334 u = u * t + (0.199999999996591265594148);
335 u = u * t + (-0.333333333333311110369124);
336
337 t = u * t * s + s;
338 t = q * (RT_PI_2) + t;
339
340 return t;
341 }
342
xatan2(double y,double x)343 __inline __attribute__((always_inline)) double xatan2(double y, double x) {
344 double r = atan2k(xfabs(y), x);
345
346 r = mulsign(r, x);
347 if (xisinf(x) || x == 0) r = RT_PI_2 - (xisinf(x) ? (sign(x) * (RT_PI_2)) : 0);
348 if (xisinf(y) ) r = RT_PI_2 - (xisinf(x) ? (sign(x) * (RT_PI*1/4)) : 0);
349 if ( y == 0) r = (sign(x) == -1 ? RT_PI : 0);
350
351 return xisnan(x) || xisnan(y) ? RT_NAN : mulsign(r, y);
352 }
353
xasin(double d)354 __inline __attribute__((always_inline)) double xasin(double d) {
355 return mulsign(atan2k(xfabs(d), sqrt((1+d)*(1-d))), d);
356 }
357
xacos(double d)358 __inline __attribute__((always_inline)) double xacos(double d) {
359 return mulsign(atan2k(sqrt((1+d)*(1-d)), xfabs(d)), d) + (d < 0 ? RT_PI : 0);
360 }
361
xatan(double s)362 __inline __attribute__((always_inline)) double xatan(double s) {
363 double t, u;
364 int q = 0;
365
366 if (s < 0) { s = -s; q = 2; }
367 if (s > 1) { s = 1.0 / s; q |= 1; }
368
369 t = s * s;
370
371 u = -1.88796008463073496563746e-05;
372 u = u * t + (0.000209850076645816976906797);
373 u = u * t + (-0.00110611831486672482563471);
374 u = u * t + (0.00370026744188713119232403);
375 u = u * t + (-0.00889896195887655491740809);
376 u = u * t + (0.016599329773529201970117);
377 u = u * t + (-0.0254517624932312641616861);
378 u = u * t + (0.0337852580001353069993897);
379 u = u * t + (-0.0407629191276836500001934);
380 u = u * t + (0.0466667150077840625632675);
381 u = u * t + (-0.0523674852303482457616113);
382 u = u * t + (0.0587666392926673580854313);
383 u = u * t + (-0.0666573579361080525984562);
384 u = u * t + (0.0769219538311769618355029);
385 u = u * t + (-0.090908995008245008229153);
386 u = u * t + (0.111111105648261418443745);
387 u = u * t + (-0.14285714266771329383765);
388 u = u * t + (0.199999999996591265594148);
389 u = u * t + (-0.333333333333311110369124);
390
391 t = s + s * (t * u);
392
393 if ((q & 1) != 0) t = 1.570796326794896557998982 - t;
394 if ((q & 2) != 0) t = -t;
395
396 return t;
397 }
398
xsin(double d)399 __inline __attribute__((always_inline)) double xsin(double d) {
400 int q;
401 double u, s;
402
403 q = (int)xrint(d * RT_1_PI);
404
405 d = mla(q, -PI4_A*4, d);
406 d = mla(q, -PI4_B*4, d);
407 d = mla(q, -PI4_C*4, d);
408
409 s = d * d;
410
411 if ((q & 1) != 0) d = -d;
412
413 u = -7.97255955009037868891952e-18;
414 u = mla(u, s, 2.81009972710863200091251e-15);
415 u = mla(u, s, -7.64712219118158833288484e-13);
416 u = mla(u, s, 1.60590430605664501629054e-10);
417 u = mla(u, s, -2.50521083763502045810755e-08);
418 u = mla(u, s, 2.75573192239198747630416e-06);
419 u = mla(u, s, -0.000198412698412696162806809);
420 u = mla(u, s, 0.00833333333333332974823815);
421 u = mla(u, s, -0.166666666666666657414808);
422
423 u = mla(s, u * d, d);
424
425 return u;
426 }
427
xcos(double d)428 __inline __attribute__((always_inline)) double xcos(double d) {
429 int q;
430 double u, s;
431
432 q = 1 + 2*(int)xrint(d * RT_1_PI - 0.5);
433
434 d = mla(q, -PI4_A*2, d);
435 d = mla(q, -PI4_B*2, d);
436 d = mla(q, -PI4_C*2, d);
437
438 s = d * d;
439
440 if ((q & 2) == 0) d = -d;
441
442 u = -7.97255955009037868891952e-18;
443 u = mla(u, s, 2.81009972710863200091251e-15);
444 u = mla(u, s, -7.64712219118158833288484e-13);
445 u = mla(u, s, 1.60590430605664501629054e-10);
446 u = mla(u, s, -2.50521083763502045810755e-08);
447 u = mla(u, s, 2.75573192239198747630416e-06);
448 u = mla(u, s, -0.000198412698412696162806809);
449 u = mla(u, s, 0.00833333333333332974823815);
450 u = mla(u, s, -0.166666666666666657414808);
451
452 u = mla(s, u * d, d);
453
454 return u;
455 }
456
xsincos(double d)457 __inline __attribute__((always_inline)) double2 xsincos(double d) {
458 int q;
459 double u, s, t;
460 double2 r;
461
462 q = (int)xrint(d * (2 * RT_1_PI));
463
464 s = d;
465
466 s = mla(-q, PI4_A*2, s);
467 s = mla(-q, PI4_B*2, s);
468 s = mla(-q, PI4_C*2, s);
469
470 t = s;
471
472 s = s * s;
473
474 u = 1.58938307283228937328511e-10;
475 u = mla(u, s, -2.50506943502539773349318e-08);
476 u = mla(u, s, 2.75573131776846360512547e-06);
477 u = mla(u, s, -0.000198412698278911770864914);
478 u = mla(u, s, 0.0083333333333191845961746);
479 u = mla(u, s, -0.166666666666666130709393);
480 u = u * s * t;
481
482 r.x = t + u;
483
484 u = -1.13615350239097429531523e-11;
485 u = mla(u, s, 2.08757471207040055479366e-09);
486 u = mla(u, s, -2.75573144028847567498567e-07);
487 u = mla(u, s, 2.48015872890001867311915e-05);
488 u = mla(u, s, -0.00138888888888714019282329);
489 u = mla(u, s, 0.0416666666666665519592062);
490 u = mla(u, s, -0.5);
491
492 r.y = u * s + 1;
493
494 if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; }
495 if ((q & 2) != 0) { r.x = -r.x; }
496 if (((q+1) & 2) != 0) { r.y = -r.y; }
497
498 if (xisinf(d)) { r.x = r.y = RT_NAN; }
499
500 return r;
501 }
502
xtan(double d)503 __inline __attribute__((always_inline)) double xtan(double d) {
504 int q;
505 double u, s, x;
506
507 q = (int)xrint(d * (2 * RT_1_PI));
508
509 x = mla(q, -PI4_A*2, d);
510 x = mla(q, -PI4_B*2, x);
511 x = mla(q, -PI4_C*2, x);
512
513 s = x * x;
514
515 if ((q & 1) != 0) x = -x;
516
517 u = 1.01419718511083373224408e-05;
518 u = mla(u, s, -2.59519791585924697698614e-05);
519 u = mla(u, s, 5.23388081915899855325186e-05);
520 u = mla(u, s, -3.05033014433946488225616e-05);
521 u = mla(u, s, 7.14707504084242744267497e-05);
522 u = mla(u, s, 8.09674518280159187045078e-05);
523 u = mla(u, s, 0.000244884931879331847054404);
524 u = mla(u, s, 0.000588505168743587154904506);
525 u = mla(u, s, 0.00145612788922812427978848);
526 u = mla(u, s, 0.00359208743836906619142924);
527 u = mla(u, s, 0.00886323944362401618113356);
528 u = mla(u, s, 0.0218694882853846389592078);
529 u = mla(u, s, 0.0539682539781298417636002);
530 u = mla(u, s, 0.133333333333125941821962);
531 u = mla(u, s, 0.333333333333334980164153);
532
533 u = mla(s, u * x, x);
534
535 if ((q & 1) != 0) u = 1.0 / u;
536
537 if (xisinf(d)) u = RT_NAN;
538
539 return u;
540 }
541
xlog(double d)542 __inline __attribute__((always_inline)) double xlog(double d) {
543 double x, x2, t, m;
544 int e;
545
546 e = ilogbp1(d * 0.7071);
547 m = ldexpk(d, -e);
548
549 x = (m-1) / (m+1);
550 x2 = x * x;
551
552 t = 0.148197055177935105296783;
553 t = mla(t, x2, 0.153108178020442575739679);
554 t = mla(t, x2, 0.181837339521549679055568);
555 t = mla(t, x2, 0.22222194152736701733275);
556 t = mla(t, x2, 0.285714288030134544449368);
557 t = mla(t, x2, 0.399999999989941956712869);
558 t = mla(t, x2, 0.666666666666685503450651);
559 t = mla(t, x2, 2);
560
561 x = x * t + 0.693147180559945286226764 * e;
562
563 if (xispinf(d)) x = RT_INFINITY;
564 if (d < 0) x = RT_NAN;
565 if (d == 0) x = -RT_INFINITY;
566
567 return x;
568 }
569
xexp(double d)570 __inline __attribute__((always_inline)) double xexp(double d) {
571 int q = (int)xrint(d * R_LN2);
572 double s, u;
573
574 s = mla(q, -L2U, d);
575 s = mla(q, -L2L, s);
576
577 u = 2.08860621107283687536341e-09;
578 u = mla(u, s, 2.51112930892876518610661e-08);
579 u = mla(u, s, 2.75573911234900471893338e-07);
580 u = mla(u, s, 2.75572362911928827629423e-06);
581 u = mla(u, s, 2.4801587159235472998791e-05);
582 u = mla(u, s, 0.000198412698960509205564975);
583 u = mla(u, s, 0.00138888888889774492207962);
584 u = mla(u, s, 0.00833333333331652721664984);
585 u = mla(u, s, 0.0416666666666665047591422);
586 u = mla(u, s, 0.166666666666666851703837);
587 u = mla(u, s, 0.5);
588
589 u = s * s * u + s + 1;
590 u = ldexpk(u, q);
591
592 if (xisminf(d)) u = 0;
593
594 return u;
595 }
596
logk(double d)597 __inline __attribute__((always_inline)) double2 logk(double d) {
598 double2 x, x2;
599 double m, t;
600 int e;
601
602 e = ilogbp1(d * 0.7071);
603 m = ldexpk(d, -e);
604
605 x = div_dd(add2_ss(-1, m), add2_ss(1, m));
606 x2 = squ_d(x);
607
608 t = 0.134601987501262130076155;
609 t = mla(t, x2.x, 0.132248509032032670243288);
610 t = mla(t, x2.x, 0.153883458318096079652524);
611 t = mla(t, x2.x, 0.181817427573705403298686);
612 t = mla(t, x2.x, 0.222222231326187414840781);
613 t = mla(t, x2.x, 0.285714285651261412873718);
614 t = mla(t, x2.x, 0.400000000000222439910458);
615 t = mla(t, x2.x, 0.666666666666666371239645);
616
617 return add2_dd(mul_ds(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e),
618 add2_dd(scale_d(x, 2), mul_ds(mul_dd(x2, x), t)));
619 }
620
expk(double2 d)621 __inline __attribute__((always_inline)) double expk(double2 d) {
622 int q = (int)rint((d.x + d.y) * R_LN2);
623 double2 s, t;
624 double u;
625
626 s = add2_ds(d, q * -L2U);
627 s = add2_ds(s, q * -L2L);
628
629 s = normalize_d(s);
630
631 u = 2.51069683420950419527139e-08;
632 u = mla(u, s.x, 2.76286166770270649116855e-07);
633 u = mla(u, s.x, 2.75572496725023574143864e-06);
634 u = mla(u, s.x, 2.48014973989819794114153e-05);
635 u = mla(u, s.x, 0.000198412698809069797676111);
636 u = mla(u, s.x, 0.0013888888939977128960529);
637 u = mla(u, s.x, 0.00833333333332371417601081);
638 u = mla(u, s.x, 0.0416666666665409524128449);
639 u = mla(u, s.x, 0.166666666666666740681535);
640 u = mla(u, s.x, 0.500000000000000999200722);
641
642 t = add_dd(s, mul_ds(squ_d(s), u));
643
644 t = add_sd(1, t);
645 return ldexpk(t.x + t.y, q);
646 }
647
xpow(double x,double y)648 __inline __attribute__((always_inline)) double xpow(double x, double y) {
649 int yisint = (int)y == y;
650 int yisodd = (1 & (int)y) != 0 && yisint;
651
652 double result = expk(mul_ds(logk(xfabs(x)), y));
653
654 result = xisnan(result) ? RT_INFINITY : result;
655 result *= (x >= 0 ? 1 : (!yisint ? RT_NAN : (yisodd ? -1 : 1)));
656
657 double efx = mulsign(xfabs(x) - 1, y);
658 if (xisinf(y)) result = efx < 0 ? 0.0 : (efx == 0 ? 1.0 : RT_INFINITY);
659 if (xisinf(x) || x == 0) result = (yisodd ? sign(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : RT_INFINITY);
660 if (xisnan(x) || xisnan(y)) result = RT_NAN;
661 if (y == 0 || x == 1) result = 1;
662
663 return result;
664 }
665
expk2(double2 d)666 __inline __attribute__((always_inline)) double2 expk2(double2 d) {
667 int q = (int)rint((d.x + d.y) * R_LN2);
668 double2 s, t;
669 double u;
670
671 s = add2_ds(d, q * -L2U);
672 s = add2_ds(s, q * -L2L);
673
674 s = normalize_d(s);
675
676 u = 2.51069683420950419527139e-08;
677 u = mla(u, s.x, 2.76286166770270649116855e-07);
678 u = mla(u, s.x, 2.75572496725023574143864e-06);
679 u = mla(u, s.x, 2.48014973989819794114153e-05);
680 u = mla(u, s.x, 0.000198412698809069797676111);
681 u = mla(u, s.x, 0.0013888888939977128960529);
682 u = mla(u, s.x, 0.00833333333332371417601081);
683 u = mla(u, s.x, 0.0416666666665409524128449);
684 u = mla(u, s.x, 0.166666666666666740681535);
685 u = mla(u, s.x, 0.500000000000000999200722);
686
687 t = add_dd(s, mul_ds(squ_d(s), u));
688
689 t = add_sd(1, t);
690 return dd(ldexpk(t.x, q), ldexpk(t.y, q));
691 }
692
xsinh(double x)693 __inline __attribute__((always_inline)) double xsinh(double x) {
694 double y = xfabs(x);
695 double2 d = expk2(dd(y, 0));
696 d = add2_dd(d, div_dd(dd(-1, 0), d));
697 y = (d.x + d.y) * 0.5;
698
699 y = xisinf(x) || xisnan(y) ? RT_INFINITY : y;
700 y = mulsign(y, x);
701 y = xisnan(x) ? RT_NAN : y;
702
703 return y;
704 }
705
xcosh(double x)706 __inline __attribute__((always_inline)) double xcosh(double x) {
707 double2 d = expk2(dd(x, 0));
708 d = add2_dd(d, div_dd(dd(1, 0), d));
709 double y = (d.x + d.y) * 0.5;
710
711 y = xisinf(x) || xisnan(y) ? RT_INFINITY : y;
712 y = xisnan(x) ? RT_NAN : y;
713
714 return y;
715 }
716
xtanh(double x)717 __inline __attribute__((always_inline)) double xtanh(double x) {
718 double y = xfabs(x);
719 double2 d = expk2(dd(y, 0));
720 double2 e = div_dd(dd(1, 0), d);
721 d = div_dd(add2_dd(d, scale_d(e, -1)), add2_dd(d, e));
722 y = d.x + d.y;
723
724 y = xisinf(x) || xisnan(y) ? 1.0 : y;
725 y = mulsign(y, x);
726 y = xisnan(x) ? RT_NAN : y;
727
728 return y;
729 }
730
logk2(double2 d)731 __inline __attribute__((always_inline)) double2 logk2(double2 d) {
732 double2 x, x2, m;
733 double t;
734 int e;
735
736 d = normalize_d(d);
737 e = ilogbp1(d.x * 0.7071);
738 m = scale_d(d, ldexpk(1, -e));
739
740 x = div_dd(add2_ds(m, -1), add2_ds(m, 1));
741 x2 = squ_d(x);
742
743 t = 0.134601987501262130076155;
744 t = mla(t, x2.x, 0.132248509032032670243288);
745 t = mla(t, x2.x, 0.153883458318096079652524);
746 t = mla(t, x2.x, 0.181817427573705403298686);
747 t = mla(t, x2.x, 0.222222231326187414840781);
748 t = mla(t, x2.x, 0.285714285651261412873718);
749 t = mla(t, x2.x, 0.400000000000222439910458);
750 t = mla(t, x2.x, 0.666666666666666371239645);
751
752 return add2_dd(mul_ds(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e),
753 add2_dd(scale_d(x, 2), mul_ds(mul_dd(x2, x), t)));
754 }
755
xasinh(double x)756 __inline __attribute__((always_inline)) double xasinh(double x) {
757 double y = xfabs(x);
758 double2 d = logk2(add2_ds(sqrt_d(add2_ds(mul_ss(y, y), 1)), y));
759 y = d.x + d.y;
760
761 y = xisinf(x) || xisnan(y) ? RT_INFINITY : y;
762 y = mulsign(y, x);
763 y = xisnan(x) ? RT_NAN : y;
764
765 return y;
766 }
767
xacosh(double x)768 __inline __attribute__((always_inline)) double xacosh(double x) {
769 double2 d = logk2(add2_ds(sqrt_d(add2_ds(mul_ss(x, x), -1)), x));
770 double y = d.x + d.y;
771
772 y = xisinf(x) || xisnan(y) ? RT_INFINITY : y;
773 y = x == 1.0 ? 0.0 : y;
774 y = x < 1.0 ? RT_NAN : y;
775 y = xisnan(x) ? RT_NAN : y;
776
777 return y;
778 }
779
xatanh(double x)780 __inline __attribute__((always_inline)) double xatanh(double x) {
781 double y = xfabs(x);
782 double2 d = logk2(div_dd(add2_ss(1, y), add2_ss(1, -y)));
783 y = y > 1.0 ? RT_NAN : (y == 1.0 ? RT_INFINITY : (d.x + d.y) * 0.5);
784
785 y = xisinf(x) || xisnan(y) ? RT_NAN : y;
786 y = mulsign(y, x);
787 y = xisnan(x) ? RT_NAN : y;
788
789 return y;
790 }
791
792 //
793
xfma(double x,double y,double z)794 __inline __attribute__((always_inline)) double xfma(double x, double y, double z) {
795 union {
796 double f;
797 long long int i;
798 } tmp;
799
800 tmp.f = x;
801 tmp.i = (tmp.i + 0x4000000) & 0xfffffffff8000000LL;
802 double xh = tmp.f, xl = x - xh;
803
804 tmp.f = y;
805 tmp.i = (tmp.i + 0x4000000) & 0xfffffffff8000000LL;
806 double yh = tmp.f, yl = y - yh;
807
808 double h = x * y;
809 double l = xh * yh - h + xl * yh + xh * yl + xl * yl;
810
811 double h2, l2, v;
812
813 h2 = h + z;
814 v = h2 - h;
815 l2 = (h - (h2 - v)) + (z - v) + l;
816
817 return h2 + l2;
818 }
819
xsqrt(double d)820 __inline __attribute__((always_inline)) double xsqrt(double d) { // max error : 0.5 ulp
821 double q = 1;
822
823 if (d < 8.636168555094445E-78) {
824 d *= 1.157920892373162E77;
825 q = 2.9387358770557188E-39;
826 }
827
828 // http://en.wikipedia.org/wiki/Fast_inverse_square_root
829 double x = longBitsToDouble(0x5fe6ec85e7de30da - (doubleToRawLongBits(d + 1e-320) >> 1));
830
831 x = x * (1.5 - 0.5 * d * x * x);
832 x = x * (1.5 - 0.5 * d * x * x);
833 x = x * (1.5 - 0.5 * d * x * x);
834
835 // You can change xfma to fma if fma is correctly implemented
836 x = xfma(d * x, d * x, -d) * (x * -0.5) + d * x;
837
838 return d == RT_INFINITY ? RT_INFINITY : x * q;
839 }
840
xcbrt(double d)841 __inline __attribute__((always_inline)) double xcbrt(double d) { // max error : 2 ulps
842 double x, y, q = 1.0;
843 int e, r;
844
845 e = ilogbp1(d);
846 d = ldexpk(d, -e);
847 r = (e + 6144) % 3;
848 q = (r == 1) ? 1.2599210498948731647672106 : q;
849 q = (r == 2) ? 1.5874010519681994747517056 : q;
850 q = ldexpk(q, (e + 6144) / 3 - 2048);
851
852 q = mulsign(q, d);
853 d = xfabs(d);
854
855 x = -0.640245898480692909870982;
856 x = x * d + 2.96155103020039511818595;
857 x = x * d + -5.73353060922947843636166;
858 x = x * d + 6.03990368989458747961407;
859 x = x * d + -3.85841935510444988821632;
860 x = x * d + 2.2307275302496609725722;
861
862 y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0);
863 y = d * x * x;
864 y = (y - (2.0 / 3.0) * y * (y * x - 1)) * q;
865
866 return y;
867 }
868
xexp2(double a)869 __inline __attribute__((always_inline)) double xexp2(double a) {
870 double u = expk(mul_ds(dd(0.69314718055994528623, 2.3190468138462995584e-17), a));
871 if (xispinf(a)) u = RT_INFINITY;
872 if (xisminf(a)) u = 0;
873 return u;
874 }
875
xexp10(double a)876 __inline __attribute__((always_inline)) double xexp10(double a) {
877 double u = expk(mul_ds(dd(2.3025850929940459011, -2.1707562233822493508e-16), a));
878 if (xispinf(a)) u = RT_INFINITY;
879 if (xisminf(a)) u = 0;
880 return u;
881 }
882
xexpm1(double a)883 __inline __attribute__((always_inline)) double xexpm1(double a) {
884 double2 d = add2_ds(expk2(dd(a, 0)), -1.0);
885 double x = d.x + d.y;
886 if (xispinf(a)) x = RT_INFINITY;
887 if (xisminf(a)) x = -1;
888 return x;
889 }
890
xlog10(double a)891 __inline __attribute__((always_inline)) double xlog10(double a) {
892 double2 d = mul_dd(logk(a), dd(0.43429448190325176116, 6.6494347733425473126e-17));
893 double x = d.x + d.y;
894
895 if (xispinf(a)) x = RT_INFINITY;
896 if (a < 0) x = RT_NAN;
897 if (a == 0) x = -RT_INFINITY;
898
899 return x;
900 }
901
xlog1p(double a)902 __inline __attribute__((always_inline)) double xlog1p(double a) {
903 double2 d = logk2(add2_ss(a, 1));
904 double x = d.x + d.y;
905
906 if (xispinf(a)) x = RT_INFINITY;
907 if (a < -1) x = RT_NAN;
908 if (a == -1) x = -RT_INFINITY;
909
910 return x;
911 }
912
913 ///////////////////////////////////////////
914
915 #define PI4_Af 0.78515625f
916 #define PI4_Bf 0.00024127960205078125f
917 #define PI4_Cf 6.3329935073852539062e-07f
918 #define PI4_Df 4.9604681473525147339e-10f
919
920 #define L2Uf 0.693145751953125f
921 #define L2Lf 1.428606765330187045e-06f
922
923 #define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f
924
925 #ifdef __SSE299__
xrintf(float x)926 __inline __attribute__((always_inline)) int xrintf(float x) {
927 return _mm_cvt_ss2si(_mm_set_ss(x));
928 }
929 #else
xrintf(float x)930 __inline __attribute__((always_inline)) int xrintf(float x) {
931 return x + (x < 0 ? -0.5f : 0.5f);
932 }
933 #endif
floatToRawIntBits(float d)934 __inline __attribute__((always_inline)) int32_t floatToRawIntBits(float d) {
935 union {
936 float f;
937 int32_t i;
938 } tmp;
939 tmp.f = d;
940 return tmp.i;
941 }
942
intBitsToFloat(int32_t i)943 __inline __attribute__((always_inline)) float intBitsToFloat(int32_t i) {
944 union {
945 float f;
946 int32_t i;
947 } tmp;
948 tmp.i = i;
949 return tmp.f;
950 }
951
xfabsf(float x)952 __inline __attribute__((always_inline)) float xfabsf(float x) {
953 return intBitsToFloat(0x7fffffffL & floatToRawIntBits(x));
954 }
955
mulsignf(float x,float y)956 __inline __attribute__((always_inline)) float mulsignf(float x, float y) {
957 return intBitsToFloat(floatToRawIntBits(x) ^ (floatToRawIntBits(y) & (1 << 31)));
958 }
959
signf(float d)960 __inline __attribute__((always_inline)) float signf(float d) { return copysign(1.f, d); }
mlaf(float x,float y,float z)961 __inline __attribute__((always_inline)) float mlaf(float x, float y, float z) { return x * y + z; }
962
xisnanf(float x)963 __inline __attribute__((always_inline)) int xisnanf(float x) { return x != x; }
xisinff(float x)964 __inline __attribute__((always_inline)) int xisinff(float x) { return x == RT_INFINITY_F || x == -RT_INFINITY_F; }
xisminff(float x)965 __inline __attribute__((always_inline)) int xisminff(float x) { return x == -RT_INFINITY_F; }
xispinff(float x)966 __inline __attribute__((always_inline)) int xispinff(float x) { return x == RT_INFINITY_F; }
967
ilogbp1f(float d)968 __inline __attribute__((always_inline)) int ilogbp1f(float d) {
969 int m = d < 5.421010862427522E-20f;
970 d = m ? 1.8446744073709552E19f * d : d;
971 int q = (floatToRawIntBits(d) >> 23) & 0xff;
972 q = m ? q - (64 + 0x7e) : q - 0x7e;
973 return q;
974 }
975
ldexpkf(float x,int q)976 __inline __attribute__((always_inline)) float ldexpkf(float x, int q) {
977 float u;
978 int m;
979 m = q >> 31;
980 m = (((m + q) >> 6) - m) << 4;
981 q = q - (m << 2);
982 u = intBitsToFloat(((int32_t)(m + 0x7f)) << 23);
983 u = u * u;
984 x = x * u * u;
985 u = intBitsToFloat(((int32_t)(q + 0x7f)) << 23);
986 return x * u;
987 }
988
xcbrtf(float d)989 __inline __attribute__((always_inline)) float xcbrtf(float d) { // max error : 2 ulps
990 float x, y, q = 1.0f;
991 int e, r;
992
993 e = ilogbp1f(d);
994 d = ldexpkf(d, -e);
995 r = (e + 6144) % 3;
996 q = (r == 1) ? 1.2599210498948731647672106f : q;
997 q = (r == 2) ? 1.5874010519681994747517056f : q;
998 q = ldexpkf(q, (e + 6144) / 3 - 2048);
999
1000 q = mulsignf(q, d);
1001 d = xfabsf(d);
1002
1003 x = -0.601564466953277587890625f;
1004 x = mlaf(x, d, 2.8208892345428466796875f);
1005 x = mlaf(x, d, -5.532182216644287109375f);
1006 x = mlaf(x, d, 5.898262500762939453125f);
1007 x = mlaf(x, d, -3.8095417022705078125f);
1008 x = mlaf(x, d, 2.2241256237030029296875f);
1009
1010 y = d * x * x;
1011 y = (y - (2.0f / 3.0f) * y * (y * x - 1.0f)) * q;
1012
1013 return y;
1014 }
1015
xsinf(float d)1016 __inline __attribute__((always_inline)) float xsinf(float d) {
1017 int q;
1018 float u, s;
1019
1020 q = xrintf(d * RT_1_PI_F);
1021
1022 d = mlaf(q, -PI4_Af*4, d);
1023 d = mlaf(q, -PI4_Bf*4, d);
1024 d = mlaf(q, -PI4_Cf*4, d);
1025 d = mlaf(q, -PI4_Df*4, d);
1026
1027 s = d * d;
1028
1029 if ((q & 1) != 0) d = -d;
1030
1031 u = 2.6083159809786593541503e-06f;
1032 u = mlaf(u, s, -0.0001981069071916863322258f);
1033 u = mlaf(u, s, 0.00833307858556509017944336f);
1034 u = mlaf(u, s, -0.166666597127914428710938f);
1035
1036 u = mlaf(s, u * d, d);
1037
1038 return u;
1039 }
1040
xcosf(float d)1041 __inline __attribute__((always_inline)) float xcosf(float d) {
1042 #ifdef __SSE299__
1043 // faster than scalar version
1044 return xcosf(_mm_set_ss(d))[0];
1045 #else
1046 int q;
1047 float u, s;
1048
1049 q = 1 + 2*xrintf(d * RT_1_PI_F - 0.5f);
1050
1051 d = mlaf(q, -PI4_Af*2, d);
1052 d = mlaf(q, -PI4_Bf*2, d);
1053 d = mlaf(q, -PI4_Cf*2, d);
1054 d = mlaf(q, -PI4_Df*2, d);
1055
1056 s = d * d;
1057
1058 if ((q & 2) == 0) d = -d;
1059
1060 u = 2.6083159809786593541503e-06f;
1061 u = mlaf(u, s, -0.0001981069071916863322258f);
1062 u = mlaf(u, s, 0.00833307858556509017944336f);
1063 u = mlaf(u, s, -0.166666597127914428710938f);
1064
1065 u = mlaf(s, u * d, d);
1066
1067 return u;
1068 #endif
1069 }
1070
xsincosf(float d)1071 __inline __attribute__((always_inline)) float2 xsincosf(float d) {
1072 #ifdef __SSE299__
1073 // faster than scalar version
1074 vfloat2 res = xsincosf(_mm_set_ss(d));
1075 return {res.x[0], res.y[0]};
1076 #else
1077 int q;
1078 float u, s, t;
1079 float2 r;
1080
1081 q = xrintf(d * RT_2_PI_F);
1082
1083 s = d;
1084
1085 s = mlaf(q, -PI4_Af*2, s);
1086 s = mlaf(q, -PI4_Bf*2, s);
1087 s = mlaf(q, -PI4_Cf*2, s);
1088 s = mlaf(q, -PI4_Df*2, s);
1089
1090 t = s;
1091
1092 s = s * s;
1093
1094 u = -0.000195169282960705459117889f;
1095 u = mlaf(u, s, 0.00833215750753879547119141f);
1096 u = mlaf(u, s, -0.166666537523269653320312f);
1097 u = u * s * t;
1098
1099 r.x = t + u;
1100
1101 u = -2.71811842367242206819355e-07f;
1102 u = mlaf(u, s, 2.47990446951007470488548e-05f);
1103 u = mlaf(u, s, -0.00138888787478208541870117f);
1104 u = mlaf(u, s, 0.0416666641831398010253906f);
1105 u = mlaf(u, s, -0.5f);
1106
1107 r.y = u * s + 1;
1108
1109 if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; }
1110 if ((q & 2) != 0) { r.x = -r.x; }
1111 if (((q+1) & 2) != 0) { r.y = -r.y; }
1112
1113 if (xisinff(d)) { r.x = r.y = RT_NAN_F; }
1114
1115 return r;
1116 #endif
1117 }
1118
xtanf(float d)1119 __inline __attribute__((always_inline)) float xtanf(float d) {
1120 int q;
1121 float u, s, x;
1122
1123 q = xrintf(d * (float)(2 * RT_1_PI));
1124
1125 x = d;
1126
1127 x = mlaf(q, -PI4_Af*2, x);
1128 x = mlaf(q, -PI4_Bf*2, x);
1129 x = mlaf(q, -PI4_Cf*2, x);
1130 x = mlaf(q, -PI4_Df*2, x);
1131
1132 s = x * x;
1133
1134 if ((q & 1) != 0) x = -x;
1135
1136 u = 0.00927245803177356719970703f;
1137 u = mlaf(u, s, 0.00331984995864331722259521f);
1138 u = mlaf(u, s, 0.0242998078465461730957031f);
1139 u = mlaf(u, s, 0.0534495301544666290283203f);
1140 u = mlaf(u, s, 0.133383005857467651367188f);
1141 u = mlaf(u, s, 0.333331853151321411132812f);
1142
1143 u = mlaf(s, u * x, x);
1144
1145 if ((q & 1) != 0) u = 1.0f / u;
1146
1147 if (xisinff(d)) u = RT_NAN_F;
1148
1149 return u;
1150 }
1151
xatanf(float s)1152 __inline __attribute__((always_inline)) float xatanf(float s) {
1153 float t, u;
1154 int q = 0;
1155
1156 if (s < 0) { s = -s; q = 2; }
1157 if (s > 1) { s = 1.0f / s; q |= 1; }
1158
1159 t = s * s;
1160
1161 u = 0.00282363896258175373077393f;
1162 u = mlaf(u, t, -0.0159569028764963150024414f);
1163 u = mlaf(u, t, 0.0425049886107444763183594f);
1164 u = mlaf(u, t, -0.0748900920152664184570312f);
1165 u = mlaf(u, t, 0.106347933411598205566406f);
1166 u = mlaf(u, t, -0.142027363181114196777344f);
1167 u = mlaf(u, t, 0.199926957488059997558594f);
1168 u = mlaf(u, t, -0.333331018686294555664062f);
1169
1170 t = s + s * (t * u);
1171
1172 if ((q & 1) != 0) t = 1.570796326794896557998982f - t;
1173 if ((q & 2) != 0) t = -t;
1174
1175 return t;
1176 }
1177
atan2kf(float y,float x)1178 __inline __attribute__((always_inline)) float atan2kf(float y, float x) {
1179 float s, t, u;
1180 float q = 0.f;
1181
1182 if (x < 0) { x = -x; q = -2.f; }
1183 if (y > x) { t = x; x = y; y = -t; q += 1.f; }
1184
1185 s = y / x;
1186 t = s * s;
1187
1188 u = 0.00282363896258175373077393f;
1189 u = mlaf(u, t, -0.0159569028764963150024414f);
1190 u = mlaf(u, t, 0.0425049886107444763183594f);
1191 u = mlaf(u, t, -0.0748900920152664184570312f);
1192 u = mlaf(u, t, 0.106347933411598205566406f);
1193 u = mlaf(u, t, -0.142027363181114196777344f);
1194 u = mlaf(u, t, 0.199926957488059997558594f);
1195 u = mlaf(u, t, -0.333331018686294555664062f);
1196
1197 t = u * t;
1198 t = mlaf(t,s,s);
1199 return mlaf(q,(float)(RT_PI_F_2),t);
1200 }
1201
xatan2f(float y,float x)1202 __inline __attribute__((always_inline)) float xatan2f(float y, float x) {
1203 float r = atan2kf(xfabsf(y), x);
1204
1205 r = mulsignf(r, x);
1206 if (xisinff(x) || x == 0) r = RT_PI_F/2 - (xisinff(x) ? (signf(x) * (float)(RT_PI_F*.5f)) : 0);
1207 if (xisinff(y) ) r = RT_PI_F/2 - (xisinff(x) ? (signf(x) * (float)(RT_PI_F*.25f)) : 0);
1208 if ( y == 0) r = (signf(x) == -1 ? RT_PI_F : 0);
1209
1210 return xisnanf(x) || xisnanf(y) ? RT_NAN_F : mulsignf(r, y);
1211 }
1212
xasinf(float d)1213 __inline __attribute__((always_inline)) float xasinf(float d) {
1214 return mulsignf(atan2kf(fabsf(d), sqrtf((1.0f+d)*(1.0f-d))), d);
1215 }
1216
xacosf(float d)1217 __inline __attribute__((always_inline)) float xacosf(float d) {
1218 return mulsignf(atan2kf(sqrtf((1.0f+d)*(1.0f-d)), fabsf(d)), d) + (d < 0 ? (float)RT_PI : 0.0f);
1219 }
1220
xlogf(float d)1221 __inline __attribute__((always_inline)) float xlogf(float d) {
1222 float x, x2, t, m;
1223 int e;
1224
1225 e = ilogbp1f(d * 0.7071f);
1226 m = ldexpkf(d, -e);
1227
1228 x = (m-1.0f) / (m+1.0f);
1229 x2 = x * x;
1230
1231 t = 0.2371599674224853515625f;
1232 t = mlaf(t, x2, 0.285279005765914916992188f);
1233 t = mlaf(t, x2, 0.400005519390106201171875f);
1234 t = mlaf(t, x2, 0.666666567325592041015625f);
1235 t = mlaf(t, x2, 2.0f);
1236
1237 x = x * t + 0.693147180559945286226764f * e;
1238
1239 if (xispinff(d)) x = RT_INFINITY_F;
1240 if (d < 0) x = RT_NAN_F;
1241 if (d == 0) x = -RT_INFINITY_F;
1242
1243 return x;
1244 }
1245
xlogf1(float d)1246 __inline __attribute__((always_inline)) float xlogf1(float d) { // does xlogf(vmaxf(d, 1.f)) but faster
1247 float x, x2, t, m;
1248 int e;
1249
1250 e = ilogbp1f(d * 0.7071f);
1251 m = ldexpkf(d, -e);
1252
1253 x = (m-1.0f) / (m+1.0f);
1254 x2 = x * x;
1255
1256 t = 0.2371599674224853515625f;
1257 t = mlaf(t, x2, 0.285279005765914916992188f);
1258 t = mlaf(t, x2, 0.400005519390106201171875f);
1259 t = mlaf(t, x2, 0.666666567325592041015625f);
1260 t = mlaf(t, x2, 2.0f);
1261
1262 x = x * t + 0.693147180559945286226764f * e;
1263
1264 if (xispinff(d)) x = RT_INFINITY_F;
1265 if (d <= 1.f) x = 0;
1266
1267 return x;
1268 }
1269
xexpf(float d)1270 __inline __attribute__((always_inline)) float xexpf(float d) {
1271 if(d<=-104.0f) return 0.0f;
1272
1273 int q = xrintf(d * R_LN2f);
1274 float s, u;
1275
1276 s = mlaf(q, -L2Uf, d);
1277 s = mlaf(q, -L2Lf, s);
1278
1279 u = 0.00136324646882712841033936f;
1280 u = mlaf(u, s, 0.00836596917361021041870117f);
1281 u = mlaf(u, s, 0.0416710823774337768554688f);
1282 u = mlaf(u, s, 0.166665524244308471679688f);
1283 u = mlaf(u, s, 0.499999850988388061523438f);
1284
1285 u = mlaf( s, mlaf(s,u,1.f),1.f);
1286 return ldexpkf(u, q);
1287
1288 }
1289
xmul2f(float d)1290 __inline __attribute__((always_inline)) float xmul2f(float d) {
1291 union {
1292 float floatval;
1293 int intval;
1294 } uflint;
1295 uflint.floatval = d;
1296 if (uflint.intval & 0x7FFFFFFF) { // if f==0 do nothing
1297 uflint.intval += 1 << 23; // add 1 to the exponent
1298 }
1299 return uflint.floatval;
1300 }
1301
xdiv2f(float d)1302 __inline __attribute__((always_inline)) float xdiv2f(float d) {
1303 union {
1304 float floatval;
1305 int intval;
1306 } uflint;
1307 uflint.floatval = d;
1308 if (uflint.intval & 0x7FFFFFFF) { // if f==0 do nothing
1309 uflint.intval -= 1 << 23; // sub 1 from the exponent
1310 }
1311 return uflint.floatval;
1312 }
1313
xdivf(float d,int n)1314 __inline __attribute__((always_inline)) float xdivf( float d, int n){
1315 union {
1316 float floatval;
1317 int intval;
1318 } uflint;
1319 uflint.floatval = d;
1320 if (uflint.intval & 0x7FFFFFFF) { // if f==0 do nothing
1321 uflint.intval -= n << 23; // add n to the exponent
1322 }
1323 return uflint.floatval;
1324 }
1325
1326