1 /*
2 * Correctly rounded expm1 = e^x - 1
3 *
4 * Author : Christoph Lauter (ENS Lyon)
5 *
6 * This file is part of the crlibm library developed by the Arenaire
7 * project at Ecole Normale Superieure de Lyon
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include "crlibm.h"
27 #include "crlibm_private.h"
28 #include "triple-double.h"
29 #include "expm1.h"
30 #ifdef BUILD_INTERVAL_FUNCTIONS
31 #include "interval.h"
32 #endif
33
34
35
36 #define DEBUG 0
37
38
expm1_direct_td(double * expm1h,double * expm1m,double * expm1l,double x,double xSqHalfh,double xSqHalfl,double xSqh,double xSql,int expoX)39 void expm1_direct_td(double *expm1h, double *expm1m, double *expm1l,
40 double x, double xSqHalfh, double xSqHalfl, double xSqh, double xSql, int expoX) {
41 double highPoly, tt1h, t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
42 double tt6h, tt6m, tt6l, t7h, t7m, t7l, lowPolyh, lowPolym, lowPolyl;
43 double fullHighPolyh, fullHighPolym, fullHighPolyl, polyh, polym, polyl;
44 double xCubeh, xCubem, xCubel, tt7h, tt7m, tt7l, t8h, t8m, t8l;
45 double expm1hover, expm1mover, expm1lover;
46 double r1h, r1m, r1l, r2h, r2m, r2l, r3h, r3m, r3l;
47 double rr1h, rr1m, rr1l, rr2h, rr2m, rr2l, rr3h, rr3m, rr3l;
48 double fullHighPolyhover, fullHighPolymover, fullHighPolylover;
49
50 /* Double precision evaluation steps */
51 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
52 highPoly = FMA(FMA(FMA(FMA(accuDirectpolyC15h ,x,accuDirectpolyC14h),x,accuDirectpolyC13h),x,
53 accuDirectpolyC12h),x,accuDirectpolyC11h);
54 #else
55 highPoly = accuDirectpolyC11h + x * (accuDirectpolyC12h + x * (accuDirectpolyC13h + x * (
56 accuDirectpolyC14h + x * accuDirectpolyC15h)));
57 #endif
58
59 tt1h = x * highPoly;
60
61 /* Triple-double steps for x + x^2/2 and x^3*/
62
63 Add123(&lowPolyh,&lowPolym,&lowPolyl,x,xSqHalfh,xSqHalfl); /* infty - 52/53 */
64 Mul123(&xCubeh,&xCubem,&xCubel,x,xSqh,xSql); /* 154 - 47/53 */
65
66
67 /* Double-double evaluation steps */
68
69 Add12(t1h,t1l,accuDirectpolyC10h,tt1h);
70
71 MulAdd212(&t2h,&t2l,accuDirectpolyC9h,accuDirectpolyC9m,x,t1h,t1l);
72 MulAdd212(&t3h,&t3l,accuDirectpolyC8h,accuDirectpolyC8m,x,t2h,t2l);
73 MulAdd212(&t4h,&t4l,accuDirectpolyC7h,accuDirectpolyC7m,x,t3h,t3l);
74 MulAdd212(&t5h,&t5l,accuDirectpolyC6h,accuDirectpolyC6m,x,t4h,t4l);
75 MulAdd212(&t6h,&t6l,accuDirectpolyC5h,accuDirectpolyC5m,x,t5h,t5l);
76
77 /* Triple-double evaluation steps */
78
79 Mul123(&tt6h,&tt6m,&tt6l,x,t6h,t6l); /* 154 - 47/53 */
80 Add233(&t7h,&t7m,&t7l,accuDirectpolyC4h,accuDirectpolyC4m,tt6h,tt6m,tt6l); /* 150 - 43/53 */
81
82 Mul133(&tt7h,&tt7m,&tt7l,x,t7h,t7m,t7l); /* 143 - 38/53 */
83 Add33(&t8h,&t8m,&t8l,accuDirectpolyC3h,accuDirectpolyC3m,accuDirectpolyC3l,tt7h,tt7m,tt7l); /* 135 - 33/53 */
84
85 Mul33(&fullHighPolyhover,&fullHighPolymover,&fullHighPolylover,xCubeh,xCubem,xCubel,t8h,t8m,t8l); /* 130 - 29/53 */
86
87 Renormalize3(&fullHighPolyh,&fullHighPolym,&fullHighPolyl,
88 fullHighPolyhover,fullHighPolymover,fullHighPolylover); /* infty - 52/53 */
89
90 Add33(&polyh,&polym,&polyl,lowPolyh,lowPolym,lowPolyl,fullHighPolyh,fullHighPolym,fullHighPolyl);
91 /* 149 - 47/53 */
92
93 /* Reconstruction steps */
94
95 /* If we have not performed any range reduction, we have no reconstruction to do */
96 if (expoX >= 0) {
97 /* If we are here, we must perform reconstruction */
98
99 /* First reconstruction step */
100
101 Add133(&r1h,&r1m,&r1l,2,polyh,polym,polyl);
102 Mul33(&rr1h,&rr1m,&rr1l,r1h,r1m,r1l,polyh,polym,polyl);
103 if (expoX >= 1) {
104
105 /* Second reconstruction step */
106 Add133(&r2h,&r2m,&r2l,2,rr1h,rr1m,rr1l);
107 Mul33(&rr2h,&rr2m,&rr2l,r2h,r2m,r2l,rr1h,rr1m,rr1l);
108
109 if (expoX >= 2) {
110
111 /* Third reconstruction step */
112 Add133(&r3h,&r3m,&r3l,2,rr2h,rr2m,rr2l);
113 Mul33(&rr3h,&rr3m,&rr3l,r3h,r3m,r3l,rr2h,rr2m,rr2l);
114
115 /* expoX may be maximally 2 */
116
117 expm1hover = rr3h;
118 expm1mover = rr3m;
119 expm1lover = rr3l;
120
121 } else {
122 expm1hover = rr2h;
123 expm1mover = rr2m;
124 expm1lover = rr2l;
125 }
126
127 } else {
128 expm1hover = rr1h;
129 expm1mover = rr1m;
130 expm1lover = rr1l;
131 }
132
133 } else {
134 expm1hover = polyh;
135 expm1mover = polym;
136 expm1lover = polyl;
137 }
138
139 /* Renormalize before returning */
140
141 Renormalize3(expm1h,expm1m,expm1l,expm1hover,expm1mover,expm1lover);
142 }
143
expm1_common_td(double * expm1h,double * expm1m,double * expm1l,double rh,double rm,double rl,double tbl1h,double tbl1m,double tbl1l,double tbl2h,double tbl2m,double tbl2l,int M)144 void expm1_common_td(double *expm1h, double *expm1m, double *expm1l,
145 double rh, double rm, double rl,
146 double tbl1h, double tbl1m, double tbl1l,
147 double tbl2h, double tbl2m, double tbl2l,
148 int M) {
149 double highPoly, highPolyMulth, highPolyMultm, highPolyMultl;
150 double rhSquareh, rhSquarel, rhSquareHalfh, rhSquareHalfl;
151 double rhCubeh, rhCubem, rhCubel;
152 double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5, t6;
153 double lowPolyh, lowPolym, lowPolyl;
154 double ph, pm, pl, phnorm, pmnorm, rmlMultPh, rmlMultPl;
155 double qh, ql, fullPolyh, fullPolym, fullPolyl;
156 double polyWithTbl1h, polyWithTbl1m, polyWithTbl1l;
157 double polyAddOneh,polyAddOnem,polyAddOnel;
158 double polyWithTablesh, polyWithTablesm, polyWithTablesl;
159 double exph, expm, expl, expm1hover, expm1mover, expm1lover;
160 db_number polyWithTableshdb, polyWithTablesmdb, polyWithTablesldb;
161
162 /* Polynomial approximation - double precision steps */
163
164 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
165 highPoly = FMA(FMA(accuCommonpolyC7h,rh,accuCommonpolyC6h),rh,accuCommonpolyC5h);
166 #else
167 highPoly = accuCommonpolyC5h + rh * (accuCommonpolyC6h + rh * accuCommonpolyC7h);
168 #endif
169
170 /* Polynomial approximation - double-double precision steps */
171
172 Mul12(&t1h,&t1l,rh,highPoly);
173 Add22(&t2h,&t2l,accuCommonpolyC4h,accuCommonpolyC4m,t1h,t1l);
174 Mul122(&t3h,&t3l,rh,t2h,t2l);
175 Add22(&t4h,&t4l,accuCommonpolyC3h,accuCommonpolyC3m,t3h,t3l);
176
177 Mul12(&rhSquareh,&rhSquarel,rh,rh);
178 Mul123(&rhCubeh,&rhCubem,&rhCubel,rh,rhSquareh,rhSquarel);
179
180 rhSquareHalfh = 0.5 * rhSquareh;
181 rhSquareHalfl = 0.5 * rhSquarel;
182
183 /* Polynomial approximation - triple-double precision steps */
184
185 Renormalize3(&lowPolyh,&lowPolym,&lowPolyl,rh,rhSquareHalfh,rhSquareHalfl);
186
187 Mul233(&highPolyMulth,&highPolyMultm,&highPolyMultl,t4h,t4l,rhCubeh,rhCubem,rhCubel);
188
189 Add33(&ph,&pm,&pl,lowPolyh,lowPolym,lowPolyl,highPolyMulth,highPolyMultm,highPolyMultl);
190
191 /* Reconstruction */
192
193 Add12(phnorm,pmnorm,ph,pm);
194 Mul22(&rmlMultPh,&rmlMultPl,rm,rl,phnorm,pmnorm);
195 Add22(&qh,&ql,rm,rl,rmlMultPh,rmlMultPl);
196
197 Add233Cond(&fullPolyh,&fullPolym,&fullPolyl,qh,ql,ph,pm,pl);
198 Add12(polyAddOneh,t5,1,fullPolyh);
199 Add12Cond(polyAddOnem,t6,t5,fullPolym);
200 polyAddOnel = t6 + fullPolyl;
201 Mul33(&polyWithTbl1h,&polyWithTbl1m,&polyWithTbl1l,tbl1h,tbl1m,tbl1l,polyAddOneh,polyAddOnem,polyAddOnel);
202 Mul33(&polyWithTablesh,&polyWithTablesm,&polyWithTablesl,
203 tbl2h,tbl2m,tbl2l,
204 polyWithTbl1h,polyWithTbl1m,polyWithTbl1l);
205
206 /* Multiplication by 2^(M)
207
208 We perform it in integer to overcome the non-representability of 2^(1024)
209 This case is possible for M = 1024 and polyWithTablesh < 1
210
211 The overlap in the triple-double polyWithTables[hml] stays unchanged.
212
213 */
214
215 polyWithTableshdb.d = polyWithTablesh;
216 polyWithTablesmdb.d = polyWithTablesm;
217 polyWithTablesldb.d = polyWithTablesl;
218
219 /* TODO FIXME probably at least the first of these tests is useless,
220 but I leave this to Christoph to check it. Let us be
221 conservative. Florent */
222 if(polyWithTableshdb.d!=0)
223 polyWithTableshdb.i[HI] += M << 20;
224 if(polyWithTablesmdb.d!=0)
225 polyWithTablesmdb.i[HI] += M << 20;
226 if(polyWithTablesldb.d!=0)
227 polyWithTablesldb.i[HI] += M << 20;
228
229 exph = polyWithTableshdb.d;
230 expm = polyWithTablesmdb.d;
231 expl = polyWithTablesldb.d;
232
233 /* Substraction of -1
234
235 We use a conditional Add133
236 */
237
238 Add133Cond(&expm1hover,&expm1mover,&expm1lover,-1,exph,expm,expl);
239
240 /* Renormalization */
241
242 Renormalize3(expm1h,expm1m,expm1l,expm1hover,expm1mover,expm1lover);
243
244 }
245
246
expm1_rn(double x)247 double expm1_rn(double x) {
248 db_number xdb, shiftedXMultdb, polyTblhdb, polyTblmdb;
249 int xIntHi, expoX, k, M, index1, index2;
250 double highPoly, tt1h, t1h, t1l, xSqh, xSql, xSqHalfh, xSqHalfl, xCubeh, xCubel, t2h, t2l, templ, tt3h, tt3l;
251 double polyh, polyl, expm1h, expm1m, expm1l;
252 double r1h, r1l, r1t, rr1h, rr1l;
253 double r2h, r2l, r2t, rr2h, rr2l;
254 double r3h, r3l, r3t, rr3h, rr3l;
255 double xMultLog2InvMult2L, shiftedXMult, kd, s1, s2, s3, s4, s5, rh, rm, rl;
256 double rhSquare, rhC3, rhSquareHalf, monomialCube, rhFour, monomialFour;
257 double tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l;
258 double highPolyWithSquare, tablesh, tablesl, t8, t9, t10, t11, t12, t13;
259 double exph, expm, t1, t2, t3;
260 double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
261 double middlePoly, doublePoly;
262
263
264 xdb.d = x;
265
266 /* Strip off the sign of x for the following tests */
267
268 xIntHi = xdb.i[HI] & 0x7fffffff;
269
270 /* Test if we are so small that we can return (a corrected) x as correct rounding */
271 if (xIntHi < RETURNXBOUND) {
272 return x;
273 }
274
275
276 /* Filter out special cases like overflow, -1 in result, infinities and NaNs
277 The filters are not sharp, we have positive arguments that flow through
278 */
279 if (xIntHi >= SIMPLEOVERFLOWBOUND) {
280 /* Test if we are +/-inf or NaN */
281 if (xIntHi >= 0x7ff00000) {
282 /* Test if NaN */
283 if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
284 /* NaN */
285 return x+x; /* return NaN */
286 }
287 /* Test if +inf or -inf */
288 if (xdb.i[HI] > 0) {
289 /* +inf */
290 return x+x; /* return +inf */
291 }
292
293 /* If we are here, we are -inf */
294 return -1.0;
295 }
296
297 /* If we are here, we are overflowed or a common case that flows through */
298
299 /* Test if we are actually overflowed */
300 if (x > OVERFLOWBOUND) {
301 return LARGEST * LARGEST; /* return +inf and set flag */
302 }
303 }
304
305 /* Test if we know already that we are -1.0 (+ correction depending on rounding mode) in result */
306 if (x < MINUSONEBOUND) {
307 return -1.0;
308 }
309
310 /* Test if we have |x| <= 1/4-1/2ulp(1/4) for knowing if we use exp(x) or approximate directly */
311
312 if (xIntHi < DIRECTINTERVALBOUND) {
313 /* We approximate expm1 directly after a range reduction as follows
314
315 expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
316
317 We perform the range reduction in such a way that finally |x| < 1/32
318 */
319
320 /* Extract the exponent of |x| and add 5 (2^5 = 32) */
321 expoX = ((xIntHi & 0x7ff00000) >> 20) - (1023 - 5);
322
323 /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
324 if (expoX >= 0) {
325 /* If we are here, we must perform range reduction */
326
327
328 /* We multiply x by 2^(-expoX-1) by bit manipulation
329 x cannot be denormalized so there is no danger
330 */
331 xdb.i[HI] += (-expoX-1) << 20;
332
333 /* We reassign the new x and maintain xIntHi */
334
335 xIntHi = xdb.i[HI] & 0x7fffffff;
336 x = xdb.d;
337 }
338
339 /* Here, we have always |x| < 1/32 */
340
341
342 /* Double precision evaluation steps and one double-double step */
343
344 Mul12(&xSqh,&xSql,x,x);
345
346 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
347 middlePoly = FMA(quickDirectpolyC5h,x,quickDirectpolyC4h);
348 #else
349 middlePoly = quickDirectpolyC4h + x * quickDirectpolyC5h;
350 #endif
351
352 doublePoly = middlePoly;
353
354 /* Special path: for small |x| we can truncate the polynomial */
355
356 if (xIntHi > SPECIALINTERVALBOUND) {
357
358 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
359 highPoly = FMA(FMA(FMA(quickDirectpolyC9h ,x,quickDirectpolyC8h),x,
360 quickDirectpolyC7h),x,quickDirectpolyC6h);
361 #else
362 highPoly = quickDirectpolyC6h + x * (quickDirectpolyC7h + x * (
363 quickDirectpolyC8h + x * quickDirectpolyC9h));
364 #endif
365
366 highPolyWithSquare = xSqh * highPoly;
367
368 doublePoly = middlePoly + highPolyWithSquare;
369
370 }
371
372 /* Double-double evaluation steps */
373 tt1h = x * doublePoly;
374
375 xSqHalfh = 0.5 * xSqh;
376 xSqHalfl = 0.5 * xSql;
377 Add12(t2h,templ,x,xSqHalfh);
378 t2l = templ + xSqHalfl;
379
380 Add12(t1h,t1l,quickDirectpolyC3h,tt1h);
381 Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
382 Mul22(&tt3h,&tt3l,xCubeh,xCubel,t1h,t1l);
383
384 Add22(&polyh,&polyl,t2h,t2l,tt3h,tt3l);
385
386 /* Reconstruction */
387
388 /* If we have not performed any range reduction, we have no reconstruction to do */
389 if (expoX >= 0) {
390 /* If we are here, we must perform reconstruction */
391
392 /* First reconstruction step */
393 Add12(r1h,r1t,2,polyh);
394 r1l = r1t + polyl;
395 Mul22(&rr1h,&rr1l,r1h,r1l,polyh,polyl);
396
397 if (expoX >= 1) {
398
399 /* Second reconstruction step */
400 Add12(r2h,r2t,2,rr1h);
401 r2l = r2t + rr1l;
402 Mul22(&rr2h,&rr2l,r2h,r2l,rr1h,rr1l);
403
404 if (expoX >= 2) {
405
406 /* Third reconstruction step */
407 Add12(r3h,r3t,2,rr2h);
408 r3l = r3t + rr2l;
409 Mul22(&rr3h,&rr3l,r3h,r3l,rr2h,rr2l);
410
411 /* expoX may be maximally 2 */
412
413 expm1h = rr3h;
414 expm1m = rr3l;
415
416 } else {
417 expm1h = rr2h;
418 expm1m = rr2l;
419 }
420
421 } else {
422 expm1h = rr1h;
423 expm1m = rr1l;
424 }
425
426 } else {
427 expm1h = polyh;
428 expm1m = polyl;
429 }
430
431 /* Rounding test */
432 if(expm1h == (expm1h + (expm1m * ROUNDCSTDIRECTRN)))
433 return expm1h;
434 else
435 {
436
437 #if DEBUG
438 printf("Launch accurate phase (direct interval)\n");
439 #endif
440
441 expm1_direct_td(&expm1h, &expm1m, &expm1l, x, xSqHalfh, xSqHalfl, xSqh, xSql, expoX);
442
443 ReturnRoundToNearest3(expm1h, expm1m, expm1l);
444
445 } /* Accurate phase launched */
446
447 /* We cannot be here, since we return in all cases before */
448 }
449
450 /* If we are here, we can use expm1(x) = exp(x) - 1 */
451
452 /* Range reduction - exact part: compute k as double and as int */
453
454 xMultLog2InvMult2L = x * log2InvMult2L;
455 shiftedXMult = xMultLog2InvMult2L + shiftConst;
456 kd = shiftedXMult - shiftConst;
457 shiftedXMultdb.d = shiftedXMult;
458 k = shiftedXMultdb.i[LO];
459 M = k >> 12;
460 index1 = k & INDEXMASK1;
461 index2 = (k & INDEXMASK2) >> 6;
462
463 /* Range reduction - part affected by error - must be redone in accurate phase */
464 Mul12(&s1,&s2,msLog2Div2Lh,kd);
465 s3 = kd * msLog2Div2Lm;
466 s4 = s2 + s3;
467 s5 = x + s1;
468 Add12Cond(rh,rm,s5,s4);
469
470 /* Table reads - read only two double-doubles by now */
471 tbl1h = twoPowerIndex1[index1].hi;
472 tbl1m = twoPowerIndex1[index1].mi;
473 tbl2h = twoPowerIndex2[index2].hi;
474 tbl2m = twoPowerIndex2[index2].mi;
475
476 /* Quick phase starts here */
477
478 rhSquare = rh * rh;
479 rhC3 = quickCommonpolyC3h * rh;
480
481 rhSquareHalf = 0.5 * rhSquare;
482 monomialCube = rhC3 * rhSquare;
483 rhFour = rhSquare * rhSquare;
484
485 monomialFour = quickCommonpolyC4h * rhFour;
486
487 highPoly = monomialCube + monomialFour;
488
489 highPolyWithSquare = rhSquareHalf + highPoly;
490
491 /* Reconstruction: integration of table values */
492
493 Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
494
495 t8 = rm + highPolyWithSquare;
496 t9 = rh + t8;
497
498 t10 = tablesh * t9;
499
500 Add12(t11,t12,tablesh,t10);
501 t13 = t12 + tablesl;
502 Add12(polyTblhdb.d,polyTblmdb.d,t11,t13);
503
504 /* Reconstruction: multiplication by 2^M */
505
506 /* Implement the multiplication by multiplication to overcome the
507 problem of the non-representability of 2^1024 (M = 1024)
508 This case is possible if polyTblhdb.d < 1
509 */
510
511 polyTblhdb.i[HI] += M << 20;
512 polyTblmdb.i[HI] += M << 20;
513
514 exph = polyTblhdb.d;
515 expm = polyTblmdb.d;
516
517 /* Substraction of 1
518
519 Testing if the operation is necessary is more expensive than
520 performing it in any case.
521
522 We may cancellate at most 2 bits in the subtraction for
523 arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
524 We must therefore use conditional Add12s
525
526 Since we perform a substraction, we may not have addition overflow towards +inf
527
528 */
529
530 Add12Cond(t1,t2,-1,exph);
531 t3 = t2 + expm;
532 Add12Cond(expm1h,expm1m,t1,t3);
533
534
535 /* Rounding test */
536 if(expm1h == (expm1h + (expm1m * ROUNDCSTCOMMONRN))) {
537 return expm1h;
538 } else {
539 /* Rest of argument reduction for accurate phase */
540
541 Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
542 t1 = x + msLog2Div2LMultKh;
543 Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
544 Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
545
546 /* Table reads for accurate phase */
547 tbl1l = twoPowerIndex1[index1].lo;
548 tbl2l = twoPowerIndex2[index2].lo;
549
550 #if DEBUG
551 printf("Launch accurate phase (common interval)\n");
552 #endif
553
554 /* Call accurate phase */
555 expm1_common_td(&expm1h, &expm1m, &expm1l, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l, M);
556
557 /* Final rounding */
558
559 ReturnRoundToNearest3(expm1h, expm1m, expm1l);
560 } /* Accurate phase launched */
561
562 /* We cannot be here since we return before in any case */
563 }
564
expm1_rd(double x)565 double expm1_rd(double x) {
566 db_number xdb, shiftedXMultdb, polyTblhdb, polyTblmdb;
567 int xIntHi, expoX, k, M, index1, index2;
568 double highPoly, tt1h, t1h, t1l, xSqh, xSql, xSqHalfh, xSqHalfl, xCubeh, xCubel, t2h, t2l, templ, tt3h, tt3l;
569 double polyh, polyl, expm1h, expm1m, expm1l;
570 double r1h, r1l, r1t, rr1h, rr1l;
571 double r2h, r2l, r2t, rr2h, rr2l;
572 double r3h, r3l, r3t, rr3h, rr3l;
573 double xMultLog2InvMult2L, shiftedXMult, kd, s1, s2, s3, s4, s5, rh, rm, rl;
574 double rhSquare, rhC3, rhSquareHalf, monomialCube, rhFour, monomialFour;
575 double tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l;
576 double highPolyWithSquare, tablesh, tablesl, t8, t9, t10, t11, t12, t13;
577 double exph, expm, t1, t2, t3;
578 double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
579 double middlePoly, doublePoly;
580
581 xdb.d = x;
582
583 /* Strip off the sign of x for the following tests */
584
585 xIntHi = xdb.i[HI] & 0x7fffffff;
586
587 /* Test if we are so small that we can return (a corrected) x as correct rounding */
588 if (xIntHi < RETURNXBOUND) {
589 /* The only algebraic result is 0 for x = +/- 0; in this case, we can return x = +/- 0
590 The truncation rest x^2/2 + x^3/6 + ... is always positive
591 but less than 1 ulp in this case, so we round down by returning x
592 */
593 return x;
594 }
595
596
597 /* Filter out special cases like overflow, -1 in result, infinities and NaNs
598 The filters are not sharp, we have positive arguments that flow through
599 */
600 if (xIntHi >= SIMPLEOVERFLOWBOUND) {
601 /* Test if we are +/-inf or NaN */
602 if (xIntHi >= 0x7ff00000) {
603 /* Test if NaN */
604 if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
605 /* NaN */
606 return x+x; /* return NaN */
607 }
608 /* Test if +inf or -inf */
609 if (xdb.i[HI] > 0) {
610 /* +inf */
611 return x+x; /* return +inf */
612 }
613
614 /* If we are here, we are -inf */
615 return -1.0;
616 }
617
618 /* If we are here, we are overflowed or a common case that flows through */
619
620 /* Test if we are actually overflowed */
621 if (x > OVERFLOWBOUND) {
622 /* We would be overflowed but as we are rounding downwards
623 the nearest number lesser than the exact result is the greatest
624 normal. In any case, we must raise the inexact flag.
625 */
626 return LARGEST * (1.0 + SMALLEST);
627 }
628 }
629
630 /* Test if we know already that we are -1.0 (+ correction depending on rounding mode) in result */
631 if (x < MINUSONEBOUND) {
632 /* We round down, so we are -1.0 */
633 return -1.0;
634 }
635
636 /* Test if we have |x| <= 1/4-1/2ulp(1/4) for knowing if we use exp(x) or approximate directly */
637
638 if (xIntHi < DIRECTINTERVALBOUND) {
639 /* We approximate expm1 directly after a range reduction as follows
640
641 expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
642
643 We perform the range reduction in such a way that finally |x| < 1/32
644 */
645
646 /* Extract the exponent of |x| and add 5 (2^5 = 32) */
647 expoX = ((xIntHi & 0x7ff00000) >> 20) - (1023 - 5);
648
649 /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
650 if (expoX >= 0) {
651 /* If we are here, we must perform range reduction */
652
653
654 /* We multiply x by 2^(-expoX-1) by bit manipulation
655 x cannot be denormalized so there is no danger
656 */
657 xdb.i[HI] += (-expoX-1) << 20;
658
659 /* We reassign the new x and maintain xIntHi */
660
661 xIntHi = xdb.i[HI] & 0x7fffffff;
662 x = xdb.d;
663 }
664
665 /* Here, we have always |x| < 1/32 */
666
667
668 /* Double precision evaluation steps and one double-double step */
669
670 Mul12(&xSqh,&xSql,x,x);
671
672 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
673 middlePoly = FMA(quickDirectpolyC5h,x,quickDirectpolyC4h);
674 #else
675 middlePoly = quickDirectpolyC4h + x * quickDirectpolyC5h;
676 #endif
677
678 doublePoly = middlePoly;
679
680 /* Special path: for small |x| we can truncate the polynomial */
681
682 if (xIntHi > SPECIALINTERVALBOUND) {
683
684 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
685 highPoly = FMA(FMA(FMA(quickDirectpolyC9h ,x,quickDirectpolyC8h),x,
686 quickDirectpolyC7h),x,quickDirectpolyC6h);
687 #else
688 highPoly = quickDirectpolyC6h + x * (quickDirectpolyC7h + x * (
689 quickDirectpolyC8h + x * quickDirectpolyC9h));
690 #endif
691
692 highPolyWithSquare = xSqh * highPoly;
693
694 doublePoly = middlePoly + highPolyWithSquare;
695
696 }
697
698 /* Double-double evaluation steps */
699 tt1h = x * doublePoly;
700
701 xSqHalfh = 0.5 * xSqh;
702 xSqHalfl = 0.5 * xSql;
703 Add12(t2h,templ,x,xSqHalfh);
704 t2l = templ + xSqHalfl;
705
706 Add12(t1h,t1l,quickDirectpolyC3h,tt1h);
707 Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
708 Mul22(&tt3h,&tt3l,xCubeh,xCubel,t1h,t1l);
709
710 Add22(&polyh,&polyl,t2h,t2l,tt3h,tt3l);
711
712 /* Reconstruction */
713
714 /* If we have not performed any range reduction, we have no reconstruction to do */
715 if (expoX >= 0) {
716 /* If we are here, we must perform reconstruction */
717
718 /* First reconstruction step */
719 Add12(r1h,r1t,2,polyh);
720 r1l = r1t + polyl;
721 Mul22(&rr1h,&rr1l,r1h,r1l,polyh,polyl);
722
723 if (expoX >= 1) {
724
725 /* Second reconstruction step */
726 Add12(r2h,r2t,2,rr1h);
727 r2l = r2t + rr1l;
728 Mul22(&rr2h,&rr2l,r2h,r2l,rr1h,rr1l);
729
730 if (expoX >= 2) {
731
732 /* Third reconstruction step */
733 Add12(r3h,r3t,2,rr2h);
734 r3l = r3t + rr2l;
735 Mul22(&rr3h,&rr3l,r3h,r3l,rr2h,rr2l);
736
737 /* expoX may be maximally 2 */
738
739 expm1h = rr3h;
740 expm1m = rr3l;
741
742 } else {
743 expm1h = rr2h;
744 expm1m = rr2l;
745 }
746
747 } else {
748 expm1h = rr1h;
749 expm1m = rr1l;
750 }
751
752 } else {
753 expm1h = polyh;
754 expm1m = polyl;
755 }
756
757 /* Rounding test */
758 TEST_AND_RETURN_RD(expm1h, expm1m, ROUNDCSTDIRECTRD);
759 {
760 expm1_direct_td(&expm1h, &expm1m, &expm1l, x, xSqHalfh, xSqHalfl, xSqh, xSql, expoX);
761
762 ReturnRoundDownwards3(expm1h, expm1m, expm1l);
763
764 } /* Accurate phase launched */
765
766 /* We cannot be here, since we return in all cases before */
767 }
768
769 /* If we are here, we can use expm1(x) = exp(x) - 1 */
770
771 /* Range reduction - exact part: compute k as double and as int */
772
773 xMultLog2InvMult2L = x * log2InvMult2L;
774 shiftedXMult = xMultLog2InvMult2L + shiftConst;
775 kd = shiftedXMult - shiftConst;
776 shiftedXMultdb.d = shiftedXMult;
777 k = shiftedXMultdb.i[LO];
778 M = k >> 12;
779 index1 = k & INDEXMASK1;
780 index2 = (k & INDEXMASK2) >> 6;
781
782 /* Range reduction - part affected by error - must be redone in accurate phase */
783 Mul12(&s1,&s2,msLog2Div2Lh,kd);
784 s3 = kd * msLog2Div2Lm;
785 s4 = s2 + s3;
786 s5 = x + s1;
787 Add12Cond(rh,rm,s5,s4);
788
789 /* Table reads - read only two double-doubles by now */
790 tbl1h = twoPowerIndex1[index1].hi;
791 tbl1m = twoPowerIndex1[index1].mi;
792 tbl2h = twoPowerIndex2[index2].hi;
793 tbl2m = twoPowerIndex2[index2].mi;
794
795 /* Quick phase starts here */
796
797 rhSquare = rh * rh;
798 rhC3 = quickCommonpolyC3h * rh;
799
800 rhSquareHalf = 0.5 * rhSquare;
801 monomialCube = rhC3 * rhSquare;
802 rhFour = rhSquare * rhSquare;
803
804 monomialFour = quickCommonpolyC4h * rhFour;
805
806 highPoly = monomialCube + monomialFour;
807
808 highPolyWithSquare = rhSquareHalf + highPoly;
809
810 /* Reconstruction: integration of table values */
811
812 Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
813
814 t8 = rm + highPolyWithSquare;
815 t9 = rh + t8;
816
817 t10 = tablesh * t9;
818
819 Add12(t11,t12,tablesh,t10);
820 t13 = t12 + tablesl;
821 Add12(polyTblhdb.d,polyTblmdb.d,t11,t13);
822
823 /* Reconstruction: multiplication by 2^M */
824
825 /* Implement the multiplication by multiplication to overcome the
826 problem of the non-representability of 2^1024 (M = 1024)
827 This case is possible if polyTblhdb.d < 1
828 */
829
830 polyTblhdb.i[HI] += M << 20;
831 polyTblmdb.i[HI] += M << 20;
832
833 exph = polyTblhdb.d;
834 expm = polyTblmdb.d;
835
836 /* Substraction of 1
837
838 Testing if the operation is necessary is more expensive than
839 performing it in any case.
840
841 We may cancellate at most 2 bits in the subtraction for
842 arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
843 We must therefore use conditional Add12s
844
845 Since we perform a substraction, we may not have addition overflow towards +inf
846
847 */
848
849 Add12Cond(t1,t2,-1,exph);
850 t3 = t2 + expm;
851 Add12Cond(expm1h,expm1m,t1,t3);
852
853
854 /* Rounding test */
855 TEST_AND_RETURN_RD(expm1h, expm1m, ROUNDCSTCOMMONRD);
856 {
857 /* Rest of argument reduction for accurate phase */
858
859 Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
860 t1 = x + msLog2Div2LMultKh;
861 Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
862 Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
863
864 /* Table reads for accurate phase */
865 tbl1l = twoPowerIndex1[index1].lo;
866 tbl2l = twoPowerIndex2[index2].lo;
867
868 /* Call accurate phase */
869 expm1_common_td(&expm1h, &expm1m, &expm1l, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l, M);
870
871 /* Final rounding */
872
873 ReturnRoundDownwards3(expm1h, expm1m, expm1l);
874 } /* Accurate phase launched */
875
876 /* We cannot be here since we return before in any case */
877 }
878
expm1_ru(double x)879 double expm1_ru(double x) {
880 db_number xdb, shiftedXMultdb, polyTblhdb, polyTblmdb;
881 int xIntHi, expoX, k, M, index1, index2;
882 double highPoly, tt1h, t1h, t1l, xSqh, xSql, xSqHalfh, xSqHalfl, xCubeh, xCubel, t2h, t2l, templ, tt3h, tt3l;
883 double polyh, polyl, expm1h, expm1m, expm1l;
884 double r1h, r1l, r1t, rr1h, rr1l;
885 double r2h, r2l, r2t, rr2h, rr2l;
886 double r3h, r3l, r3t, rr3h, rr3l;
887 double xMultLog2InvMult2L, shiftedXMult, kd, s1, s2, s3, s4, s5, rh, rm, rl;
888 double rhSquare, rhC3, rhSquareHalf, monomialCube, rhFour, monomialFour;
889 double tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l;
890 double highPolyWithSquare, tablesh, tablesl, t8, t9, t10, t11, t12, t13;
891 double exph, expm, t1, t2, t3;
892 double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
893 double middlePoly, doublePoly;
894
895 xdb.d = x;
896
897 /* Strip off the sign of x for the following tests */
898
899 xIntHi = xdb.i[HI] & 0x7fffffff;
900
901 /* Test if we are so small that we can return (a corrected) x as correct rounding */
902 if (xIntHi < RETURNXBOUND) {
903 /* The only algebraic result is 0 for x = +/-0; in this case, we return x = +/-0
904 The truncation rest x^2/2 + x^3/6 + ... is always positive
905 but less than 1 ulp in this case, so we round by adding 1 ulp
906 */
907
908 if (x == 0.0) return x;
909
910 if (xdb.i[HI] & 0x80000000) {
911 /* x is negative
912 We add 1 ulp by subtracting 1 in long
913 */
914 xdb.l--;
915 } else {
916 /* x is positive
917 We add 1 ulp by adding 1 in long
918 */
919 xdb.l++;
920 }
921 return xdb.d;
922 }
923
924
925 /* Filter out special cases like overflow, -1 in result, infinities and NaNs
926 The filters are not sharp, we have positive arguments that flow through
927 */
928 if (xIntHi >= SIMPLEOVERFLOWBOUND) {
929 /* Test if we are +/-inf or NaN */
930 if (xIntHi >= 0x7ff00000) {
931 /* Test if NaN */
932 if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
933 /* NaN */
934 return x+x; /* return NaN */
935 }
936 /* Test if +inf or -inf */
937 if (xdb.i[HI] > 0) {
938 /* +inf */
939 return x+x; /* return +inf */
940 }
941
942 /* If we are here, we are -inf */
943 return -1.0;
944 }
945
946 /* If we are here, we are overflowed or a common case that flows through */
947
948 /* Test if we are actually overflowed */
949 if (x > OVERFLOWBOUND) {
950 return LARGEST * LARGEST; /* return +inf and set flag */
951 }
952 }
953
954 /* Test if we know already that we are -1.0 (+ correction depending on rounding mode) in result */
955 if (x < MINUSONEBOUND) {
956 /* Round up so we are -1.0 + 1ulp */
957 return MINUSONEPLUSONEULP;
958 }
959
960 /* Test if we have |x| <= 1/4-1/2ulp(1/4) for knowing if we use exp(x) or approximate directly */
961
962 if (xIntHi < DIRECTINTERVALBOUND) {
963 /* We approximate expm1 directly after a range reduction as follows
964
965 expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
966
967 We perform the range reduction in such a way that finally |x| < 1/32
968 */
969
970 /* Extract the exponent of |x| and add 5 (2^5 = 32) */
971 expoX = ((xIntHi & 0x7ff00000) >> 20) - (1023 - 5);
972
973 /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
974 if (expoX >= 0) {
975 /* If we are here, we must perform range reduction */
976
977
978 /* We multiply x by 2^(-expoX-1) by bit manipulation
979 x cannot be denormalized so there is no danger
980 */
981 xdb.i[HI] += (-expoX-1) << 20;
982
983 /* We reassign the new x and maintain xIntHi */
984
985 xIntHi = xdb.i[HI] & 0x7fffffff;
986 x = xdb.d;
987 }
988
989 /* Here, we have always |x| < 1/32 */
990
991
992 /* Double precision evaluation steps and one double-double step */
993
994 Mul12(&xSqh,&xSql,x,x);
995
996 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
997 middlePoly = FMA(quickDirectpolyC5h,x,quickDirectpolyC4h);
998 #else
999 middlePoly = quickDirectpolyC4h + x * quickDirectpolyC5h;
1000 #endif
1001
1002 doublePoly = middlePoly;
1003
1004 /* Special path: for small |x| we can truncate the polynomial */
1005
1006 if (xIntHi > SPECIALINTERVALBOUND) {
1007
1008 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1009 highPoly = FMA(FMA(FMA(quickDirectpolyC9h ,x,quickDirectpolyC8h),x,
1010 quickDirectpolyC7h),x,quickDirectpolyC6h);
1011 #else
1012 highPoly = quickDirectpolyC6h + x * (quickDirectpolyC7h + x * (
1013 quickDirectpolyC8h + x * quickDirectpolyC9h));
1014 #endif
1015
1016 highPolyWithSquare = xSqh * highPoly;
1017
1018 doublePoly = middlePoly + highPolyWithSquare;
1019
1020 }
1021
1022 /* Double-double evaluation steps */
1023 tt1h = x * doublePoly;
1024
1025 xSqHalfh = 0.5 * xSqh;
1026 xSqHalfl = 0.5 * xSql;
1027 Add12(t2h,templ,x,xSqHalfh);
1028 t2l = templ + xSqHalfl;
1029
1030 Add12(t1h,t1l,quickDirectpolyC3h,tt1h);
1031 Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
1032 Mul22(&tt3h,&tt3l,xCubeh,xCubel,t1h,t1l);
1033
1034 Add22(&polyh,&polyl,t2h,t2l,tt3h,tt3l);
1035
1036 /* Reconstruction */
1037
1038 /* If we have not performed any range reduction, we have no reconstruction to do */
1039 if (expoX >= 0) {
1040 /* If we are here, we must perform reconstruction */
1041
1042 /* First reconstruction step */
1043 Add12(r1h,r1t,2,polyh);
1044 r1l = r1t + polyl;
1045 Mul22(&rr1h,&rr1l,r1h,r1l,polyh,polyl);
1046
1047 if (expoX >= 1) {
1048
1049 /* Second reconstruction step */
1050 Add12(r2h,r2t,2,rr1h);
1051 r2l = r2t + rr1l;
1052 Mul22(&rr2h,&rr2l,r2h,r2l,rr1h,rr1l);
1053
1054 if (expoX >= 2) {
1055
1056 /* Third reconstruction step */
1057 Add12(r3h,r3t,2,rr2h);
1058 r3l = r3t + rr2l;
1059 Mul22(&rr3h,&rr3l,r3h,r3l,rr2h,rr2l);
1060
1061 /* expoX may be maximally 2 */
1062
1063 expm1h = rr3h;
1064 expm1m = rr3l;
1065
1066 } else {
1067 expm1h = rr2h;
1068 expm1m = rr2l;
1069 }
1070
1071 } else {
1072 expm1h = rr1h;
1073 expm1m = rr1l;
1074 }
1075
1076 } else {
1077 expm1h = polyh;
1078 expm1m = polyl;
1079 }
1080
1081 /* Rounding test */
1082 TEST_AND_RETURN_RU(expm1h, expm1m, ROUNDCSTDIRECTRD);
1083 {
1084 expm1_direct_td(&expm1h, &expm1m, &expm1l, x, xSqHalfh, xSqHalfl, xSqh, xSql, expoX);
1085
1086 ReturnRoundUpwards3(expm1h, expm1m, expm1l);
1087
1088 } /* Accurate phase launched */
1089
1090 /* We cannot be here, since we return in all cases before */
1091 }
1092
1093 /* If we are here, we can use expm1(x) = exp(x) - 1 */
1094
1095 /* Range reduction - exact part: compute k as double and as int */
1096
1097 xMultLog2InvMult2L = x * log2InvMult2L;
1098 shiftedXMult = xMultLog2InvMult2L + shiftConst;
1099 kd = shiftedXMult - shiftConst;
1100 shiftedXMultdb.d = shiftedXMult;
1101 k = shiftedXMultdb.i[LO];
1102 M = k >> 12;
1103 index1 = k & INDEXMASK1;
1104 index2 = (k & INDEXMASK2) >> 6;
1105
1106 /* Range reduction - part affected by error - must be redone in accurate phase */
1107 Mul12(&s1,&s2,msLog2Div2Lh,kd);
1108 s3 = kd * msLog2Div2Lm;
1109 s4 = s2 + s3;
1110 s5 = x + s1;
1111 Add12Cond(rh,rm,s5,s4);
1112
1113 /* Table reads - read only two double-doubles by now */
1114 tbl1h = twoPowerIndex1[index1].hi;
1115 tbl1m = twoPowerIndex1[index1].mi;
1116 tbl2h = twoPowerIndex2[index2].hi;
1117 tbl2m = twoPowerIndex2[index2].mi;
1118
1119 /* Quick phase starts here */
1120
1121 rhSquare = rh * rh;
1122 rhC3 = quickCommonpolyC3h * rh;
1123
1124 rhSquareHalf = 0.5 * rhSquare;
1125 monomialCube = rhC3 * rhSquare;
1126 rhFour = rhSquare * rhSquare;
1127
1128 monomialFour = quickCommonpolyC4h * rhFour;
1129
1130 highPoly = monomialCube + monomialFour;
1131
1132 highPolyWithSquare = rhSquareHalf + highPoly;
1133
1134 /* Reconstruction: integration of table values */
1135
1136 Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
1137
1138 t8 = rm + highPolyWithSquare;
1139 t9 = rh + t8;
1140
1141 t10 = tablesh * t9;
1142
1143 Add12(t11,t12,tablesh,t10);
1144 t13 = t12 + tablesl;
1145 Add12(polyTblhdb.d,polyTblmdb.d,t11,t13);
1146
1147 /* Reconstruction: multiplication by 2^M */
1148
1149 /* Implement the multiplication by multiplication to overcome the
1150 problem of the non-representability of 2^1024 (M = 1024)
1151 This case is possible if polyTblhdb.d < 1
1152 */
1153
1154 polyTblhdb.i[HI] += M << 20;
1155 polyTblmdb.i[HI] += M << 20;
1156
1157 exph = polyTblhdb.d;
1158 expm = polyTblmdb.d;
1159
1160 /* Substraction of 1
1161
1162 Testing if the operation is necessary is more expensive than
1163 performing it in any case.
1164
1165 We may cancellate at most 2 bits in the subtraction for
1166 arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
1167 We must therefore use conditional Add12s
1168
1169 Since we perform a substraction, we may not have addition overflow towards +inf
1170
1171 */
1172
1173 Add12Cond(t1,t2,-1,exph);
1174 t3 = t2 + expm;
1175 Add12Cond(expm1h,expm1m,t1,t3);
1176
1177
1178 /* Rounding test */
1179 TEST_AND_RETURN_RU(expm1h, expm1m, ROUNDCSTCOMMONRD);
1180 {
1181 /* Rest of argument reduction for accurate phase */
1182
1183 Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
1184 t1 = x + msLog2Div2LMultKh;
1185 Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
1186 Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
1187
1188 /* Table reads for accurate phase */
1189 tbl1l = twoPowerIndex1[index1].lo;
1190 tbl2l = twoPowerIndex2[index2].lo;
1191
1192 /* Call accurate phase */
1193 expm1_common_td(&expm1h, &expm1m, &expm1l, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l, M);
1194
1195 /* Final rounding */
1196
1197 ReturnRoundUpwards3(expm1h, expm1m, expm1l);
1198 } /* Accurate phase launched */
1199
1200 /* We cannot be here since we return before in any case */
1201 }
1202
expm1_rz(double x)1203 double expm1_rz(double x) {
1204 db_number xdb, shiftedXMultdb, polyTblhdb, polyTblmdb;
1205 int xIntHi, expoX, k, M, index1, index2;
1206 double highPoly, tt1h, t1h, t1l, xSqh, xSql, xSqHalfh, xSqHalfl, xCubeh, xCubel, t2h, t2l, templ, tt3h, tt3l;
1207 double polyh, polyl, expm1h, expm1m, expm1l;
1208 double r1h, r1l, r1t, rr1h, rr1l;
1209 double r2h, r2l, r2t, rr2h, rr2l;
1210 double r3h, r3l, r3t, rr3h, rr3l;
1211 double xMultLog2InvMult2L, shiftedXMult, kd, s1, s2, s3, s4, s5, rh, rm, rl;
1212 double rhSquare, rhC3, rhSquareHalf, monomialCube, rhFour, monomialFour;
1213 double tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l;
1214 double highPolyWithSquare, tablesh, tablesl, t8, t9, t10, t11, t12, t13;
1215 double exph, expm, t1, t2, t3;
1216 double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
1217 double middlePoly, doublePoly;
1218
1219 xdb.d = x;
1220
1221 /* Strip off the sign of x for the following tests */
1222
1223 xIntHi = xdb.i[HI] & 0x7fffffff;
1224
1225 /* Test if we are so small that we can return (a corrected) x as correct rounding */
1226 if (xIntHi < RETURNXBOUND) {
1227 /* The only algebraic result is 0 for x = +/- 0; in this case, we can return x = +/- 0
1228 expm1 is positive for positive x, negative for negative x
1229 The truncation rest x^2/2 + x^3/6 + ... is always positive
1230 but less than 1 ulp in this case, so we round as follows:
1231
1232 - x is positive => expm1 is positive => round downwards => truncate by returning x
1233 - x is negative => expm1 is negative => round upwards => add 1 ulp
1234 */
1235
1236 if (x == 0.0) return x;
1237
1238 if (xdb.i[HI] & 0x80000000) {
1239 /* x is negative
1240 We add 1 ulp by subtracting 1 in long
1241 */
1242 xdb.l--;
1243 return xdb.d;
1244 } else {
1245 /* x is positive
1246 We do nothing (see above)
1247 */
1248 return x;
1249 }
1250 }
1251
1252
1253 /* Filter out special cases like overflow, -1 in result, infinities and NaNs
1254 The filters are not sharp, we have positive arguments that flow through
1255 */
1256 if (xIntHi >= SIMPLEOVERFLOWBOUND) {
1257 /* Test if we are +/-inf or NaN */
1258 if (xIntHi >= 0x7ff00000) {
1259 /* Test if NaN */
1260 if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
1261 /* NaN */
1262 return x+x; /* return NaN */
1263 }
1264 /* Test if +inf or -inf */
1265 if (xdb.i[HI] > 0) {
1266 /* +inf */
1267 return x+x; /* return +inf */
1268 }
1269
1270 /* If we are here, we are -inf */
1271 return -1.0;
1272 }
1273
1274 /* If we are here, we are overflowed or a common case that flows through */
1275
1276 /* Test if we are actually overflowed */
1277 if (x > OVERFLOWBOUND) {
1278 /* We would be overflowed but as we are rounding towards zero, i.e. downwards,
1279 the nearest number lesser than the exact result is the greatest
1280 normal. In any case, we must raise the inexact flag.
1281 */
1282 return LARGEST * (1.0 + SMALLEST);
1283 }
1284 }
1285
1286 /* Test if we know already that we are -1.0 (+ correction depending on rounding mode) in result */
1287 if (x < MINUSONEBOUND) {
1288 /* We round towards zero, i.e. upwards, so we return -1.0+1ulp */
1289 return MINUSONEPLUSONEULP;
1290 }
1291
1292 /* Test if we have |x| <= 1/4-1/2ulp(1/4) for knowing if we use exp(x) or approximate directly */
1293
1294 if (xIntHi < DIRECTINTERVALBOUND) {
1295
1296 /* We approximate expm1 directly after a range reduction as follows
1297
1298 expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
1299
1300 We perform the range reduction in such a way that finally |x| < 1/32
1301 */
1302
1303 /* Extract the exponent of |x| and add 5 (2^5 = 32) */
1304 expoX = ((xIntHi & 0x7ff00000) >> 20) - (1023 - 5);
1305
1306 /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
1307 if (expoX >= 0) {
1308 /* If we are here, we must perform range reduction */
1309
1310
1311 /* We multiply x by 2^(-expoX-1) by bit manipulation
1312 x cannot be denormalized so there is no danger
1313 */
1314 xdb.i[HI] += (-expoX-1) << 20;
1315
1316 /* We reassign the new x and maintain xIntHi */
1317
1318 xIntHi = xdb.i[HI] & 0x7fffffff;
1319 x = xdb.d;
1320 }
1321 /* Here, we have always |x| < 1/32 */
1322
1323
1324 /* Double precision evaluation steps and one double-double step */
1325
1326 Mul12(&xSqh,&xSql,x,x);
1327
1328 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1329 middlePoly = FMA(quickDirectpolyC5h,x,quickDirectpolyC4h);
1330 #else
1331 middlePoly = quickDirectpolyC4h + x * quickDirectpolyC5h;
1332 #endif
1333
1334 doublePoly = middlePoly;
1335
1336 /* Special path: for small |x| we can truncate the polynomial */
1337
1338 if (xIntHi > SPECIALINTERVALBOUND) {
1339
1340 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1341 highPoly = FMA(FMA(FMA(quickDirectpolyC9h ,x,quickDirectpolyC8h),x,
1342 quickDirectpolyC7h),x,quickDirectpolyC6h);
1343 #else
1344 highPoly = quickDirectpolyC6h + x * (quickDirectpolyC7h + x * (
1345 quickDirectpolyC8h + x * quickDirectpolyC9h));
1346 #endif
1347
1348 highPolyWithSquare = xSqh * highPoly;
1349
1350 doublePoly = middlePoly + highPolyWithSquare;
1351
1352 }
1353
1354 /* Double-double evaluation steps */
1355 tt1h = x * doublePoly;
1356
1357 xSqHalfh = 0.5 * xSqh;
1358 xSqHalfl = 0.5 * xSql;
1359 Add12(t2h,templ,x,xSqHalfh);
1360 t2l = templ + xSqHalfl;
1361
1362 Add12(t1h,t1l,quickDirectpolyC3h,tt1h);
1363 Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
1364 Mul22(&tt3h,&tt3l,xCubeh,xCubel,t1h,t1l);
1365
1366 Add22(&polyh,&polyl,t2h,t2l,tt3h,tt3l);
1367
1368 /* Reconstruction */
1369
1370 /* If we have not performed any range reduction, we have no reconstruction to do */
1371 if (expoX >= 0) {
1372 /* If we are here, we must perform reconstruction */
1373
1374 /* First reconstruction step */
1375 Add12(r1h,r1t,2,polyh);
1376 r1l = r1t + polyl;
1377 Mul22(&rr1h,&rr1l,r1h,r1l,polyh,polyl);
1378
1379 if (expoX >= 1) {
1380
1381 /* Second reconstruction step */
1382 Add12(r2h,r2t,2,rr1h);
1383 r2l = r2t + rr1l;
1384 Mul22(&rr2h,&rr2l,r2h,r2l,rr1h,rr1l);
1385
1386 if (expoX >= 2) {
1387
1388 /* Third reconstruction step */
1389 Add12(r3h,r3t,2,rr2h);
1390 r3l = r3t + rr2l;
1391 Mul22(&rr3h,&rr3l,r3h,r3l,rr2h,rr2l);
1392
1393 /* expoX may be maximally 2 */
1394
1395 expm1h = rr3h;
1396 expm1m = rr3l;
1397
1398 } else {
1399 expm1h = rr2h;
1400 expm1m = rr2l;
1401 }
1402
1403 } else {
1404 expm1h = rr1h;
1405 expm1m = rr1l;
1406 }
1407
1408 } else {
1409 expm1h = polyh;
1410 expm1m = polyl;
1411 }
1412
1413 /* Rounding test */
1414 TEST_AND_RETURN_RZ(expm1h, expm1m, ROUNDCSTDIRECTRD);
1415 {
1416 expm1_direct_td(&expm1h, &expm1m, &expm1l, x, xSqHalfh, xSqHalfl, xSqh, xSql, expoX);
1417
1418 ReturnRoundTowardsZero3(expm1h, expm1m, expm1l);
1419
1420 } /* Accurate phase launched */
1421
1422 /* We cannot be here, since we return in all cases before */
1423 }
1424
1425 /* If we are here, we can use expm1(x) = exp(x) - 1 */
1426
1427 /* Range reduction - exact part: compute k as double and as int */
1428
1429 xMultLog2InvMult2L = x * log2InvMult2L;
1430 shiftedXMult = xMultLog2InvMult2L + shiftConst;
1431 kd = shiftedXMult - shiftConst;
1432 shiftedXMultdb.d = shiftedXMult;
1433 k = shiftedXMultdb.i[LO];
1434 M = k >> 12;
1435 index1 = k & INDEXMASK1;
1436 index2 = (k & INDEXMASK2) >> 6;
1437
1438 /* Range reduction - part affected by error - must be redone in accurate phase */
1439 Mul12(&s1,&s2,msLog2Div2Lh,kd);
1440 s3 = kd * msLog2Div2Lm;
1441 s4 = s2 + s3;
1442 s5 = x + s1;
1443 Add12Cond(rh,rm,s5,s4);
1444
1445 /* Table reads - read only two double-doubles by now */
1446 tbl1h = twoPowerIndex1[index1].hi;
1447 tbl1m = twoPowerIndex1[index1].mi;
1448 tbl2h = twoPowerIndex2[index2].hi;
1449 tbl2m = twoPowerIndex2[index2].mi;
1450
1451 /* Quick phase starts here */
1452
1453 rhSquare = rh * rh;
1454 rhC3 = quickCommonpolyC3h * rh;
1455
1456 rhSquareHalf = 0.5 * rhSquare;
1457 monomialCube = rhC3 * rhSquare;
1458 rhFour = rhSquare * rhSquare;
1459
1460 monomialFour = quickCommonpolyC4h * rhFour;
1461
1462 highPoly = monomialCube + monomialFour;
1463
1464 highPolyWithSquare = rhSquareHalf + highPoly;
1465
1466 /* Reconstruction: integration of table values */
1467
1468 Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
1469
1470 t8 = rm + highPolyWithSquare;
1471 t9 = rh + t8;
1472
1473 t10 = tablesh * t9;
1474
1475 Add12(t11,t12,tablesh,t10);
1476 t13 = t12 + tablesl;
1477 Add12(polyTblhdb.d,polyTblmdb.d,t11,t13);
1478
1479 /* Reconstruction: multiplication by 2^M */
1480
1481 /* Implement the multiplication by multiplication to overcome the
1482 problem of the non-representability of 2^1024 (M = 1024)
1483 This case is possible if polyTblhdb.d < 1
1484 */
1485
1486 polyTblhdb.i[HI] += M << 20;
1487 polyTblmdb.i[HI] += M << 20;
1488
1489 exph = polyTblhdb.d;
1490 expm = polyTblmdb.d;
1491
1492 /* Substraction of 1
1493
1494 Testing if the operation is necessary is more expensive than
1495 performing it in any case.
1496
1497 We may cancellate at most 2 bits in the subtraction for
1498 arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
1499 We must therefore use conditional Add12s
1500
1501 Since we perform a substraction, we may not have addition overflow towards +inf
1502
1503 */
1504
1505 Add12Cond(t1,t2,-1,exph);
1506 t3 = t2 + expm;
1507 Add12Cond(expm1h,expm1m,t1,t3);
1508
1509
1510 /* Rounding test */
1511 TEST_AND_RETURN_RZ(expm1h, expm1m, ROUNDCSTCOMMONRD);
1512 {
1513
1514 /* Rest of argument reduction for accurate phase */
1515
1516 Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
1517 t1 = x + msLog2Div2LMultKh;
1518 Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
1519 Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
1520
1521 /* Table reads for accurate phase */
1522 tbl1l = twoPowerIndex1[index1].lo;
1523 tbl2l = twoPowerIndex2[index2].lo;
1524
1525 /* Call accurate phase */
1526 expm1_common_td(&expm1h, &expm1m, &expm1l, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l, M);
1527
1528 /* Final rounding */
1529
1530 ReturnRoundTowardsZero3(expm1h, expm1m, expm1l);
1531 } /* Accurate phase launched */
1532
1533 /* We cannot be here since we return before in any case */
1534 }
1535
1536 #ifdef BUILD_INTERVAL_FUNCTIONS
j_expm1(interval x)1537 interval j_expm1(interval x)
1538 {
1539 interval res;
1540 db_number xdb_inf, shiftedXMultdb_inf, polyTblhdb_inf, polyTblmdb_inf;
1541 int xIntHi_inf, expoX_inf, k_inf, M_inf, index1_inf, index2_inf;
1542 double x_inf, x_sup;
1543 double res_inf,res_sup;
1544 double restemp_inf, restemp_sup;
1545 double highPoly_inf, tt1h_inf, t1h_inf, t1l_inf, xSqh_inf, xSql_inf, xSqHalfh_inf, xSqHalfl_inf, xCubeh_inf, xCubel_inf, t2h_inf, t2l_inf, templ_inf, tt3h_inf, tt3l_inf;
1546 double polyh_inf, polyl_inf, expm1h_inf, expm1m_inf, expm1l_inf;
1547 double r1h_inf, r1l_inf, r1t_inf, rr1h_inf, rr1l_inf;
1548 double r2h_inf, r2l_inf, r2t_inf, rr2h_inf, rr2l_inf;
1549 double r3h_inf, r3l_inf, r3t_inf, rr3h_inf, rr3l_inf;
1550 double xMultLog2InvMult2L_inf, shiftedXMult_inf, kd_inf, s1_inf, s2_inf, s3_inf, s4_inf, s5_inf, rh_inf, rm_inf, rl_inf;
1551 double rhSquare_inf, rhC3_inf, rhSquareHalf_inf, monomialCube_inf, rhFour_inf, monomialFour_inf;
1552 double tbl1h_inf, tbl1m_inf, tbl1l_inf, tbl2h_inf, tbl2m_inf, tbl2l_inf;
1553 double highPolyWithSquare_inf, tablesh_inf, tablesl_inf, t8_inf, t9_inf, t10_inf, t11_inf, t12_inf, t13_inf;
1554 double exph_inf, expm_inf, t1_inf, t2_inf, t3_inf;
1555 double msLog2Div2LMultKh_inf, msLog2Div2LMultKm_inf, msLog2Div2LMultKl_inf;
1556 double middlePoly_inf, doublePoly_inf;
1557 int roundable;
1558 int infDone, supDone;
1559 infDone=0; supDone=0;
1560
1561 db_number xdb_sup, shiftedXMultdb_sup, polyTblhdb_sup, polyTblmdb_sup;
1562 int xIntHi_sup, expoX_sup, k_sup, M_sup, index1_sup, index2_sup;
1563 double highPoly_sup, tt1h_sup, t1h_sup, t1l_sup, xSqh_sup, xSql_sup, xSqHalfh_sup, xSqHalfl_sup, xCubeh_sup, xCubel_sup, t2h_sup, t2l_sup, templ_sup, tt3h_sup, tt3l_sup;
1564 double polyh_sup, polyl_sup, expm1h_sup, expm1m_sup, expm1l_sup;
1565 double r1h_sup, r1l_sup, r1t_sup, rr1h_sup, rr1l_sup;
1566 double r2h_sup, r2l_sup, r2t_sup, rr2h_sup, rr2l_sup;
1567 double r3h_sup, r3l_sup, r3t_sup, rr3h_sup, rr3l_sup;
1568 double xMultLog2InvMult2L_sup, shiftedXMult_sup, kd_sup, s1_sup, s2_sup, s3_sup, s4_sup, s5_sup, rh_sup, rm_sup, rl_sup;
1569 double rhSquare_sup, rhC3_sup, rhSquareHalf_sup, monomialCube_sup, rhFour_sup, monomialFour_sup;
1570 double tbl1h_sup, tbl1m_sup, tbl1l_sup, tbl2h_sup, tbl2m_sup, tbl2l_sup;
1571 double highPolyWithSquare_sup, tablesh_sup, tablesl_sup, t8_sup, t9_sup, t10_sup, t11_sup, t12_sup, t13_sup;
1572 double exph_sup, expm_sup, t1_sup, t2_sup, t3_sup;
1573 double msLog2Div2LMultKh_sup, msLog2Div2LMultKm_sup, msLog2Div2LMultKl_sup;
1574 double middlePoly_sup, doublePoly_sup;
1575
1576 x_inf=LOW(x);
1577 x_sup=UP(x);
1578
1579 xdb_inf.d = x_inf;
1580 xdb_sup.d = x_sup;
1581
1582
1583 /* Strip off the sign of x for the following tests */
1584
1585 xIntHi_inf = xdb_inf.i[HI] & 0x7fffffff;
1586 xIntHi_sup = xdb_sup.i[HI] & 0x7fffffff;
1587
1588 /* Test if we are so small that we can return (a corrected) x as correct rounding */
1589
1590 if ( __builtin_expect(
1591 (xIntHi_inf < RETURNXBOUND) ||
1592 ((xIntHi_inf >= SIMPLEOVERFLOWBOUND) && (xIntHi_inf >= 0x7ff00000)) ||
1593 ((xIntHi_inf >= SIMPLEOVERFLOWBOUND) && (xIntHi_inf >= 0x7ff00000) && (((xIntHi_inf & 0x000fffff) | xdb_inf.i[LO]) != 0)) ||
1594 ((xIntHi_inf >= SIMPLEOVERFLOWBOUND) && (xIntHi_inf >= 0x7ff00000) && (xdb_inf.i[HI] > 0)) ||
1595 ((xIntHi_inf >= SIMPLEOVERFLOWBOUND) && (x_inf > OVERFLOWBOUND)) ||
1596 (x_inf < MINUSONEBOUND) ||
1597 (xIntHi_sup < RETURNXBOUND) ||
1598 ((xIntHi_sup < RETURNXBOUND) && (x_sup == 0.0)) ||
1599 ((xIntHi_sup >= SIMPLEOVERFLOWBOUND) && (xIntHi_sup >= 0x7ff00000)) ||
1600 ((xIntHi_sup >= SIMPLEOVERFLOWBOUND) && (xIntHi_sup >= 0x7ff00000) && (((xIntHi_sup & 0x000fffff) | xdb_sup.i[LO]) != 0)) ||
1601 ((xIntHi_sup >= SIMPLEOVERFLOWBOUND) && (xIntHi_sup >= 0x7ff00000) && (xdb_sup.i[HI] > 0)) ||
1602 ((xIntHi_sup >= SIMPLEOVERFLOWBOUND) && (x_sup > OVERFLOWBOUND)) ||
1603 (x_sup < MINUSONEBOUND)
1604 ,FALSE))
1605 {
1606 ASSIGN_LOW(res,expm1_rd(LOW(x)));
1607 ASSIGN_UP(res,expm1_ru(UP(x)));
1608 return res;
1609 }
1610
1611 if (__builtin_expect((xIntHi_inf < DIRECTINTERVALBOUND) && (xIntHi_sup < DIRECTINTERVALBOUND),TRUE)) {
1612 /* We approximate expm1 directly after a range reduction as follows
1613
1614 expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
1615
1616 We perform the range reduction in such a way that finally |x| < 1/32
1617 */
1618
1619 /* Extract the exponent of |x| and add 5 (2^5 = 32) */
1620 expoX_inf = ((xIntHi_inf & 0x7ff00000) >> 20) - (1023 - 5);
1621 expoX_sup = ((xIntHi_sup & 0x7ff00000) >> 20) - (1023 - 5);
1622
1623 /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
1624 if (expoX_inf >= 0) {
1625 /* If we are here, we must perform range reduction */
1626
1627
1628 /* We multiply x by 2^(-expoX-1) by bit manipulation
1629 x cannot be denormalized so there is no danger
1630 */
1631 xdb_inf.i[HI] += (-expoX_inf-1) << 20;
1632
1633 /* We reassign the new x and maintain xIntHi */
1634
1635 xIntHi_inf = xdb_inf.i[HI] & 0x7fffffff;
1636 x_inf = xdb_inf.d;
1637 }
1638
1639 if (expoX_sup >= 0) {
1640 /* If we are here, we must perform range reduction */
1641
1642
1643 /* We multiply x by 2^(-expoX-1) by bit manipulation
1644 x cannot be denormalized so there is no danger
1645 */
1646 xdb_sup.i[HI] += (-expoX_sup-1) << 20;
1647
1648 /* We reassign the new x and maintain xIntHi */
1649
1650 xIntHi_sup = xdb_sup.i[HI] & 0x7fffffff;
1651 x_sup = xdb_sup.d;
1652 }
1653
1654 /* Here, we have always |x| < 1/32 */
1655
1656
1657 /* Double precision evaluation steps and one double-double step */
1658
1659 Mul12(&xSqh_inf,&xSql_inf,x_inf,x_inf);
1660 Mul12(&xSqh_sup,&xSql_sup,x_sup,x_sup);
1661
1662 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1663 middlePoly_inf = FMA(quickDirectpolyC5h,x_inf,quickDirectpolyC4h);
1664 middlePoly_sup = FMA(quickDirectpolyC5h,x_sup,quickDirectpolyC4h);
1665 #else
1666 middlePoly_inf = quickDirectpolyC4h + x_inf * quickDirectpolyC5h;
1667 middlePoly_sup = quickDirectpolyC4h + x_sup * quickDirectpolyC5h;
1668 #endif
1669
1670 doublePoly_inf = middlePoly_inf;
1671 doublePoly_sup = middlePoly_sup;
1672
1673 /* Special path: for small |x| we can truncate the polynomial */
1674
1675 if (xIntHi_inf > SPECIALINTERVALBOUND) {
1676
1677 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1678 highPoly_inf = FMA(FMA(FMA(quickDirectpolyC9h ,x_inf,quickDirectpolyC8h),x_inf,
1679 quickDirectpolyC7h),x_inf,quickDirectpolyC6h);
1680 #else
1681 highPoly_inf = quickDirectpolyC6h + x_inf * (quickDirectpolyC7h + x_inf * (
1682 quickDirectpolyC8h + x_inf * quickDirectpolyC9h));
1683 #endif
1684
1685 highPolyWithSquare_inf = xSqh_inf * highPoly_inf;
1686
1687 doublePoly_inf = middlePoly_inf + highPolyWithSquare_inf;
1688
1689 }
1690 if (xIntHi_sup > SPECIALINTERVALBOUND) {
1691
1692 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1693 highPoly_sup = FMA(FMA(FMA(quickDirectpolyC9h ,x_sup,quickDirectpolyC8h),x_sup,
1694 quickDirectpolyC7h),x_sup,quickDirectpolyC6h);
1695 #else
1696 highPoly_sup = quickDirectpolyC6h + x_sup * (quickDirectpolyC7h + x_sup * (
1697 quickDirectpolyC8h + x_sup * quickDirectpolyC9h));
1698 #endif
1699
1700 highPolyWithSquare_sup = xSqh_sup * highPoly_sup;
1701
1702 doublePoly_sup = middlePoly_sup + highPolyWithSquare_sup;
1703
1704 }
1705
1706 /* Double-double evaluation steps */
1707 tt1h_inf = x_inf * doublePoly_inf;
1708 tt1h_sup = x_sup * doublePoly_sup;
1709
1710 xSqHalfh_inf = 0.5 * xSqh_inf;
1711 xSqHalfh_sup = 0.5 * xSqh_sup;
1712 xSqHalfl_inf = 0.5 * xSql_inf;
1713 xSqHalfl_sup = 0.5 * xSql_sup;
1714 Add12(t2h_inf,templ_inf,x_inf,xSqHalfh_inf);
1715 Add12(t2h_sup,templ_sup,x_sup,xSqHalfh_sup);
1716 t2l_inf = templ_inf + xSqHalfl_inf;
1717 t2l_sup = templ_sup + xSqHalfl_sup;
1718
1719 Add12(t1h_inf,t1l_inf,quickDirectpolyC3h,tt1h_inf);
1720 Add12(t1h_sup,t1l_sup,quickDirectpolyC3h,tt1h_sup);
1721 Mul122(&xCubeh_inf,&xCubel_inf,x_inf,xSqh_inf,xSql_inf);
1722 Mul122(&xCubeh_sup,&xCubel_sup,x_sup,xSqh_sup,xSql_sup);
1723 Mul22(&tt3h_inf,&tt3l_inf,xCubeh_inf,xCubel_inf,t1h_inf,t1l_inf);
1724 Mul22(&tt3h_sup,&tt3l_sup,xCubeh_sup,xCubel_sup,t1h_sup,t1l_sup);
1725
1726 Add22(&polyh_inf,&polyl_inf,t2h_inf,t2l_inf,tt3h_inf,tt3l_inf);
1727 Add22(&polyh_sup,&polyl_sup,t2h_sup,t2l_sup,tt3h_sup,tt3l_sup);
1728
1729 /* Reconstruction */
1730
1731 /* If we have not performed any range reduction, we have no reconstruction to do */
1732 if (expoX_inf >= 0) {
1733 /* If we are here, we must perform reconstruction */
1734
1735 /* First reconstruction step */
1736 Add12(r1h_inf,r1t_inf,2,polyh_inf);
1737 r1l_inf = r1t_inf + polyl_inf;
1738 Mul22(&rr1h_inf,&rr1l_inf,r1h_inf,r1l_inf,polyh_inf,polyl_inf);
1739
1740 if (expoX_inf >= 1) {
1741
1742 /* Second reconstruction step */
1743 Add12(r2h_inf,r2t_inf,2,rr1h_inf);
1744 r2l_inf = r2t_inf + rr1l_inf;
1745 Mul22(&rr2h_inf,&rr2l_inf,r2h_inf,r2l_inf,rr1h_inf,rr1l_inf);
1746
1747 if (expoX_inf >= 2) {
1748
1749 /* Third reconstruction step */
1750 Add12(r3h_inf,r3t_inf,2,rr2h_inf);
1751 r3l_inf = r3t_inf + rr2l_inf;
1752 Mul22(&rr3h_inf,&rr3l_inf,r3h_inf,r3l_inf,rr2h_inf,rr2l_inf);
1753
1754 /* expoX may be maximally 2 */
1755
1756 expm1h_inf = rr3h_inf;
1757 expm1m_inf = rr3l_inf;
1758
1759 } else {
1760 expm1h_inf = rr2h_inf;
1761 expm1m_inf = rr2l_inf;
1762 }
1763
1764 } else {
1765 expm1h_inf = rr1h_inf;
1766 expm1m_inf = rr1l_inf;
1767 }
1768
1769 } else {
1770 expm1h_inf = polyh_inf;
1771 expm1m_inf = polyl_inf;
1772 }
1773 if (expoX_sup >= 0) {
1774 /* If we are here, we must perform reconstruction */
1775
1776 /* First reconstruction step */
1777 Add12(r1h_sup,r1t_sup,2,polyh_sup);
1778 r1l_sup = r1t_sup + polyl_sup;
1779 Mul22(&rr1h_sup,&rr1l_sup,r1h_sup,r1l_sup,polyh_sup,polyl_sup);
1780
1781 if (expoX_sup >= 1) {
1782
1783 /* Second reconstruction step */
1784 Add12(r2h_sup,r2t_sup,2,rr1h_sup);
1785 r2l_sup = r2t_sup + rr1l_sup;
1786 Mul22(&rr2h_sup,&rr2l_sup,r2h_sup,r2l_sup,rr1h_sup,rr1l_sup);
1787
1788 if (expoX_sup >= 2) {
1789
1790 /* Third reconstruction step */
1791 Add12(r3h_sup,r3t_sup,2,rr2h_sup);
1792 r3l_sup = r3t_sup + rr2l_sup;
1793 Mul22(&rr3h_sup,&rr3l_sup,r3h_sup,r3l_sup,rr2h_sup,rr2l_sup);
1794
1795 /* expoX may be maximally 2 */
1796
1797 expm1h_sup = rr3h_sup;
1798 expm1m_sup = rr3l_sup;
1799
1800 } else {
1801 expm1h_sup = rr2h_sup;
1802 expm1m_sup = rr2l_sup;
1803 }
1804
1805 } else {
1806 expm1h_sup = rr1h_sup;
1807 expm1m_sup = rr1l_sup;
1808 }
1809
1810 } else {
1811 expm1h_sup = polyh_sup;
1812 expm1m_sup = polyl_sup;
1813 }
1814
1815
1816 /* Rounding test */
1817 TEST_AND_COPY_RDRU_EXPM1(roundable,restemp_inf,expm1h_inf, expm1m_inf, restemp_sup,expm1h_sup, expm1m_sup, ROUNDCSTDIRECTRD);
1818 if((roundable==2) || (roundable==0))
1819 {
1820 expm1_direct_td(&expm1h_inf, &expm1m_inf, &expm1l_inf, x_inf, xSqHalfh_inf, xSqHalfl_inf, xSqh_inf, xSql_inf, expoX_inf);
1821
1822 RoundDownwards3(&restemp_inf,expm1h_inf, expm1m_inf, expm1l_inf);
1823
1824 } /* Accurate phase launched */
1825 if((roundable==1) || (roundable==0))
1826 {
1827 expm1_direct_td(&expm1h_sup, &expm1m_sup, &expm1l_sup, x_sup, xSqHalfh_sup, xSqHalfl_sup, xSqh_sup, xSql_sup, expoX_sup);
1828
1829 RoundUpwards3(&restemp_sup,expm1h_sup, expm1m_sup, expm1l_sup);
1830
1831 } /* Accurate phase launched */
1832 ASSIGN_LOW(res,restemp_inf);
1833 ASSIGN_UP(res,restemp_sup);
1834 return res;
1835 }
1836
1837
1838 /* Test if we have |x| <= 1/4-1/2ulp(1/4) for knowing if we use exp(x) or approximate directly */
1839
1840 if (xIntHi_inf < DIRECTINTERVALBOUND) {
1841 /* We approximate expm1 directly after a range reduction as follows
1842
1843 expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
1844
1845 We perform the range reduction in such a way that finally |x| < 1/32
1846 */
1847
1848 /* Extract the exponent of |x| and add 5 (2^5 = 32) */
1849 expoX_inf = ((xIntHi_inf & 0x7ff00000) >> 20) - (1023 - 5);
1850
1851 /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
1852 if (expoX_inf >= 0) {
1853 /* If we are here, we must perform range reduction */
1854
1855
1856 /* We multiply x by 2^(-expoX-1) by bit manipulation
1857 x cannot be denormalized so there is no danger
1858 */
1859 xdb_inf.i[HI] += (-expoX_inf-1) << 20;
1860
1861 /* We reassign the new x and maintain xIntHi */
1862
1863 xIntHi_inf = xdb_inf.i[HI] & 0x7fffffff;
1864 x_inf = xdb_inf.d;
1865 }
1866
1867 /* Here, we have always |x| < 1/32 */
1868
1869
1870 /* Double precision evaluation steps and one double-double step */
1871
1872 Mul12(&xSqh_inf,&xSql_inf,x_inf,x_inf);
1873
1874 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1875 middlePoly_inf = FMA(quickDirectpolyC5h,x_inf,quickDirectpolyC4h);
1876 #else
1877 middlePoly_inf = quickDirectpolyC4h + x_inf * quickDirectpolyC5h;
1878 #endif
1879
1880 doublePoly_inf = middlePoly_inf;
1881
1882 /* Special path: for small |x| we can truncate the polynomial */
1883
1884 if (xIntHi_inf > SPECIALINTERVALBOUND) {
1885
1886 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1887 highPoly_inf = FMA(FMA(FMA(quickDirectpolyC9h ,x_inf,quickDirectpolyC8h),x_inf,
1888 quickDirectpolyC7h),x_inf,quickDirectpolyC6h);
1889 #else
1890 highPoly_inf = quickDirectpolyC6h + x_inf * (quickDirectpolyC7h + x_inf * (
1891 quickDirectpolyC8h + x_inf * quickDirectpolyC9h));
1892 #endif
1893
1894 highPolyWithSquare_inf = xSqh_inf * highPoly_inf;
1895
1896 doublePoly_inf = middlePoly_inf + highPolyWithSquare_inf;
1897
1898 }
1899
1900 /* Double-double evaluation steps */
1901 tt1h_inf = x_inf * doublePoly_inf;
1902
1903 xSqHalfh_inf = 0.5 * xSqh_inf;
1904 xSqHalfl_inf = 0.5 * xSql_inf;
1905 Add12(t2h_inf,templ_inf,x_inf,xSqHalfh_inf);
1906 t2l_inf = templ_inf + xSqHalfl_inf;
1907
1908 Add12(t1h_inf,t1l_inf,quickDirectpolyC3h,tt1h_inf);
1909 Mul122(&xCubeh_inf,&xCubel_inf,x_inf,xSqh_inf,xSql_inf);
1910 Mul22(&tt3h_inf,&tt3l_inf,xCubeh_inf,xCubel_inf,t1h_inf,t1l_inf);
1911
1912 Add22(&polyh_inf,&polyl_inf,t2h_inf,t2l_inf,tt3h_inf,tt3l_inf);
1913
1914 /* Reconstruction */
1915
1916 /* If we have not performed any range reduction, we have no reconstruction to do */
1917 if (expoX_inf >= 0) {
1918 /* If we are here, we must perform reconstruction */
1919
1920 /* First reconstruction step */
1921 Add12(r1h_inf,r1t_inf,2,polyh_inf);
1922 r1l_inf = r1t_inf + polyl_inf;
1923 Mul22(&rr1h_inf,&rr1l_inf,r1h_inf,r1l_inf,polyh_inf,polyl_inf);
1924
1925 if (expoX_inf >= 1) {
1926
1927 /* Second reconstruction step */
1928 Add12(r2h_inf,r2t_inf,2,rr1h_inf);
1929 r2l_inf = r2t_inf + rr1l_inf;
1930 Mul22(&rr2h_inf,&rr2l_inf,r2h_inf,r2l_inf,rr1h_inf,rr1l_inf);
1931
1932 if (expoX_inf >= 2) {
1933
1934 /* Third reconstruction step */
1935 Add12(r3h_inf,r3t_inf,2,rr2h_inf);
1936 r3l_inf = r3t_inf + rr2l_inf;
1937 Mul22(&rr3h_inf,&rr3l_inf,r3h_inf,r3l_inf,rr2h_inf,rr2l_inf);
1938
1939 /* expoX may be maximally 2 */
1940
1941 expm1h_inf = rr3h_inf;
1942 expm1m_inf = rr3l_inf;
1943
1944 } else {
1945 expm1h_inf = rr2h_inf;
1946 expm1m_inf = rr2l_inf;
1947 }
1948
1949 } else {
1950 expm1h_inf = rr1h_inf;
1951 expm1m_inf = rr1l_inf;
1952 }
1953
1954 } else {
1955 expm1h_inf = polyh_inf;
1956 expm1m_inf = polyl_inf;
1957 }
1958
1959 /* Rounding test */
1960 infDone=1;
1961 TEST_AND_COPY_RD(roundable,restemp_inf,expm1h_inf, expm1m_inf, ROUNDCSTDIRECTRD);
1962 if(roundable==0)
1963 {
1964 expm1_direct_td(&expm1h_inf, &expm1m_inf, &expm1l_inf, x_inf, xSqHalfh_inf, xSqHalfl_inf, xSqh_inf, xSql_inf, expoX_inf);
1965
1966 RoundDownwards3(&restemp_inf,expm1h_inf, expm1m_inf, expm1l_inf);
1967
1968 } /* Accurate phase launched */
1969
1970 }
1971
1972 /* Test if we have |x| <= 1/4-1/2ulp(1/4) for knowing if we use exp(x) or approximate directly */
1973
1974 if (xIntHi_sup < DIRECTINTERVALBOUND) {
1975 /* We approximate expm1 directly after a range reduction as follows
1976
1977 expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
1978
1979 We perform the range reduction in such a way that finally |x| < 1/32
1980 */
1981
1982 /* Extract the exponent of |x| and add 5 (2^5 = 32) */
1983 expoX_sup = ((xIntHi_sup & 0x7ff00000) >> 20) - (1023 - 5);
1984
1985 /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
1986 if (expoX_sup >= 0) {
1987 /* If we are here, we must perform range reduction */
1988
1989
1990 /* We multiply x by 2^(-expoX-1) by bit manipulation
1991 x cannot be denormalized so there is no danger
1992 */
1993 xdb_sup.i[HI] += (-expoX_sup-1) << 20;
1994
1995 /* We reassign the new x and maintain xIntHi */
1996
1997 xIntHi_sup = xdb_sup.i[HI] & 0x7fffffff;
1998 x_sup = xdb_sup.d;
1999 }
2000
2001 /* Here, we have always |x| < 1/32 */
2002
2003
2004 /* Double precision evaluation steps and one double-double step */
2005
2006 Mul12(&xSqh_sup,&xSql_sup,x_sup,x_sup);
2007
2008 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
2009 middlePoly_sup = FMA(quickDirectpolyC5h,x_sup,quickDirectpolyC4h);
2010 #else
2011 middlePoly_sup = quickDirectpolyC4h + x_sup * quickDirectpolyC5h;
2012 #endif
2013
2014 doublePoly_sup = middlePoly_sup;
2015
2016 /* Special path: for small |x| we can truncate the polynomial */
2017
2018 if (xIntHi_sup > SPECIALINTERVALBOUND) {
2019
2020 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
2021 highPoly_sup = FMA(FMA(FMA(quickDirectpolyC9h ,x_sup,quickDirectpolyC8h),x_sup,
2022 quickDirectpolyC7h),x_sup,quickDirectpolyC6h);
2023 #else
2024 highPoly_sup = quickDirectpolyC6h + x_sup * (quickDirectpolyC7h + x_sup * (
2025 quickDirectpolyC8h + x_sup * quickDirectpolyC9h));
2026 #endif
2027
2028 highPolyWithSquare_sup = xSqh_sup * highPoly_sup;
2029
2030 doublePoly_sup = middlePoly_sup + highPolyWithSquare_sup;
2031
2032 }
2033
2034 /* Double-double evaluation steps */
2035 tt1h_sup = x_sup * doublePoly_sup;
2036
2037 xSqHalfh_sup = 0.5 * xSqh_sup;
2038 xSqHalfl_sup = 0.5 * xSql_sup;
2039 Add12(t2h_sup,templ_sup,x_sup,xSqHalfh_sup);
2040 t2l_sup = templ_sup + xSqHalfl_sup;
2041
2042 Add12(t1h_sup,t1l_sup,quickDirectpolyC3h,tt1h_sup);
2043 Mul122(&xCubeh_sup,&xCubel_sup,x_sup,xSqh_sup,xSql_sup);
2044 Mul22(&tt3h_sup,&tt3l_sup,xCubeh_sup,xCubel_sup,t1h_sup,t1l_sup);
2045
2046 Add22(&polyh_sup,&polyl_sup,t2h_sup,t2l_sup,tt3h_sup,tt3l_sup);
2047
2048 /* Reconstruction */
2049
2050 /* If we have not performed any range reduction, we have no reconstruction to do */
2051 if (expoX_sup >= 0) {
2052 /* If we are here, we must perform reconstruction */
2053
2054 /* First reconstruction step */
2055 Add12(r1h_sup,r1t_sup,2,polyh_sup);
2056 r1l_sup = r1t_sup + polyl_sup;
2057 Mul22(&rr1h_sup,&rr1l_sup,r1h_sup,r1l_sup,polyh_sup,polyl_sup);
2058
2059 if (expoX_sup >= 1) {
2060
2061 /* Second reconstruction step */
2062 Add12(r2h_sup,r2t_sup,2,rr1h_sup);
2063 r2l_sup = r2t_sup + rr1l_sup;
2064 Mul22(&rr2h_sup,&rr2l_sup,r2h_sup,r2l_sup,rr1h_sup,rr1l_sup);
2065
2066 if (expoX_sup >= 2) {
2067
2068 /* Third reconstruction step */
2069 Add12(r3h_sup,r3t_sup,2,rr2h_sup);
2070 r3l_sup = r3t_sup + rr2l_sup;
2071 Mul22(&rr3h_sup,&rr3l_sup,r3h_sup,r3l_sup,rr2h_sup,rr2l_sup);
2072
2073 /* expoX may be maximally 2 */
2074
2075 expm1h_sup = rr3h_sup;
2076 expm1m_sup = rr3l_sup;
2077
2078 } else {
2079 expm1h_sup = rr2h_sup;
2080 expm1m_sup = rr2l_sup;
2081 }
2082
2083 } else {
2084 expm1h_sup = rr1h_sup;
2085 expm1m_sup = rr1l_sup;
2086 }
2087
2088 } else {
2089 expm1h_sup = polyh_sup;
2090 expm1m_sup = polyl_sup;
2091 }
2092
2093 /* Rounding test */
2094 supDone=1;
2095 TEST_AND_COPY_RU(roundable,restemp_sup,expm1h_sup, expm1m_sup, ROUNDCSTDIRECTRD);
2096 if(roundable==0)
2097 {
2098 expm1_direct_td(&expm1h_sup, &expm1m_sup, &expm1l_sup, x_sup, xSqHalfh_sup, xSqHalfl_sup, xSqh_sup, xSql_sup, expoX_sup);
2099
2100 RoundUpwards3(&restemp_sup,expm1h_sup, expm1m_sup, expm1l_sup);
2101
2102 } /* Accurate phase launched */
2103
2104 }
2105 if((infDone==0) && (supDone==0))
2106 {
2107 /* If we are here, we can use expm1(x) = exp(x) - 1 */
2108
2109 /* Range reduction - exact part: compute k as double and as int */
2110
2111 xMultLog2InvMult2L_inf = x_inf * log2InvMult2L;
2112 xMultLog2InvMult2L_sup = x_sup * log2InvMult2L;
2113 shiftedXMult_inf = xMultLog2InvMult2L_inf + shiftConst;
2114 shiftedXMult_sup = xMultLog2InvMult2L_sup + shiftConst;
2115 kd_inf = shiftedXMult_inf - shiftConst;
2116 kd_sup = shiftedXMult_sup - shiftConst;
2117 shiftedXMultdb_inf.d = shiftedXMult_inf;
2118 shiftedXMultdb_sup.d = shiftedXMult_sup;
2119 k_inf = shiftedXMultdb_inf.i[LO];
2120 k_sup = shiftedXMultdb_sup.i[LO];
2121 M_inf = k_inf >> 12;
2122 M_sup = k_sup >> 12;
2123 index1_inf = k_inf & INDEXMASK1;
2124 index1_sup = k_sup & INDEXMASK1;
2125 index2_inf = (k_inf & INDEXMASK2) >> 6;
2126 index2_sup = (k_sup & INDEXMASK2) >> 6;
2127
2128
2129 /* Range reduction - part affected by error - must be redone in accurate phase */
2130 Mul12(&s1_inf,&s2_inf,msLog2Div2Lh,kd_inf);
2131 Mul12(&s1_sup,&s2_sup,msLog2Div2Lh,kd_sup);
2132 s3_inf = kd_inf * msLog2Div2Lm;
2133 s3_sup = kd_sup * msLog2Div2Lm;
2134 s4_inf = s2_inf + s3_inf;
2135 s4_sup = s2_sup + s3_sup;
2136 s5_inf = x_inf + s1_inf;
2137 s5_sup = x_sup + s1_sup;
2138 Add12Cond(rh_inf,rm_inf,s5_inf,s4_inf);
2139 Add12Cond(rh_sup,rm_sup,s5_sup,s4_sup);
2140
2141
2142 /* Table reads - read only two double-doubles by now */
2143 tbl1h_inf = twoPowerIndex1[index1_inf].hi;
2144 tbl1h_sup = twoPowerIndex1[index1_sup].hi;
2145 tbl1m_inf = twoPowerIndex1[index1_inf].mi;
2146 tbl1m_sup = twoPowerIndex1[index1_sup].mi;
2147 tbl2h_inf = twoPowerIndex2[index2_inf].hi;
2148 tbl2h_sup = twoPowerIndex2[index2_sup].hi;
2149 tbl2m_inf = twoPowerIndex2[index2_inf].mi;
2150 tbl2m_sup = twoPowerIndex2[index2_sup].mi;
2151
2152 /* Quick phase starts here */
2153
2154 rhSquare_inf = rh_inf * rh_inf;
2155 rhSquare_sup = rh_sup * rh_sup;
2156 rhC3_inf = quickCommonpolyC3h * rh_inf;
2157 rhC3_sup = quickCommonpolyC3h * rh_sup;
2158
2159 rhSquareHalf_inf = 0.5 * rhSquare_inf;
2160 rhSquareHalf_sup = 0.5 * rhSquare_sup;
2161
2162 monomialCube_inf = rhC3_inf * rhSquare_inf;
2163 monomialCube_sup = rhC3_sup * rhSquare_sup;
2164 rhFour_inf = rhSquare_inf * rhSquare_inf;
2165 rhFour_sup = rhSquare_sup * rhSquare_sup;
2166
2167 monomialFour_inf = quickCommonpolyC4h * rhFour_inf;
2168 monomialFour_sup = quickCommonpolyC4h * rhFour_sup;
2169 highPoly_inf = monomialCube_inf + monomialFour_inf;
2170 highPoly_sup = monomialCube_sup + monomialFour_sup;
2171 highPolyWithSquare_inf = rhSquareHalf_inf + highPoly_inf;
2172 highPolyWithSquare_sup = rhSquareHalf_sup + highPoly_sup;
2173
2174 /* Reconstruction: integration of table values */
2175
2176 Mul22(&tablesh_inf,&tablesl_inf,tbl1h_inf,tbl1m_inf,tbl2h_inf,tbl2m_inf);
2177 Mul22(&tablesh_sup,&tablesl_sup,tbl1h_sup,tbl1m_sup,tbl2h_sup,tbl2m_sup);
2178
2179 t8_inf = rm_inf + highPolyWithSquare_inf;
2180 t8_sup = rm_sup + highPolyWithSquare_sup;
2181 t9_inf = rh_inf + t8_inf;
2182 t9_sup = rh_sup + t8_sup;
2183
2184 t10_inf = tablesh_inf * t9_inf;
2185 t10_sup = tablesh_sup * t9_sup;
2186
2187 Add12(t11_inf,t12_inf,tablesh_inf,t10_inf);
2188 Add12(t11_sup,t12_sup,tablesh_sup,t10_sup);
2189 t13_inf = t12_inf + tablesl_inf;
2190 t13_sup = t12_sup + tablesl_sup;
2191 Add12(polyTblhdb_inf.d,polyTblmdb_inf.d,t11_inf,t13_inf);
2192 Add12(polyTblhdb_sup.d,polyTblmdb_sup.d,t11_sup,t13_sup);
2193
2194 /* Reconstruction: multiplication by 2^M */
2195
2196 /* Implement the multiplication by multiplication to overcome the
2197 problem of the non-representability of 2^1024 (M = 1024)
2198 This case is possible if polyTblhdb.d < 1
2199 */
2200
2201 polyTblhdb_inf.i[HI] += M_inf << 20;
2202 polyTblhdb_sup.i[HI] += M_sup << 20;
2203 polyTblmdb_inf.i[HI] += M_inf << 20;
2204 polyTblmdb_sup.i[HI] += M_sup << 20;
2205
2206 exph_inf = polyTblhdb_inf.d;
2207 exph_sup = polyTblhdb_sup.d;
2208 expm_inf = polyTblmdb_inf.d;
2209 expm_sup = polyTblmdb_sup.d;
2210
2211 /* Substraction of 1
2212
2213 Testing if the operation is necessary is more expensive than
2214 performing it in any case.
2215
2216 We may cancellate at most 2 bits in the subtraction for
2217 arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
2218 We must therefore use conditional Add12s
2219
2220 Since we perform a substraction, we may not have addition overflow towards +inf
2221
2222 */
2223
2224 Add12Cond(t1_inf,t2_inf,-1,exph_inf);
2225 Add12Cond(t1_sup,t2_sup,-1,exph_sup);
2226 t3_inf = t2_inf + expm_inf;
2227 t3_sup = t2_sup + expm_sup;
2228 Add12Cond(expm1h_inf,expm1m_inf,t1_inf,t3_inf);
2229 Add12Cond(expm1h_sup,expm1m_sup,t1_sup,t3_sup);
2230
2231
2232 /* Rounding test */
2233 TEST_AND_COPY_RDRU_EXPM1(roundable,res_inf,expm1h_inf, expm1m_inf, res_sup,expm1h_sup, expm1m_sup, ROUNDCSTCOMMONRD);
2234 if((roundable==2) || (roundable==0))
2235 {
2236 /* Rest of argument reduction for accurate phase */
2237
2238 Mul133(&msLog2Div2LMultKh_inf,&msLog2Div2LMultKm_inf,&msLog2Div2LMultKl_inf,kd_inf,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
2239 t1_inf = x_inf + msLog2Div2LMultKh_inf;
2240 Add12Cond(rh_inf,t2_inf,t1_inf,msLog2Div2LMultKm_inf);
2241 Add12Cond(rm_inf,rl_inf,t2_inf,msLog2Div2LMultKl_inf);
2242
2243 /* Table reads for accurate phase */
2244 tbl1l_inf = twoPowerIndex1[index1_inf].lo;
2245 tbl2l_inf = twoPowerIndex2[index2_inf].lo;
2246
2247 /* Call accurate phase */
2248 expm1_common_td(&expm1h_inf, &expm1m_inf, &expm1l_inf, rh_inf, rm_inf, rl_inf, tbl1h_inf, tbl1m_inf, tbl1l_inf, tbl2h_inf, tbl2m_inf, tbl2l_inf, M_inf);
2249
2250 /* Final rounding */
2251
2252 RoundDownwards3(&res_inf,expm1h_inf, expm1m_inf, expm1l_inf);
2253 } /* Accurate phase launched */
2254 if((roundable==1) || (roundable==0))
2255 {
2256 /* Rest of argument reduction for accurate phase */
2257 Mul133(&msLog2Div2LMultKh_sup,&msLog2Div2LMultKm_sup,&msLog2Div2LMultKl_sup,kd_sup,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
2258 t1_sup = x_sup + msLog2Div2LMultKh_sup;
2259 Add12Cond(rh_sup,t2_sup,t1_sup,msLog2Div2LMultKm_sup);
2260 Add12Cond(rm_sup,rl_sup,t2_sup,msLog2Div2LMultKl_sup);
2261
2262 /* Table reads for accurate phase */
2263 tbl1l_sup = twoPowerIndex1[index1_sup].lo;
2264 tbl2l_sup = twoPowerIndex2[index2_sup].lo;
2265
2266 /* Call accurate phase */
2267 expm1_common_td(&expm1h_sup, &expm1m_sup, &expm1l_sup, rh_sup, rm_sup, rl_sup, tbl1h_sup, tbl1m_sup, tbl1l_sup, tbl2h_sup, tbl2m_sup, tbl2l_sup, M_sup);
2268
2269 /* Final rounding */
2270
2271 RoundUpwards3(&res_sup,expm1h_sup, expm1m_sup, expm1l_sup);
2272 } /* Accurate phase launched */
2273
2274 ASSIGN_LOW(res,res_inf);
2275 ASSIGN_UP(res,res_sup);
2276 return res;
2277 }
2278 if((supDone==1))
2279 {
2280 /* If we are here, we can use expm1(x) = exp(x) - 1 */
2281
2282 /* Range reduction - exact part: compute k as double and as int */
2283
2284 xMultLog2InvMult2L_inf = x_inf * log2InvMult2L;
2285 shiftedXMult_inf = xMultLog2InvMult2L_inf + shiftConst;
2286 kd_inf = shiftedXMult_inf - shiftConst;
2287 shiftedXMultdb_inf.d = shiftedXMult_inf;
2288 k_inf = shiftedXMultdb_inf.i[LO];
2289 M_inf = k_inf >> 12;
2290 index1_inf = k_inf & INDEXMASK1;
2291 index2_inf = (k_inf & INDEXMASK2) >> 6;
2292
2293
2294 /* Range reduction - part affected by error - must be redone in accurate phase */
2295 Mul12(&s1_inf,&s2_inf,msLog2Div2Lh,kd_inf);
2296 s3_inf = kd_inf * msLog2Div2Lm;
2297 s4_inf = s2_inf + s3_inf;
2298 s5_inf = x_inf + s1_inf;
2299 Add12Cond(rh_inf,rm_inf,s5_inf,s4_inf);
2300
2301
2302 /* Table reads - read only two double-doubles by now */
2303 tbl1h_inf = twoPowerIndex1[index1_inf].hi;
2304 tbl1m_inf = twoPowerIndex1[index1_inf].mi;
2305 tbl2h_inf = twoPowerIndex2[index2_inf].hi;
2306 tbl2m_inf = twoPowerIndex2[index2_inf].mi;
2307
2308 /* Quick phase starts here */
2309
2310 rhSquare_inf = rh_inf * rh_inf;
2311 rhC3_inf = quickCommonpolyC3h * rh_inf;
2312
2313 rhSquareHalf_inf = 0.5 * rhSquare_inf;
2314
2315 monomialCube_inf = rhC3_inf * rhSquare_inf;
2316 rhFour_inf = rhSquare_inf * rhSquare_inf;
2317
2318 monomialFour_inf = quickCommonpolyC4h * rhFour_inf;
2319 highPoly_inf = monomialCube_inf + monomialFour_inf;
2320 highPolyWithSquare_inf = rhSquareHalf_inf + highPoly_inf;
2321
2322 /* Reconstruction: integration of table values */
2323
2324 Mul22(&tablesh_inf,&tablesl_inf,tbl1h_inf,tbl1m_inf,tbl2h_inf,tbl2m_inf);
2325
2326 t8_inf = rm_inf + highPolyWithSquare_inf;
2327 t9_inf = rh_inf + t8_inf;
2328
2329 t10_inf = tablesh_inf * t9_inf;
2330
2331 Add12(t11_inf,t12_inf,tablesh_inf,t10_inf);
2332 t13_inf = t12_inf + tablesl_inf;
2333 Add12(polyTblhdb_inf.d,polyTblmdb_inf.d,t11_inf,t13_inf);
2334
2335 /* Reconstruction: multiplication by 2^M */
2336
2337 /* Implement the multiplication by multiplication to overcome the
2338 problem of the non-representability of 2^1024 (M = 1024)
2339 This case is possible if polyTblhdb.d < 1
2340 */
2341
2342 polyTblhdb_inf.i[HI] += M_inf << 20;
2343 polyTblmdb_inf.i[HI] += M_inf << 20;
2344
2345 exph_inf = polyTblhdb_inf.d;
2346 expm_inf = polyTblmdb_inf.d;
2347
2348 /* Substraction of 1
2349
2350 Testing if the operation is necessary is more expensive than
2351 performing it in any case.
2352
2353 We may cancellate at most 2 bits in the subtraction for
2354 arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
2355 We must therefore use conditional Add12s
2356
2357 Since we perform a substraction, we may not have addition overflow towards +inf
2358
2359 */
2360
2361 Add12Cond(t1_inf,t2_inf,-1,exph_inf);
2362 t3_inf = t2_inf + expm_inf;
2363 Add12Cond(expm1h_inf,expm1m_inf,t1_inf,t3_inf);
2364
2365
2366 /* Rounding test */
2367 TEST_AND_COPY_RD(roundable,res_inf,expm1h_inf, expm1m_inf, ROUNDCSTCOMMONRD);
2368 if(roundable==0)
2369 {
2370 /* Rest of argument reduction for accurate phase */
2371
2372 Mul133(&msLog2Div2LMultKh_inf,&msLog2Div2LMultKm_inf,&msLog2Div2LMultKl_inf,kd_inf,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
2373 t1_inf = x_inf + msLog2Div2LMultKh_inf;
2374 Add12Cond(rh_inf,t2_inf,t1_inf,msLog2Div2LMultKm_inf);
2375 Add12Cond(rm_inf,rl_inf,t2_inf,msLog2Div2LMultKl_inf);
2376
2377 /* Table reads for accurate phase */
2378 tbl1l_inf = twoPowerIndex1[index1_inf].lo;
2379 tbl2l_inf = twoPowerIndex2[index2_inf].lo;
2380
2381 /* Call accurate phase */
2382 expm1_common_td(&expm1h_inf, &expm1m_inf, &expm1l_inf, rh_inf, rm_inf, rl_inf, tbl1h_inf, tbl1m_inf, tbl1l_inf, tbl2h_inf, tbl2m_inf, tbl2l_inf, M_inf);
2383
2384 /* Final rounding */
2385
2386 RoundDownwards3(&res_inf,expm1h_inf, expm1m_inf, expm1l_inf);
2387 } /* Accurate phase launched */
2388
2389 }
2390 if(infDone==1)
2391 {
2392 /* If we are here, we can use expm1(x) = exp(x) - 1 */
2393
2394 /* Range reduction - exact part: compute k as double and as int */
2395
2396 xMultLog2InvMult2L_sup = x_sup * log2InvMult2L;
2397 shiftedXMult_sup = xMultLog2InvMult2L_sup + shiftConst;
2398 kd_sup = shiftedXMult_sup - shiftConst;
2399 shiftedXMultdb_sup.d = shiftedXMult_sup;
2400 k_sup = shiftedXMultdb_sup.i[LO];
2401 M_sup = k_sup >> 12;
2402 index1_sup = k_sup & INDEXMASK1;
2403 index2_sup = (k_sup & INDEXMASK2) >> 6;
2404
2405
2406 /* Range reduction - part affected by error - must be redone in accurate phase */
2407 Mul12(&s1_sup,&s2_sup,msLog2Div2Lh,kd_sup);
2408 s3_sup = kd_sup * msLog2Div2Lm;
2409 s4_sup = s2_sup + s3_sup;
2410 s5_sup = x_sup + s1_sup;
2411 Add12Cond(rh_sup,rm_sup,s5_sup,s4_sup);
2412
2413
2414 /* Table reads - read only two double-doubles by now */
2415 tbl1h_sup = twoPowerIndex1[index1_sup].hi;
2416 tbl1m_sup = twoPowerIndex1[index1_sup].mi;
2417 tbl2h_sup = twoPowerIndex2[index2_sup].hi;
2418 tbl2m_sup = twoPowerIndex2[index2_sup].mi;
2419
2420 /* Quick phase starts here */
2421
2422 rhSquare_sup = rh_sup * rh_sup;
2423 rhC3_sup = quickCommonpolyC3h * rh_sup;
2424
2425 rhSquareHalf_sup = 0.5 * rhSquare_sup;
2426
2427 monomialCube_sup = rhC3_sup * rhSquare_sup;
2428 rhFour_sup = rhSquare_sup * rhSquare_sup;
2429
2430 monomialFour_sup = quickCommonpolyC4h * rhFour_sup;
2431 highPoly_sup = monomialCube_sup + monomialFour_sup;
2432 highPolyWithSquare_sup = rhSquareHalf_sup + highPoly_sup;
2433
2434 /* Reconstruction: integration of table values */
2435
2436 Mul22(&tablesh_sup,&tablesl_sup,tbl1h_sup,tbl1m_sup,tbl2h_sup,tbl2m_sup);
2437
2438 t8_sup = rm_sup + highPolyWithSquare_sup;
2439 t9_sup = rh_sup + t8_sup;
2440
2441 t10_sup = tablesh_sup * t9_sup;
2442
2443 Add12(t11_sup,t12_sup,tablesh_sup,t10_sup);
2444 t13_sup = t12_sup + tablesl_sup;
2445 Add12(polyTblhdb_sup.d,polyTblmdb_sup.d,t11_sup,t13_sup);
2446
2447 /* Reconstruction: multiplication by 2^M */
2448
2449 /* Implement the multiplication by multiplication to overcome the
2450 problem of the non-representability of 2^1024 (M = 1024)
2451 This case is possible if polyTblhdb.d < 1
2452 */
2453
2454 polyTblhdb_sup.i[HI] += M_sup << 20;
2455 polyTblmdb_sup.i[HI] += M_sup << 20;
2456
2457 exph_sup = polyTblhdb_sup.d;
2458 expm_sup = polyTblmdb_sup.d;
2459
2460 /* Substraction of 1
2461
2462 Testing if the operation is necessary is more expensive than
2463 performing it in any case.
2464
2465 We may cancellate at most 2 bits in the subtraction for
2466 arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
2467 We must therefore use conditional Add12s
2468
2469 Since we perform a substraction, we may not have addition overflow towards +inf
2470
2471 */
2472
2473 Add12Cond(t1_sup,t2_sup,-1,exph_sup);
2474 t3_sup = t2_sup + expm_sup;
2475 Add12Cond(expm1h_sup,expm1m_sup,t1_sup,t3_sup);
2476
2477 /* Rounding test */
2478 TEST_AND_COPY_RU(roundable,res_sup,expm1h_sup, expm1m_sup, ROUNDCSTCOMMONRD);
2479 if(roundable==0)
2480 {
2481 /* Rest of argument reduction for accurate phase */
2482 Mul133(&msLog2Div2LMultKh_sup,&msLog2Div2LMultKm_sup,&msLog2Div2LMultKl_sup,kd_sup,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
2483 t1_sup = x_sup + msLog2Div2LMultKh_sup;
2484 Add12Cond(rh_sup,t2_sup,t1_sup,msLog2Div2LMultKm_sup);
2485 Add12Cond(rm_sup,rl_sup,t2_sup,msLog2Div2LMultKl_sup);
2486
2487 /* Table reads for accurate phase */
2488 tbl1l_sup = twoPowerIndex1[index1_sup].lo;
2489 tbl2l_sup = twoPowerIndex2[index2_sup].lo;
2490
2491 /* Call accurate phase */
2492 expm1_common_td(&expm1h_sup, &expm1m_sup, &expm1l_sup, rh_sup, rm_sup, rl_sup, tbl1h_sup, tbl1m_sup, tbl1l_sup, tbl2h_sup, tbl2m_sup, tbl2l_sup, M_sup);
2493
2494 /* Final rounding */
2495
2496 RoundUpwards3(&res_sup,expm1h_sup, expm1m_sup, expm1l_sup);
2497 } /* Accurate phase launched */
2498 }
2499
2500 if (infDone==1) res_inf=restemp_inf;
2501 if (supDone==1) res_sup=restemp_sup;
2502 ASSIGN_LOW(res,res_inf);
2503 ASSIGN_UP(res,res_sup);
2504 return res;
2505 }
2506 #endif
2507