1 /*
2  * Correctly rounded expm1 = e^x - 1
3  *
4  * Author : Christoph Lauter (ENS Lyon)
5  *
6  * This file is part of the crlibm library developed by the Arenaire
7  * project at Ecole Normale Superieure de Lyon
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 */
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include "crlibm.h"
27 #include "crlibm_private.h"
28 #include "triple-double.h"
29 #include "expm1.h"
30 #ifdef BUILD_INTERVAL_FUNCTIONS
31 #include "interval.h"
32 #endif
33 
34 
35 
36 #define DEBUG 0
37 
38 
expm1_direct_td(double * expm1h,double * expm1m,double * expm1l,double x,double xSqHalfh,double xSqHalfl,double xSqh,double xSql,int expoX)39 void expm1_direct_td(double *expm1h, double *expm1m, double *expm1l,
40 		     double x, double xSqHalfh, double xSqHalfl, double xSqh, double xSql, int expoX) {
41   double highPoly, tt1h, t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
42   double tt6h, tt6m, tt6l, t7h, t7m, t7l, lowPolyh, lowPolym, lowPolyl;
43   double fullHighPolyh, fullHighPolym, fullHighPolyl, polyh, polym, polyl;
44   double xCubeh, xCubem, xCubel, tt7h, tt7m, tt7l, t8h, t8m, t8l;
45   double expm1hover, expm1mover, expm1lover;
46   double r1h, r1m, r1l, r2h, r2m, r2l, r3h, r3m, r3l;
47   double rr1h, rr1m, rr1l, rr2h, rr2m, rr2l, rr3h, rr3m, rr3l;
48   double fullHighPolyhover, fullHighPolymover, fullHighPolylover;
49 
50   /* Double precision evaluation steps */
51 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
52     highPoly = FMA(FMA(FMA(FMA(accuDirectpolyC15h ,x,accuDirectpolyC14h),x,accuDirectpolyC13h),x,
53                                accuDirectpolyC12h),x,accuDirectpolyC11h);
54 #else
55     highPoly = accuDirectpolyC11h + x * (accuDirectpolyC12h + x * (accuDirectpolyC13h + x * (
56 	       accuDirectpolyC14h + x *  accuDirectpolyC15h)));
57 #endif
58 
59     tt1h = x * highPoly;
60 
61     /* Triple-double steps for x + x^2/2 and x^3*/
62 
63     Add123(&lowPolyh,&lowPolym,&lowPolyl,x,xSqHalfh,xSqHalfl);                     /* infty - 52/53 */
64     Mul123(&xCubeh,&xCubem,&xCubel,x,xSqh,xSql);                                   /* 154 - 47/53 */
65 
66 
67     /* Double-double evaluation steps */
68 
69     Add12(t1h,t1l,accuDirectpolyC10h,tt1h);
70 
71     MulAdd212(&t2h,&t2l,accuDirectpolyC9h,accuDirectpolyC9m,x,t1h,t1l);
72     MulAdd212(&t3h,&t3l,accuDirectpolyC8h,accuDirectpolyC8m,x,t2h,t2l);
73     MulAdd212(&t4h,&t4l,accuDirectpolyC7h,accuDirectpolyC7m,x,t3h,t3l);
74     MulAdd212(&t5h,&t5l,accuDirectpolyC6h,accuDirectpolyC6m,x,t4h,t4l);
75     MulAdd212(&t6h,&t6l,accuDirectpolyC5h,accuDirectpolyC5m,x,t5h,t5l);
76 
77     /* Triple-double evaluation steps */
78 
79     Mul123(&tt6h,&tt6m,&tt6l,x,t6h,t6l);                                          /* 154 - 47/53 */
80     Add233(&t7h,&t7m,&t7l,accuDirectpolyC4h,accuDirectpolyC4m,tt6h,tt6m,tt6l);    /* 150 - 43/53 */
81 
82     Mul133(&tt7h,&tt7m,&tt7l,x,t7h,t7m,t7l);                                      /* 143 - 38/53 */
83     Add33(&t8h,&t8m,&t8l,accuDirectpolyC3h,accuDirectpolyC3m,accuDirectpolyC3l,tt7h,tt7m,tt7l); /* 135 - 33/53 */
84 
85     Mul33(&fullHighPolyhover,&fullHighPolymover,&fullHighPolylover,xCubeh,xCubem,xCubel,t8h,t8m,t8l); /* 130 - 29/53 */
86 
87     Renormalize3(&fullHighPolyh,&fullHighPolym,&fullHighPolyl,
88 		 fullHighPolyhover,fullHighPolymover,fullHighPolylover);                     /* infty - 52/53 */
89 
90     Add33(&polyh,&polym,&polyl,lowPolyh,lowPolym,lowPolyl,fullHighPolyh,fullHighPolym,fullHighPolyl);
91                                                                                              /* 149 - 47/53 */
92 
93     /* Reconstruction steps */
94 
95     /* If we have not performed any range reduction, we have no reconstruction to do */
96     if (expoX >= 0) {
97       /* If we are here, we must perform reconstruction */
98 
99       /* First reconstruction step */
100 
101       Add133(&r1h,&r1m,&r1l,2,polyh,polym,polyl);
102       Mul33(&rr1h,&rr1m,&rr1l,r1h,r1m,r1l,polyh,polym,polyl);
103       if (expoX >= 1) {
104 
105 	/* Second reconstruction step */
106 	Add133(&r2h,&r2m,&r2l,2,rr1h,rr1m,rr1l);
107 	Mul33(&rr2h,&rr2m,&rr2l,r2h,r2m,r2l,rr1h,rr1m,rr1l);
108 
109 	if (expoX >= 2) {
110 
111 	  /* Third reconstruction step */
112 	  Add133(&r3h,&r3m,&r3l,2,rr2h,rr2m,rr2l);
113 	  Mul33(&rr3h,&rr3m,&rr3l,r3h,r3m,r3l,rr2h,rr2m,rr2l);
114 
115 	  /* expoX may be maximally 2 */
116 
117 	  expm1hover = rr3h;
118 	  expm1mover = rr3m;
119 	  expm1lover = rr3l;
120 
121 	} else {
122 	  expm1hover = rr2h;
123 	  expm1mover = rr2m;
124 	  expm1lover = rr2l;
125 	}
126 
127       } else {
128 	expm1hover = rr1h;
129 	expm1mover = rr1m;
130 	expm1lover = rr1l;
131       }
132 
133     } else {
134       expm1hover = polyh;
135       expm1mover = polym;
136       expm1lover = polyl;
137     }
138 
139     /* Renormalize before returning */
140 
141     Renormalize3(expm1h,expm1m,expm1l,expm1hover,expm1mover,expm1lover);
142 }
143 
expm1_common_td(double * expm1h,double * expm1m,double * expm1l,double rh,double rm,double rl,double tbl1h,double tbl1m,double tbl1l,double tbl2h,double tbl2m,double tbl2l,int M)144 void expm1_common_td(double *expm1h, double *expm1m, double *expm1l,
145 		     double rh, double rm, double rl,
146 		     double tbl1h, double tbl1m, double tbl1l,
147 		     double tbl2h, double tbl2m, double tbl2l,
148 		     int M) {
149   double highPoly, highPolyMulth, highPolyMultm, highPolyMultl;
150   double rhSquareh, rhSquarel, rhSquareHalfh, rhSquareHalfl;
151   double rhCubeh, rhCubem, rhCubel;
152   double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5, t6;
153   double lowPolyh, lowPolym, lowPolyl;
154   double ph, pm, pl, phnorm, pmnorm, rmlMultPh, rmlMultPl;
155   double qh, ql, fullPolyh, fullPolym, fullPolyl;
156   double polyWithTbl1h, polyWithTbl1m, polyWithTbl1l;
157   double polyAddOneh,polyAddOnem,polyAddOnel;
158   double polyWithTablesh, polyWithTablesm, polyWithTablesl;
159   double exph, expm, expl, expm1hover, expm1mover, expm1lover;
160   db_number polyWithTableshdb, polyWithTablesmdb, polyWithTablesldb;
161 
162   /* Polynomial approximation - double precision steps */
163 
164 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
165   highPoly = FMA(FMA(accuCommonpolyC7h,rh,accuCommonpolyC6h),rh,accuCommonpolyC5h);
166 #else
167   highPoly = accuCommonpolyC5h + rh * (accuCommonpolyC6h + rh * accuCommonpolyC7h);
168 #endif
169 
170   /* Polynomial approximation - double-double precision steps */
171 
172   Mul12(&t1h,&t1l,rh,highPoly);
173   Add22(&t2h,&t2l,accuCommonpolyC4h,accuCommonpolyC4m,t1h,t1l);
174   Mul122(&t3h,&t3l,rh,t2h,t2l);
175   Add22(&t4h,&t4l,accuCommonpolyC3h,accuCommonpolyC3m,t3h,t3l);
176 
177   Mul12(&rhSquareh,&rhSquarel,rh,rh);
178   Mul123(&rhCubeh,&rhCubem,&rhCubel,rh,rhSquareh,rhSquarel);
179 
180   rhSquareHalfh = 0.5 * rhSquareh;
181   rhSquareHalfl = 0.5 * rhSquarel;
182 
183   /* Polynomial approximation - triple-double precision steps */
184 
185   Renormalize3(&lowPolyh,&lowPolym,&lowPolyl,rh,rhSquareHalfh,rhSquareHalfl);
186 
187   Mul233(&highPolyMulth,&highPolyMultm,&highPolyMultl,t4h,t4l,rhCubeh,rhCubem,rhCubel);
188 
189   Add33(&ph,&pm,&pl,lowPolyh,lowPolym,lowPolyl,highPolyMulth,highPolyMultm,highPolyMultl);
190 
191   /* Reconstruction */
192 
193   Add12(phnorm,pmnorm,ph,pm);
194   Mul22(&rmlMultPh,&rmlMultPl,rm,rl,phnorm,pmnorm);
195   Add22(&qh,&ql,rm,rl,rmlMultPh,rmlMultPl);
196 
197   Add233Cond(&fullPolyh,&fullPolym,&fullPolyl,qh,ql,ph,pm,pl);
198   Add12(polyAddOneh,t5,1,fullPolyh);
199   Add12Cond(polyAddOnem,t6,t5,fullPolym);
200   polyAddOnel = t6 + fullPolyl;
201   Mul33(&polyWithTbl1h,&polyWithTbl1m,&polyWithTbl1l,tbl1h,tbl1m,tbl1l,polyAddOneh,polyAddOnem,polyAddOnel);
202   Mul33(&polyWithTablesh,&polyWithTablesm,&polyWithTablesl,
203 	tbl2h,tbl2m,tbl2l,
204 	polyWithTbl1h,polyWithTbl1m,polyWithTbl1l);
205 
206   /* Multiplication by 2^(M)
207 
208      We perform it in integer to overcome the non-representability of 2^(1024)
209      This case is possible for M = 1024 and polyWithTablesh < 1
210 
211      The overlap in the triple-double polyWithTables[hml] stays unchanged.
212 
213   */
214 
215   polyWithTableshdb.d = polyWithTablesh;
216   polyWithTablesmdb.d = polyWithTablesm;
217   polyWithTablesldb.d = polyWithTablesl;
218 
219   /* TODO FIXME probably at least the first of these tests is useless,
220      but I leave this to Christoph to check it. Let us be
221      conservative. Florent  */
222   if(polyWithTableshdb.d!=0)
223     polyWithTableshdb.i[HI] += M << 20;
224   if(polyWithTablesmdb.d!=0)
225     polyWithTablesmdb.i[HI] += M << 20;
226   if(polyWithTablesldb.d!=0)
227     polyWithTablesldb.i[HI] += M << 20;
228 
229   exph = polyWithTableshdb.d;
230   expm = polyWithTablesmdb.d;
231   expl = polyWithTablesldb.d;
232 
233   /* Substraction of -1
234 
235      We use a conditional Add133
236   */
237 
238   Add133Cond(&expm1hover,&expm1mover,&expm1lover,-1,exph,expm,expl);
239 
240   /* Renormalization */
241 
242   Renormalize3(expm1h,expm1m,expm1l,expm1hover,expm1mover,expm1lover);
243 
244 }
245 
246 
expm1_rn(double x)247 double expm1_rn(double x) {
248   db_number xdb, shiftedXMultdb, polyTblhdb, polyTblmdb;
249   int xIntHi, expoX, k, M, index1, index2;
250   double highPoly, tt1h, t1h, t1l, xSqh, xSql, xSqHalfh, xSqHalfl, xCubeh, xCubel, t2h, t2l, templ, tt3h, tt3l;
251   double polyh, polyl, expm1h, expm1m, expm1l;
252   double r1h, r1l, r1t, rr1h, rr1l;
253   double r2h, r2l, r2t, rr2h, rr2l;
254   double r3h, r3l, r3t, rr3h, rr3l;
255   double xMultLog2InvMult2L, shiftedXMult, kd, s1, s2, s3, s4, s5, rh, rm, rl;
256   double rhSquare, rhC3, rhSquareHalf, monomialCube, rhFour, monomialFour;
257   double tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l;
258   double highPolyWithSquare, tablesh, tablesl, t8, t9, t10, t11, t12, t13;
259   double exph, expm, t1, t2, t3;
260   double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
261   double middlePoly, doublePoly;
262 
263 
264   xdb.d = x;
265 
266   /* Strip off the sign of x for the following tests */
267 
268   xIntHi = xdb.i[HI] & 0x7fffffff;
269 
270   /* Test if we are so small that we can return (a corrected) x as correct rounding */
271   if (xIntHi < RETURNXBOUND) {
272     return x;
273   }
274 
275 
276   /* Filter out special cases like overflow, -1 in result, infinities and NaNs
277      The filters are not sharp, we have positive arguments that flow through
278   */
279   if (xIntHi >= SIMPLEOVERFLOWBOUND) {
280     /* Test if we are +/-inf or NaN */
281     if (xIntHi >= 0x7ff00000) {
282       /* Test if NaN */
283       if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
284 	/* NaN */
285 	return x+x;  /* return NaN */
286       }
287       /* Test if +inf or -inf */
288       if (xdb.i[HI] > 0) {
289 	/* +inf */
290 	return x+x;  /* return +inf */
291       }
292 
293       /* If we are here, we are -inf */
294       return -1.0;
295     }
296 
297     /* If we are here, we are overflowed or a common case that flows through */
298 
299     /* Test if we are actually overflowed */
300     if (x > OVERFLOWBOUND) {
301       return LARGEST * LARGEST;  /* return +inf and set flag */
302     }
303   }
304 
305   /* Test if we know already that we are -1.0 (+ correction depending on rounding mode) in result */
306   if (x < MINUSONEBOUND) {
307     return -1.0;
308   }
309 
310   /* Test if we have |x| <= 1/4-1/2ulp(1/4) for knowing if we use exp(x) or approximate directly */
311 
312   if (xIntHi < DIRECTINTERVALBOUND) {
313     /* We approximate expm1 directly after a range reduction as follows
314 
315        expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
316 
317        We perform the range reduction in such a way that finally |x| < 1/32
318     */
319 
320     /* Extract the exponent of |x| and add 5 (2^5 = 32) */
321     expoX = ((xIntHi & 0x7ff00000) >> 20) - (1023 - 5);
322 
323     /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
324     if (expoX >= 0) {
325       /* If we are here, we must perform range reduction */
326 
327 
328       /* We multiply x by 2^(-expoX-1) by bit manipulation
329 	 x cannot be denormalized so there is no danger
330       */
331       xdb.i[HI] += (-expoX-1) << 20;
332 
333       /* We reassign the new x and maintain xIntHi */
334 
335       xIntHi = xdb.i[HI] & 0x7fffffff;
336       x = xdb.d;
337     }
338 
339     /* Here, we have always |x| < 1/32 */
340 
341 
342     /* Double precision evaluation steps and one double-double step */
343 
344     Mul12(&xSqh,&xSql,x,x);
345 
346 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
347     middlePoly = FMA(quickDirectpolyC5h,x,quickDirectpolyC4h);
348 #else
349     middlePoly = quickDirectpolyC4h + x * quickDirectpolyC5h;
350 #endif
351 
352     doublePoly = middlePoly;
353 
354     /* Special path: for small |x| we can truncate the polynomial */
355 
356     if (xIntHi > SPECIALINTERVALBOUND) {
357 
358 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
359       highPoly = FMA(FMA(FMA(quickDirectpolyC9h ,x,quickDirectpolyC8h),x,
360                              quickDirectpolyC7h),x,quickDirectpolyC6h);
361 #else
362       highPoly = quickDirectpolyC6h + x * (quickDirectpolyC7h + x * (
363 	         quickDirectpolyC8h + x *  quickDirectpolyC9h));
364 #endif
365 
366       highPolyWithSquare = xSqh * highPoly;
367 
368       doublePoly = middlePoly + highPolyWithSquare;
369 
370     }
371 
372     /* Double-double evaluation steps */
373     tt1h = x * doublePoly;
374 
375     xSqHalfh = 0.5 * xSqh;
376     xSqHalfl = 0.5 * xSql;
377     Add12(t2h,templ,x,xSqHalfh);
378     t2l = templ + xSqHalfl;
379 
380     Add12(t1h,t1l,quickDirectpolyC3h,tt1h);
381     Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
382     Mul22(&tt3h,&tt3l,xCubeh,xCubel,t1h,t1l);
383 
384     Add22(&polyh,&polyl,t2h,t2l,tt3h,tt3l);
385 
386     /* Reconstruction */
387 
388     /* If we have not performed any range reduction, we have no reconstruction to do */
389     if (expoX >= 0) {
390       /* If we are here, we must perform reconstruction */
391 
392       /* First reconstruction step */
393       Add12(r1h,r1t,2,polyh);
394       r1l = r1t + polyl;
395       Mul22(&rr1h,&rr1l,r1h,r1l,polyh,polyl);
396 
397       if (expoX >= 1) {
398 
399 	/* Second reconstruction step */
400 	Add12(r2h,r2t,2,rr1h);
401 	r2l = r2t + rr1l;
402 	Mul22(&rr2h,&rr2l,r2h,r2l,rr1h,rr1l);
403 
404 	if (expoX >= 2) {
405 
406 	  /* Third reconstruction step */
407 	  Add12(r3h,r3t,2,rr2h);
408 	  r3l = r3t + rr2l;
409 	  Mul22(&rr3h,&rr3l,r3h,r3l,rr2h,rr2l);
410 
411 	  /* expoX may be maximally 2 */
412 
413 	  expm1h = rr3h;
414 	  expm1m = rr3l;
415 
416 	} else {
417 	  expm1h = rr2h;
418 	  expm1m = rr2l;
419 	}
420 
421       } else {
422 	expm1h = rr1h;
423 	expm1m = rr1l;
424       }
425 
426     } else {
427       expm1h = polyh;
428       expm1m = polyl;
429     }
430 
431     /* Rounding test */
432     if(expm1h == (expm1h + (expm1m * ROUNDCSTDIRECTRN)))
433      return expm1h;
434    else
435      {
436 
437 #if DEBUG
438        printf("Launch accurate phase (direct interval)\n");
439 #endif
440 
441        expm1_direct_td(&expm1h, &expm1m, &expm1l, x, xSqHalfh, xSqHalfl, xSqh, xSql, expoX);
442 
443        ReturnRoundToNearest3(expm1h, expm1m, expm1l);
444 
445      } /* Accurate phase launched */
446 
447     /* We cannot be here, since we return in all cases before */
448   }
449 
450   /* If we are here, we can use expm1(x) = exp(x) - 1 */
451 
452   /* Range reduction - exact part: compute k as double and as int */
453 
454   xMultLog2InvMult2L = x * log2InvMult2L;
455   shiftedXMult = xMultLog2InvMult2L + shiftConst;
456   kd = shiftedXMult - shiftConst;
457   shiftedXMultdb.d = shiftedXMult;
458   k = shiftedXMultdb.i[LO];
459   M = k >> 12;
460   index1 = k & INDEXMASK1;
461   index2 = (k & INDEXMASK2) >> 6;
462 
463   /* Range reduction - part affected by error - must be redone in accurate phase */
464   Mul12(&s1,&s2,msLog2Div2Lh,kd);
465   s3 = kd * msLog2Div2Lm;
466   s4 = s2 + s3;
467   s5 = x + s1;
468   Add12Cond(rh,rm,s5,s4);
469 
470   /* Table reads - read only two double-doubles by now */
471   tbl1h = twoPowerIndex1[index1].hi;
472   tbl1m = twoPowerIndex1[index1].mi;
473   tbl2h = twoPowerIndex2[index2].hi;
474   tbl2m = twoPowerIndex2[index2].mi;
475 
476   /* Quick phase starts here */
477 
478   rhSquare = rh * rh;
479   rhC3 = quickCommonpolyC3h * rh;
480 
481   rhSquareHalf = 0.5 * rhSquare;
482   monomialCube = rhC3 * rhSquare;
483   rhFour = rhSquare * rhSquare;
484 
485   monomialFour = quickCommonpolyC4h * rhFour;
486 
487   highPoly = monomialCube + monomialFour;
488 
489   highPolyWithSquare = rhSquareHalf + highPoly;
490 
491   /* Reconstruction: integration of table values */
492 
493   Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
494 
495   t8 = rm + highPolyWithSquare;
496   t9 = rh + t8;
497 
498   t10 = tablesh * t9;
499 
500   Add12(t11,t12,tablesh,t10);
501   t13 = t12 + tablesl;
502   Add12(polyTblhdb.d,polyTblmdb.d,t11,t13);
503 
504   /* Reconstruction: multiplication by 2^M */
505 
506   /* Implement the multiplication by multiplication to overcome the
507      problem of the non-representability of 2^1024 (M = 1024)
508      This case is possible if polyTblhdb.d < 1
509   */
510 
511   polyTblhdb.i[HI] += M << 20;
512   polyTblmdb.i[HI] += M << 20;
513 
514   exph = polyTblhdb.d;
515   expm = polyTblmdb.d;
516 
517   /* Substraction of 1
518 
519      Testing if the operation is necessary is more expensive than
520      performing it in any case.
521 
522      We may cancellate at most 2 bits in the subtraction for
523      arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
524      We must therefore use conditional Add12s
525 
526      Since we perform a substraction, we may not have addition overflow towards +inf
527 
528   */
529 
530   Add12Cond(t1,t2,-1,exph);
531   t3 = t2 + expm;
532   Add12Cond(expm1h,expm1m,t1,t3);
533 
534 
535   /* Rounding test */
536   if(expm1h == (expm1h + (expm1m * ROUNDCSTCOMMONRN))) {
537     return expm1h;
538   } else {
539     /* Rest of argument reduction for accurate phase */
540 
541     Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
542     t1 = x + msLog2Div2LMultKh;
543     Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
544     Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
545 
546     /* Table reads for accurate phase */
547     tbl1l = twoPowerIndex1[index1].lo;
548     tbl2l = twoPowerIndex2[index2].lo;
549 
550 #if DEBUG
551        printf("Launch accurate phase (common interval)\n");
552 #endif
553 
554     /* Call accurate phase */
555     expm1_common_td(&expm1h, &expm1m, &expm1l, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l, M);
556 
557     /* Final rounding */
558 
559     ReturnRoundToNearest3(expm1h, expm1m, expm1l);
560   } /* Accurate phase launched */
561 
562   /* We cannot be here since we return before in any case */
563 }
564 
expm1_rd(double x)565 double expm1_rd(double x) {
566   db_number xdb, shiftedXMultdb, polyTblhdb, polyTblmdb;
567   int xIntHi, expoX, k, M, index1, index2;
568   double highPoly, tt1h, t1h, t1l, xSqh, xSql, xSqHalfh, xSqHalfl, xCubeh, xCubel, t2h, t2l, templ, tt3h, tt3l;
569   double polyh, polyl, expm1h, expm1m, expm1l;
570   double r1h, r1l, r1t, rr1h, rr1l;
571   double r2h, r2l, r2t, rr2h, rr2l;
572   double r3h, r3l, r3t, rr3h, rr3l;
573   double xMultLog2InvMult2L, shiftedXMult, kd, s1, s2, s3, s4, s5, rh, rm, rl;
574   double rhSquare, rhC3, rhSquareHalf, monomialCube, rhFour, monomialFour;
575   double tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l;
576   double highPolyWithSquare, tablesh, tablesl, t8, t9, t10, t11, t12, t13;
577   double exph, expm, t1, t2, t3;
578   double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
579   double middlePoly, doublePoly;
580 
581   xdb.d = x;
582 
583   /* Strip off the sign of x for the following tests */
584 
585   xIntHi = xdb.i[HI] & 0x7fffffff;
586 
587   /* Test if we are so small that we can return (a corrected) x as correct rounding */
588   if (xIntHi < RETURNXBOUND) {
589     /* The only algebraic result is 0 for x = +/- 0; in this case, we can return x = +/- 0
590        The truncation rest x^2/2 + x^3/6 + ... is always positive
591        but less than 1 ulp in this case, so we round down by returning x
592     */
593     return x;
594   }
595 
596 
597   /* Filter out special cases like overflow, -1 in result, infinities and NaNs
598      The filters are not sharp, we have positive arguments that flow through
599   */
600   if (xIntHi >= SIMPLEOVERFLOWBOUND) {
601     /* Test if we are +/-inf or NaN */
602     if (xIntHi >= 0x7ff00000) {
603       /* Test if NaN */
604       if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
605 	/* NaN */
606 	return x+x;  /* return NaN */
607       }
608       /* Test if +inf or -inf */
609       if (xdb.i[HI] > 0) {
610 	/* +inf */
611 	return x+x;  /* return +inf */
612       }
613 
614       /* If we are here, we are -inf */
615       return -1.0;
616     }
617 
618     /* If we are here, we are overflowed or a common case that flows through */
619 
620     /* Test if we are actually overflowed */
621     if (x > OVERFLOWBOUND) {
622       /* We would be overflowed but as we are rounding downwards
623 	 the nearest number lesser than the exact result is the greatest
624 	 normal. In any case, we must raise the inexact flag.
625       */
626       return LARGEST * (1.0 + SMALLEST);
627     }
628   }
629 
630   /* Test if we know already that we are -1.0 (+ correction depending on rounding mode) in result */
631   if (x < MINUSONEBOUND) {
632     /* We round down, so we are -1.0 */
633     return -1.0;
634   }
635 
636   /* Test if we have |x| <= 1/4-1/2ulp(1/4) for knowing if we use exp(x) or approximate directly */
637 
638   if (xIntHi < DIRECTINTERVALBOUND) {
639     /* We approximate expm1 directly after a range reduction as follows
640 
641        expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
642 
643        We perform the range reduction in such a way that finally |x| < 1/32
644     */
645 
646     /* Extract the exponent of |x| and add 5 (2^5 = 32) */
647     expoX = ((xIntHi & 0x7ff00000) >> 20) - (1023 - 5);
648 
649     /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
650     if (expoX >= 0) {
651       /* If we are here, we must perform range reduction */
652 
653 
654       /* We multiply x by 2^(-expoX-1) by bit manipulation
655 	 x cannot be denormalized so there is no danger
656       */
657       xdb.i[HI] += (-expoX-1) << 20;
658 
659       /* We reassign the new x and maintain xIntHi */
660 
661       xIntHi = xdb.i[HI] & 0x7fffffff;
662       x = xdb.d;
663     }
664 
665     /* Here, we have always |x| < 1/32 */
666 
667 
668     /* Double precision evaluation steps and one double-double step */
669 
670     Mul12(&xSqh,&xSql,x,x);
671 
672 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
673     middlePoly = FMA(quickDirectpolyC5h,x,quickDirectpolyC4h);
674 #else
675     middlePoly = quickDirectpolyC4h + x * quickDirectpolyC5h;
676 #endif
677 
678     doublePoly = middlePoly;
679 
680     /* Special path: for small |x| we can truncate the polynomial */
681 
682     if (xIntHi > SPECIALINTERVALBOUND) {
683 
684 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
685       highPoly = FMA(FMA(FMA(quickDirectpolyC9h ,x,quickDirectpolyC8h),x,
686                              quickDirectpolyC7h),x,quickDirectpolyC6h);
687 #else
688       highPoly = quickDirectpolyC6h + x * (quickDirectpolyC7h + x * (
689 	         quickDirectpolyC8h + x *  quickDirectpolyC9h));
690 #endif
691 
692       highPolyWithSquare = xSqh * highPoly;
693 
694       doublePoly = middlePoly + highPolyWithSquare;
695 
696     }
697 
698     /* Double-double evaluation steps */
699     tt1h = x * doublePoly;
700 
701     xSqHalfh = 0.5 * xSqh;
702     xSqHalfl = 0.5 * xSql;
703     Add12(t2h,templ,x,xSqHalfh);
704     t2l = templ + xSqHalfl;
705 
706     Add12(t1h,t1l,quickDirectpolyC3h,tt1h);
707     Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
708     Mul22(&tt3h,&tt3l,xCubeh,xCubel,t1h,t1l);
709 
710     Add22(&polyh,&polyl,t2h,t2l,tt3h,tt3l);
711 
712     /* Reconstruction */
713 
714     /* If we have not performed any range reduction, we have no reconstruction to do */
715     if (expoX >= 0) {
716       /* If we are here, we must perform reconstruction */
717 
718       /* First reconstruction step */
719       Add12(r1h,r1t,2,polyh);
720       r1l = r1t + polyl;
721       Mul22(&rr1h,&rr1l,r1h,r1l,polyh,polyl);
722 
723       if (expoX >= 1) {
724 
725 	/* Second reconstruction step */
726 	Add12(r2h,r2t,2,rr1h);
727 	r2l = r2t + rr1l;
728 	Mul22(&rr2h,&rr2l,r2h,r2l,rr1h,rr1l);
729 
730 	if (expoX >= 2) {
731 
732 	  /* Third reconstruction step */
733 	  Add12(r3h,r3t,2,rr2h);
734 	  r3l = r3t + rr2l;
735 	  Mul22(&rr3h,&rr3l,r3h,r3l,rr2h,rr2l);
736 
737 	  /* expoX may be maximally 2 */
738 
739 	  expm1h = rr3h;
740 	  expm1m = rr3l;
741 
742 	} else {
743 	  expm1h = rr2h;
744 	  expm1m = rr2l;
745 	}
746 
747       } else {
748 	expm1h = rr1h;
749 	expm1m = rr1l;
750       }
751 
752     } else {
753       expm1h = polyh;
754       expm1m = polyl;
755     }
756 
757     /* Rounding test */
758     TEST_AND_RETURN_RD(expm1h, expm1m, ROUNDCSTDIRECTRD);
759     {
760       expm1_direct_td(&expm1h, &expm1m, &expm1l, x, xSqHalfh, xSqHalfl, xSqh, xSql, expoX);
761 
762       ReturnRoundDownwards3(expm1h, expm1m, expm1l);
763 
764     } /* Accurate phase launched */
765 
766     /* We cannot be here, since we return in all cases before */
767   }
768 
769   /* If we are here, we can use expm1(x) = exp(x) - 1 */
770 
771   /* Range reduction - exact part: compute k as double and as int */
772 
773   xMultLog2InvMult2L = x * log2InvMult2L;
774   shiftedXMult = xMultLog2InvMult2L + shiftConst;
775   kd = shiftedXMult - shiftConst;
776   shiftedXMultdb.d = shiftedXMult;
777   k = shiftedXMultdb.i[LO];
778   M = k >> 12;
779   index1 = k & INDEXMASK1;
780   index2 = (k & INDEXMASK2) >> 6;
781 
782   /* Range reduction - part affected by error - must be redone in accurate phase */
783   Mul12(&s1,&s2,msLog2Div2Lh,kd);
784   s3 = kd * msLog2Div2Lm;
785   s4 = s2 + s3;
786   s5 = x + s1;
787   Add12Cond(rh,rm,s5,s4);
788 
789   /* Table reads - read only two double-doubles by now */
790   tbl1h = twoPowerIndex1[index1].hi;
791   tbl1m = twoPowerIndex1[index1].mi;
792   tbl2h = twoPowerIndex2[index2].hi;
793   tbl2m = twoPowerIndex2[index2].mi;
794 
795   /* Quick phase starts here */
796 
797   rhSquare = rh * rh;
798   rhC3 = quickCommonpolyC3h * rh;
799 
800   rhSquareHalf = 0.5 * rhSquare;
801   monomialCube = rhC3 * rhSquare;
802   rhFour = rhSquare * rhSquare;
803 
804   monomialFour = quickCommonpolyC4h * rhFour;
805 
806   highPoly = monomialCube + monomialFour;
807 
808   highPolyWithSquare = rhSquareHalf + highPoly;
809 
810   /* Reconstruction: integration of table values */
811 
812   Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
813 
814   t8 = rm + highPolyWithSquare;
815   t9 = rh + t8;
816 
817   t10 = tablesh * t9;
818 
819   Add12(t11,t12,tablesh,t10);
820   t13 = t12 + tablesl;
821   Add12(polyTblhdb.d,polyTblmdb.d,t11,t13);
822 
823   /* Reconstruction: multiplication by 2^M */
824 
825   /* Implement the multiplication by multiplication to overcome the
826      problem of the non-representability of 2^1024 (M = 1024)
827      This case is possible if polyTblhdb.d < 1
828   */
829 
830   polyTblhdb.i[HI] += M << 20;
831   polyTblmdb.i[HI] += M << 20;
832 
833   exph = polyTblhdb.d;
834   expm = polyTblmdb.d;
835 
836   /* Substraction of 1
837 
838      Testing if the operation is necessary is more expensive than
839      performing it in any case.
840 
841      We may cancellate at most 2 bits in the subtraction for
842      arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
843      We must therefore use conditional Add12s
844 
845      Since we perform a substraction, we may not have addition overflow towards +inf
846 
847   */
848 
849   Add12Cond(t1,t2,-1,exph);
850   t3 = t2 + expm;
851   Add12Cond(expm1h,expm1m,t1,t3);
852 
853 
854   /* Rounding test */
855   TEST_AND_RETURN_RD(expm1h, expm1m, ROUNDCSTCOMMONRD);
856   {
857     /* Rest of argument reduction for accurate phase */
858 
859     Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
860     t1 = x + msLog2Div2LMultKh;
861     Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
862     Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
863 
864     /* Table reads for accurate phase */
865     tbl1l = twoPowerIndex1[index1].lo;
866     tbl2l = twoPowerIndex2[index2].lo;
867 
868     /* Call accurate phase */
869     expm1_common_td(&expm1h, &expm1m, &expm1l, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l, M);
870 
871     /* Final rounding */
872 
873     ReturnRoundDownwards3(expm1h, expm1m, expm1l);
874   } /* Accurate phase launched */
875 
876   /* We cannot be here since we return before in any case */
877 }
878 
expm1_ru(double x)879 double expm1_ru(double x) {
880   db_number xdb, shiftedXMultdb, polyTblhdb, polyTblmdb;
881   int xIntHi, expoX, k, M, index1, index2;
882   double highPoly, tt1h, t1h, t1l, xSqh, xSql, xSqHalfh, xSqHalfl, xCubeh, xCubel, t2h, t2l, templ, tt3h, tt3l;
883   double polyh, polyl, expm1h, expm1m, expm1l;
884   double r1h, r1l, r1t, rr1h, rr1l;
885   double r2h, r2l, r2t, rr2h, rr2l;
886   double r3h, r3l, r3t, rr3h, rr3l;
887   double xMultLog2InvMult2L, shiftedXMult, kd, s1, s2, s3, s4, s5, rh, rm, rl;
888   double rhSquare, rhC3, rhSquareHalf, monomialCube, rhFour, monomialFour;
889   double tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l;
890   double highPolyWithSquare, tablesh, tablesl, t8, t9, t10, t11, t12, t13;
891   double exph, expm, t1, t2, t3;
892   double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
893   double middlePoly, doublePoly;
894 
895   xdb.d = x;
896 
897   /* Strip off the sign of x for the following tests */
898 
899   xIntHi = xdb.i[HI] & 0x7fffffff;
900 
901   /* Test if we are so small that we can return (a corrected) x as correct rounding */
902   if (xIntHi < RETURNXBOUND) {
903     /* The only algebraic result is 0 for x = +/-0; in this case, we return x = +/-0
904        The truncation rest x^2/2 + x^3/6 + ... is always positive
905        but less than 1 ulp in this case, so we round by adding 1 ulp
906     */
907 
908     if (x == 0.0) return x;
909 
910     if (xdb.i[HI] & 0x80000000) {
911       /* x is negative
912 	 We add 1 ulp by subtracting 1 in long
913       */
914       xdb.l--;
915     } else {
916       /* x is positive
917 	 We add 1 ulp by adding 1 in long
918       */
919       xdb.l++;
920     }
921     return xdb.d;
922   }
923 
924 
925   /* Filter out special cases like overflow, -1 in result, infinities and NaNs
926      The filters are not sharp, we have positive arguments that flow through
927   */
928   if (xIntHi >= SIMPLEOVERFLOWBOUND) {
929     /* Test if we are +/-inf or NaN */
930     if (xIntHi >= 0x7ff00000) {
931       /* Test if NaN */
932       if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
933 	/* NaN */
934 	return x+x;  /* return NaN */
935       }
936       /* Test if +inf or -inf */
937       if (xdb.i[HI] > 0) {
938 	/* +inf */
939 	return x+x;  /* return +inf */
940       }
941 
942       /* If we are here, we are -inf */
943       return -1.0;
944     }
945 
946     /* If we are here, we are overflowed or a common case that flows through */
947 
948     /* Test if we are actually overflowed */
949     if (x > OVERFLOWBOUND) {
950       return LARGEST * LARGEST;  /* return +inf and set flag */
951     }
952   }
953 
954   /* Test if we know already that we are -1.0 (+ correction depending on rounding mode) in result */
955   if (x < MINUSONEBOUND) {
956     /* Round up so we are -1.0 + 1ulp */
957     return MINUSONEPLUSONEULP;
958   }
959 
960   /* Test if we have |x| <= 1/4-1/2ulp(1/4) for knowing if we use exp(x) or approximate directly */
961 
962   if (xIntHi < DIRECTINTERVALBOUND) {
963     /* We approximate expm1 directly after a range reduction as follows
964 
965        expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
966 
967        We perform the range reduction in such a way that finally |x| < 1/32
968     */
969 
970     /* Extract the exponent of |x| and add 5 (2^5 = 32) */
971     expoX = ((xIntHi & 0x7ff00000) >> 20) - (1023 - 5);
972 
973     /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
974     if (expoX >= 0) {
975       /* If we are here, we must perform range reduction */
976 
977 
978       /* We multiply x by 2^(-expoX-1) by bit manipulation
979 	 x cannot be denormalized so there is no danger
980       */
981       xdb.i[HI] += (-expoX-1) << 20;
982 
983       /* We reassign the new x and maintain xIntHi */
984 
985       xIntHi = xdb.i[HI] & 0x7fffffff;
986       x = xdb.d;
987     }
988 
989     /* Here, we have always |x| < 1/32 */
990 
991 
992     /* Double precision evaluation steps and one double-double step */
993 
994     Mul12(&xSqh,&xSql,x,x);
995 
996 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
997     middlePoly = FMA(quickDirectpolyC5h,x,quickDirectpolyC4h);
998 #else
999     middlePoly = quickDirectpolyC4h + x * quickDirectpolyC5h;
1000 #endif
1001 
1002     doublePoly = middlePoly;
1003 
1004     /* Special path: for small |x| we can truncate the polynomial */
1005 
1006     if (xIntHi > SPECIALINTERVALBOUND) {
1007 
1008 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1009       highPoly = FMA(FMA(FMA(quickDirectpolyC9h ,x,quickDirectpolyC8h),x,
1010                              quickDirectpolyC7h),x,quickDirectpolyC6h);
1011 #else
1012       highPoly = quickDirectpolyC6h + x * (quickDirectpolyC7h + x * (
1013 	         quickDirectpolyC8h + x *  quickDirectpolyC9h));
1014 #endif
1015 
1016       highPolyWithSquare = xSqh * highPoly;
1017 
1018       doublePoly = middlePoly + highPolyWithSquare;
1019 
1020     }
1021 
1022     /* Double-double evaluation steps */
1023     tt1h = x * doublePoly;
1024 
1025     xSqHalfh = 0.5 * xSqh;
1026     xSqHalfl = 0.5 * xSql;
1027     Add12(t2h,templ,x,xSqHalfh);
1028     t2l = templ + xSqHalfl;
1029 
1030     Add12(t1h,t1l,quickDirectpolyC3h,tt1h);
1031     Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
1032     Mul22(&tt3h,&tt3l,xCubeh,xCubel,t1h,t1l);
1033 
1034     Add22(&polyh,&polyl,t2h,t2l,tt3h,tt3l);
1035 
1036     /* Reconstruction */
1037 
1038     /* If we have not performed any range reduction, we have no reconstruction to do */
1039     if (expoX >= 0) {
1040       /* If we are here, we must perform reconstruction */
1041 
1042       /* First reconstruction step */
1043       Add12(r1h,r1t,2,polyh);
1044       r1l = r1t + polyl;
1045       Mul22(&rr1h,&rr1l,r1h,r1l,polyh,polyl);
1046 
1047       if (expoX >= 1) {
1048 
1049 	/* Second reconstruction step */
1050 	Add12(r2h,r2t,2,rr1h);
1051 	r2l = r2t + rr1l;
1052 	Mul22(&rr2h,&rr2l,r2h,r2l,rr1h,rr1l);
1053 
1054 	if (expoX >= 2) {
1055 
1056 	  /* Third reconstruction step */
1057 	  Add12(r3h,r3t,2,rr2h);
1058 	  r3l = r3t + rr2l;
1059 	  Mul22(&rr3h,&rr3l,r3h,r3l,rr2h,rr2l);
1060 
1061 	  /* expoX may be maximally 2 */
1062 
1063 	  expm1h = rr3h;
1064 	  expm1m = rr3l;
1065 
1066 	} else {
1067 	  expm1h = rr2h;
1068 	  expm1m = rr2l;
1069 	}
1070 
1071       } else {
1072 	expm1h = rr1h;
1073 	expm1m = rr1l;
1074       }
1075 
1076     } else {
1077       expm1h = polyh;
1078       expm1m = polyl;
1079     }
1080 
1081     /* Rounding test */
1082     TEST_AND_RETURN_RU(expm1h, expm1m, ROUNDCSTDIRECTRD);
1083     {
1084       expm1_direct_td(&expm1h, &expm1m, &expm1l, x, xSqHalfh, xSqHalfl, xSqh, xSql, expoX);
1085 
1086       ReturnRoundUpwards3(expm1h, expm1m, expm1l);
1087 
1088     } /* Accurate phase launched */
1089 
1090     /* We cannot be here, since we return in all cases before */
1091   }
1092 
1093   /* If we are here, we can use expm1(x) = exp(x) - 1 */
1094 
1095   /* Range reduction - exact part: compute k as double and as int */
1096 
1097   xMultLog2InvMult2L = x * log2InvMult2L;
1098   shiftedXMult = xMultLog2InvMult2L + shiftConst;
1099   kd = shiftedXMult - shiftConst;
1100   shiftedXMultdb.d = shiftedXMult;
1101   k = shiftedXMultdb.i[LO];
1102   M = k >> 12;
1103   index1 = k & INDEXMASK1;
1104   index2 = (k & INDEXMASK2) >> 6;
1105 
1106   /* Range reduction - part affected by error - must be redone in accurate phase */
1107   Mul12(&s1,&s2,msLog2Div2Lh,kd);
1108   s3 = kd * msLog2Div2Lm;
1109   s4 = s2 + s3;
1110   s5 = x + s1;
1111   Add12Cond(rh,rm,s5,s4);
1112 
1113   /* Table reads - read only two double-doubles by now */
1114   tbl1h = twoPowerIndex1[index1].hi;
1115   tbl1m = twoPowerIndex1[index1].mi;
1116   tbl2h = twoPowerIndex2[index2].hi;
1117   tbl2m = twoPowerIndex2[index2].mi;
1118 
1119   /* Quick phase starts here */
1120 
1121   rhSquare = rh * rh;
1122   rhC3 = quickCommonpolyC3h * rh;
1123 
1124   rhSquareHalf = 0.5 * rhSquare;
1125   monomialCube = rhC3 * rhSquare;
1126   rhFour = rhSquare * rhSquare;
1127 
1128   monomialFour = quickCommonpolyC4h * rhFour;
1129 
1130   highPoly = monomialCube + monomialFour;
1131 
1132   highPolyWithSquare = rhSquareHalf + highPoly;
1133 
1134   /* Reconstruction: integration of table values */
1135 
1136   Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
1137 
1138   t8 = rm + highPolyWithSquare;
1139   t9 = rh + t8;
1140 
1141   t10 = tablesh * t9;
1142 
1143   Add12(t11,t12,tablesh,t10);
1144   t13 = t12 + tablesl;
1145   Add12(polyTblhdb.d,polyTblmdb.d,t11,t13);
1146 
1147   /* Reconstruction: multiplication by 2^M */
1148 
1149   /* Implement the multiplication by multiplication to overcome the
1150      problem of the non-representability of 2^1024 (M = 1024)
1151      This case is possible if polyTblhdb.d < 1
1152   */
1153 
1154   polyTblhdb.i[HI] += M << 20;
1155   polyTblmdb.i[HI] += M << 20;
1156 
1157   exph = polyTblhdb.d;
1158   expm = polyTblmdb.d;
1159 
1160   /* Substraction of 1
1161 
1162      Testing if the operation is necessary is more expensive than
1163      performing it in any case.
1164 
1165      We may cancellate at most 2 bits in the subtraction for
1166      arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
1167      We must therefore use conditional Add12s
1168 
1169      Since we perform a substraction, we may not have addition overflow towards +inf
1170 
1171   */
1172 
1173   Add12Cond(t1,t2,-1,exph);
1174   t3 = t2 + expm;
1175   Add12Cond(expm1h,expm1m,t1,t3);
1176 
1177 
1178   /* Rounding test */
1179   TEST_AND_RETURN_RU(expm1h, expm1m, ROUNDCSTCOMMONRD);
1180   {
1181     /* Rest of argument reduction for accurate phase */
1182 
1183     Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
1184     t1 = x + msLog2Div2LMultKh;
1185     Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
1186     Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
1187 
1188     /* Table reads for accurate phase */
1189     tbl1l = twoPowerIndex1[index1].lo;
1190     tbl2l = twoPowerIndex2[index2].lo;
1191 
1192     /* Call accurate phase */
1193     expm1_common_td(&expm1h, &expm1m, &expm1l, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l, M);
1194 
1195     /* Final rounding */
1196 
1197     ReturnRoundUpwards3(expm1h, expm1m, expm1l);
1198   } /* Accurate phase launched */
1199 
1200   /* We cannot be here since we return before in any case */
1201 }
1202 
expm1_rz(double x)1203 double expm1_rz(double x) {
1204   db_number xdb, shiftedXMultdb, polyTblhdb, polyTblmdb;
1205   int xIntHi, expoX, k, M, index1, index2;
1206   double highPoly, tt1h, t1h, t1l, xSqh, xSql, xSqHalfh, xSqHalfl, xCubeh, xCubel, t2h, t2l, templ, tt3h, tt3l;
1207   double polyh, polyl, expm1h, expm1m, expm1l;
1208   double r1h, r1l, r1t, rr1h, rr1l;
1209   double r2h, r2l, r2t, rr2h, rr2l;
1210   double r3h, r3l, r3t, rr3h, rr3l;
1211   double xMultLog2InvMult2L, shiftedXMult, kd, s1, s2, s3, s4, s5, rh, rm, rl;
1212   double rhSquare, rhC3, rhSquareHalf, monomialCube, rhFour, monomialFour;
1213   double tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l;
1214   double highPolyWithSquare, tablesh, tablesl, t8, t9, t10, t11, t12, t13;
1215   double exph, expm, t1, t2, t3;
1216   double msLog2Div2LMultKh, msLog2Div2LMultKm, msLog2Div2LMultKl;
1217   double middlePoly, doublePoly;
1218 
1219   xdb.d = x;
1220 
1221   /* Strip off the sign of x for the following tests */
1222 
1223   xIntHi = xdb.i[HI] & 0x7fffffff;
1224 
1225   /* Test if we are so small that we can return (a corrected) x as correct rounding */
1226   if (xIntHi < RETURNXBOUND) {
1227     /* The only algebraic result is 0 for x = +/- 0; in this case, we can return x = +/- 0
1228        expm1 is positive for positive x, negative for negative x
1229        The truncation rest x^2/2 + x^3/6 + ... is always positive
1230        but less than 1 ulp in this case, so we round as follows:
1231 
1232        - x is positive => expm1 is positive => round downwards => truncate by returning x
1233        - x is negative => expm1 is negative => round upwards => add 1 ulp
1234     */
1235 
1236     if (x == 0.0) return x;
1237 
1238     if (xdb.i[HI] & 0x80000000) {
1239       /* x is negative
1240 	 We add 1 ulp by subtracting 1 in long
1241       */
1242       xdb.l--;
1243       return xdb.d;
1244     } else {
1245       /* x is positive
1246 	 We do nothing (see above)
1247       */
1248       return x;
1249     }
1250   }
1251 
1252 
1253   /* Filter out special cases like overflow, -1 in result, infinities and NaNs
1254      The filters are not sharp, we have positive arguments that flow through
1255   */
1256   if (xIntHi >= SIMPLEOVERFLOWBOUND) {
1257     /* Test if we are +/-inf or NaN */
1258     if (xIntHi >= 0x7ff00000) {
1259       /* Test if NaN */
1260       if (((xIntHi & 0x000fffff) | xdb.i[LO]) != 0) {
1261 	/* NaN */
1262 	return x+x;  /* return NaN */
1263       }
1264       /* Test if +inf or -inf */
1265       if (xdb.i[HI] > 0) {
1266 	/* +inf */
1267 	return x+x;  /* return +inf */
1268       }
1269 
1270       /* If we are here, we are -inf */
1271       return -1.0;
1272     }
1273 
1274     /* If we are here, we are overflowed or a common case that flows through */
1275 
1276     /* Test if we are actually overflowed */
1277     if (x > OVERFLOWBOUND) {
1278       /* We would be overflowed but as we are rounding towards zero, i.e. downwards,
1279 	 the nearest number lesser than the exact result is the greatest
1280 	 normal. In any case, we must raise the inexact flag.
1281       */
1282       return LARGEST * (1.0 + SMALLEST);
1283     }
1284   }
1285 
1286   /* Test if we know already that we are -1.0 (+ correction depending on rounding mode) in result */
1287   if (x < MINUSONEBOUND) {
1288     /* We round towards zero, i.e. upwards, so we return -1.0+1ulp */
1289     return MINUSONEPLUSONEULP;
1290   }
1291 
1292   /* Test if we have |x| <= 1/4-1/2ulp(1/4) for knowing if we use exp(x) or approximate directly */
1293 
1294   if (xIntHi < DIRECTINTERVALBOUND) {
1295 
1296     /* We approximate expm1 directly after a range reduction as follows
1297 
1298        expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
1299 
1300        We perform the range reduction in such a way that finally |x| < 1/32
1301     */
1302 
1303     /* Extract the exponent of |x| and add 5 (2^5 = 32) */
1304     expoX = ((xIntHi & 0x7ff00000) >> 20) - (1023 - 5);
1305 
1306     /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
1307     if (expoX >= 0) {
1308       /* If we are here, we must perform range reduction */
1309 
1310 
1311       /* We multiply x by 2^(-expoX-1) by bit manipulation
1312 	 x cannot be denormalized so there is no danger
1313       */
1314       xdb.i[HI] += (-expoX-1) << 20;
1315 
1316       /* We reassign the new x and maintain xIntHi */
1317 
1318       xIntHi = xdb.i[HI] & 0x7fffffff;
1319       x = xdb.d;
1320     }
1321     /* Here, we have always |x| < 1/32 */
1322 
1323 
1324     /* Double precision evaluation steps and one double-double step */
1325 
1326     Mul12(&xSqh,&xSql,x,x);
1327 
1328 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1329     middlePoly = FMA(quickDirectpolyC5h,x,quickDirectpolyC4h);
1330 #else
1331     middlePoly = quickDirectpolyC4h + x * quickDirectpolyC5h;
1332 #endif
1333 
1334     doublePoly = middlePoly;
1335 
1336     /* Special path: for small |x| we can truncate the polynomial */
1337 
1338     if (xIntHi > SPECIALINTERVALBOUND) {
1339 
1340 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1341       highPoly = FMA(FMA(FMA(quickDirectpolyC9h ,x,quickDirectpolyC8h),x,
1342                              quickDirectpolyC7h),x,quickDirectpolyC6h);
1343 #else
1344       highPoly = quickDirectpolyC6h + x * (quickDirectpolyC7h + x * (
1345 	         quickDirectpolyC8h + x *  quickDirectpolyC9h));
1346 #endif
1347 
1348       highPolyWithSquare = xSqh * highPoly;
1349 
1350       doublePoly = middlePoly + highPolyWithSquare;
1351 
1352     }
1353 
1354     /* Double-double evaluation steps */
1355     tt1h = x * doublePoly;
1356 
1357     xSqHalfh = 0.5 * xSqh;
1358     xSqHalfl = 0.5 * xSql;
1359     Add12(t2h,templ,x,xSqHalfh);
1360     t2l = templ + xSqHalfl;
1361 
1362     Add12(t1h,t1l,quickDirectpolyC3h,tt1h);
1363     Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
1364     Mul22(&tt3h,&tt3l,xCubeh,xCubel,t1h,t1l);
1365 
1366     Add22(&polyh,&polyl,t2h,t2l,tt3h,tt3l);
1367 
1368     /* Reconstruction */
1369 
1370     /* If we have not performed any range reduction, we have no reconstruction to do */
1371     if (expoX >= 0) {
1372       /* If we are here, we must perform reconstruction */
1373 
1374       /* First reconstruction step */
1375       Add12(r1h,r1t,2,polyh);
1376       r1l = r1t + polyl;
1377       Mul22(&rr1h,&rr1l,r1h,r1l,polyh,polyl);
1378 
1379       if (expoX >= 1) {
1380 
1381 	/* Second reconstruction step */
1382 	Add12(r2h,r2t,2,rr1h);
1383 	r2l = r2t + rr1l;
1384 	Mul22(&rr2h,&rr2l,r2h,r2l,rr1h,rr1l);
1385 
1386 	if (expoX >= 2) {
1387 
1388 	  /* Third reconstruction step */
1389 	  Add12(r3h,r3t,2,rr2h);
1390 	  r3l = r3t + rr2l;
1391 	  Mul22(&rr3h,&rr3l,r3h,r3l,rr2h,rr2l);
1392 
1393 	  /* expoX may be maximally 2 */
1394 
1395 	  expm1h = rr3h;
1396 	  expm1m = rr3l;
1397 
1398 	} else {
1399 	  expm1h = rr2h;
1400 	  expm1m = rr2l;
1401 	}
1402 
1403       } else {
1404 	expm1h = rr1h;
1405 	expm1m = rr1l;
1406       }
1407 
1408     } else {
1409       expm1h = polyh;
1410       expm1m = polyl;
1411     }
1412 
1413     /* Rounding test */
1414     TEST_AND_RETURN_RZ(expm1h, expm1m, ROUNDCSTDIRECTRD);
1415     {
1416       expm1_direct_td(&expm1h, &expm1m, &expm1l, x, xSqHalfh, xSqHalfl, xSqh, xSql, expoX);
1417 
1418       ReturnRoundTowardsZero3(expm1h, expm1m, expm1l);
1419 
1420     } /* Accurate phase launched */
1421 
1422     /* We cannot be here, since we return in all cases before */
1423   }
1424 
1425   /* If we are here, we can use expm1(x) = exp(x) - 1 */
1426 
1427   /* Range reduction - exact part: compute k as double and as int */
1428 
1429   xMultLog2InvMult2L = x * log2InvMult2L;
1430   shiftedXMult = xMultLog2InvMult2L + shiftConst;
1431   kd = shiftedXMult - shiftConst;
1432   shiftedXMultdb.d = shiftedXMult;
1433   k = shiftedXMultdb.i[LO];
1434   M = k >> 12;
1435   index1 = k & INDEXMASK1;
1436   index2 = (k & INDEXMASK2) >> 6;
1437 
1438   /* Range reduction - part affected by error - must be redone in accurate phase */
1439   Mul12(&s1,&s2,msLog2Div2Lh,kd);
1440   s3 = kd * msLog2Div2Lm;
1441   s4 = s2 + s3;
1442   s5 = x + s1;
1443   Add12Cond(rh,rm,s5,s4);
1444 
1445   /* Table reads - read only two double-doubles by now */
1446   tbl1h = twoPowerIndex1[index1].hi;
1447   tbl1m = twoPowerIndex1[index1].mi;
1448   tbl2h = twoPowerIndex2[index2].hi;
1449   tbl2m = twoPowerIndex2[index2].mi;
1450 
1451   /* Quick phase starts here */
1452 
1453   rhSquare = rh * rh;
1454   rhC3 = quickCommonpolyC3h * rh;
1455 
1456   rhSquareHalf = 0.5 * rhSquare;
1457   monomialCube = rhC3 * rhSquare;
1458   rhFour = rhSquare * rhSquare;
1459 
1460   monomialFour = quickCommonpolyC4h * rhFour;
1461 
1462   highPoly = monomialCube + monomialFour;
1463 
1464   highPolyWithSquare = rhSquareHalf + highPoly;
1465 
1466   /* Reconstruction: integration of table values */
1467 
1468   Mul22(&tablesh,&tablesl,tbl1h,tbl1m,tbl2h,tbl2m);
1469 
1470   t8 = rm + highPolyWithSquare;
1471   t9 = rh + t8;
1472 
1473   t10 = tablesh * t9;
1474 
1475   Add12(t11,t12,tablesh,t10);
1476   t13 = t12 + tablesl;
1477   Add12(polyTblhdb.d,polyTblmdb.d,t11,t13);
1478 
1479   /* Reconstruction: multiplication by 2^M */
1480 
1481   /* Implement the multiplication by multiplication to overcome the
1482      problem of the non-representability of 2^1024 (M = 1024)
1483      This case is possible if polyTblhdb.d < 1
1484   */
1485 
1486   polyTblhdb.i[HI] += M << 20;
1487   polyTblmdb.i[HI] += M << 20;
1488 
1489   exph = polyTblhdb.d;
1490   expm = polyTblmdb.d;
1491 
1492   /* Substraction of 1
1493 
1494      Testing if the operation is necessary is more expensive than
1495      performing it in any case.
1496 
1497      We may cancellate at most 2 bits in the subtraction for
1498      arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
1499      We must therefore use conditional Add12s
1500 
1501      Since we perform a substraction, we may not have addition overflow towards +inf
1502 
1503   */
1504 
1505   Add12Cond(t1,t2,-1,exph);
1506   t3 = t2 + expm;
1507   Add12Cond(expm1h,expm1m,t1,t3);
1508 
1509 
1510   /* Rounding test */
1511   TEST_AND_RETURN_RZ(expm1h, expm1m, ROUNDCSTCOMMONRD);
1512   {
1513 
1514     /* Rest of argument reduction for accurate phase */
1515 
1516     Mul133(&msLog2Div2LMultKh,&msLog2Div2LMultKm,&msLog2Div2LMultKl,kd,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
1517     t1 = x + msLog2Div2LMultKh;
1518     Add12Cond(rh,t2,t1,msLog2Div2LMultKm);
1519     Add12Cond(rm,rl,t2,msLog2Div2LMultKl);
1520 
1521     /* Table reads for accurate phase */
1522     tbl1l = twoPowerIndex1[index1].lo;
1523     tbl2l = twoPowerIndex2[index2].lo;
1524 
1525     /* Call accurate phase */
1526     expm1_common_td(&expm1h, &expm1m, &expm1l, rh, rm, rl, tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l, M);
1527 
1528     /* Final rounding */
1529 
1530     ReturnRoundTowardsZero3(expm1h, expm1m, expm1l);
1531   } /* Accurate phase launched */
1532 
1533   /* We cannot be here since we return before in any case */
1534 }
1535 
1536 #ifdef BUILD_INTERVAL_FUNCTIONS
j_expm1(interval x)1537 interval j_expm1(interval x)
1538 {
1539   interval res;
1540   db_number xdb_inf, shiftedXMultdb_inf, polyTblhdb_inf, polyTblmdb_inf;
1541   int xIntHi_inf, expoX_inf, k_inf, M_inf, index1_inf, index2_inf;
1542   double x_inf, x_sup;
1543   double res_inf,res_sup;
1544   double restemp_inf, restemp_sup;
1545   double highPoly_inf, tt1h_inf, t1h_inf, t1l_inf, xSqh_inf, xSql_inf, xSqHalfh_inf, xSqHalfl_inf, xCubeh_inf, xCubel_inf, t2h_inf, t2l_inf, templ_inf, tt3h_inf, tt3l_inf;
1546   double polyh_inf, polyl_inf, expm1h_inf, expm1m_inf, expm1l_inf;
1547   double r1h_inf, r1l_inf, r1t_inf, rr1h_inf, rr1l_inf;
1548   double r2h_inf, r2l_inf, r2t_inf, rr2h_inf, rr2l_inf;
1549   double r3h_inf, r3l_inf, r3t_inf, rr3h_inf, rr3l_inf;
1550   double xMultLog2InvMult2L_inf, shiftedXMult_inf, kd_inf, s1_inf, s2_inf, s3_inf, s4_inf, s5_inf, rh_inf, rm_inf, rl_inf;
1551   double rhSquare_inf, rhC3_inf, rhSquareHalf_inf, monomialCube_inf, rhFour_inf, monomialFour_inf;
1552   double tbl1h_inf, tbl1m_inf, tbl1l_inf, tbl2h_inf, tbl2m_inf, tbl2l_inf;
1553   double highPolyWithSquare_inf, tablesh_inf, tablesl_inf, t8_inf, t9_inf, t10_inf, t11_inf, t12_inf, t13_inf;
1554   double exph_inf, expm_inf, t1_inf, t2_inf, t3_inf;
1555   double msLog2Div2LMultKh_inf, msLog2Div2LMultKm_inf, msLog2Div2LMultKl_inf;
1556   double middlePoly_inf, doublePoly_inf;
1557   int roundable;
1558   int infDone, supDone;
1559   infDone=0; supDone=0;
1560 
1561   db_number xdb_sup, shiftedXMultdb_sup, polyTblhdb_sup, polyTblmdb_sup;
1562   int xIntHi_sup, expoX_sup, k_sup, M_sup, index1_sup, index2_sup;
1563   double highPoly_sup, tt1h_sup, t1h_sup, t1l_sup, xSqh_sup, xSql_sup, xSqHalfh_sup, xSqHalfl_sup, xCubeh_sup, xCubel_sup, t2h_sup, t2l_sup, templ_sup, tt3h_sup, tt3l_sup;
1564   double polyh_sup, polyl_sup, expm1h_sup, expm1m_sup, expm1l_sup;
1565   double r1h_sup, r1l_sup, r1t_sup, rr1h_sup, rr1l_sup;
1566   double r2h_sup, r2l_sup, r2t_sup, rr2h_sup, rr2l_sup;
1567   double r3h_sup, r3l_sup, r3t_sup, rr3h_sup, rr3l_sup;
1568   double xMultLog2InvMult2L_sup, shiftedXMult_sup, kd_sup, s1_sup, s2_sup, s3_sup, s4_sup, s5_sup, rh_sup, rm_sup, rl_sup;
1569   double rhSquare_sup, rhC3_sup, rhSquareHalf_sup, monomialCube_sup, rhFour_sup, monomialFour_sup;
1570   double tbl1h_sup, tbl1m_sup, tbl1l_sup, tbl2h_sup, tbl2m_sup, tbl2l_sup;
1571   double highPolyWithSquare_sup, tablesh_sup, tablesl_sup, t8_sup, t9_sup, t10_sup, t11_sup, t12_sup, t13_sup;
1572   double exph_sup, expm_sup, t1_sup, t2_sup, t3_sup;
1573   double msLog2Div2LMultKh_sup, msLog2Div2LMultKm_sup, msLog2Div2LMultKl_sup;
1574   double middlePoly_sup, doublePoly_sup;
1575 
1576   x_inf=LOW(x);
1577   x_sup=UP(x);
1578 
1579   xdb_inf.d = x_inf;
1580   xdb_sup.d = x_sup;
1581 
1582 
1583   /* Strip off the sign of x for the following tests */
1584 
1585   xIntHi_inf = xdb_inf.i[HI] & 0x7fffffff;
1586   xIntHi_sup = xdb_sup.i[HI] & 0x7fffffff;
1587 
1588   /* Test if we are so small that we can return (a corrected) x as correct rounding */
1589 
1590   if ( __builtin_expect(
1591       (xIntHi_inf < RETURNXBOUND) ||
1592       ((xIntHi_inf >= SIMPLEOVERFLOWBOUND) && (xIntHi_inf >= 0x7ff00000)) ||
1593       ((xIntHi_inf >= SIMPLEOVERFLOWBOUND) && (xIntHi_inf >= 0x7ff00000) && (((xIntHi_inf & 0x000fffff) | xdb_inf.i[LO]) != 0)) ||
1594       ((xIntHi_inf >= SIMPLEOVERFLOWBOUND) && (xIntHi_inf >= 0x7ff00000) && (xdb_inf.i[HI] > 0)) ||
1595       ((xIntHi_inf >= SIMPLEOVERFLOWBOUND) && (x_inf > OVERFLOWBOUND)) ||
1596       (x_inf < MINUSONEBOUND) ||
1597       (xIntHi_sup < RETURNXBOUND) ||
1598       ((xIntHi_sup < RETURNXBOUND) && (x_sup == 0.0)) ||
1599       ((xIntHi_sup >= SIMPLEOVERFLOWBOUND) && (xIntHi_sup >= 0x7ff00000)) ||
1600       ((xIntHi_sup >= SIMPLEOVERFLOWBOUND) && (xIntHi_sup >= 0x7ff00000) && (((xIntHi_sup & 0x000fffff) | xdb_sup.i[LO]) != 0)) ||
1601       ((xIntHi_sup >= SIMPLEOVERFLOWBOUND) && (xIntHi_sup >= 0x7ff00000) && (xdb_sup.i[HI] > 0)) ||
1602       ((xIntHi_sup >= SIMPLEOVERFLOWBOUND) && (x_sup > OVERFLOWBOUND)) ||
1603       (x_sup < MINUSONEBOUND)
1604      ,FALSE))
1605   {
1606     ASSIGN_LOW(res,expm1_rd(LOW(x)));
1607     ASSIGN_UP(res,expm1_ru(UP(x)));
1608     return res;
1609   }
1610 
1611   if (__builtin_expect((xIntHi_inf < DIRECTINTERVALBOUND) && (xIntHi_sup < DIRECTINTERVALBOUND),TRUE)) {
1612     /* We approximate expm1 directly after a range reduction as follows
1613 
1614        expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
1615 
1616        We perform the range reduction in such a way that finally |x| < 1/32
1617     */
1618 
1619     /* Extract the exponent of |x| and add 5 (2^5 = 32) */
1620     expoX_inf = ((xIntHi_inf & 0x7ff00000) >> 20) - (1023 - 5);
1621     expoX_sup = ((xIntHi_sup & 0x7ff00000) >> 20) - (1023 - 5);
1622 
1623     /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
1624     if (expoX_inf >= 0) {
1625       /* If we are here, we must perform range reduction */
1626 
1627 
1628       /* We multiply x by 2^(-expoX-1) by bit manipulation
1629 	 x cannot be denormalized so there is no danger
1630       */
1631       xdb_inf.i[HI] += (-expoX_inf-1) << 20;
1632 
1633       /* We reassign the new x and maintain xIntHi */
1634 
1635       xIntHi_inf = xdb_inf.i[HI] & 0x7fffffff;
1636       x_inf = xdb_inf.d;
1637     }
1638 
1639     if (expoX_sup >= 0) {
1640       /* If we are here, we must perform range reduction */
1641 
1642 
1643       /* We multiply x by 2^(-expoX-1) by bit manipulation
1644 	 x cannot be denormalized so there is no danger
1645       */
1646       xdb_sup.i[HI] += (-expoX_sup-1) << 20;
1647 
1648       /* We reassign the new x and maintain xIntHi */
1649 
1650       xIntHi_sup = xdb_sup.i[HI] & 0x7fffffff;
1651       x_sup = xdb_sup.d;
1652     }
1653 
1654     /* Here, we have always |x| < 1/32 */
1655 
1656 
1657     /* Double precision evaluation steps and one double-double step */
1658 
1659     Mul12(&xSqh_inf,&xSql_inf,x_inf,x_inf);
1660     Mul12(&xSqh_sup,&xSql_sup,x_sup,x_sup);
1661 
1662 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1663     middlePoly_inf = FMA(quickDirectpolyC5h,x_inf,quickDirectpolyC4h);
1664     middlePoly_sup = FMA(quickDirectpolyC5h,x_sup,quickDirectpolyC4h);
1665 #else
1666     middlePoly_inf = quickDirectpolyC4h + x_inf * quickDirectpolyC5h;
1667     middlePoly_sup = quickDirectpolyC4h + x_sup * quickDirectpolyC5h;
1668 #endif
1669 
1670     doublePoly_inf = middlePoly_inf;
1671     doublePoly_sup = middlePoly_sup;
1672 
1673     /* Special path: for small |x| we can truncate the polynomial */
1674 
1675     if (xIntHi_inf > SPECIALINTERVALBOUND) {
1676 
1677 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1678       highPoly_inf = FMA(FMA(FMA(quickDirectpolyC9h ,x_inf,quickDirectpolyC8h),x_inf,
1679                              quickDirectpolyC7h),x_inf,quickDirectpolyC6h);
1680 #else
1681       highPoly_inf = quickDirectpolyC6h + x_inf * (quickDirectpolyC7h + x_inf * (
1682 	         quickDirectpolyC8h + x_inf *  quickDirectpolyC9h));
1683 #endif
1684 
1685       highPolyWithSquare_inf = xSqh_inf * highPoly_inf;
1686 
1687       doublePoly_inf = middlePoly_inf + highPolyWithSquare_inf;
1688 
1689     }
1690     if (xIntHi_sup > SPECIALINTERVALBOUND) {
1691 
1692 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1693       highPoly_sup = FMA(FMA(FMA(quickDirectpolyC9h ,x_sup,quickDirectpolyC8h),x_sup,
1694                              quickDirectpolyC7h),x_sup,quickDirectpolyC6h);
1695 #else
1696       highPoly_sup = quickDirectpolyC6h + x_sup * (quickDirectpolyC7h + x_sup * (
1697 	         quickDirectpolyC8h + x_sup *  quickDirectpolyC9h));
1698 #endif
1699 
1700       highPolyWithSquare_sup = xSqh_sup * highPoly_sup;
1701 
1702       doublePoly_sup = middlePoly_sup + highPolyWithSquare_sup;
1703 
1704     }
1705 
1706     /* Double-double evaluation steps */
1707     tt1h_inf = x_inf * doublePoly_inf;
1708     tt1h_sup = x_sup * doublePoly_sup;
1709 
1710     xSqHalfh_inf = 0.5 * xSqh_inf;
1711     xSqHalfh_sup = 0.5 * xSqh_sup;
1712     xSqHalfl_inf = 0.5 * xSql_inf;
1713     xSqHalfl_sup = 0.5 * xSql_sup;
1714     Add12(t2h_inf,templ_inf,x_inf,xSqHalfh_inf);
1715     Add12(t2h_sup,templ_sup,x_sup,xSqHalfh_sup);
1716     t2l_inf = templ_inf + xSqHalfl_inf;
1717     t2l_sup = templ_sup + xSqHalfl_sup;
1718 
1719     Add12(t1h_inf,t1l_inf,quickDirectpolyC3h,tt1h_inf);
1720     Add12(t1h_sup,t1l_sup,quickDirectpolyC3h,tt1h_sup);
1721     Mul122(&xCubeh_inf,&xCubel_inf,x_inf,xSqh_inf,xSql_inf);
1722     Mul122(&xCubeh_sup,&xCubel_sup,x_sup,xSqh_sup,xSql_sup);
1723     Mul22(&tt3h_inf,&tt3l_inf,xCubeh_inf,xCubel_inf,t1h_inf,t1l_inf);
1724     Mul22(&tt3h_sup,&tt3l_sup,xCubeh_sup,xCubel_sup,t1h_sup,t1l_sup);
1725 
1726     Add22(&polyh_inf,&polyl_inf,t2h_inf,t2l_inf,tt3h_inf,tt3l_inf);
1727     Add22(&polyh_sup,&polyl_sup,t2h_sup,t2l_sup,tt3h_sup,tt3l_sup);
1728 
1729     /* Reconstruction */
1730 
1731     /* If we have not performed any range reduction, we have no reconstruction to do */
1732     if (expoX_inf >= 0) {
1733       /* If we are here, we must perform reconstruction */
1734 
1735       /* First reconstruction step */
1736       Add12(r1h_inf,r1t_inf,2,polyh_inf);
1737       r1l_inf = r1t_inf + polyl_inf;
1738       Mul22(&rr1h_inf,&rr1l_inf,r1h_inf,r1l_inf,polyh_inf,polyl_inf);
1739 
1740       if (expoX_inf >= 1) {
1741 
1742 	/* Second reconstruction step */
1743 	Add12(r2h_inf,r2t_inf,2,rr1h_inf);
1744 	r2l_inf = r2t_inf + rr1l_inf;
1745 	Mul22(&rr2h_inf,&rr2l_inf,r2h_inf,r2l_inf,rr1h_inf,rr1l_inf);
1746 
1747 	if (expoX_inf >= 2) {
1748 
1749 	  /* Third reconstruction step */
1750 	  Add12(r3h_inf,r3t_inf,2,rr2h_inf);
1751 	  r3l_inf = r3t_inf + rr2l_inf;
1752 	  Mul22(&rr3h_inf,&rr3l_inf,r3h_inf,r3l_inf,rr2h_inf,rr2l_inf);
1753 
1754 	  /* expoX may be maximally 2 */
1755 
1756 	  expm1h_inf = rr3h_inf;
1757 	  expm1m_inf = rr3l_inf;
1758 
1759 	} else {
1760 	  expm1h_inf = rr2h_inf;
1761 	  expm1m_inf = rr2l_inf;
1762 	}
1763 
1764       } else {
1765 	expm1h_inf = rr1h_inf;
1766 	expm1m_inf = rr1l_inf;
1767       }
1768 
1769     } else {
1770       expm1h_inf = polyh_inf;
1771       expm1m_inf = polyl_inf;
1772     }
1773     if (expoX_sup >= 0) {
1774       /* If we are here, we must perform reconstruction */
1775 
1776       /* First reconstruction step */
1777       Add12(r1h_sup,r1t_sup,2,polyh_sup);
1778       r1l_sup = r1t_sup + polyl_sup;
1779       Mul22(&rr1h_sup,&rr1l_sup,r1h_sup,r1l_sup,polyh_sup,polyl_sup);
1780 
1781       if (expoX_sup >= 1) {
1782 
1783 	/* Second reconstruction step */
1784 	Add12(r2h_sup,r2t_sup,2,rr1h_sup);
1785 	r2l_sup = r2t_sup + rr1l_sup;
1786 	Mul22(&rr2h_sup,&rr2l_sup,r2h_sup,r2l_sup,rr1h_sup,rr1l_sup);
1787 
1788 	if (expoX_sup >= 2) {
1789 
1790 	  /* Third reconstruction step */
1791 	  Add12(r3h_sup,r3t_sup,2,rr2h_sup);
1792 	  r3l_sup = r3t_sup + rr2l_sup;
1793 	  Mul22(&rr3h_sup,&rr3l_sup,r3h_sup,r3l_sup,rr2h_sup,rr2l_sup);
1794 
1795 	  /* expoX may be maximally 2 */
1796 
1797 	  expm1h_sup = rr3h_sup;
1798 	  expm1m_sup = rr3l_sup;
1799 
1800 	} else {
1801 	  expm1h_sup = rr2h_sup;
1802 	  expm1m_sup = rr2l_sup;
1803 	}
1804 
1805       } else {
1806 	expm1h_sup = rr1h_sup;
1807 	expm1m_sup = rr1l_sup;
1808       }
1809 
1810     } else {
1811       expm1h_sup = polyh_sup;
1812       expm1m_sup = polyl_sup;
1813     }
1814 
1815 
1816     /* Rounding test */
1817     TEST_AND_COPY_RDRU_EXPM1(roundable,restemp_inf,expm1h_inf, expm1m_inf, restemp_sup,expm1h_sup, expm1m_sup, ROUNDCSTDIRECTRD);
1818     if((roundable==2) || (roundable==0))
1819     {
1820       expm1_direct_td(&expm1h_inf, &expm1m_inf, &expm1l_inf, x_inf, xSqHalfh_inf, xSqHalfl_inf, xSqh_inf, xSql_inf, expoX_inf);
1821 
1822       RoundDownwards3(&restemp_inf,expm1h_inf, expm1m_inf, expm1l_inf);
1823 
1824     } /* Accurate phase launched */
1825     if((roundable==1) || (roundable==0))
1826     {
1827       expm1_direct_td(&expm1h_sup, &expm1m_sup, &expm1l_sup, x_sup, xSqHalfh_sup, xSqHalfl_sup, xSqh_sup, xSql_sup, expoX_sup);
1828 
1829       RoundUpwards3(&restemp_sup,expm1h_sup, expm1m_sup, expm1l_sup);
1830 
1831     } /* Accurate phase launched */
1832     ASSIGN_LOW(res,restemp_inf);
1833     ASSIGN_UP(res,restemp_sup);
1834     return res;
1835   }
1836 
1837 
1838   /* Test if we have |x| <= 1/4-1/2ulp(1/4) for knowing if we use exp(x) or approximate directly */
1839 
1840   if (xIntHi_inf < DIRECTINTERVALBOUND) {
1841     /* We approximate expm1 directly after a range reduction as follows
1842 
1843        expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
1844 
1845        We perform the range reduction in such a way that finally |x| < 1/32
1846     */
1847 
1848     /* Extract the exponent of |x| and add 5 (2^5 = 32) */
1849     expoX_inf = ((xIntHi_inf & 0x7ff00000) >> 20) - (1023 - 5);
1850 
1851     /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
1852     if (expoX_inf >= 0) {
1853       /* If we are here, we must perform range reduction */
1854 
1855 
1856       /* We multiply x by 2^(-expoX-1) by bit manipulation
1857 	 x cannot be denormalized so there is no danger
1858       */
1859       xdb_inf.i[HI] += (-expoX_inf-1) << 20;
1860 
1861       /* We reassign the new x and maintain xIntHi */
1862 
1863       xIntHi_inf = xdb_inf.i[HI] & 0x7fffffff;
1864       x_inf = xdb_inf.d;
1865     }
1866 
1867     /* Here, we have always |x| < 1/32 */
1868 
1869 
1870     /* Double precision evaluation steps and one double-double step */
1871 
1872     Mul12(&xSqh_inf,&xSql_inf,x_inf,x_inf);
1873 
1874 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1875     middlePoly_inf = FMA(quickDirectpolyC5h,x_inf,quickDirectpolyC4h);
1876 #else
1877     middlePoly_inf = quickDirectpolyC4h + x_inf * quickDirectpolyC5h;
1878 #endif
1879 
1880     doublePoly_inf = middlePoly_inf;
1881 
1882     /* Special path: for small |x| we can truncate the polynomial */
1883 
1884     if (xIntHi_inf > SPECIALINTERVALBOUND) {
1885 
1886 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1887       highPoly_inf = FMA(FMA(FMA(quickDirectpolyC9h ,x_inf,quickDirectpolyC8h),x_inf,
1888                              quickDirectpolyC7h),x_inf,quickDirectpolyC6h);
1889 #else
1890       highPoly_inf = quickDirectpolyC6h + x_inf * (quickDirectpolyC7h + x_inf * (
1891 	         quickDirectpolyC8h + x_inf *  quickDirectpolyC9h));
1892 #endif
1893 
1894       highPolyWithSquare_inf = xSqh_inf * highPoly_inf;
1895 
1896       doublePoly_inf = middlePoly_inf + highPolyWithSquare_inf;
1897 
1898     }
1899 
1900     /* Double-double evaluation steps */
1901     tt1h_inf = x_inf * doublePoly_inf;
1902 
1903     xSqHalfh_inf = 0.5 * xSqh_inf;
1904     xSqHalfl_inf = 0.5 * xSql_inf;
1905     Add12(t2h_inf,templ_inf,x_inf,xSqHalfh_inf);
1906     t2l_inf = templ_inf + xSqHalfl_inf;
1907 
1908     Add12(t1h_inf,t1l_inf,quickDirectpolyC3h,tt1h_inf);
1909     Mul122(&xCubeh_inf,&xCubel_inf,x_inf,xSqh_inf,xSql_inf);
1910     Mul22(&tt3h_inf,&tt3l_inf,xCubeh_inf,xCubel_inf,t1h_inf,t1l_inf);
1911 
1912     Add22(&polyh_inf,&polyl_inf,t2h_inf,t2l_inf,tt3h_inf,tt3l_inf);
1913 
1914     /* Reconstruction */
1915 
1916     /* If we have not performed any range reduction, we have no reconstruction to do */
1917     if (expoX_inf >= 0) {
1918       /* If we are here, we must perform reconstruction */
1919 
1920       /* First reconstruction step */
1921       Add12(r1h_inf,r1t_inf,2,polyh_inf);
1922       r1l_inf = r1t_inf + polyl_inf;
1923       Mul22(&rr1h_inf,&rr1l_inf,r1h_inf,r1l_inf,polyh_inf,polyl_inf);
1924 
1925       if (expoX_inf >= 1) {
1926 
1927 	/* Second reconstruction step */
1928 	Add12(r2h_inf,r2t_inf,2,rr1h_inf);
1929 	r2l_inf = r2t_inf + rr1l_inf;
1930 	Mul22(&rr2h_inf,&rr2l_inf,r2h_inf,r2l_inf,rr1h_inf,rr1l_inf);
1931 
1932 	if (expoX_inf >= 2) {
1933 
1934 	  /* Third reconstruction step */
1935 	  Add12(r3h_inf,r3t_inf,2,rr2h_inf);
1936 	  r3l_inf = r3t_inf + rr2l_inf;
1937 	  Mul22(&rr3h_inf,&rr3l_inf,r3h_inf,r3l_inf,rr2h_inf,rr2l_inf);
1938 
1939 	  /* expoX may be maximally 2 */
1940 
1941 	  expm1h_inf = rr3h_inf;
1942 	  expm1m_inf = rr3l_inf;
1943 
1944 	} else {
1945 	  expm1h_inf = rr2h_inf;
1946 	  expm1m_inf = rr2l_inf;
1947 	}
1948 
1949       } else {
1950 	expm1h_inf = rr1h_inf;
1951 	expm1m_inf = rr1l_inf;
1952       }
1953 
1954     } else {
1955       expm1h_inf = polyh_inf;
1956       expm1m_inf = polyl_inf;
1957     }
1958 
1959     /* Rounding test */
1960     infDone=1;
1961     TEST_AND_COPY_RD(roundable,restemp_inf,expm1h_inf, expm1m_inf, ROUNDCSTDIRECTRD);
1962     if(roundable==0)
1963     {
1964       expm1_direct_td(&expm1h_inf, &expm1m_inf, &expm1l_inf, x_inf, xSqHalfh_inf, xSqHalfl_inf, xSqh_inf, xSql_inf, expoX_inf);
1965 
1966       RoundDownwards3(&restemp_inf,expm1h_inf, expm1m_inf, expm1l_inf);
1967 
1968     } /* Accurate phase launched */
1969 
1970   }
1971 
1972   /* Test if we have |x| <= 1/4-1/2ulp(1/4) for knowing if we use exp(x) or approximate directly */
1973 
1974   if (xIntHi_sup < DIRECTINTERVALBOUND) {
1975     /* We approximate expm1 directly after a range reduction as follows
1976 
1977        expm1(x) = (expm1(x/2) + 2) * expm1(x/2)
1978 
1979        We perform the range reduction in such a way that finally |x| < 1/32
1980     */
1981 
1982     /* Extract the exponent of |x| and add 5 (2^5 = 32) */
1983     expoX_sup = ((xIntHi_sup & 0x7ff00000) >> 20) - (1023 - 5);
1984 
1985     /* If this particularily biased exponent expoX is negative, we are already less than 1/32 */
1986     if (expoX_sup >= 0) {
1987       /* If we are here, we must perform range reduction */
1988 
1989 
1990       /* We multiply x by 2^(-expoX-1) by bit manipulation
1991 	 x cannot be denormalized so there is no danger
1992       */
1993       xdb_sup.i[HI] += (-expoX_sup-1) << 20;
1994 
1995       /* We reassign the new x and maintain xIntHi */
1996 
1997       xIntHi_sup = xdb_sup.i[HI] & 0x7fffffff;
1998       x_sup = xdb_sup.d;
1999     }
2000 
2001     /* Here, we have always |x| < 1/32 */
2002 
2003 
2004     /* Double precision evaluation steps and one double-double step */
2005 
2006     Mul12(&xSqh_sup,&xSql_sup,x_sup,x_sup);
2007 
2008 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
2009     middlePoly_sup = FMA(quickDirectpolyC5h,x_sup,quickDirectpolyC4h);
2010 #else
2011     middlePoly_sup = quickDirectpolyC4h + x_sup * quickDirectpolyC5h;
2012 #endif
2013 
2014     doublePoly_sup = middlePoly_sup;
2015 
2016     /* Special path: for small |x| we can truncate the polynomial */
2017 
2018     if (xIntHi_sup > SPECIALINTERVALBOUND) {
2019 
2020 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
2021       highPoly_sup = FMA(FMA(FMA(quickDirectpolyC9h ,x_sup,quickDirectpolyC8h),x_sup,
2022                              quickDirectpolyC7h),x_sup,quickDirectpolyC6h);
2023 #else
2024       highPoly_sup = quickDirectpolyC6h + x_sup * (quickDirectpolyC7h + x_sup * (
2025 	         quickDirectpolyC8h + x_sup *  quickDirectpolyC9h));
2026 #endif
2027 
2028       highPolyWithSquare_sup = xSqh_sup * highPoly_sup;
2029 
2030       doublePoly_sup = middlePoly_sup + highPolyWithSquare_sup;
2031 
2032     }
2033 
2034     /* Double-double evaluation steps */
2035     tt1h_sup = x_sup * doublePoly_sup;
2036 
2037     xSqHalfh_sup = 0.5 * xSqh_sup;
2038     xSqHalfl_sup = 0.5 * xSql_sup;
2039     Add12(t2h_sup,templ_sup,x_sup,xSqHalfh_sup);
2040     t2l_sup = templ_sup + xSqHalfl_sup;
2041 
2042     Add12(t1h_sup,t1l_sup,quickDirectpolyC3h,tt1h_sup);
2043     Mul122(&xCubeh_sup,&xCubel_sup,x_sup,xSqh_sup,xSql_sup);
2044     Mul22(&tt3h_sup,&tt3l_sup,xCubeh_sup,xCubel_sup,t1h_sup,t1l_sup);
2045 
2046     Add22(&polyh_sup,&polyl_sup,t2h_sup,t2l_sup,tt3h_sup,tt3l_sup);
2047 
2048     /* Reconstruction */
2049 
2050     /* If we have not performed any range reduction, we have no reconstruction to do */
2051     if (expoX_sup >= 0) {
2052       /* If we are here, we must perform reconstruction */
2053 
2054       /* First reconstruction step */
2055       Add12(r1h_sup,r1t_sup,2,polyh_sup);
2056       r1l_sup = r1t_sup + polyl_sup;
2057       Mul22(&rr1h_sup,&rr1l_sup,r1h_sup,r1l_sup,polyh_sup,polyl_sup);
2058 
2059       if (expoX_sup >= 1) {
2060 
2061 	/* Second reconstruction step */
2062 	Add12(r2h_sup,r2t_sup,2,rr1h_sup);
2063 	r2l_sup = r2t_sup + rr1l_sup;
2064 	Mul22(&rr2h_sup,&rr2l_sup,r2h_sup,r2l_sup,rr1h_sup,rr1l_sup);
2065 
2066 	if (expoX_sup >= 2) {
2067 
2068 	  /* Third reconstruction step */
2069 	  Add12(r3h_sup,r3t_sup,2,rr2h_sup);
2070 	  r3l_sup = r3t_sup + rr2l_sup;
2071 	  Mul22(&rr3h_sup,&rr3l_sup,r3h_sup,r3l_sup,rr2h_sup,rr2l_sup);
2072 
2073 	  /* expoX may be maximally 2 */
2074 
2075 	  expm1h_sup = rr3h_sup;
2076 	  expm1m_sup = rr3l_sup;
2077 
2078 	} else {
2079 	  expm1h_sup = rr2h_sup;
2080 	  expm1m_sup = rr2l_sup;
2081 	}
2082 
2083       } else {
2084 	expm1h_sup = rr1h_sup;
2085 	expm1m_sup = rr1l_sup;
2086       }
2087 
2088     } else {
2089       expm1h_sup = polyh_sup;
2090       expm1m_sup = polyl_sup;
2091     }
2092 
2093     /* Rounding test */
2094     supDone=1;
2095     TEST_AND_COPY_RU(roundable,restemp_sup,expm1h_sup, expm1m_sup, ROUNDCSTDIRECTRD);
2096     if(roundable==0)
2097     {
2098       expm1_direct_td(&expm1h_sup, &expm1m_sup, &expm1l_sup, x_sup, xSqHalfh_sup, xSqHalfl_sup, xSqh_sup, xSql_sup, expoX_sup);
2099 
2100       RoundUpwards3(&restemp_sup,expm1h_sup, expm1m_sup, expm1l_sup);
2101 
2102     } /* Accurate phase launched */
2103 
2104   }
2105   if((infDone==0) && (supDone==0))
2106   {
2107     /* If we are here, we can use expm1(x) = exp(x) - 1 */
2108 
2109     /* Range reduction - exact part: compute k as double and as int */
2110 
2111     xMultLog2InvMult2L_inf = x_inf * log2InvMult2L;
2112     xMultLog2InvMult2L_sup = x_sup * log2InvMult2L;
2113     shiftedXMult_inf = xMultLog2InvMult2L_inf + shiftConst;
2114     shiftedXMult_sup = xMultLog2InvMult2L_sup + shiftConst;
2115     kd_inf = shiftedXMult_inf - shiftConst;
2116     kd_sup = shiftedXMult_sup - shiftConst;
2117     shiftedXMultdb_inf.d = shiftedXMult_inf;
2118     shiftedXMultdb_sup.d = shiftedXMult_sup;
2119     k_inf = shiftedXMultdb_inf.i[LO];
2120     k_sup = shiftedXMultdb_sup.i[LO];
2121     M_inf = k_inf >> 12;
2122     M_sup = k_sup >> 12;
2123     index1_inf = k_inf & INDEXMASK1;
2124     index1_sup = k_sup & INDEXMASK1;
2125     index2_inf = (k_inf & INDEXMASK2) >> 6;
2126     index2_sup = (k_sup & INDEXMASK2) >> 6;
2127 
2128 
2129     /* Range reduction - part affected by error - must be redone in accurate phase */
2130     Mul12(&s1_inf,&s2_inf,msLog2Div2Lh,kd_inf);
2131     Mul12(&s1_sup,&s2_sup,msLog2Div2Lh,kd_sup);
2132     s3_inf = kd_inf * msLog2Div2Lm;
2133     s3_sup = kd_sup * msLog2Div2Lm;
2134     s4_inf = s2_inf + s3_inf;
2135     s4_sup = s2_sup + s3_sup;
2136     s5_inf = x_inf + s1_inf;
2137     s5_sup = x_sup + s1_sup;
2138     Add12Cond(rh_inf,rm_inf,s5_inf,s4_inf);
2139     Add12Cond(rh_sup,rm_sup,s5_sup,s4_sup);
2140 
2141 
2142     /* Table reads - read only two double-doubles by now */
2143     tbl1h_inf = twoPowerIndex1[index1_inf].hi;
2144     tbl1h_sup = twoPowerIndex1[index1_sup].hi;
2145     tbl1m_inf = twoPowerIndex1[index1_inf].mi;
2146     tbl1m_sup = twoPowerIndex1[index1_sup].mi;
2147     tbl2h_inf = twoPowerIndex2[index2_inf].hi;
2148     tbl2h_sup = twoPowerIndex2[index2_sup].hi;
2149     tbl2m_inf = twoPowerIndex2[index2_inf].mi;
2150     tbl2m_sup = twoPowerIndex2[index2_sup].mi;
2151 
2152     /* Quick phase starts here */
2153 
2154     rhSquare_inf = rh_inf * rh_inf;
2155     rhSquare_sup = rh_sup * rh_sup;
2156     rhC3_inf = quickCommonpolyC3h * rh_inf;
2157     rhC3_sup = quickCommonpolyC3h * rh_sup;
2158 
2159     rhSquareHalf_inf = 0.5 * rhSquare_inf;
2160     rhSquareHalf_sup = 0.5 * rhSquare_sup;
2161 
2162     monomialCube_inf = rhC3_inf * rhSquare_inf;
2163     monomialCube_sup = rhC3_sup * rhSquare_sup;
2164     rhFour_inf = rhSquare_inf * rhSquare_inf;
2165     rhFour_sup = rhSquare_sup * rhSquare_sup;
2166 
2167     monomialFour_inf = quickCommonpolyC4h * rhFour_inf;
2168     monomialFour_sup = quickCommonpolyC4h * rhFour_sup;
2169     highPoly_inf = monomialCube_inf + monomialFour_inf;
2170     highPoly_sup = monomialCube_sup + monomialFour_sup;
2171     highPolyWithSquare_inf = rhSquareHalf_inf + highPoly_inf;
2172     highPolyWithSquare_sup = rhSquareHalf_sup + highPoly_sup;
2173 
2174     /* Reconstruction: integration of table values */
2175 
2176     Mul22(&tablesh_inf,&tablesl_inf,tbl1h_inf,tbl1m_inf,tbl2h_inf,tbl2m_inf);
2177     Mul22(&tablesh_sup,&tablesl_sup,tbl1h_sup,tbl1m_sup,tbl2h_sup,tbl2m_sup);
2178 
2179     t8_inf = rm_inf + highPolyWithSquare_inf;
2180     t8_sup = rm_sup + highPolyWithSquare_sup;
2181     t9_inf = rh_inf + t8_inf;
2182     t9_sup = rh_sup + t8_sup;
2183 
2184     t10_inf = tablesh_inf * t9_inf;
2185     t10_sup = tablesh_sup * t9_sup;
2186 
2187     Add12(t11_inf,t12_inf,tablesh_inf,t10_inf);
2188     Add12(t11_sup,t12_sup,tablesh_sup,t10_sup);
2189     t13_inf = t12_inf + tablesl_inf;
2190     t13_sup = t12_sup + tablesl_sup;
2191     Add12(polyTblhdb_inf.d,polyTblmdb_inf.d,t11_inf,t13_inf);
2192     Add12(polyTblhdb_sup.d,polyTblmdb_sup.d,t11_sup,t13_sup);
2193 
2194     /* Reconstruction: multiplication by 2^M */
2195 
2196     /* Implement the multiplication by multiplication to overcome the
2197        problem of the non-representability of 2^1024 (M = 1024)
2198        This case is possible if polyTblhdb.d < 1
2199     */
2200 
2201     polyTblhdb_inf.i[HI] += M_inf << 20;
2202     polyTblhdb_sup.i[HI] += M_sup << 20;
2203     polyTblmdb_inf.i[HI] += M_inf << 20;
2204     polyTblmdb_sup.i[HI] += M_sup << 20;
2205 
2206     exph_inf = polyTblhdb_inf.d;
2207     exph_sup = polyTblhdb_sup.d;
2208     expm_inf = polyTblmdb_inf.d;
2209     expm_sup = polyTblmdb_sup.d;
2210 
2211     /* Substraction of 1
2212 
2213        Testing if the operation is necessary is more expensive than
2214        performing it in any case.
2215 
2216        We may cancellate at most 2 bits in the subtraction for
2217        arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
2218        We must therefore use conditional Add12s
2219 
2220        Since we perform a substraction, we may not have addition overflow towards +inf
2221 
2222     */
2223 
2224     Add12Cond(t1_inf,t2_inf,-1,exph_inf);
2225     Add12Cond(t1_sup,t2_sup,-1,exph_sup);
2226     t3_inf = t2_inf + expm_inf;
2227     t3_sup = t2_sup + expm_sup;
2228     Add12Cond(expm1h_inf,expm1m_inf,t1_inf,t3_inf);
2229     Add12Cond(expm1h_sup,expm1m_sup,t1_sup,t3_sup);
2230 
2231 
2232     /* Rounding test */
2233     TEST_AND_COPY_RDRU_EXPM1(roundable,res_inf,expm1h_inf, expm1m_inf, res_sup,expm1h_sup, expm1m_sup, ROUNDCSTCOMMONRD);
2234     if((roundable==2) || (roundable==0))
2235     {
2236       /* Rest of argument reduction for accurate phase */
2237 
2238       Mul133(&msLog2Div2LMultKh_inf,&msLog2Div2LMultKm_inf,&msLog2Div2LMultKl_inf,kd_inf,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
2239       t1_inf = x_inf + msLog2Div2LMultKh_inf;
2240       Add12Cond(rh_inf,t2_inf,t1_inf,msLog2Div2LMultKm_inf);
2241       Add12Cond(rm_inf,rl_inf,t2_inf,msLog2Div2LMultKl_inf);
2242 
2243       /* Table reads for accurate phase */
2244       tbl1l_inf = twoPowerIndex1[index1_inf].lo;
2245       tbl2l_inf = twoPowerIndex2[index2_inf].lo;
2246 
2247       /* Call accurate phase */
2248       expm1_common_td(&expm1h_inf, &expm1m_inf, &expm1l_inf, rh_inf, rm_inf, rl_inf, tbl1h_inf, tbl1m_inf, tbl1l_inf, tbl2h_inf, tbl2m_inf, tbl2l_inf, M_inf);
2249 
2250       /* Final rounding */
2251 
2252       RoundDownwards3(&res_inf,expm1h_inf, expm1m_inf, expm1l_inf);
2253     } /* Accurate phase launched */
2254     if((roundable==1) || (roundable==0))
2255     {
2256       /* Rest of argument reduction for accurate phase */
2257       Mul133(&msLog2Div2LMultKh_sup,&msLog2Div2LMultKm_sup,&msLog2Div2LMultKl_sup,kd_sup,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
2258       t1_sup = x_sup + msLog2Div2LMultKh_sup;
2259       Add12Cond(rh_sup,t2_sup,t1_sup,msLog2Div2LMultKm_sup);
2260       Add12Cond(rm_sup,rl_sup,t2_sup,msLog2Div2LMultKl_sup);
2261 
2262       /* Table reads for accurate phase */
2263       tbl1l_sup = twoPowerIndex1[index1_sup].lo;
2264       tbl2l_sup = twoPowerIndex2[index2_sup].lo;
2265 
2266       /* Call accurate phase */
2267       expm1_common_td(&expm1h_sup, &expm1m_sup, &expm1l_sup, rh_sup, rm_sup, rl_sup, tbl1h_sup, tbl1m_sup, tbl1l_sup, tbl2h_sup, tbl2m_sup, tbl2l_sup, M_sup);
2268 
2269       /* Final rounding */
2270 
2271       RoundUpwards3(&res_sup,expm1h_sup, expm1m_sup, expm1l_sup);
2272     } /* Accurate phase launched */
2273 
2274     ASSIGN_LOW(res,res_inf);
2275     ASSIGN_UP(res,res_sup);
2276     return res;
2277   }
2278   if((supDone==1))
2279   {
2280     /* If we are here, we can use expm1(x) = exp(x) - 1 */
2281 
2282     /* Range reduction - exact part: compute k as double and as int */
2283 
2284     xMultLog2InvMult2L_inf = x_inf * log2InvMult2L;
2285     shiftedXMult_inf = xMultLog2InvMult2L_inf + shiftConst;
2286     kd_inf = shiftedXMult_inf - shiftConst;
2287     shiftedXMultdb_inf.d = shiftedXMult_inf;
2288     k_inf = shiftedXMultdb_inf.i[LO];
2289     M_inf = k_inf >> 12;
2290     index1_inf = k_inf & INDEXMASK1;
2291     index2_inf = (k_inf & INDEXMASK2) >> 6;
2292 
2293 
2294     /* Range reduction - part affected by error - must be redone in accurate phase */
2295     Mul12(&s1_inf,&s2_inf,msLog2Div2Lh,kd_inf);
2296     s3_inf = kd_inf * msLog2Div2Lm;
2297     s4_inf = s2_inf + s3_inf;
2298     s5_inf = x_inf + s1_inf;
2299     Add12Cond(rh_inf,rm_inf,s5_inf,s4_inf);
2300 
2301 
2302     /* Table reads - read only two double-doubles by now */
2303     tbl1h_inf = twoPowerIndex1[index1_inf].hi;
2304     tbl1m_inf = twoPowerIndex1[index1_inf].mi;
2305     tbl2h_inf = twoPowerIndex2[index2_inf].hi;
2306     tbl2m_inf = twoPowerIndex2[index2_inf].mi;
2307 
2308     /* Quick phase starts here */
2309 
2310     rhSquare_inf = rh_inf * rh_inf;
2311     rhC3_inf = quickCommonpolyC3h * rh_inf;
2312 
2313     rhSquareHalf_inf = 0.5 * rhSquare_inf;
2314 
2315     monomialCube_inf = rhC3_inf * rhSquare_inf;
2316     rhFour_inf = rhSquare_inf * rhSquare_inf;
2317 
2318     monomialFour_inf = quickCommonpolyC4h * rhFour_inf;
2319     highPoly_inf = monomialCube_inf + monomialFour_inf;
2320     highPolyWithSquare_inf = rhSquareHalf_inf + highPoly_inf;
2321 
2322     /* Reconstruction: integration of table values */
2323 
2324     Mul22(&tablesh_inf,&tablesl_inf,tbl1h_inf,tbl1m_inf,tbl2h_inf,tbl2m_inf);
2325 
2326     t8_inf = rm_inf + highPolyWithSquare_inf;
2327     t9_inf = rh_inf + t8_inf;
2328 
2329     t10_inf = tablesh_inf * t9_inf;
2330 
2331     Add12(t11_inf,t12_inf,tablesh_inf,t10_inf);
2332     t13_inf = t12_inf + tablesl_inf;
2333     Add12(polyTblhdb_inf.d,polyTblmdb_inf.d,t11_inf,t13_inf);
2334 
2335     /* Reconstruction: multiplication by 2^M */
2336 
2337     /* Implement the multiplication by multiplication to overcome the
2338        problem of the non-representability of 2^1024 (M = 1024)
2339        This case is possible if polyTblhdb.d < 1
2340     */
2341 
2342     polyTblhdb_inf.i[HI] += M_inf << 20;
2343     polyTblmdb_inf.i[HI] += M_inf << 20;
2344 
2345     exph_inf = polyTblhdb_inf.d;
2346     expm_inf = polyTblmdb_inf.d;
2347 
2348     /* Substraction of 1
2349 
2350        Testing if the operation is necessary is more expensive than
2351        performing it in any case.
2352 
2353        We may cancellate at most 2 bits in the subtraction for
2354        arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
2355        We must therefore use conditional Add12s
2356 
2357        Since we perform a substraction, we may not have addition overflow towards +inf
2358 
2359     */
2360 
2361     Add12Cond(t1_inf,t2_inf,-1,exph_inf);
2362     t3_inf = t2_inf + expm_inf;
2363     Add12Cond(expm1h_inf,expm1m_inf,t1_inf,t3_inf);
2364 
2365 
2366     /* Rounding test */
2367     TEST_AND_COPY_RD(roundable,res_inf,expm1h_inf, expm1m_inf, ROUNDCSTCOMMONRD);
2368     if(roundable==0)
2369     {
2370       /* Rest of argument reduction for accurate phase */
2371 
2372       Mul133(&msLog2Div2LMultKh_inf,&msLog2Div2LMultKm_inf,&msLog2Div2LMultKl_inf,kd_inf,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
2373       t1_inf = x_inf + msLog2Div2LMultKh_inf;
2374       Add12Cond(rh_inf,t2_inf,t1_inf,msLog2Div2LMultKm_inf);
2375       Add12Cond(rm_inf,rl_inf,t2_inf,msLog2Div2LMultKl_inf);
2376 
2377       /* Table reads for accurate phase */
2378       tbl1l_inf = twoPowerIndex1[index1_inf].lo;
2379       tbl2l_inf = twoPowerIndex2[index2_inf].lo;
2380 
2381       /* Call accurate phase */
2382       expm1_common_td(&expm1h_inf, &expm1m_inf, &expm1l_inf, rh_inf, rm_inf, rl_inf, tbl1h_inf, tbl1m_inf, tbl1l_inf, tbl2h_inf, tbl2m_inf, tbl2l_inf, M_inf);
2383 
2384       /* Final rounding */
2385 
2386       RoundDownwards3(&res_inf,expm1h_inf, expm1m_inf, expm1l_inf);
2387     } /* Accurate phase launched */
2388 
2389   }
2390   if(infDone==1)
2391   {
2392     /* If we are here, we can use expm1(x) = exp(x) - 1 */
2393 
2394     /* Range reduction - exact part: compute k as double and as int */
2395 
2396     xMultLog2InvMult2L_sup = x_sup * log2InvMult2L;
2397     shiftedXMult_sup = xMultLog2InvMult2L_sup + shiftConst;
2398     kd_sup = shiftedXMult_sup - shiftConst;
2399     shiftedXMultdb_sup.d = shiftedXMult_sup;
2400     k_sup = shiftedXMultdb_sup.i[LO];
2401     M_sup = k_sup >> 12;
2402     index1_sup = k_sup & INDEXMASK1;
2403     index2_sup = (k_sup & INDEXMASK2) >> 6;
2404 
2405 
2406     /* Range reduction - part affected by error - must be redone in accurate phase */
2407     Mul12(&s1_sup,&s2_sup,msLog2Div2Lh,kd_sup);
2408     s3_sup = kd_sup * msLog2Div2Lm;
2409     s4_sup = s2_sup + s3_sup;
2410     s5_sup = x_sup + s1_sup;
2411     Add12Cond(rh_sup,rm_sup,s5_sup,s4_sup);
2412 
2413 
2414     /* Table reads - read only two double-doubles by now */
2415     tbl1h_sup = twoPowerIndex1[index1_sup].hi;
2416     tbl1m_sup = twoPowerIndex1[index1_sup].mi;
2417     tbl2h_sup = twoPowerIndex2[index2_sup].hi;
2418     tbl2m_sup = twoPowerIndex2[index2_sup].mi;
2419 
2420     /* Quick phase starts here */
2421 
2422     rhSquare_sup = rh_sup * rh_sup;
2423     rhC3_sup = quickCommonpolyC3h * rh_sup;
2424 
2425     rhSquareHalf_sup = 0.5 * rhSquare_sup;
2426 
2427     monomialCube_sup = rhC3_sup * rhSquare_sup;
2428     rhFour_sup = rhSquare_sup * rhSquare_sup;
2429 
2430     monomialFour_sup = quickCommonpolyC4h * rhFour_sup;
2431     highPoly_sup = monomialCube_sup + monomialFour_sup;
2432     highPolyWithSquare_sup = rhSquareHalf_sup + highPoly_sup;
2433 
2434     /* Reconstruction: integration of table values */
2435 
2436     Mul22(&tablesh_sup,&tablesl_sup,tbl1h_sup,tbl1m_sup,tbl2h_sup,tbl2m_sup);
2437 
2438     t8_sup = rm_sup + highPolyWithSquare_sup;
2439     t9_sup = rh_sup + t8_sup;
2440 
2441     t10_sup = tablesh_sup * t9_sup;
2442 
2443     Add12(t11_sup,t12_sup,tablesh_sup,t10_sup);
2444     t13_sup = t12_sup + tablesl_sup;
2445     Add12(polyTblhdb_sup.d,polyTblmdb_sup.d,t11_sup,t13_sup);
2446 
2447     /* Reconstruction: multiplication by 2^M */
2448 
2449     /* Implement the multiplication by multiplication to overcome the
2450        problem of the non-representability of 2^1024 (M = 1024)
2451        This case is possible if polyTblhdb.d < 1
2452     */
2453 
2454     polyTblhdb_sup.i[HI] += M_sup << 20;
2455     polyTblmdb_sup.i[HI] += M_sup << 20;
2456 
2457     exph_sup = polyTblhdb_sup.d;
2458     expm_sup = polyTblmdb_sup.d;
2459 
2460     /* Substraction of 1
2461 
2462        Testing if the operation is necessary is more expensive than
2463        performing it in any case.
2464 
2465        We may cancellate at most 2 bits in the subtraction for
2466        arguments 1/4 <= x <= ln(2) (0.25 <= x <= 0.69)
2467        We must therefore use conditional Add12s
2468 
2469        Since we perform a substraction, we may not have addition overflow towards +inf
2470 
2471     */
2472 
2473     Add12Cond(t1_sup,t2_sup,-1,exph_sup);
2474     t3_sup = t2_sup + expm_sup;
2475     Add12Cond(expm1h_sup,expm1m_sup,t1_sup,t3_sup);
2476 
2477     /* Rounding test */
2478     TEST_AND_COPY_RU(roundable,res_sup,expm1h_sup, expm1m_sup, ROUNDCSTCOMMONRD);
2479     if(roundable==0)
2480     {
2481       /* Rest of argument reduction for accurate phase */
2482       Mul133(&msLog2Div2LMultKh_sup,&msLog2Div2LMultKm_sup,&msLog2Div2LMultKl_sup,kd_sup,msLog2Div2Lh,msLog2Div2Lm,msLog2Div2Ll);
2483       t1_sup = x_sup + msLog2Div2LMultKh_sup;
2484       Add12Cond(rh_sup,t2_sup,t1_sup,msLog2Div2LMultKm_sup);
2485       Add12Cond(rm_sup,rl_sup,t2_sup,msLog2Div2LMultKl_sup);
2486 
2487       /* Table reads for accurate phase */
2488       tbl1l_sup = twoPowerIndex1[index1_sup].lo;
2489       tbl2l_sup = twoPowerIndex2[index2_sup].lo;
2490 
2491       /* Call accurate phase */
2492       expm1_common_td(&expm1h_sup, &expm1m_sup, &expm1l_sup, rh_sup, rm_sup, rl_sup, tbl1h_sup, tbl1m_sup, tbl1l_sup, tbl2h_sup, tbl2m_sup, tbl2l_sup, M_sup);
2493 
2494       /* Final rounding */
2495 
2496       RoundUpwards3(&res_sup,expm1h_sup, expm1m_sup, expm1l_sup);
2497     } /* Accurate phase launched */
2498   }
2499 
2500   if (infDone==1) res_inf=restemp_inf;
2501   if (supDone==1) res_sup=restemp_sup;
2502   ASSIGN_LOW(res,res_inf);
2503   ASSIGN_UP(res,res_sup);
2504   return res;
2505 }
2506 #endif
2507