1 
2 #include <stdio.h>
3 #include "crlibm.h"
4 #include "crlibm_private.h"
5 #include "triple-double.h"
6 #include "pow.h"
7 
8 
9 /* Some macros for specific operations in power */
10 
11 #define USE_BUILTINS 0
12 
13 /* decompose
14 
15    Decomposes a positive double precision
16    variable x (normal or subnormal) such that
17 
18    2^(resE) * resm = x
19 
20    where resm = 2 * k + 1 for integer k
21 
22    resE is an integer variable pointer
23    resm is a double precision variable pointer
24    x is a double precision variable.
25 
26 */
27 
28 #if USE_BUILTINS
29 #define decompose(resm,resE,x)                                             \
30 {                                                                          \
31    int __decompose_ex, __decompose_d;                                      \
32    int64_t __decompose_temp;                                               \
33    db_number __decompose_xdb;                                              \
34                                                                            \
35    __decompose_xdb.d = (x);                                                \
36    __decompose_ex = 0;                                                     \
37    if (!(__decompose_xdb.i[HI] & 0xfff00000)) {                            \
38      __decompose_xdb.d = (x) * 0.18014398509481984e17;                     \
39      __decompose_ex = -54;                                                 \
40    }                                                                       \
41    __decompose_ex += (__decompose_xdb.i[HI] >> 20) - 1023;                 \
42    __decompose_xdb.i[HI] &= 0x000fffff;                                    \
43    __decompose_temp = __decompose_xdb.l | 0x0010000000000000llu;           \
44    __decompose_d = __builtin_ctz(__decompose_temp);                        \
45    __decompose_xdb.i[HI] |= (1075 - __decompose_d) << 20;                  \
46    *(resE) = __decompose_d - 52 + __decompose_ex;                          \
47    *(resm) = __decompose_xdb.d;                                            \
48 }
49 #else
50 #define decompose(resm,resE,x)                                             \
51 {                                                                          \
52    int __decompose_ex, __decompose_d;                                      \
53    int64_t __decompose_temp;                                               \
54    db_number __decompose_xdb, __decompose_tempdb;                          \
55                                                                            \
56    __decompose_xdb.d = (x);                                                \
57    __decompose_ex = 0;                                                     \
58    if (!(__decompose_xdb.i[HI] & 0xfff00000)) {                            \
59      __decompose_xdb.d = (x) * 0.18014398509481984e17;                     \
60      __decompose_ex = -54;                                                 \
61    }                                                                       \
62    __decompose_ex += (__decompose_xdb.i[HI] >> 20) - 1023;                 \
63    __decompose_xdb.i[HI] &= 0x000fffff;                                    \
64    __decompose_temp = __decompose_xdb.l | 0x0010000000000000llu;           \
65    __decompose_temp = (~__decompose_temp & (__decompose_temp - 1)) + 1;    \
66    __decompose_tempdb.d = (double) __decompose_temp;                       \
67    __decompose_d = (__decompose_tempdb.i[HI] >> 20) - 1023;                \
68    __decompose_xdb.i[HI] |= (1075 - __decompose_d) << 20;                  \
69    *(resE) = __decompose_d - 52 + __decompose_ex;                          \
70    *(resm) = __decompose_xdb.d;                                            \
71 }
72 #endif
73 
74 
75 /* isOddInteger
76 
77    Determines if a given double precision number x is an odd integer
78 
79 */
80 
81 #define isOddInteger(x) (ABS(((ABS(x) + 0.9007199254740992e16) - 0.9007199254740992e16) - ABS(x)) == 1.0)
82 
83 /* isInteger
84 
85    Determines if a given double precision number x is integer
86 
87 */
88 
89 #define isInteger(x) ((ABS(x) >= 0.4503599627370496e16) || (((ABS(x) + 0.4503599627370496e16) - 0.4503599627370496e16) == ABS(x)))
90 
91 
92 /* log2_130
93 
94    Approximates
95 
96    resh + resm + resl = (ed + log2(1 + (xh + xm)) + log2(r[index])) * (1 + eps)
97 
98    where ||eps|| <= 2^(-130)
99 
100 
101 */
log2_130(double * resh,double * resm,double * resl,int index,double ed,double xh,double xm)102 static inline void log2_130(double *resh, double *resm, double *resl,
103 	                    int index, double ed, double xh, double xm) {
104 
105   double p_t_1_0h;
106   double p_t_2_0h;
107   double p_t_3_0h;
108   double p_t_4_0h;
109   double p_t_5_0h;
110   double p_t_6_0h;
111   double p_t_7_0h;
112   double p_t_8_0h;
113   double p_t_9_0h, p_t_9_0m;
114   double p_t_10_0h, p_t_10_0m;
115   double p_t_11_0h, p_t_11_0m;
116   double p_t_12_0h, p_t_12_0m;
117   double p_t_13_0h, p_t_13_0m;
118   double p_t_14_0h, p_t_14_0m;
119   double p_t_15_0h, p_t_15_0m;
120   double p_t_16_0h, p_t_16_0m, p_t_16_0l;
121   double p_t_17_0h, p_t_17_0m, p_t_17_0l;
122   double p_t_18_0h, p_t_18_0m, p_t_18_0l;
123   double p_t_19_0h, p_t_19_0m, p_t_19_0l;
124   double p_t_20_0h, p_t_20_0m, p_t_20_0l;
125   double p_t_21_0h, p_t_21_0m, p_t_21_0l;
126   double p_t_21_1h, p_t_21_1m, p_t_21_1l;
127   double p_t_22_0h, p_t_22_0m, p_t_22_0l;
128   double p_t_23_0h, p_t_23_0m, p_t_23_0l;
129   double p_resh, p_resm, p_resl;
130   double log2yh, log2ym, log2yl;
131   double log2xh, log2xm, log2xl;
132   double logih, logim, logil;
133 
134 
135   p_t_1_0h = log2_130_p_coeff_13h;
136   p_t_2_0h = p_t_1_0h * xh;
137   p_t_3_0h = log2_130_p_coeff_12h + p_t_2_0h;
138   p_t_4_0h = p_t_3_0h * xh;
139   p_t_5_0h = log2_130_p_coeff_11h + p_t_4_0h;
140   p_t_6_0h = p_t_5_0h * xh;
141   p_t_7_0h = log2_130_p_coeff_10h + p_t_6_0h;
142   p_t_8_0h = p_t_7_0h * xh;
143   Add12(p_t_9_0h,p_t_9_0m,log2_130_p_coeff_9h,p_t_8_0h);
144   Mul22(&p_t_10_0h,&p_t_10_0m,p_t_9_0h,p_t_9_0m,xh,xm);
145   Add122(&p_t_11_0h,&p_t_11_0m,log2_130_p_coeff_8h,p_t_10_0h,p_t_10_0m);
146   MulAdd22(&p_t_12_0h,&p_t_12_0m,log2_130_p_coeff_7h,log2_130_p_coeff_7m,xh,xm,p_t_11_0h,p_t_11_0m);
147   MulAdd22(&p_t_13_0h,&p_t_13_0m,log2_130_p_coeff_6h,log2_130_p_coeff_6m,xh,xm,p_t_12_0h,p_t_12_0m);
148   MulAdd22(&p_t_14_0h,&p_t_14_0m,log2_130_p_coeff_5h,log2_130_p_coeff_5m,xh,xm,p_t_13_0h,p_t_13_0m);
149   Mul22(&p_t_15_0h,&p_t_15_0m,p_t_14_0h,p_t_14_0m,xh,xm);
150   Add23(&p_t_16_0h,&p_t_16_0m,&p_t_16_0l,log2_130_p_coeff_4h,log2_130_p_coeff_4m,p_t_15_0h,p_t_15_0m);
151   Mul233(&p_t_17_0h,&p_t_17_0m,&p_t_17_0l,xh,xm,p_t_16_0h,p_t_16_0m,p_t_16_0l);
152   Add233(&p_t_18_0h,&p_t_18_0m,&p_t_18_0l,log2_130_p_coeff_3h,log2_130_p_coeff_3m,p_t_17_0h,p_t_17_0m,p_t_17_0l);
153   Mul233(&p_t_19_0h,&p_t_19_0m,&p_t_19_0l,xh,xm,p_t_18_0h,p_t_18_0m,p_t_18_0l);
154   Add33(&p_t_20_0h,&p_t_20_0m,&p_t_20_0l,log2_130_p_coeff_2h,log2_130_p_coeff_2m,log2_130_p_coeff_2l,p_t_19_0h,p_t_19_0m,p_t_19_0l);
155   Mul233(&p_t_21_0h,&p_t_21_0m,&p_t_21_0l,xh,xm,p_t_20_0h,p_t_20_0m,p_t_20_0l);
156   Renormalize3(&p_t_21_1h,&p_t_21_1m,&p_t_21_1l,p_t_21_0h,p_t_21_0m,p_t_21_0l);
157   Add33(&p_t_22_0h,&p_t_22_0m,&p_t_22_0l,log2_130_p_coeff_1h,log2_130_p_coeff_1m,log2_130_p_coeff_1l,p_t_21_1h,p_t_21_1m,p_t_21_1l);
158   Mul233(&p_t_23_0h,&p_t_23_0m,&p_t_23_0l,xh,xm,p_t_22_0h,p_t_22_0m,p_t_22_0l);
159   p_resh = p_t_23_0h;
160   p_resm = p_t_23_0m;
161   p_resl = p_t_23_0l;
162 
163   logih = argredtable[index].logih;
164   logim = argredtable[index].logim;
165   logil = argredtable[index].logil;
166 
167   Add33(&log2yh,&log2ym,&log2yl,logih,logim,logil,p_resh,p_resm,p_resl);
168   Add133(&log2xh,&log2xm,&log2xl,ed,log2yh,log2ym,log2yl);
169 
170   Renormalize3(resh,resm,resl,log2xh,log2xm,log2xl);
171 
172 }
173 
174 /* exp2_120
175 
176    Approximates
177 
178    2^H * (resh + resm + resl) = 2^(xh + xm + xl) * (1 + eps)
179 
180    where ||eps|| <= 2^(-119.5)
181 
182 */
exp2_120(int * H,double * resh,double * resm,double * resl,double xh,double xm,double xl)183 static inline void exp2_120(int *H, double *resh, double *resm, double *resl,
184 			    double xh, double xm, double xl) {
185   double xhMult2L, rhMult2L, r;
186   int k, index1, index2;
187   db_number shiftedxhMult2Ldb;
188   double rh, rm, rl;
189   double t1h, t1m, t1l, t2, t3;
190   double p_t_1_0h;
191   double p_t_2_0h;
192   double p_t_3_0h;
193   double p_t_4_0h;
194   double p_t_5_0h, p_t_5_0m;
195   double p_t_6_0h, p_t_6_0m;
196   double p_t_7_0h, p_t_7_0m;
197   double p_t_8_0h, p_t_8_0m;
198   double p_t_9_0h, p_t_9_0m, p_t_9_0l;
199   double p_t_10_0h, p_t_10_0m, p_t_10_0l;
200   double p_t_11_0h, p_t_11_0m, p_t_11_0l;
201   double p_resh, p_resm, p_resl;
202   double tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l;
203   double tablesh, tablesm, tablesl;
204   double exp2h, exp2m, exp2l;
205 
206   /* Argument reduction
207 
208      Produce exactly
209 
210      2^H * 2^(i1/2^8) * 2^(i2/2^13) * 2^(rh + rm + rl)
211 
212   */
213 
214   xhMult2L = xh * two13;
215   shiftedxhMult2Ldb.d = shiftConst + xhMult2L;
216   rhMult2L = xhMult2L - (shiftedxhMult2Ldb.d - shiftConst);
217   r = rhMult2L * twoM13;
218   k = shiftedxhMult2Ldb.i[LO];
219   *H = k >> 13;
220   index1 = k & INDEXMASK1;
221   index2 = (k & INDEXMASK2) >> 5;
222   Add12Cond(t1h, t2, r, xm);
223   Add12Cond(t1m, t1l, t2, xl);
224   Add12(rh, t3, t1h, t1m);
225   Add12(rm, rl, t3, t1l);
226 
227   /* Polynomial approximation of 2^(rh + rm + rl) */
228 
229   p_t_1_0h = exp2_120_p_coeff_6h;
230   p_t_2_0h = p_t_1_0h * rh;
231   p_t_3_0h = exp2_120_p_coeff_5h + p_t_2_0h;
232   p_t_4_0h = p_t_3_0h * rh;
233   Add12(p_t_5_0h,p_t_5_0m,exp2_120_p_coeff_4h,p_t_4_0h);
234   MulAdd22(&p_t_6_0h,&p_t_6_0m,exp2_120_p_coeff_3h,exp2_120_p_coeff_3m,rh,rm,p_t_5_0h,p_t_5_0m);
235   MulAdd22(&p_t_7_0h,&p_t_7_0m,exp2_120_p_coeff_2h,exp2_120_p_coeff_2m,rh,rm,p_t_6_0h,p_t_6_0m);
236   Mul22(&p_t_8_0h,&p_t_8_0m,p_t_7_0h,p_t_7_0m,rh,rm);
237   Add23(&p_t_9_0h,&p_t_9_0m,&p_t_9_0l,exp2_120_p_coeff_1h,exp2_120_p_coeff_1m,p_t_8_0h,p_t_8_0m);
238   Mul33(&p_t_10_0h,&p_t_10_0m,&p_t_10_0l,rh,rm,rl,p_t_9_0h,p_t_9_0m,p_t_9_0l);
239   Add133(&p_t_11_0h,&p_t_11_0m,&p_t_11_0l,exp2_120_p_coeff_0h,p_t_10_0h,p_t_10_0m,p_t_10_0l);
240   Renormalize3(&p_resh,&p_resm,&p_resl,p_t_11_0h,p_t_11_0m,p_t_11_0l);
241 
242   /* Table access */
243 
244   tbl1h = twoPowerIndex1[index1].hi;
245   tbl1m = twoPowerIndex1[index1].mi;
246   tbl1l = twoPowerIndex1[index1].lo;
247   tbl2h = twoPowerIndex2[index2].hi;
248   tbl2m = twoPowerIndex2[index2].mi;
249   tbl2l = twoPowerIndex2[index2].lo;
250 
251   /* Reconstruction */
252 
253   Mul33(&tablesh,&tablesm,&tablesl,tbl1h,tbl1m,tbl1l,tbl2h,tbl2m,tbl2l);
254   Mul33(&exp2h,&exp2m,&exp2l,tablesh,tablesm,tablesl,p_resh,p_resm,p_resl);
255 
256   Renormalize3(resh,resm,resl,exp2h,exp2m,exp2l);
257 
258 }
259 
260 
261 
262 /* pow_120
263 
264    Approximates
265 
266    2^H * (resh + resm + resl) = 2^(y * (ed + log2(1 + (zh + zm)) + log2(r[index]))) * (1 + eps)
267 
268    where ||eps|| <= 2^(-118.5) if -1075 <= y * (ed + log2(1 + (zh + zm)) + log2(r[index])) <= 1024
269 
270    Approximates further (ed + log2(1 + (zh + zm)) + log2(r[index))) by log2xh
271    where
272 
273    log2xh = (ed + log2(1 + (zh + zm)) + log2(r[index))) * (1 + eps2)
274 
275    where |eps2| <= 2^(-52)
276    and log2xh is exact if 2^ed * ((1 + (zh + zm)) * r[index]) is an integer power of 2.
277 
278 */
pow_120(int * H,double * resh,double * resm,double * resl,double * log2xh,double y,int index,double ed,double zh,double zm)279 void pow_120(int *H, double *resh, double *resm, double *resl, double *log2xh,
280 	     double y, int index, double ed, double zh, double zm) {
281   double ylog2xh, ylog2xm, ylog2xl;
282   double log2xm, log2xl;
283 
284   /* Compute log2(x) */
285   log2_130(log2xh,&log2xm,&log2xl,index,ed,zh,zm);
286 
287   /* Compute y * log2(x) */
288   Mul133(&ylog2xh,&ylog2xm,&ylog2xl,y,*log2xh,log2xm,log2xl);
289 
290   /* Compute 2^(y * log2(x)) */
291   exp2_120(H,resh,resm,resl,ylog2xh,ylog2xm,ylog2xl);
292 
293 }
294 
295 /* pow_round_and_check_rn
296 
297    Checks whether
298 
299    2^H * (powh + powm + powl)
300 
301    which is an approximate to x^y with a relative error or less than 2^(-118.5),
302    can be rounded correctly to double precision in round-to-nearest-ties-to-even
303    mode or whether the Table Maker's Dilemma occurs or the case is exact.
304 
305    Returns 1 if rounding is possible and affects pow with the rounding
306    Returns 0 if rounding is not possible or if the case is exact
307 
308    If the returned value is 0, it affects
309 
310    G, kh, kl
311 
312    such that
313 
314    2^G * (kh + kl)
315 
316    is an approximate to x^y with an relative error of 2^(-117)
317 
318    and such that rounding
319 
320    2^G * kh
321 
322    to double precision is an exact operation.
323 
324 */
325 
pow_round_and_check_rn(double * pow,int H,double powh,double powm,double powl,int * G,double * kh,double * kl)326 static inline int pow_round_and_check_rn(double *pow,
327 					 int H, double powh, double powm, double powl,
328 					 int *G, double *kh, double *kl) {
329   double th, tm, tl;
330   int K, K1, K2;
331   db_number twodb, two2db;
332   double twoH1074powh, twoH1074powm, shiftedpowh, delta;
333   double scaledth;
334   double t1m, t1l;
335 
336   /* We start by bringing H and powh + powm + powl
337      to a form such that
338 
339      1 <= powh + powm + powl < 2
340   */
341   if ((powh < 1.0) ||
342       ((powh == 1.0) && (powm < 0.0))) {
343     powh *= 2.0;
344     powm *= 2.0;
345     powl *= 2.0;
346     H--;
347   }
348   if ((powh > 2.0) ||
349       ((powh == 2.0) && (powm >= 0.0))) {
350     powh *= 0.5;
351     powm *= 0.5;
352     powl *= 0.5;
353     H++;
354   }
355 
356   /* Check now whether we have normal or subnormal rounding
357 
358      The rounding is subnormal iff H <= -1023
359 
360      In both cases, we bring H, powh + powm + powl to a form
361 
362      2^K * (th + tm + tl) = 2^H * (powh + powm + powm) * (1 + eps)
363 
364      where
365 
366      (i)   |eps| <= 2^(-118 - 53) = 2^(-171)
367      (ii)  2^(-K) * ulp(2^K * th) = 1
368      (iii) the rounding of 2^K * th to double precision is exact
369            (or produces +inf)
370 
371   */
372   if (H <= -1023) {
373     /* Subnormal rounding
374 
375        In this case, we can neglect powl
376        because the rounding bit of the RN rounding
377        is in powh
378 
379     */
380     twodb.i[HI] = (H + (1074 + 1023)) << 20;
381     twodb.i[LO] = 0;
382 
383     twoH1074powh = twodb.d * powh;
384     twoH1074powm = twodb.d * powm;
385 
386     shiftedpowh = two52 + twoH1074powh;
387     th = shiftedpowh - two52;
388     delta = twoH1074powh - th;
389 
390     Add12Cond(tm,tl,delta,twoH1074powm);
391 
392     K = -1074;
393   } else {
394     /* Normal rounding
395 
396        In this case, we have exactly:
397 
398        2^K * (th + tm + tl) = 2^H * (powh + powm + powm)
399 
400     */
401     th = powh * two52;
402     tm = powm * two52;
403     tl = powl * two52;
404     K = H - 52;
405   }
406 
407   /* Return results for exactness case test */
408   *G = K;
409   *kh = th;
410   *kl = tm;
411 
412   /* Compute now
413 
414      delta = ABS(0.5 - ABS(tm + tl)) * (1 + eps)
415 
416      where |eps| <= 2^(-53)
417 
418      The addition 0.5 + (-ABS(tm)) is exact by Sterbenz' lemma
419 
420   */
421   if (tm > 0.0) {
422     t1m = - tm;
423     t1l = - tl;
424   } else {
425     t1m = tm;
426     t1l = tl;
427   }
428   delta = ABS((0.5 + t1m) - t1l);
429 
430   /* We cannot decide the rounding or have an exact case
431      iff
432 
433      delta <= 2^(-118) * th
434 
435      We can see this in the following drawing:
436 
437      result = round(
438 
439             +-----------------+------+----------...----+------------
440      2^K *  | th              | +/-1 | 0               | delta        )
441             +-----------------+------+----------...----+------------
442             | <------------- 118 bits -------------> |
443 
444   */
445 
446   scaledth = th * PRECISEROUNDCST;
447 
448   if (delta > scaledth) {
449     /* We can round exactly to nearest
450 
451        We must correct th to the rounding of
452        th + tm + tl iff tm is greater in
453        absolute value than 0.5
454        or if tm is equal to 0.5 in absolute value and
455        the sign of tm and tl
456        is equal:
457 
458                 |                 |
459        ...------|--------|--------|------...
460                 |------->-->      |
461                 ^   tm    tl      ^
462                 th                round(th + tm + tl)
463 
464        If tm is negative, we must correct th by decreasing it
465        otherwise we must correct th by increasing it.
466 
467     */
468     if (ABS(tm) >= 0.5) {
469       if (ABS(tm) == 0.5) {
470 	if (tm < 0.0) {
471 	  if (tl < 0.0) {
472 	    /* The same sign of tm and tl, tm is negative */
473 	    th -= 1.0;
474 	  }
475 	} else {
476 	  if (tl > 0.0) {
477 	    /* The same sign of tm and tl, tm is positive */
478 	    th += 1.0;
479 	  }
480 	}
481       } else {
482 	/* tm > 0.5 */
483 	if (tm < 0.0) {
484 	  th -= 1.0;
485 	} else {
486 	  th += 1.0;
487 	}
488       }
489     }
490 
491     /* Perform now the multiplication 2^K * th
492 
493        Note that we must be able to produce
494 
495        (i)   0 if we have total underflow
496        (ii)  a subnormal result
497        (iii) a normal result
498        (iv)  +inf if we have overflow in a TMD case
499 
500        We produce K1 + K2 = K with K1 = floor(K/2)
501        and multiply in two steps.
502 
503     */
504     K1 = K >> 1;
505     K2 = K - K1;
506 
507     twodb.i[HI] = (K1 + 1023) << 20;
508     twodb.i[LO] = 0;
509     two2db.i[HI] = (K2 + 1023) << 20;
510     two2db.i[LO] = 0;
511 
512     *pow = two2db.d * (twodb.d * th);
513 
514     return 1;
515   }
516   /* Otherwise we return 0 because we cannot round */
517 
518   return 0;
519 }
520 
521 
522 /* pow_exact_case
523 
524    Checks whether x^y is an exact or half-ulp case for rounding
525    into double precision.
526 
527    This means the procedure checks whether x^y can be written on
528    not more than 54 bits.
529 
530    The procedure uses 2^G * (kh + kl) supposing the following properties:
531 
532    * kh + kl holds on at most 54 bits
533    * 2^G * kh is representable in double precision (even in the subnormal range)
534    * 2^G * (kh + kl) approximates x^y with an error eps less than 2^(-117), i.e.
535 
536      2^G * (kh + kl) = x^y * (1 + eps) where |eps| <= 2^(-117)
537 
538    * log2xh approximates log2(x) with an accuracy equivalent to at least 52 bits
539      In particular log2xh is exact if x is an integer power of 2
540 
541    Returns 1 if the case is exact or half-ulp
542    Returns 0 otherwise
543 
544    If returning 1, affects pow with 2^G * (kh + kl) rounded to double precision
545 
546 */
pow_exact_case(double * pow,double x,double y,int G,double kh,double kl,double log2xh)547 int pow_exact_case(double *pow,
548 		   double x, double y,
549 		   int G, double kh, double kl, double log2xh) {
550   db_number xdb, ydb, tempdb, shiftedEydb, temp2db;
551   int E, F, G1, G2;
552   double m, n, yh, yl, Eyh, Eyl, ed, Ey;
553   double delta;
554   double nearestEy;
555   double value;
556 
557   /* For testing whether x^y is an exact or half-ulp case,
558      we have two main cases:
559 
560      (i)  x is an integer power of 2
561      (ii) x is not an integer power of 2
562 
563      We start by testing if x is an integer power of 2.
564 
565   */
566 
567   xdb.d = x;
568   if ((xdb.i[HI] & 0xfff00000) == 0) {
569     /* x is subnormal, scale by 2^52 */
570     xdb.d *= 0.4503599627370496e16;
571   }
572   if (((xdb.i[HI] & 0x000fffff) | xdb.i[LO]) == 0) {
573     /* x is an integer power of 2
574 
575        x^y is exact or midpoint iff log2(x) * y is integer
576 
577        Since we know that x is an integer power of 2,
578        log2xh is equal to this integer. Since the exponent
579        of the double precision number is bounded by the
580        exponent range, we know that log2xh is integer and
581        bounded by 2^11. It is therefore possible to
582        split y into two parts of 21 and 32 bits, to perform
583        the multiplication componentwise. Since the result
584        is bounded by 2^11, it suffices to compute the
585        nearest integer to the higher word product, and to
586        compare the rounding difference to the low word product.
587        This splitting is faster than the usual Dekker splitting.
588 
589     */
590     ydb.d = y;
591     ydb.i[LO] = 0;
592     yh = ydb.d;
593     yl = y - yh;
594     Eyh = log2xh * yh;
595     Eyl = log2xh * yl;
596     delta = ((0.6755399441055744e16 + Eyh) - 0.6755399441055744e16) - Eyh; /* addition rounds, subtractions exact */
597 
598     if (delta != Eyl) return 0;
599 
600   } else {
601 
602     /* x is not an integer power of 2
603 
604        We have clearly an inexact case if y is negative
605        or if y is greater than 35
606 
607     */
608 
609     if ((y < 0.0) || (y > 35.0)) return 0;
610 
611     /* Decompose now y into
612 
613        y = 2^F * n
614 
615        Checking F and n, we can then already decide
616        some cases using the fact that the worst-case
617        accuracy for x^n, n in [|0;35|], is less (in bits)
618        than the accuracy of the approximation of x^y we have
619        already in 2^G * (kh + kl).
620 
621     */
622     decompose(&n,&F,y);
623 
624     if ((n > 35.0) || (F < -5)) return 0;
625 
626     if (F < 0) {
627       /* Here, -5 <= F <= -1, 3 <= n <= 35, n an odd integer
628 
629          We decompose x into 2^E * m where m is an odd integer
630 
631 	 Let H, sh and sl such that 2^H * (sh + sl) = 2^G * (kh + kl) and
632 	 sh + sl is an odd integer.
633 
634 	 If we have E * 2^F * n = H, we can apply the worst case argument
635 	 because we know the worst case for
636 
637 	 m^(2^F * n) with m odd integer, -5 <= F <= -1, 3 <= n <= 35
638 
639 	 when rounding to 53 bits in both rounding modes.
640 
641 	 E is bounded in magnitude by 2^11. 2^F * n is equal to y by
642 	 construction. Since n <= 35, y contains at most 6 significant bits.
643 	 The arithmetical multiplication E * y is therefore exact and less
644 	 than or equal to 2^17.
645 
646 	 We check first whether E * y is an integer. If this is the case,
647 	 we compute sh + sl = 2^(G - E * y) * (kh + kl). Finally,
648 	 we check whether sh + sl is an odd integer.
649 
650       */
651 
652       decompose(&m,&E,x);
653 
654       ed = (double) E;
655 
656       Ey = ed * y; /* Exact */
657 
658 
659       /* Check whether Ey is an integer using the simple shift technique
660 	 The addition rounds, the substraction is exact by Sterbenz' lemma.
661 	 If Ey is an integer, the low order word of shiftedEydb is equal to this
662 	 integer.
663       */
664       shiftedEydb.d = 0.6755399441055744e16 + Ey;
665       nearestEy = shiftedEydb.d - 0.6755399441055744e16;
666 
667       if (nearestEy != Ey) return 0;
668 
669       /* Here E * y is integer.
670 	 Produce now 2^(G - E * y).
671       */
672       tempdb.i[HI] = (((G - shiftedEydb.i[LO]) + 1023) << 20);
673       tempdb.i[LO] = 0;
674 
675       /* Check now if sh + sl = tempdb.d * (kh + kl) is an odd
676 	 integer.
677 
678 	 Since kh and kl are not overlapped, we have two cases:
679 
680 	 (i)  kl is equal to 0, in which case tempdb.d * kh must be an odd integer
681 	 (ii) kl is not equal to 0, in which case tempdb.d * kl must be an odd integer
682 
683       */
684       if (kl == 0.0) value = kh; else value = kl;
685 
686       value *= tempdb.d; /* Exact because multiplication by positive integer power of 2 */
687 
688       if (!isOddInteger(value)) return 0;
689 
690       /* Here the case is exact by the worst-case argument */
691     }
692 
693     /* Here, we have either F >= 0 or an exact case
694 
695        If F >= 0, we also have an exact case because
696        2^F * n = y <= 35, y therefore integer and because
697        we can apply the worst case argument.
698 
699     */
700   }
701 
702   /*
703        Here, the case is exact, affect pow with 2^G * (kh + kl) rounded to
704        nearest in double precision
705 
706        Since kh + kl holds on at most 54 bits, 2^G * kh produces
707        never any rounding error and kh and kl are not overlapping,
708        2^G * kh is equal to the rounding if
709        (i)  kl is equal to 0
710        (ii) kl is not equal to 0 and the mantissa of 2^G * kh is even
711 
712        If in condition (ii), kl is not equal to 0 and the mantissa of
713        2^G * kh is not even, we correct it depending on the sign of kl.
714 
715        Remark that G can be such that 2^G is no longer a normal.
716        Produce therefore 2^(floor(G/2)) and 2^(G - floor(G/2)) and
717        multiply in two steps.
718 
719   */
720 
721   G1 = G >> 1;
722   G2 = G - G1;
723 
724   tempdb.i[HI] = (G1 + 1023) << 20;
725   tempdb.i[LO] = 0;
726   temp2db.i[HI] = (G2 + 1023) << 20;
727   temp2db.i[LO] = 0;
728 
729   tempdb.d *= (kh * temp2db.d);
730 
731   if ((kl != 0.0) && ((tempdb.i[LO] & 1) != 0)) {
732     /* We must correct the rounding to the rounding to nearest ties to even */
733     if (kl > 0.0) {
734       tempdb.l++;
735     } else {
736       tempdb.l++;
737     }
738   }
739   *pow = tempdb.d;
740 
741   return 1;
742 }
743 
744 
pow_exact_rn(double x,double y,double sign,int index,double ed,double zh,double zm)745 double pow_exact_rn(double x, double y, double sign,
746 		    int index, double ed, double zh, double zm) {
747   int H, G;
748   double powh, powm, powl;
749   double pow;
750   double kh, kl;
751   double log2xh;
752 
753   pow_120(&H, &powh, &powm, &powl, &log2xh, y, index, ed, zh, zm);
754 
755   if (pow_round_and_check_rn(&pow,H,powh,powm,powl,&G,&kh,&kl))
756     return sign * pow;
757 
758   if (pow_exact_case(&pow,x,y,G,kh,kl,log2xh))
759     return sign * pow;
760 
761   //  printf("Could not decide the rounding to nearest of power.\n");
762 
763   return -5.0;
764 }
765 
766 
pow_rn(double x,double y)767 double pow_rn(double x, double y) {
768   db_number xdb, ydb, yhdb, shiftedylog2xhMult2Ldb, powdb, twodb;
769   double sign;
770   int E, index;
771   double log2FastApprox, ed, ylog2xFast, f;
772   double yh, yl, ri, logih, logim, yrih, yril, th, zh, zm;
773   double p_t_1_0h;
774   double p_t_2_0h;
775   double p_t_3_0h;
776   double p_t_4_0h;
777   double p_t_5_0h;
778   double p_t_9_0h;
779   double p_t_10_0h;
780   double p_t_11_0h, p_t_11_0m;
781   double p_t_12_0h, p_t_12_0m;
782   double log2zh, log2zm;
783   double log2yh, log2ym;
784   double log2xh, log2xm;
785   double ylog2xh, ylog2xm;
786   double rh, r;
787   int k, index1, index2, H;
788   double tbl1, tbl2h, tbl2m;
789   double ph;
790   double powh, powm;
791   double twoH1074powh, twoH1074powm;
792   double shiftedpowh, nearestintpowh, delta, deltaint;
793   double rest;
794   double lowerTerms;
795   double temp1;
796   double zhSq, zhFour, p35, p46, p36, p7;
797   double xSq;
798 
799   /* Fast rejection of special cases */
800   xdb.d = x;
801   ydb.d = y;
802 
803   if (((((xdb.i[HI] >> 20) + 1) & 0x3ff) <= 1) ||
804       ((((ydb.i[HI] >> 20) + 1) & 0x3ff) <= 1)) {
805 
806     /* Handle special cases before handling NaNs and Infinities */
807     if (x == 1.0)  return 1.0;
808     if (y == 0.0)  return 1.0;
809     if (y == 1.0)  return x;
810     if (y == 2.0)  return x * x; /* Remark: may yield uncorrect rounding on x86 for subnormal results */
811     if (y == -1.0) return 1 / x;
812 
813     if ((x == 0.0) && ((ydb.i[HI] & 0x7ff00000) != 0x7ff00000)) {
814       /* x = +/-0 and y is neither NaN nor Infinity
815 
816       We have four cases
817       (i)  y < 0:
818       (a) y odd integer: return +/- Inf and raise divide-by-zero
819       (b) y not odd integer: return + Ind and raise divide-by-zero
820       (ii) (a) y odd integer: return +/- 0
821       (b) y not odd integer: return +0
822 
823       Note that y = 0.0 has already been filtered out.
824       */
825       if (y < 0.0) {
826 	if (isOddInteger(y))
827 	  return 1/x;
828 	else
829 	  return 1/(x * x);
830       } else {
831 	if (isOddInteger(y))
832 	  return x;
833 	else
834 	  return x * x;
835       }
836     }
837 
838     /* Handle NaNs and Infinities
839 
840     Note: the cases (x,y) = (1,NaN) and (x,y) = (NaN,0) have already been handled.
841 
842     */
843     if ((ydb.i[HI] & 0x7ff00000) == 0x7ff00000) {
844       /* Here y is NaN or Inf */
845       if (((ydb.i[HI] & 0x000fffff) | ydb.i[LO]) != 0) {
846 	/* Here y is NaN, we return NaN */
847 	return y;
848       }
849       /* Here y is +/- Inf
850 
851       There are three main cases:
852       (i)   x = -1: return 1
853       (ii)  abs(x) > 1:
854       (a) y = +Inf: return +Inf
855       (b) y = -Inf: return +0
856       (iii) abs(x) < 1:
857       (a) y = +Inf: return +0
858       (b) y = -Inf: return +Inf
859 
860       Note: the case x = 1 has already been filtered out
861       */
862       if (x == -1.0)
863 	return 1.0;
864 
865       /* Here x != 1, x != -1 */
866       if ((ABS(x) > 1.0) ^ ((ydb.i[HI] & 0x80000000) == 0)) {
867 	/* abs(x) > 1 and y = -Inf or abs(x) < 1 and y = +Inf */
868 	return 0.0;
869       } else {
870 	/* abs(x) > 1 and y = +Inf or abs(x) < 1 and y = -Inf */
871 	return ABS(y);
872       }
873     }
874 
875     /* Here y is neither Inf nor NaN */
876 
877     if ((xdb.i[HI] & 0x7ff00000) == 0x7ff00000) {
878       /* Here x is NaN or Inf */
879       if (((xdb.i[HI] & 0x000fffff) | xdb.i[LO]) != 0) {
880 	/* Here x is NaN, we return NaN */
881 	return x;
882       }
883       /* Here x is +/- Inf
884 
885       There are two main cases:
886 
887       (i)  x is +Inf
888       (ii) x is -Inf
889       */
890       if ((xdb.i[HI] & 0x80000000) == 0) {
891 	/* x is +Inf
892 
893 	(a) y > 0: return +Inf
894 	(b) y < 0: return +0
895 
896 	Note: y = 0 has already been filtered out
897 	*/
898 	if (y > 0.0) {
899 	  return x;
900 	} else {
901 	  return 0.0;
902 	}
903       } else {
904 	/* x is -Inf
905 
906 	There are four cases:
907 
908 	(a) y > 0:
909 	(*)  y is an odd integer: return -Inf
910 	(**) y is not an odd integer: return +Inf
911 	(b) y < 0:
912 	(*)  y is an odd integer: return -0
913 	(**) y is not an odd integer: return +0
914 
915 	Note: y = 0 has already been filtered out
916 	*/
917 	if (y > 0.0) {
918 	  if (isOddInteger(y)) {
919 	    return x;
920 	  } else {
921 	    return -x;
922 	  }
923 	} else {
924 	  if (isOddInteger(y)) {
925 	    return -0.0;
926 	  } else {
927 	    return 0.0;
928 	  }
929 	}
930       }
931     }
932   }
933   /* Here both x and y are finite numbers */
934 
935   /* Test now whether we have the case
936 
937      (-x)^y = (-1)^y * x^y
938 
939      where x is positive
940 
941   */
942 
943   sign = 1.0;
944   if (x < 0.0) {
945     /* x is negative
946        x^y is defined only if y is integer
947     */
948     if (!isInteger(y)) {
949       /* y is not integer
950 
951          return NaN and raise invalid exception
952       */
953       return 0.0/0.0;
954     }
955     /* Here y is integer
956 
957        Remove the sign of x and put (-1)^y in sign
958 
959     */
960     x = -x; xdb.i[HI] &= 0x7fffffff;
961     if (isOddInteger(y)) {
962       sign = -sign;
963     }
964   }
965 
966   /* Here x is strictly positive and finite */
967 
968   /* Test now if abs(y) is in a range giving finite, non-trivial results x^y
969 
970      We have
971 
972      x^y = 2^(y * log2(x)) = 2^(y * log2(2^E * m)) = 2^(y * (E + log2(1 + f)))
973 
974      We have overflow iff x^y >= 2^(1024), i.e. y * (E + log2(1 + f)) >= 1024
975      We have underflow (flush to zero) iff x^y <= 2^(-1076), i.e. y * (E + log2(1 + f)) <= - 1076
976      We round to 1.0 iff abs(y * (E + log2(1 + f))) <= 2^(-54)
977 
978      We approximate log2(1 + f) as
979 
980      log2(1 + f) = c * f * alpha(f)
981 
982      where c is a constant and  0.853 < alpha(f) < 1.21
983 
984      So we have surely
985      (i)  overflow or underflow if abs(y * (E + c * f)) >= ceil(1076/0.853) = 1261
986      (ii) trivial rounding to 1.0 if abs(y * (E + c * f)) <= 2^(-55) <= 2^(-54)/1.21
987 
988   */
989 
990   /* We start the computation of the logarithm of x */
991 
992   E = 0;
993   if ((xdb.i[HI] & 0xfff00000) == 0) {
994     /* x is subnormal, make it normal */
995     xdb.d *= two52;
996     E = -52;
997   }
998 
999   E += (xdb.i[HI]>>20)-1023;             /* extract the exponent */
1000   index = (xdb.i[HI] & 0x000fffff);
1001   xdb.i[HI] =  index | 0x3ff00000;	/* do exponent = 0 */
1002   index = (index + (1<<(20-L-1))) >> (20-L);
1003 
1004   /* reduce  such that sqrt(2)/2 < xdb.d < sqrt(2) */
1005   if (index >= MAXINDEX) { /* corresponds to xdb>sqrt(2)*/
1006     xdb.i[HI] -= 0x00100000;
1007     E++;
1008   }
1009 
1010   ed = (double) E;
1011 
1012   /* Continue with the exact range reduction of the logarithm of x */
1013 
1014   yhdb.i[HI] = xdb.i[HI];
1015   yhdb.i[LO] = 0;
1016   yh = yhdb.d;
1017   yl = xdb.d - yh;
1018 
1019 
1020   /* Special handling for y = 3 or y = 4 and x on not more than 21 bits (without subnormals) */
1021   if ((yl == 0) && ((y == 3.0) || (y == 4.0)) && (E > -255)) {
1022     if (y == 3.0)
1023       return sign * (x * (x * x));
1024     else {
1025       xSq = x * x;
1026       return sign * (xSq * xSq);
1027     }
1028   }
1029 
1030   index = index & INDEXMASK;
1031 
1032   /* Now we have
1033 
1034      log2(x) = E + log2(xdb.d)
1035 
1036      with sqrt(2)/2 < xdb.d < sqrt(2)
1037 
1038      Compute now f such that 1 + f = xdb.d and
1039      approximate
1040 
1041      log2(1 + f) = logFastCoeff * f
1042 
1043   */
1044 
1045   f = xdb.d - 1.0;
1046   log2FastApprox = ed + logFastCoeff * f;
1047   ylog2xFast = y * log2FastApprox;
1048 
1049   if (ABS(ylog2xFast) >= 1261.0) {
1050     if (ylog2xFast > 0.0) {
1051       /* y * log2(x) is positive, i.e. we overflow */
1052       return (sign * LARGEST) * LARGEST;
1053     } else {
1054       /* y * log2(x) is negative, i.e. we underflow */
1055       return (sign * SMALLEST) * SMALLEST;
1056     }
1057   }
1058 
1059   if (ABS(ylog2xFast) <= twoM55) {
1060     /* abs(y * log2(x)) <= 2^(-55),
1061        we return 1.0 and set the inexact flag
1062     */
1063     return sign * (1.0 + SMALLEST);
1064   }
1065 
1066   /* Now, we may still overflow or underflow but on some inputs only */
1067 
1068   /*
1069      Read tables:
1070      Read one float for ri
1071      Read the first two doubles for -log(r_i) (out of three)
1072 
1073      Organization of the table:
1074 
1075      one struct entry per index, the struct entry containing
1076      r, logih, logim and logil in this order
1077   */
1078 
1079   ri = argredtable[index].ri;
1080   /*
1081      Actually we don't need the logarithm entries now
1082      Move the following two lines to the eventual reconstruction
1083      As long as we don't have any if in the following code, we can overlap
1084      memory access with calculations
1085   */
1086   logih = argredtable[index].logih;
1087   logim = argredtable[index].logim;
1088 
1089   /* Do range reduction:
1090 
1091      zh + zm = y * ri - 1.0 correctly
1092 
1093      Correctness is assured by use of two part yh + yl and 21 bit ri and Add12
1094 
1095      Discard zl for higher monome degrees
1096   */
1097 
1098   yrih = yh * ri;
1099   yril = yl * ri;
1100   th = yrih - 1.0;
1101   Add12Cond(zh, zm, th, yril);
1102 
1103   /* Polynomial approximation of log2(1 + (zh + zm)) */
1104 
1105   zhSq = zh * zh;
1106 
1107   p35 = log2_70_p_coeff_3h + zhSq * log2_70_p_coeff_5h;
1108   p46 = log2_70_p_coeff_4h + zhSq * log2_70_p_coeff_6h;
1109   zhFour = zhSq * zhSq;
1110 
1111   p36 = p35 + zh * p46;
1112   p7 = log2_70_p_coeff_7h * zhFour;
1113 
1114   p_t_9_0h = p36 + p7;
1115 
1116   p_t_10_0h = p_t_9_0h * zh;
1117   Add212(&p_t_11_0h,&p_t_11_0m,log2_70_p_coeff_2h,log2_70_p_coeff_2m,p_t_10_0h);
1118   MulAdd22(&p_t_12_0h,&p_t_12_0m,log2_70_p_coeff_1h,log2_70_p_coeff_1m,zh,zm,p_t_11_0h,p_t_11_0m);
1119   Mul22(&log2zh,&log2zm,p_t_12_0h,p_t_12_0m,zh,zm);
1120 
1121   /* Reconstruction */
1122 
1123   Add122(&log2yh,&log2ym,ed,logih,logim);
1124   Add22(&log2xh,&log2xm,log2yh,log2ym,log2zh,log2zm);
1125 
1126   /* Produce ylog2xh + ylog2xm approximating y * log2(x)
1127 
1128      Note: neither y nor y * log2(x) may not be subnormal and non-zero
1129      because of the fast overflow/underflow/trivial rounding test.
1130 
1131   */
1132 
1133   Mul12(&ylog2xh,&temp1,y,log2xh);
1134   ylog2xm = temp1 + y * log2xm;
1135 
1136   /* Approximate now 2^(y * log2(x))
1137 
1138      We have
1139 
1140      2^(ylog2xh + ylog2xm)
1141           = 2^ylog2xh * 2^ylog2xm
1142           = 2^H * 2^(ylog2xh - H) * 2^ylog2xm
1143 	  = 2^H * 2^(i2/2^8) * 2^(i1/2^13) * 2^(ylog2xh - H - i2/2^8 - i1/2^13) * 2^ylog2xm
1144 	  = 2^H * tbl2[i2] * (1 + tbl1[i1]) * (1 + p(rh)) + delta
1145 	  where rh \approx ylog2xh - H - i1/2^7 - i2/2^13 + ylog2xm
1146   */
1147 
1148   shiftedylog2xhMult2Ldb.d = shiftConstTwoM13 + ylog2xh;
1149   r = ylog2xh - (shiftedylog2xhMult2Ldb.d - shiftConstTwoM13);
1150   k = shiftedylog2xhMult2Ldb.i[LO];
1151   H = k >> 13;
1152   index1 = k & INDEXMASK1;
1153   index2 = (k & INDEXMASK2) >> 5;
1154 
1155   /* Renormalize reduced argument
1156 
1157      This operation produces an error
1158 
1159      2^(z * (1 + eps)) = 2^z * (1 + eps')
1160 
1161      where |eps| <= 2^(-53) and |eps'| <= 2^(-65)
1162 
1163   */
1164   rh = r + ylog2xm;
1165 
1166   /* Table reads */
1167   tbl1 = twoPowerIndex1[index1].hiM1;
1168   tbl2h = twoPowerIndex2[index2].hi;
1169   tbl2m = twoPowerIndex2[index2].mi;
1170 
1171   /* Polynomial approximation */
1172   p_t_1_0h = exp2_p_coeff_3h;
1173   p_t_2_0h = p_t_1_0h * rh;
1174   p_t_3_0h = exp2_p_coeff_2h + p_t_2_0h;
1175   p_t_4_0h = p_t_3_0h * rh;
1176   p_t_5_0h = exp2_p_coeff_1h + p_t_4_0h;
1177   ph = p_t_5_0h * rh;
1178 
1179   /* Reconstruction */
1180 
1181   lowerTerms = tbl1 + (ph + tbl1 * ph);
1182 
1183   Add212(&powh,&powm,tbl2h,tbl2m,tbl2h * lowerTerms);
1184 
1185   /* Here we have
1186 
1187      2^H * (powh + powm) = x^y * (1 + eps)
1188 
1189      with ||eps|| <= 2^(-60)
1190 
1191      Check first if we overflow or underflow.
1192 
1193      Check then if we can perform normal rounding or
1194      if we must perhaps perform subnormal rounding.
1195      We are sure that
1196 
1197      0.5 <= powh + powm <= 4.0
1198 
1199   */
1200 
1201   if (H >= 1025) {
1202     /* Here, we surely overflow
1203        Return sign * inf
1204     */
1205     return (sign * LARGEST) * LARGEST;
1206   }
1207 
1208   if (H <= -1077) {
1209     /* Here, we surely underflow
1210        Return sign * 0 and set inexact flag
1211     */
1212     return (sign * SMALLEST) * SMALLEST;
1213   }
1214 
1215   if (H > -1022) {
1216     /* We are sure to be able to perform normal rounding
1217        We must still be aware of the fact that the final
1218        result may overflow
1219     */
1220     if(powh == (powh + (powm * RNROUNDCST))) {
1221       powdb.d = powh;
1222       if (H < 1023) {
1223 	powdb.i[HI] += H << 20;
1224 	return sign * powdb.d;
1225       } else {
1226 	/* May overflow: multiply by 2^H in two steps */
1227 	powdb.i[HI] += (H - 3) << 20;
1228 	return sign * powdb.d * 8.0;
1229       }
1230     }
1231   } else {
1232     /* We must perhaps perform subnormal rounding
1233 
1234        We start by renormalizing the double-double
1235        powh + powm such that
1236 
1237        1 <= powh + powm < 2
1238 
1239     */
1240 
1241     if ((powh < 1.0) || ((powh == 1.0) && (powm < 0.0))) {
1242       powh *= 2.0;
1243       powm *= 2.0;
1244       H--;
1245     }
1246     if ((powh > 2.0) || ((powh == 2.0) && (powm >= 0.0))) {
1247       powh *= 0.5;
1248       powm *= 0.5;
1249       H++;
1250     }
1251 
1252     /* Here we know that 1 <= powh + powm < 2
1253 
1254        2^H * (powh + powm) is subnormal or equal to the least normal
1255        iff H <= -1023
1256 
1257     */
1258     if (H > -1023) {
1259       /* We have nevertheless normal rounding */
1260       if(powh == (powh + (powm * RNROUNDCST))) {
1261 	powdb.d = powh;
1262 	powdb.i[HI] += H << 20;
1263 	return sign * powdb.d;
1264       }
1265       /* Here we have normal rounding but could not decide the rounding */
1266     } else {
1267       /* We have surely denormal rounding
1268 
1269          This means
1270 
1271 	 round(2^H * (powh + powm)) = 2^(-1074) * nearestint(2^(+1074) * 2^H * (powh + powm))
1272 
1273 	 We compute the rounding (and test its TMD) of
1274 
1275 	 nearestint(2^(H + 1074) * (powh + powm))
1276 
1277 	 by computing first
1278 
1279 	 nearestint(2^(H + 1074) * powh)
1280 
1281 	 and then the absolute error
1282 
1283 	 delta = 2^(H + 1074) * powh - nearestint(2^(H + 1074) * powh) + 2^(H + 1074) * powl
1284 
1285 	 We can then refute or correct the rounding by tests on delta.
1286 
1287 	 We know that 2^(H + 1074) <= 2^(51) and powh <= 2.0. Thus 2^(H + 1074) * powh <= 2^(52)
1288 	 We can hence compute nearestint using the shift trick.
1289 
1290 	 We start by constructing 2^(H + 1074)
1291       */
1292 
1293       twodb.i[HI] = (H + (1074 + 1023)) << 20;
1294       twodb.i[LO] = 0;
1295 
1296       twoH1074powh = twodb.d * powh;
1297       twoH1074powm = twodb.d * powm;
1298 
1299       shiftedpowh = two52 + twoH1074powh;
1300       nearestintpowh = shiftedpowh - two52;
1301       deltaint = twoH1074powh - nearestintpowh;
1302 
1303       /* The next addition produces a very small
1304 	 rounding error we must account for in the rounding test.
1305       */
1306       delta = deltaint + twoH1074powm;
1307 
1308       /* Correct now the rounding */
1309 
1310       if (ABS(delta) > 0.5) {
1311 	/* ABS(delta) > 0.5 */
1312 	if (delta > 0.0) {
1313 	  /* nearestintpowh is too small by 1.0 */
1314 	  nearestintpowh += 1.0; /* exact */
1315 	  delta -= 1.0; /* exact */
1316 	} else {
1317 	  /* nearestintpowh is too great by 1.0 */
1318 	  nearestintpowh -= 1.0; /* exact */
1319 	  delta += 1.0; /* exact */
1320 	}
1321       }
1322 
1323       /* Perform now the rounding test
1324 
1325          This test filters out also half-ulp exact cases.
1326 	 Exact cases are not filtered out because C99 allows
1327 	 setting the underflow and/or inexact flags also
1328 	 for exact cases.
1329 
1330          Compute first
1331 
1332 	 rest = abs(0.5 - abs(delta)) * (1 + eps)
1333 
1334 	 where abs(eps) <= 2^(-53)
1335 
1336 	 We cannot decide the rounding if
1337 
1338 	 rest <= 2^(H + 1074) * epsmax * 2
1339 
1340 	 where epsmax is the bound for the approximating polynomials,
1341 
1342 	 here epsmax = 2^(-60)
1343 
1344 	 as follows by the following drawing
1345 
1346 	 2^52                2^(H + 1074)     2^0            2^(H + 1074) * epsmax
1347 	 +-------------------+----------------+---+------... |
1348 	 | 0 ...           0 | nearestintpowh | 1 |          rest
1349          +-------------------+----------------+---+------... |
1350                              | <---- 60 bits approx. ------->|
1351 
1352 	 This means that we cannot decide the rounding if
1353 
1354 	 rest * 1/epsmax * 0.5 <= 2^(H + 1074)
1355 
1356 	 We store 1/epsmax * 0.5 on SUBNORMROUNDCST
1357       */
1358 
1359       rest = ABS(0.5 - ABS(delta));
1360 
1361       if (rest * SUBNORMROUNDCST > twodb.d) {
1362 	/* Here we could decide the rounding
1363 
1364 	   The result to return is
1365 
1366 	   sign * 2^(-1074) * nearestintpowh
1367 
1368 	   which is either subnormal or equal to the
1369 	   least normal.
1370 
1371 	   Since we know that the multiplication
1372 	   does not imply any rounding error, we can
1373 	   perform it using the shift trick and a mask.
1374 
1375 	   This trick yields to a 200 cycle speed-up on Athlon.
1376 
1377 	*/
1378 
1379 	twodb.d = nearestintpowh + two52;
1380 	twodb.i[HI] = (twodb.i[HI] + (1 << 20)) & 0x001fffff;
1381 
1382 	return sign * twodb.d;
1383       }
1384     }
1385   }
1386 
1387   /* If we are here, we could not decide the rounding or are half-ulp
1388 
1389      Launch now exacter phases and exact and half-ulp testing
1390 
1391   */
1392 
1393   return pow_exact_rn(x,y,sign,index,ed,zh,zm);
1394 
1395 }
1396 
1397