crlibm/crlibm-1.0beta4/pow.c

#include <stdio.h>
#include "crlibm.h"
#include "crlibm_private.h"
#include "triple-double.h"
#include "pow.h"


/* Some macros for specific operations in power */

#define USE_BUILTINS 0

/* decompose

   Decomposes a positive double precision
   variable x (normal or subnormal) such that

   2^(resE) * resm = x

   where resm = 2 * k + 1 for integer k

   resE is an integer variable pointer
   resm is a double precision variable pointer
   x is a double precision variable.

*/

#if USE_BUILTINS
#define decompose(resm,resE,x)                                             \
{                                                                          \
   int __decompose_ex, __decompose_d;                                      \
   int64_t __decompose_temp;                                               \
   db_number __decompose_xdb;                                              \
                                                                           \
   __decompose_xdb.d = (x);                                                \
   __decompose_ex = 0;                                                     \
   if (!(__decompose_xdb.i[HI] & 0xfff00000)) {                            \
     __decompose_xdb.d = (x) * 0.18014398509481984e17;                     \
     __decompose_ex = -54;                                                 \
   }                                                                       \
   __decompose_ex += (__decompose_xdb.i[HI] >> 20) - 1023;                 \
   __decompose_xdb.i[HI] &= 0x000fffff;                                    \
   __decompose_temp = __decompose_xdb.l | 0x0010000000000000llu;           \
   __decompose_d = __builtin_ctz(__decompose_temp);                        \
   __decompose_xdb.i[HI] |= (1075 - __decompose_d) << 20;                  \
   *(resE) = __decompose_d - 52 + __decompose_ex;                          \
   *(resm) = __decompose_xdb.d;                                            \
}
#else
#define decompose(resm,resE,x)                                             \
{                                                                          \
   int __decompose_ex, __decompose_d;                                      \
   int64_t __decompose_temp;                                               \
   db_number __decompose_xdb, __decompose_tempdb;                          \
                                                                           \
   __decompose_xdb.d = (x);                                                \
   __decompose_ex = 0;                                                     \
   if (!(__decompose_xdb.i[HI] & 0xfff00000)) {                            \
     __decompose_xdb.d = (x) * 0.18014398509481984e17;                     \
     __decompose_ex = -54;                                                 \
   }                                                                       \
   __decompose_ex += (__decompose_xdb.i[HI] >> 20) - 1023;                 \
   __decompose_xdb.i[HI] &= 0x000fffff;                                    \
   __decompose_temp = __decompose_xdb.l | 0x0010000000000000llu;           \
   __decompose_temp = (~__decompose_temp & (__decompose_temp - 1)) + 1;    \
   __decompose_tempdb.d = (double) __decompose_temp;                       \
   __decompose_d = (__decompose_tempdb.i[HI] >> 20) - 1023;                \
   __decompose_xdb.i[HI] |= (1075 - __decompose_d) << 20;                  \
   *(resE) = __decompose_d - 52 + __decompose_ex;                          \
   *(resm) = __decompose_xdb.d;                                            \
}
#endif


/* isOddInteger

   Determines if a given double precision number x is an odd integer

*/

#define isOddInteger(x) (ABS(((ABS(x) + 0.9007199254740992e16) - 0.9007199254740992e16) - ABS(x)) == 1.0)

/* isInteger

   Determines if a given double precision number x is integer

*/

#define isInteger(x) ((ABS(x) >= 0.4503599627370496e16) || (((ABS(x) + 0.4503599627370496e16) - 0.4503599627370496e16) == ABS(x)))


/* log2_130

   Approximates

   resh + resm + resl = (ed + log2(1 + (xh + xm)) + log2(r[index])) * (1 + eps)

   where ||eps|| <= 2^(-130)


*/
static inline void log2_130(double *resh, double *resm, double *resl,
	                    int index, double ed, double xh, double xm) {

  double p_t_1_0h;
  double p_t_2_0h;
  double p_t_3_0h;
  double p_t_4_0h;
  double p_t_5_0h;
  double p_t_6_0h;
  double p_t_7_0h;
  double p_t_8_0h;
  double p_t_9_0h, p_t_9_0m;
  double p_t_10_0h, p_t_10_0m;
  double p_t_11_0h, p_t_11_0m;
  double p_t_12_0h, p_t_12_0m;
  double p_t_13_0h, p_t_13_0m;
  double p_t_14_0h, p_t_14_0m;
  double p_t_15_0h, p_t_15_0m;
  double p_t_16_0h, p_t_16_0m, p_t_16_0l;
  double p_t_17_0h, p_t_17_0m, p_t_17_0l;
  double p_t_18_0h, p_t_18_0m, p_t_18_0l;
  double p_t_19_0h, p_t_19_0m, p_t_19_0l;
  double p_t_20_0h, p_t_20_0m, p_t_20_0l;
  double p_t_21_0h, p_t_21_0m, p_t_21_0l;
  double p_t_21_1h, p_t_21_1m, p_t_21_1l;
  double p_t_22_0h, p_t_22_0m, p_t_22_0l;
  double p_t_23_0h, p_t_23_0m, p_t_23_0l;
  double p_resh, p_resm, p_resl;
  double log2yh, log2ym, log2yl;
  double log2xh, log2xm, log2xl;
  double logih, logim, logil;


  p_t_1_0h = log2_130_p_coeff_13h;
  p_t_2_0h = p_t_1_0h * xh;
  p_t_3_0h = log2_130_p_coeff_12h + p_t_2_0h;
  p_t_4_0h = p_t_3_0h * xh;
  p_t_5_0h = log2_130_p_coeff_11h + p_t_4_0h;
  p_t_6_0h = p_t_5_0h * xh;
  p_t_7_0h = log2_130_p_coeff_10h + p_t_6_0h;
  p_t_8_0h = p_t_7_0h * xh;
  Add12(p_t_9_0h,p_t_9_0m,log2_130_p_coeff_9h,p_t_8_0h);
  Mul22(&p_t_10_0h,&p_t_10_0m,p_t_9_0h,p_t_9_0m,xh,xm);
  Add122(&p_t_11_0h,&p_t_11_0m,log2_130_p_coeff_8h,p_t_10_0h,p_t_10_0m);
  MulAdd22(&p_t_12_0h,&p_t_12_0m,log2_130_p_coeff_7h,log2_130_p_coeff_7m,xh,xm,p_t_11_0h,p_t_11_0m);
  MulAdd22(&p_t_13_0h,&p_t_13_0m,log2_130_p_coeff_6h,log2_130_p_coeff_6m,xh,xm,p_t_12_0h,p_t_12_0m);
  MulAdd22(&p_t_14_0h,&p_t_14_0m,log2_130_p_coeff_5h,log2_130_p_coeff_5m,xh,xm,p_t_13_0h,p_t_13_0m);
  Mul22(&p_t_15_0h,&p_t_15_0m,p_t_14_0h,p_t_14_0m,xh,xm);
  Add23(&p_t_16_0h,&p_t_16_0m,&p_t_16_0l,log2_130_p_coeff_4h,log2_130_p_coeff_4m,p_t_15_0h,p_t_15_0m);
  Mul233(&p_t_17_0h,&p_t_17_0m,&p_t_17_0l,xh,xm,p_t_16_0h,p_t_16_0m,p_t_16_0l);
  Add233(&p_t_18_0h,&p_t_18_0m,&p_t_18_0l,log2_130_p_coeff_3h,log2_130_p_coeff_3m,p_t_17_0h,p_t_17_0m,p_t_17_0l);
  Mul233(&p_t_19_0h,&p_t_19_0m,&p_t_19_0l,xh,xm,p_t_18_0h,p_t_18_0m,p_t_18_0l);
  Add33(&p_t_20_0h,&p_t_20_0m,&p_t_20_0l,log2_130_p_coeff_2h,log2_130_p_coeff_2m,log2_130_p_coeff_2l,p_t_19_0h,p_t_19_0m,p_t_19_0l);
  Mul233(&p_t_21_0h,&p_t_21_0m,&p_t_21_0l,xh,xm,p_t_20_0h,p_t_20_0m,p_t_20_0l);
  Renormalize3(&p_t_21_1h,&p_t_21_1m,&p_t_21_1l,p_t_21_0h,p_t_21_0m,p_t_21_0l);
  Add33(&p_t_22_0h,&p_t_22_0m,&p_t_22_0l,log2_130_p_coeff_1h,log2_130_p_coeff_1m,log2_130_p_coeff_1l,p_t_21_1h,p_t_21_1m,p_t_21_1l);
  Mul233(&p_t_23_0h,&p_t_23_0m,&p_t_23_0l,xh,xm,p_t_22_0h,p_t_22_0m,p_t_22_0l);
  p_resh = p_t_23_0h;
  p_resm = p_t_23_0m;
  p_resl = p_t_23_0l;

  logih = argredtable[index].logih;
  logim = argredtable[index].logim;
  logil = argredtable[index].logil;

  Add33(&log2yh,&log2ym,&log2yl,logih,logim,logil,p_resh,p_resm,p_resl);
  Add133(&log2xh,&log2xm,&log2xl,ed,log2yh,log2ym,log2yl);

  Renormalize3(resh,resm,resl,log2xh,log2xm,log2xl);

}

/* exp2_120

   Approximates

   2^H * (resh + resm + resl) = 2^(xh + xm + xl) * (1 + eps)

   where ||eps|| <= 2^(-119.5)

*/
static inline void exp2_120(int *H, double *resh, double *resm, double *resl,
			    double xh, double xm, double xl) {
  double xhMult2L, rhMult2L, r;
  int k, index1, index2;
  db_number shiftedxhMult2Ldb;
  double rh, rm, rl;
  double t1h, t1m, t1l, t2, t3;
  double p_t_1_0h;
  double p_t_2_0h;
  double p_t_3_0h;
  double p_t_4_0h;
  double p_t_5_0h, p_t_5_0m;
  double p_t_6_0h, p_t_6_0m;
  double p_t_7_0h, p_t_7_0m;
  double p_t_8_0h, p_t_8_0m;
  double p_t_9_0h, p_t_9_0m, p_t_9_0l;
  double p_t_10_0h, p_t_10_0m, p_t_10_0l;
  double p_t_11_0h, p_t_11_0m, p_t_11_0l;
  double p_resh, p_resm, p_resl;
  double tbl1h, tbl1m, tbl1l, tbl2h, tbl2m, tbl2l;
  double tablesh, tablesm, tablesl;
  double exp2h, exp2m, exp2l;

  /* Argument reduction

     Produce exactly

     2^H * 2^(i1/2^8) * 2^(i2/2^13) * 2^(rh + rm + rl)

  */

  xhMult2L = xh * two13;
  shiftedxhMult2Ldb.d = shiftConst + xhMult2L;
  rhMult2L = xhMult2L - (shiftedxhMult2Ldb.d - shiftConst);
  r = rhMult2L * twoM13;
  k = shiftedxhMult2Ldb.i[LO];
  *H = k >> 13;
  index1 = k & INDEXMASK1;
  index2 = (k & INDEXMASK2) >> 5;
  Add12Cond(t1h, t2, r, xm);
  Add12Cond(t1m, t1l, t2, xl);
  Add12(rh, t3, t1h, t1m);
  Add12(rm, rl, t3, t1l);

  /* Polynomial approximation of 2^(rh + rm + rl) */

  p_t_1_0h = exp2_120_p_coeff_6h;
  p_t_2_0h = p_t_1_0h * rh;
  p_t_3_0h = exp2_120_p_coeff_5h + p_t_2_0h;
  p_t_4_0h = p_t_3_0h * rh;
  Add12(p_t_5_0h,p_t_5_0m,exp2_120_p_coeff_4h,p_t_4_0h);
  MulAdd22(&p_t_6_0h,&p_t_6_0m,exp2_120_p_coeff_3h,exp2_120_p_coeff_3m,rh,rm,p_t_5_0h,p_t_5_0m);
  MulAdd22(&p_t_7_0h,&p_t_7_0m,exp2_120_p_coeff_2h,exp2_120_p_coeff_2m,rh,rm,p_t_6_0h,p_t_6_0m);
  Mul22(&p_t_8_0h,&p_t_8_0m,p_t_7_0h,p_t_7_0m,rh,rm);
  Add23(&p_t_9_0h,&p_t_9_0m,&p_t_9_0l,exp2_120_p_coeff_1h,exp2_120_p_coeff_1m,p_t_8_0h,p_t_8_0m);
  Mul33(&p_t_10_0h,&p_t_10_0m,&p_t_10_0l,rh,rm,rl,p_t_9_0h,p_t_9_0m,p_t_9_0l);
  Add133(&p_t_11_0h,&p_t_11_0m,&p_t_11_0l,exp2_120_p_coeff_0h,p_t_10_0h,p_t_10_0m,p_t_10_0l);
  Renormalize3(&p_resh,&p_resm,&p_resl,p_t_11_0h,p_t_11_0m,p_t_11_0l);

  /* Table access */

  tbl1h = twoPowerIndex1[index1].hi;
  tbl1m = twoPowerIndex1[index1].mi;
  tbl1l = twoPowerIndex1[index1].lo;
  tbl2h = twoPowerIndex2[index2].hi;
  tbl2m = twoPowerIndex2[index2].mi;
  tbl2l = twoPowerIndex2[index2].lo;

  /* Reconstruction */

  Mul33(&tablesh,&tablesm,&tablesl,tbl1h,tbl1m,tbl1l,tbl2h,tbl2m,tbl2l);
  Mul33(&exp2h,&exp2m,&exp2l,tablesh,tablesm,tablesl,p_resh,p_resm,p_resl);

  Renormalize3(resh,resm,resl,exp2h,exp2m,exp2l);

}


/* pow_120

   Approximates

   2^H * (resh + resm + resl) = 2^(y * (ed + log2(1 + (zh + zm)) + log2(r[index]))) * (1 + eps)

   where ||eps|| <= 2^(-118.5) if -1075 <= y * (ed + log2(1 + (zh + zm)) + log2(r[index])) <= 1024

   Approximates further (ed + log2(1 + (zh + zm)) + log2(r[index))) by log2xh
   where

   log2xh = (ed + log2(1 + (zh + zm)) + log2(r[index))) * (1 + eps2)

   where |eps2| <= 2^(-52)
   and log2xh is exact if 2^ed * ((1 + (zh + zm)) * r[index]) is an integer power of 2.

*/
void pow_120(int *H, double *resh, double *resm, double *resl, double *log2xh,
	     double y, int index, double ed, double zh, double zm) {
  double ylog2xh, ylog2xm, ylog2xl;
  double log2xm, log2xl;

  /* Compute log2(x) */
  log2_130(log2xh,&log2xm,&log2xl,index,ed,zh,zm);

  /* Compute y * log2(x) */
  Mul133(&ylog2xh,&ylog2xm,&ylog2xl,y,*log2xh,log2xm,log2xl);

  /* Compute 2^(y * log2(x)) */
  exp2_120(H,resh,resm,resl,ylog2xh,ylog2xm,ylog2xl);

}

/* pow_round_and_check_rn

   Checks whether

   2^H * (powh + powm + powl)

   which is an approximate to x^y with a relative error or less than 2^(-118.5),
   can be rounded correctly to double precision in round-to-nearest-ties-to-even
   mode or whether the Table Maker's Dilemma occurs or the case is exact.

   Returns 1 if rounding is possible and affects pow with the rounding
   Returns 0 if rounding is not possible or if the case is exact

   If the returned value is 0, it affects

   G, kh, kl

   such that

   2^G * (kh + kl)

   is an approximate to x^y with an relative error of 2^(-117)

   and such that rounding

   2^G * kh

   to double precision is an exact operation.

*/

static inline int pow_round_and_check_rn(double *pow,
					 int H, double powh, double powm, double powl,
					 int *G, double *kh, double *kl) {
  double th, tm, tl;
  int K, K1, K2;
  db_number twodb, two2db;
  double twoH1074powh, twoH1074powm, shiftedpowh, delta;
  double scaledth;
  double t1m, t1l;

  /* We start by bringing H and powh + powm + powl
     to a form such that

     1 <= powh + powm + powl < 2
  */
  if ((powh < 1.0) ||
      ((powh == 1.0) && (powm < 0.0))) {
    powh *= 2.0;
    powm *= 2.0;
    powl *= 2.0;
    H--;
  }
  if ((powh > 2.0) ||
      ((powh == 2.0) && (powm >= 0.0))) {
    powh *= 0.5;
    powm *= 0.5;
    powl *= 0.5;
    H++;
  }

  /* Check now whether we have normal or subnormal rounding

     The rounding is subnormal iff H <= -1023

     In both cases, we bring H, powh + powm + powl to a form

     2^K * (th + tm + tl) = 2^H * (powh + powm + powm) * (1 + eps)

     where

     (i)   |eps| <= 2^(-118 - 53) = 2^(-171)
     (ii)  2^(-K) * ulp(2^K * th) = 1
     (iii) the rounding of 2^K * th to double precision is exact
           (or produces +inf)

  */
  if (H <= -1023) {
    /* Subnormal rounding

       In this case, we can neglect powl
       because the rounding bit of the RN rounding
       is in powh

    */
    twodb.i[HI] = (H + (1074 + 1023)) << 20;
    twodb.i[LO] = 0;

    twoH1074powh = twodb.d * powh;
    twoH1074powm = twodb.d * powm;

    shiftedpowh = two52 + twoH1074powh;
    th = shiftedpowh - two52;
    delta = twoH1074powh - th;

    Add12Cond(tm,tl,delta,twoH1074powm);

    K = -1074;
  } else {
    /* Normal rounding

       In this case, we have exactly:

       2^K * (th + tm + tl) = 2^H * (powh + powm + powm)

    */
    th = powh * two52;
    tm = powm * two52;
    tl = powl * two52;
    K = H - 52;
  }

  /* Return results for exactness case test */
  *G = K;
  *kh = th;
  *kl = tm;

  /* Compute now

     delta = ABS(0.5 - ABS(tm + tl)) * (1 + eps)

     where |eps| <= 2^(-53)

     The addition 0.5 + (-ABS(tm)) is exact by Sterbenz' lemma

  */
  if (tm > 0.0) {
    t1m = - tm;
    t1l = - tl;
  } else {
    t1m = tm;
    t1l = tl;
  }
  delta = ABS((0.5 + t1m) - t1l);

  /* We cannot decide the rounding or have an exact case
     iff

     delta <= 2^(-118) * th

     We can see this in the following drawing:

     result = round(

            +-----------------+------+----------...----+------------
     2^K *  | th              | +/-1 | 0               | delta        )
            +-----------------+------+----------...----+------------
            | <------------- 118 bits -------------> |

  */

  scaledth = th * PRECISEROUNDCST;

  if (delta > scaledth) {
    /* We can round exactly to nearest

       We must correct th to the rounding of
       th + tm + tl iff tm is greater in
       absolute value than 0.5
       or if tm is equal to 0.5 in absolute value and
       the sign of tm and tl
       is equal:

                |                 |
       ...------|--------|--------|------...
                |------->-->      |
                ^   tm    tl      ^
                th                round(th + tm + tl)

       If tm is negative, we must correct th by decreasing it
       otherwise we must correct th by increasing it.

    */
    if (ABS(tm) >= 0.5) {
      if (ABS(tm) == 0.5) {
	if (tm < 0.0) {
	  if (tl < 0.0) {
	    /* The same sign of tm and tl, tm is negative */
	    th -= 1.0;
	  }
	} else {
	  if (tl > 0.0) {
	    /* The same sign of tm and tl, tm is positive */
	    th += 1.0;
	  }
	}
      } else {
	/* tm > 0.5 */
	if (tm < 0.0) {
	  th -= 1.0;
	} else {
	  th += 1.0;
	}
      }
    }

    /* Perform now the multiplication 2^K * th

       Note that we must be able to produce

       (i)   0 if we have total underflow
       (ii)  a subnormal result
       (iii) a normal result
       (iv)  +inf if we have overflow in a TMD case

       We produce K1 + K2 = K with K1 = floor(K/2)
       and multiply in two steps.

    */
    K1 = K >> 1;
    K2 = K - K1;

    twodb.i[HI] = (K1 + 1023) << 20;
    twodb.i[LO] = 0;
    two2db.i[HI] = (K2 + 1023) << 20;
    two2db.i[LO] = 0;

    *pow = two2db.d * (twodb.d * th);

    return 1;
  }
  /* Otherwise we return 0 because we cannot round */

  return 0;
}


/* pow_exact_case

   Checks whether x^y is an exact or half-ulp case for rounding
   into double precision.

   This means the procedure checks whether x^y can be written on
   not more than 54 bits.

   The procedure uses 2^G * (kh + kl) supposing the following properties:

   * kh + kl holds on at most 54 bits
   * 2^G * kh is representable in double precision (even in the subnormal range)
   * 2^G * (kh + kl) approximates x^y with an error eps less than 2^(-117), i.e.

     2^G * (kh + kl) = x^y * (1 + eps) where |eps| <= 2^(-117)

   * log2xh approximates log2(x) with an accuracy equivalent to at least 52 bits
     In particular log2xh is exact if x is an integer power of 2

   Returns 1 if the case is exact or half-ulp
   Returns 0 otherwise

   If returning 1, affects pow with 2^G * (kh + kl) rounded to double precision

*/
int pow_exact_case(double *pow,
		   double x, double y,
		   int G, double kh, double kl, double log2xh) {
  db_number xdb, ydb, tempdb, shiftedEydb, temp2db;
  int E, F, G1, G2;
  double m, n, yh, yl, Eyh, Eyl, ed, Ey;
  double delta;
  double nearestEy;
  double value;

  /* For testing whether x^y is an exact or half-ulp case,
     we have two main cases:

     (i)  x is an integer power of 2
     (ii) x is not an integer power of 2

     We start by testing if x is an integer power of 2.

  */

  xdb.d = x;
  if ((xdb.i[HI] & 0xfff00000) == 0) {
    /* x is subnormal, scale by 2^52 */
    xdb.d *= 0.4503599627370496e16;
  }
  if (((xdb.i[HI] & 0x000fffff) | xdb.i[LO]) == 0) {
    /* x is an integer power of 2

       x^y is exact or midpoint iff log2(x) * y is integer

       Since we know that x is an integer power of 2,
       log2xh is equal to this integer. Since the exponent
       of the double precision number is bounded by the
       exponent range, we know that log2xh is integer and
       bounded by 2^11. It is therefore possible to
       split y into two parts of 21 and 32 bits, to perform
       the multiplication componentwise. Since the result
       is bounded by 2^11, it suffices to compute the
       nearest integer to the higher word product, and to
       compare the rounding difference to the low word product.
       This splitting is faster than the usual Dekker splitting.

    */
    ydb.d = y;
    ydb.i[LO] = 0;
    yh = ydb.d;
    yl = y - yh;
    Eyh = log2xh * yh;
    Eyl = log2xh * yl;
    delta = ((0.6755399441055744e16 + Eyh) - 0.6755399441055744e16) - Eyh; /* addition rounds, subtractions exact */

    if (delta != Eyl) return 0;

  } else {

    /* x is not an integer power of 2

       We have clearly an inexact case if y is negative
       or if y is greater than 35

    */

    if ((y < 0.0) || (y > 35.0)) return 0;

    /* Decompose now y into

       y = 2^F * n

       Checking F and n, we can then already decide
       some cases using the fact that the worst-case
       accuracy for x^n, n in [|0;35|], is less (in bits)
       than the accuracy of the approximation of x^y we have
       already in 2^G * (kh + kl).

    */
    decompose(&n,&F,y);

    if ((n > 35.0) || (F < -5)) return 0;

    if (F < 0) {
      /* Here, -5 <= F <= -1, 3 <= n <= 35, n an odd integer

         We decompose x into 2^E * m where m is an odd integer

	 Let H, sh and sl such that 2^H * (sh + sl) = 2^G * (kh + kl) and
	 sh + sl is an odd integer.

	 If we have E * 2^F * n = H, we can apply the worst case argument
	 because we know the worst case for

	 m^(2^F * n) with m odd integer, -5 <= F <= -1, 3 <= n <= 35

	 when rounding to 53 bits in both rounding modes.

	 E is bounded in magnitude by 2^11. 2^F * n is equal to y by
	 construction. Since n <= 35, y contains at most 6 significant bits.
	 The arithmetical multiplication E * y is therefore exact and less
	 than or equal to 2^17.

	 We check first whether E * y is an integer. If this is the case,
	 we compute sh + sl = 2^(G - E * y) * (kh + kl). Finally,
	 we check whether sh + sl is an odd integer.

      */

      decompose(&m,&E,x);

      ed = (double) E;

      Ey = ed * y; /* Exact */


      /* Check whether Ey is an integer using the simple shift technique
	 The addition rounds, the substraction is exact by Sterbenz' lemma.
	 If Ey is an integer, the low order word of shiftedEydb is equal to this
	 integer.
      */
      shiftedEydb.d = 0.6755399441055744e16 + Ey;
      nearestEy = shiftedEydb.d - 0.6755399441055744e16;

      if (nearestEy != Ey) return 0;

      /* Here E * y is integer.
	 Produce now 2^(G - E * y).
      */
      tempdb.i[HI] = (((G - shiftedEydb.i[LO]) + 1023) << 20);
      tempdb.i[LO] = 0;

      /* Check now if sh + sl = tempdb.d * (kh + kl) is an odd
	 integer.

	 Since kh and kl are not overlapped, we have two cases:

	 (i)  kl is equal to 0, in which case tempdb.d * kh must be an odd integer
	 (ii) kl is not equal to 0, in which case tempdb.d * kl must be an odd integer

      */
      if (kl == 0.0) value = kh; else value = kl;

      value *= tempdb.d; /* Exact because multiplication by positive integer power of 2 */

      if (!isOddInteger(value)) return 0;

      /* Here the case is exact by the worst-case argument */
    }

    /* Here, we have either F >= 0 or an exact case

       If F >= 0, we also have an exact case because
       2^F * n = y <= 35, y therefore integer and because
       we can apply the worst case argument.

    */
  }

  /*
       Here, the case is exact, affect pow with 2^G * (kh + kl) rounded to
       nearest in double precision

       Since kh + kl holds on at most 54 bits, 2^G * kh produces
       never any rounding error and kh and kl are not overlapping,
       2^G * kh is equal to the rounding if
       (i)  kl is equal to 0
       (ii) kl is not equal to 0 and the mantissa of 2^G * kh is even

       If in condition (ii), kl is not equal to 0 and the mantissa of
       2^G * kh is not even, we correct it depending on the sign of kl.

       Remark that G can be such that 2^G is no longer a normal.
       Produce therefore 2^(floor(G/2)) and 2^(G - floor(G/2)) and
       multiply in two steps.

  */

  G1 = G >> 1;
  G2 = G - G1;

  tempdb.i[HI] = (G1 + 1023) << 20;
  tempdb.i[LO] = 0;
  temp2db.i[HI] = (G2 + 1023) << 20;
  temp2db.i[LO] = 0;

  tempdb.d *= (kh * temp2db.d);

  if ((kl != 0.0) && ((tempdb.i[LO] & 1) != 0)) {
    /* We must correct the rounding to the rounding to nearest ties to even */
    if (kl > 0.0) {
      tempdb.l++;
    } else {
      tempdb.l++;
    }
  }
  *pow = tempdb.d;

  return 1;
}


double pow_exact_rn(double x, double y, double sign,
		    int index, double ed, double zh, double zm) {
  int H, G;
  double powh, powm, powl;
  double pow;
  double kh, kl;
  double log2xh;

  pow_120(&H, &powh, &powm, &powl, &log2xh, y, index, ed, zh, zm);

  if (pow_round_and_check_rn(&pow,H,powh,powm,powl,&G,&kh,&kl))
    return sign * pow;

  if (pow_exact_case(&pow,x,y,G,kh,kl,log2xh))
    return sign * pow;

  //  printf("Could not decide the rounding to nearest of power.\n");

  return -5.0;
}


double pow_rn(double x, double y) {
  db_number xdb, ydb, yhdb, shiftedylog2xhMult2Ldb, powdb, twodb;
  double sign;
  int E, index;
  double log2FastApprox, ed, ylog2xFast, f;
  double yh, yl, ri, logih, logim, yrih, yril, th, zh, zm;
  double p_t_1_0h;
  double p_t_2_0h;
  double p_t_3_0h;
  double p_t_4_0h;
  double p_t_5_0h;
  double p_t_9_0h;
  double p_t_10_0h;
  double p_t_11_0h, p_t_11_0m;
  double p_t_12_0h, p_t_12_0m;
  double log2zh, log2zm;
  double log2yh, log2ym;
  double log2xh, log2xm;
  double ylog2xh, ylog2xm;
  double rh, r;
  int k, index1, index2, H;
  double tbl1, tbl2h, tbl2m;
  double ph;
  double powh, powm;
  double twoH1074powh, twoH1074powm;
  double shiftedpowh, nearestintpowh, delta, deltaint;
  double rest;
  double lowerTerms;
  double temp1;
  double zhSq, zhFour, p35, p46, p36, p7;
  double xSq;

  /* Fast rejection of special cases */
  xdb.d = x;
  ydb.d = y;

  if (((((xdb.i[HI] >> 20) + 1) & 0x3ff) <= 1) ||
      ((((ydb.i[HI] >> 20) + 1) & 0x3ff) <= 1)) {

    /* Handle special cases before handling NaNs and Infinities */
    if (x == 1.0)  return 1.0;
    if (y == 0.0)  return 1.0;
    if (y == 1.0)  return x;
    if (y == 2.0)  return x * x; /* Remark: may yield uncorrect rounding on x86 for subnormal results */
    if (y == -1.0) return 1 / x;

    if ((x == 0.0) && ((ydb.i[HI] & 0x7ff00000) != 0x7ff00000)) {
      /* x = +/-0 and y is neither NaN nor Infinity

      We have four cases
      (i)  y < 0:
      (a) y odd integer: return +/- Inf and raise divide-by-zero
      (b) y not odd integer: return + Ind and raise divide-by-zero
      (ii) (a) y odd integer: return +/- 0
      (b) y not odd integer: return +0

      Note that y = 0.0 has already been filtered out.
      */
      if (y < 0.0) {
	if (isOddInteger(y))
	  return 1/x;
	else
	  return 1/(x * x);
      } else {
	if (isOddInteger(y))
	  return x;
	else
	  return x * x;
      }
    }

    /* Handle NaNs and Infinities

    Note: the cases (x,y) = (1,NaN) and (x,y) = (NaN,0) have already been handled.

    */
    if ((ydb.i[HI] & 0x7ff00000) == 0x7ff00000) {
      /* Here y is NaN or Inf */
      if (((ydb.i[HI] & 0x000fffff) | ydb.i[LO]) != 0) {
	/* Here y is NaN, we return NaN */
	return y;
      }
      /* Here y is +/- Inf

      There are three main cases:
      (i)   x = -1: return 1
      (ii)  abs(x) > 1:
      (a) y = +Inf: return +Inf
      (b) y = -Inf: return +0
      (iii) abs(x) < 1:
      (a) y = +Inf: return +0
      (b) y = -Inf: return +Inf

      Note: the case x = 1 has already been filtered out
      */
      if (x == -1.0)
	return 1.0;

      /* Here x != 1, x != -1 */
      if ((ABS(x) > 1.0) ^ ((ydb.i[HI] & 0x80000000) == 0)) {
	/* abs(x) > 1 and y = -Inf or abs(x) < 1 and y = +Inf */
	return 0.0;
      } else {
	/* abs(x) > 1 and y = +Inf or abs(x) < 1 and y = -Inf */
	return ABS(y);
      }
    }

    /* Here y is neither Inf nor NaN */

    if ((xdb.i[HI] & 0x7ff00000) == 0x7ff00000) {
      /* Here x is NaN or Inf */
      if (((xdb.i[HI] & 0x000fffff) | xdb.i[LO]) != 0) {
	/* Here x is NaN, we return NaN */
	return x;
      }
      /* Here x is +/- Inf

      There are two main cases:

      (i)  x is +Inf
      (ii) x is -Inf
      */
      if ((xdb.i[HI] & 0x80000000) == 0) {
	/* x is +Inf

	(a) y > 0: return +Inf
	(b) y < 0: return +0

	Note: y = 0 has already been filtered out
	*/
	if (y > 0.0) {
	  return x;
	} else {
	  return 0.0;
	}
      } else {
	/* x is -Inf

	There are four cases:

	(a) y > 0:
	(*)  y is an odd integer: return -Inf
	(**) y is not an odd integer: return +Inf
	(b) y < 0:
	(*)  y is an odd integer: return -0
	(**) y is not an odd integer: return +0

	Note: y = 0 has already been filtered out
	*/
	if (y > 0.0) {
	  if (isOddInteger(y)) {
	    return x;
	  } else {
	    return -x;
	  }
	} else {
	  if (isOddInteger(y)) {
	    return -0.0;
	  } else {
	    return 0.0;
	  }
	}
      }
    }
  }
  /* Here both x and y are finite numbers */

  /* Test now whether we have the case

     (-x)^y = (-1)^y * x^y

     where x is positive

  */

  sign = 1.0;
  if (x < 0.0) {
    /* x is negative
       x^y is defined only if y is integer
    */
    if (!isInteger(y)) {
      /* y is not integer

         return NaN and raise invalid exception
      */
      return 0.0/0.0;
    }
    /* Here y is integer

       Remove the sign of x and put (-1)^y in sign

    */
    x = -x; xdb.i[HI] &= 0x7fffffff;
    if (isOddInteger(y)) {
      sign = -sign;
    }
  }

  /* Here x is strictly positive and finite */

  /* Test now if abs(y) is in a range giving finite, non-trivial results x^y

     We have

     x^y = 2^(y * log2(x)) = 2^(y * log2(2^E * m)) = 2^(y * (E + log2(1 + f)))

     We have overflow iff x^y >= 2^(1024), i.e. y * (E + log2(1 + f)) >= 1024
     We have underflow (flush to zero) iff x^y <= 2^(-1076), i.e. y * (E + log2(1 + f)) <= - 1076
     We round to 1.0 iff abs(y * (E + log2(1 + f))) <= 2^(-54)

     We approximate log2(1 + f) as

     log2(1 + f) = c * f * alpha(f)

     where c is a constant and  0.853 < alpha(f) < 1.21

     So we have surely
     (i)  overflow or underflow if abs(y * (E + c * f)) >= ceil(1076/0.853) = 1261
     (ii) trivial rounding to 1.0 if abs(y * (E + c * f)) <= 2^(-55) <= 2^(-54)/1.21

  */

  /* We start the computation of the logarithm of x */

  E = 0;
  if ((xdb.i[HI] & 0xfff00000) == 0) {
    /* x is subnormal, make it normal */
    xdb.d *= two52;
    E = -52;
  }

  E += (xdb.i[HI]>>20)-1023;             /* extract the exponent */
  index = (xdb.i[HI] & 0x000fffff);
  xdb.i[HI] =  index | 0x3ff00000;	/* do exponent = 0 */
  index = (index + (1<<(20-L-1))) >> (20-L);

  /* reduce  such that sqrt(2)/2 < xdb.d < sqrt(2) */
  if (index >= MAXINDEX) { /* corresponds to xdb>sqrt(2)*/
    xdb.i[HI] -= 0x00100000;
    E++;
  }

  ed = (double) E;

  /* Continue with the exact range reduction of the logarithm of x */

  yhdb.i[HI] = xdb.i[HI];
  yhdb.i[LO] = 0;
  yh = yhdb.d;
  yl = xdb.d - yh;


  /* Special handling for y = 3 or y = 4 and x on not more than 21 bits (without subnormals) */
  if ((yl == 0) && ((y == 3.0) || (y == 4.0)) && (E > -255)) {
    if (y == 3.0)
      return sign * (x * (x * x));
    else {
      xSq = x * x;
      return sign * (xSq * xSq);
    }
  }

  index = index & INDEXMASK;

  /* Now we have

     log2(x) = E + log2(xdb.d)

     with sqrt(2)/2 < xdb.d < sqrt(2)

     Compute now f such that 1 + f = xdb.d and
     approximate

     log2(1 + f) = logFastCoeff * f

  */

  f = xdb.d - 1.0;
  log2FastApprox = ed + logFastCoeff * f;
  ylog2xFast = y * log2FastApprox;

  if (ABS(ylog2xFast) >= 1261.0) {
    if (ylog2xFast > 0.0) {
      /* y * log2(x) is positive, i.e. we overflow */
      return (sign * LARGEST) * LARGEST;
    } else {
      /* y * log2(x) is negative, i.e. we underflow */
      return (sign * SMALLEST) * SMALLEST;
    }
  }

  if (ABS(ylog2xFast) <= twoM55) {
    /* abs(y * log2(x)) <= 2^(-55),
       we return 1.0 and set the inexact flag
    */
    return sign * (1.0 + SMALLEST);
  }

  /* Now, we may still overflow or underflow but on some inputs only */

  /*
     Read tables:
     Read one float for ri
     Read the first two doubles for -log(r_i) (out of three)

     Organization of the table:

     one struct entry per index, the struct entry containing
     r, logih, logim and logil in this order
  */

  ri = argredtable[index].ri;
  /*
     Actually we don't need the logarithm entries now
     Move the following two lines to the eventual reconstruction
     As long as we don't have any if in the following code, we can overlap
     memory access with calculations
  */
  logih = argredtable[index].logih;
  logim = argredtable[index].logim;

  /* Do range reduction:

     zh + zm = y * ri - 1.0 correctly

     Correctness is assured by use of two part yh + yl and 21 bit ri and Add12

     Discard zl for higher monome degrees
  */

  yrih = yh * ri;
  yril = yl * ri;
  th = yrih - 1.0;
  Add12Cond(zh, zm, th, yril);

  /* Polynomial approximation of log2(1 + (zh + zm)) */

  zhSq = zh * zh;

  p35 = log2_70_p_coeff_3h + zhSq * log2_70_p_coeff_5h;
  p46 = log2_70_p_coeff_4h + zhSq * log2_70_p_coeff_6h;
  zhFour = zhSq * zhSq;

  p36 = p35 + zh * p46;
  p7 = log2_70_p_coeff_7h * zhFour;

  p_t_9_0h = p36 + p7;

  p_t_10_0h = p_t_9_0h * zh;
  Add212(&p_t_11_0h,&p_t_11_0m,log2_70_p_coeff_2h,log2_70_p_coeff_2m,p_t_10_0h);
  MulAdd22(&p_t_12_0h,&p_t_12_0m,log2_70_p_coeff_1h,log2_70_p_coeff_1m,zh,zm,p_t_11_0h,p_t_11_0m);
  Mul22(&log2zh,&log2zm,p_t_12_0h,p_t_12_0m,zh,zm);

  /* Reconstruction */

  Add122(&log2yh,&log2ym,ed,logih,logim);
  Add22(&log2xh,&log2xm,log2yh,log2ym,log2zh,log2zm);

  /* Produce ylog2xh + ylog2xm approximating y * log2(x)

     Note: neither y nor y * log2(x) may not be subnormal and non-zero
     because of the fast overflow/underflow/trivial rounding test.

  */

  Mul12(&ylog2xh,&temp1,y,log2xh);
  ylog2xm = temp1 + y * log2xm;

  /* Approximate now 2^(y * log2(x))

     We have

     2^(ylog2xh + ylog2xm)
          = 2^ylog2xh * 2^ylog2xm
          = 2^H * 2^(ylog2xh - H) * 2^ylog2xm
	  = 2^H * 2^(i2/2^8) * 2^(i1/2^13) * 2^(ylog2xh - H - i2/2^8 - i1/2^13) * 2^ylog2xm
	  = 2^H * tbl2[i2] * (1 + tbl1[i1]) * (1 + p(rh)) + delta
	  where rh \approx ylog2xh - H - i1/2^7 - i2/2^13 + ylog2xm
  */

  shiftedylog2xhMult2Ldb.d = shiftConstTwoM13 + ylog2xh;
  r = ylog2xh - (shiftedylog2xhMult2Ldb.d - shiftConstTwoM13);
  k = shiftedylog2xhMult2Ldb.i[LO];
  H = k >> 13;
  index1 = k & INDEXMASK1;
  index2 = (k & INDEXMASK2) >> 5;

  /* Renormalize reduced argument

     This operation produces an error

     2^(z * (1 + eps)) = 2^z * (1 + eps')

     where |eps| <= 2^(-53) and |eps'| <= 2^(-65)

  */
  rh = r + ylog2xm;

  /* Table reads */
  tbl1 = twoPowerIndex1[index1].hiM1;
  tbl2h = twoPowerIndex2[index2].hi;
  tbl2m = twoPowerIndex2[index2].mi;

  /* Polynomial approximation */
  p_t_1_0h = exp2_p_coeff_3h;
  p_t_2_0h = p_t_1_0h * rh;
  p_t_3_0h = exp2_p_coeff_2h + p_t_2_0h;
  p_t_4_0h = p_t_3_0h * rh;
  p_t_5_0h = exp2_p_coeff_1h + p_t_4_0h;
  ph = p_t_5_0h * rh;

  /* Reconstruction */

  lowerTerms = tbl1 + (ph + tbl1 * ph);

  Add212(&powh,&powm,tbl2h,tbl2m,tbl2h * lowerTerms);

  /* Here we have

     2^H * (powh + powm) = x^y * (1 + eps)

     with ||eps|| <= 2^(-60)

     Check first if we overflow or underflow.

     Check then if we can perform normal rounding or
     if we must perhaps perform subnormal rounding.
     We are sure that

     0.5 <= powh + powm <= 4.0

  */

  if (H >= 1025) {
    /* Here, we surely overflow
       Return sign * inf
    */
    return (sign * LARGEST) * LARGEST;
  }

  if (H <= -1077) {
    /* Here, we surely underflow
       Return sign * 0 and set inexact flag
    */
    return (sign * SMALLEST) * SMALLEST;
  }

  if (H > -1022) {
    /* We are sure to be able to perform normal rounding
       We must still be aware of the fact that the final
       result may overflow
    */
    if(powh == (powh + (powm * RNROUNDCST))) {
      powdb.d = powh;
      if (H < 1023) {
	powdb.i[HI] += H << 20;
	return sign * powdb.d;
      } else {
	/* May overflow: multiply by 2^H in two steps */
	powdb.i[HI] += (H - 3) << 20;
	return sign * powdb.d * 8.0;
      }
    }
  } else {
    /* We must perhaps perform subnormal rounding

       We start by renormalizing the double-double
       powh + powm such that

       1 <= powh + powm < 2

    */

    if ((powh < 1.0) || ((powh == 1.0) && (powm < 0.0))) {
      powh *= 2.0;
      powm *= 2.0;
      H--;
    }
    if ((powh > 2.0) || ((powh == 2.0) && (powm >= 0.0))) {
      powh *= 0.5;
      powm *= 0.5;
      H++;
    }

    /* Here we know that 1 <= powh + powm < 2

       2^H * (powh + powm) is subnormal or equal to the least normal
       iff H <= -1023

    */
    if (H > -1023) {
      /* We have nevertheless normal rounding */
      if(powh == (powh + (powm * RNROUNDCST))) {
	powdb.d = powh;
	powdb.i[HI] += H << 20;
	return sign * powdb.d;
      }
      /* Here we have normal rounding but could not decide the rounding */
    } else {
      /* We have surely denormal rounding

         This means

	 round(2^H * (powh + powm)) = 2^(-1074) * nearestint(2^(+1074) * 2^H * (powh + powm))

	 We compute the rounding (and test its TMD) of

	 nearestint(2^(H + 1074) * (powh + powm))

	 by computing first

	 nearestint(2^(H + 1074) * powh)

	 and then the absolute error

	 delta = 2^(H + 1074) * powh - nearestint(2^(H + 1074) * powh) + 2^(H + 1074) * powl

	 We can then refute or correct the rounding by tests on delta.

	 We know that 2^(H + 1074) <= 2^(51) and powh <= 2.0. Thus 2^(H + 1074) * powh <= 2^(52)
	 We can hence compute nearestint using the shift trick.

	 We start by constructing 2^(H + 1074)
      */

      twodb.i[HI] = (H + (1074 + 1023)) << 20;
      twodb.i[LO] = 0;

      twoH1074powh = twodb.d * powh;
      twoH1074powm = twodb.d * powm;

      shiftedpowh = two52 + twoH1074powh;
      nearestintpowh = shiftedpowh - two52;
      deltaint = twoH1074powh - nearestintpowh;

      /* The next addition produces a very small
	 rounding error we must account for in the rounding test.
      */
      delta = deltaint + twoH1074powm;

      /* Correct now the rounding */

      if (ABS(delta) > 0.5) {
	/* ABS(delta) > 0.5 */
	if (delta > 0.0) {
	  /* nearestintpowh is too small by 1.0 */
	  nearestintpowh += 1.0; /* exact */
	  delta -= 1.0; /* exact */
	} else {
	  /* nearestintpowh is too great by 1.0 */
	  nearestintpowh -= 1.0; /* exact */
	  delta += 1.0; /* exact */
	}
      }

      /* Perform now the rounding test

         This test filters out also half-ulp exact cases.
	 Exact cases are not filtered out because C99 allows
	 setting the underflow and/or inexact flags also
	 for exact cases.

         Compute first

	 rest = abs(0.5 - abs(delta)) * (1 + eps)

	 where abs(eps) <= 2^(-53)

	 We cannot decide the rounding if

	 rest <= 2^(H + 1074) * epsmax * 2

	 where epsmax is the bound for the approximating polynomials,

	 here epsmax = 2^(-60)

	 as follows by the following drawing

	 2^52                2^(H + 1074)     2^0            2^(H + 1074) * epsmax
	 +-------------------+----------------+---+------... |
	 | 0 ...           0 | nearestintpowh | 1 |          rest
         +-------------------+----------------+---+------... |
                             | <---- 60 bits approx. ------->|

	 This means that we cannot decide the rounding if

	 rest * 1/epsmax * 0.5 <= 2^(H + 1074)

	 We store 1/epsmax * 0.5 on SUBNORMROUNDCST
      */

      rest = ABS(0.5 - ABS(delta));

      if (rest * SUBNORMROUNDCST > twodb.d) {
	/* Here we could decide the rounding

	   The result to return is

	   sign * 2^(-1074) * nearestintpowh

	   which is either subnormal or equal to the
	   least normal.

	   Since we know that the multiplication
	   does not imply any rounding error, we can
	   perform it using the shift trick and a mask.

	   This trick yields to a 200 cycle speed-up on Athlon.

	*/

	twodb.d = nearestintpowh + two52;
	twodb.i[HI] = (twodb.i[HI] + (1 << 20)) & 0x001fffff;

	return sign * twodb.d;
      }
    }
  }

  /* If we are here, we could not decide the rounding or are half-ulp

     Launch now exacter phases and exact and half-ulp testing

  */

  return pow_exact_rn(x,y,sign,index,ed,zh,zm);

}