1 /* mpfr_exp -- exponential of a floating-point number 2 3 Copyright 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramel projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H /* for MPFR_MPZ_SIZEINBASE2 */ 24 #include "mpfr-impl.h" 25 26 /* y <- exp(p/2^r) within 1 ulp, using 2^m terms from the series 27 Assume |p/2^r| < 1. 28 We use the following binary splitting formula: 29 P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise 30 Q(a,b) = a*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise 31 T(a,b) = P(a,b) if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise 32 Then exp(p/2^r) ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough. 33 34 Since P(a,b) = p^(b-a), and we consider only values of b-a of the form 2^j, 35 we don't need to compute P(), we only precompute p^(2^j) in the ptoj[] array 36 below. 37 38 Since Q(a,b) is divisible by 2^(r*(b-a-1)), we don't compute the power of 39 two part. 40 */ 41 static void 42 mpfr_exp_rational (mpfr_ptr y, mpz_ptr p, long r, int m, 43 mpz_t *Q, mpfr_prec_t *mult) 44 { 45 unsigned long n, i, j; 46 mpz_t *S, *ptoj; 47 mpfr_prec_t *log2_nb_terms; 48 mpfr_exp_t diff, expo; 49 mpfr_prec_t precy = MPFR_PREC(y), prec_i_have, prec_ptoj; 50 int k, l; 51 52 MPFR_ASSERTN ((size_t) m < sizeof (long) * CHAR_BIT - 1); 53 54 S = Q + (m+1); 55 ptoj = Q + 2*(m+1); /* ptoj[i] = mantissa^(2^i) */ 56 log2_nb_terms = mult + (m+1); 57 58 /* Normalize p */ 59 MPFR_ASSERTD (mpz_cmp_ui (p, 0) != 0); 60 n = mpz_scan1 (p, 0); /* number of trailing zeros in p */ 61 mpz_tdiv_q_2exp (p, p, n); 62 r -= n; /* since |p/2^r| < 1 and p >= 1, r >= 1 */ 63 64 /* Set initial var */ 65 mpz_set (ptoj[0], p); 66 for (k = 1; k < m; k++) 67 mpz_mul (ptoj[k], ptoj[k-1], ptoj[k-1]); /* ptoj[k] = p^(2^k) */ 68 mpz_set_ui (Q[0], 1); 69 mpz_set_ui (S[0], 1); 70 k = 0; 71 mult[0] = 0; /* the multiplier P[k]/Q[k] for the remaining terms 72 satisfies P[k]/Q[k] <= 2^(-mult[k]) */ 73 log2_nb_terms[0] = 0; /* log2(#terms) [exact in 1st loop where 2^k] */ 74 prec_i_have = 0; 75 76 /* Main Loop */ 77 n = 1UL << m; 78 for (i = 1; (prec_i_have < precy) && (i < n); i++) 79 { 80 /* invariant: Q[0]*Q[1]*...*Q[k] equals i! */ 81 k++; 82 log2_nb_terms[k] = 0; /* 1 term */ 83 mpz_set_ui (Q[k], i + 1); 84 mpz_set_ui (S[k], i + 1); 85 j = i + 1; /* we have computed j = i+1 terms so far */ 86 l = 0; 87 while ((j & 1) == 0) /* combine and reduce */ 88 { 89 /* invariant: S[k] corresponds to 2^l consecutive terms */ 90 mpz_mul (S[k], S[k], ptoj[l]); 91 mpz_mul (S[k-1], S[k-1], Q[k]); 92 /* Q[k] corresponds to 2^l consecutive terms too. 93 Since it does not contains the factor 2^(r*2^l), 94 when going from l to l+1 we need to multiply 95 by 2^(r*2^(l+1))/2^(r*2^l) = 2^(r*2^l) */ 96 mpz_mul_2exp (S[k-1], S[k-1], r << l); 97 mpz_add (S[k-1], S[k-1], S[k]); 98 mpz_mul (Q[k-1], Q[k-1], Q[k]); 99 log2_nb_terms[k-1] ++; /* number of terms in S[k-1] 100 is a power of 2 by construction */ 101 MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[k]); 102 MPFR_MPZ_SIZEINBASE2 (prec_ptoj, ptoj[l]); 103 mult[k-1] += prec_i_have + (r << l) - prec_ptoj - 1; 104 prec_i_have = mult[k] = mult[k-1]; 105 /* since mult[k] >= mult[k-1] + nbits(Q[k]), 106 we have Q[0]*...*Q[k] <= 2^mult[k] = 2^prec_i_have */ 107 l ++; 108 j >>= 1; 109 k --; 110 } 111 } 112 113 /* accumulate all products in S[0] and Q[0]. Warning: contrary to above, 114 here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */ 115 l = 0; /* number of accumulated terms in the right part S[k]/Q[k] */ 116 while (k > 0) 117 { 118 j = log2_nb_terms[k-1]; 119 mpz_mul (S[k], S[k], ptoj[j]); 120 mpz_mul (S[k-1], S[k-1], Q[k]); 121 l += 1 << log2_nb_terms[k]; 122 mpz_mul_2exp (S[k-1], S[k-1], r * l); 123 mpz_add (S[k-1], S[k-1], S[k]); 124 mpz_mul (Q[k-1], Q[k-1], Q[k]); 125 k--; 126 } 127 128 /* Q[0] now equals i! */ 129 MPFR_MPZ_SIZEINBASE2 (prec_i_have, S[0]); 130 diff = (mpfr_exp_t) prec_i_have - 2 * (mpfr_exp_t) precy; 131 expo = diff; 132 if (diff >= 0) 133 mpz_fdiv_q_2exp (S[0], S[0], diff); 134 else 135 mpz_mul_2exp (S[0], S[0], -diff); 136 137 MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[0]); 138 diff = (mpfr_exp_t) prec_i_have - (mpfr_prec_t) precy; 139 expo -= diff; 140 if (diff > 0) 141 mpz_fdiv_q_2exp (Q[0], Q[0], diff); 142 else 143 mpz_mul_2exp (Q[0], Q[0], -diff); 144 145 mpz_tdiv_q (S[0], S[0], Q[0]); 146 mpfr_set_z (y, S[0], MPFR_RNDD); 147 MPFR_SET_EXP (y, MPFR_GET_EXP (y) + expo - r * (i - 1) ); 148 } 149 150 #define shift (GMP_NUMB_BITS/2) 151 152 int 153 mpfr_exp_3 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode) 154 { 155 mpfr_t t, x_copy, tmp; 156 mpz_t uk; 157 mpfr_exp_t ttt, shift_x; 158 unsigned long twopoweri; 159 mpz_t *P; 160 mpfr_prec_t *mult; 161 int i, k, loop; 162 int prec_x; 163 mpfr_prec_t realprec, Prec; 164 int iter; 165 int inexact = 0; 166 MPFR_SAVE_EXPO_DECL (expo); 167 MPFR_ZIV_DECL (ziv_loop); 168 169 MPFR_LOG_FUNC 170 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode), 171 ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y, 172 inexact)); 173 174 MPFR_SAVE_EXPO_MARK (expo); 175 176 /* decompose x */ 177 /* we first write x = 1.xxxxxxxxxxxxx 178 ----- k bits -- */ 179 prec_x = MPFR_INT_CEIL_LOG2 (MPFR_PREC (x)) - MPFR_LOG2_GMP_NUMB_BITS; 180 if (prec_x < 0) 181 prec_x = 0; 182 183 ttt = MPFR_GET_EXP (x); 184 mpfr_init2 (x_copy, MPFR_PREC(x)); 185 mpfr_set (x_copy, x, MPFR_RNDD); 186 187 /* we shift to get a number less than 1 */ 188 if (ttt > 0) 189 { 190 shift_x = ttt; 191 mpfr_div_2ui (x_copy, x, ttt, MPFR_RNDN); 192 ttt = MPFR_GET_EXP (x_copy); 193 } 194 else 195 shift_x = 0; 196 MPFR_ASSERTD (ttt <= 0); 197 198 /* Init prec and vars */ 199 realprec = MPFR_PREC (y) + MPFR_INT_CEIL_LOG2 (prec_x + MPFR_PREC (y)); 200 Prec = realprec + shift + 2 + shift_x; 201 mpfr_init2 (t, Prec); 202 mpfr_init2 (tmp, Prec); 203 mpz_init (uk); 204 205 /* Main loop */ 206 MPFR_ZIV_INIT (ziv_loop, realprec); 207 for (;;) 208 { 209 int scaled = 0; 210 MPFR_BLOCK_DECL (flags); 211 212 k = MPFR_INT_CEIL_LOG2 (Prec) - MPFR_LOG2_GMP_NUMB_BITS; 213 214 /* now we have to extract */ 215 twopoweri = GMP_NUMB_BITS; 216 217 /* Allocate tables */ 218 P = (mpz_t*) (*__gmp_allocate_func) (3*(k+2)*sizeof(mpz_t)); 219 for (i = 0; i < 3*(k+2); i++) 220 mpz_init (P[i]); 221 mult = (mpfr_prec_t*) (*__gmp_allocate_func) (2*(k+2)*sizeof(mpfr_prec_t)); 222 223 /* Particular case for i==0 */ 224 mpfr_extract (uk, x_copy, 0); 225 MPFR_ASSERTD (mpz_cmp_ui (uk, 0) != 0); 226 mpfr_exp_rational (tmp, uk, shift + twopoweri - ttt, k + 1, P, mult); 227 for (loop = 0; loop < shift; loop++) 228 mpfr_sqr (tmp, tmp, MPFR_RNDD); 229 twopoweri *= 2; 230 231 /* General case */ 232 iter = (k <= prec_x) ? k : prec_x; 233 for (i = 1; i <= iter; i++) 234 { 235 mpfr_extract (uk, x_copy, i); 236 if (MPFR_LIKELY (mpz_cmp_ui (uk, 0) != 0)) 237 { 238 mpfr_exp_rational (t, uk, twopoweri - ttt, k - i + 1, P, mult); 239 mpfr_mul (tmp, tmp, t, MPFR_RNDD); 240 } 241 MPFR_ASSERTN (twopoweri <= LONG_MAX/2); 242 twopoweri *=2; 243 } 244 245 /* Clear tables */ 246 for (i = 0; i < 3*(k+2); i++) 247 mpz_clear (P[i]); 248 (*__gmp_free_func) (P, 3*(k+2)*sizeof(mpz_t)); 249 (*__gmp_free_func) (mult, 2*(k+2)*sizeof(mpfr_prec_t)); 250 251 if (shift_x > 0) 252 { 253 MPFR_BLOCK (flags, { 254 for (loop = 0; loop < shift_x - 1; loop++) 255 mpfr_sqr (tmp, tmp, MPFR_RNDD); 256 mpfr_sqr (t, tmp, MPFR_RNDD); 257 } ); 258 259 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags))) 260 { 261 /* tmp <= exact result, so that it is a real overflow. */ 262 inexact = mpfr_overflow (y, rnd_mode, 1); 263 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_OVERFLOW); 264 break; 265 } 266 267 if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags))) 268 { 269 /* This may be a spurious underflow. So, let's scale 270 the result. */ 271 mpfr_mul_2ui (tmp, tmp, 1, MPFR_RNDD); /* no overflow, exact */ 272 mpfr_sqr (t, tmp, MPFR_RNDD); 273 if (MPFR_IS_ZERO (t)) 274 { 275 /* approximate result < 2^(emin - 3), thus 276 exact result < 2^(emin - 2). */ 277 inexact = mpfr_underflow (y, (rnd_mode == MPFR_RNDN) ? 278 MPFR_RNDZ : rnd_mode, 1); 279 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW); 280 break; 281 } 282 scaled = 1; 283 } 284 } 285 286 if (mpfr_can_round (shift_x > 0 ? t : tmp, realprec, MPFR_RNDD, MPFR_RNDZ, 287 MPFR_PREC(y) + (rnd_mode == MPFR_RNDN))) 288 { 289 inexact = mpfr_set (y, shift_x > 0 ? t : tmp, rnd_mode); 290 if (MPFR_UNLIKELY (scaled && MPFR_IS_PURE_FP (y))) 291 { 292 int inex2; 293 mpfr_exp_t ey; 294 295 /* The result has been scaled and needs to be corrected. */ 296 ey = MPFR_GET_EXP (y); 297 inex2 = mpfr_mul_2si (y, y, -2, rnd_mode); 298 if (inex2) /* underflow */ 299 { 300 if (rnd_mode == MPFR_RNDN && inexact < 0 && 301 MPFR_IS_ZERO (y) && ey == __gmpfr_emin + 1) 302 { 303 /* Double rounding case: in MPFR_RNDN, the scaled 304 result has been rounded downward to 2^emin. 305 As the exact result is > 2^(emin - 2), correct 306 rounding must be done upward. */ 307 /* TODO: make sure in coverage tests that this line 308 is reached. */ 309 inexact = mpfr_underflow (y, MPFR_RNDU, 1); 310 } 311 else 312 { 313 /* No double rounding. */ 314 inexact = inex2; 315 } 316 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW); 317 } 318 } 319 break; 320 } 321 322 MPFR_ZIV_NEXT (ziv_loop, realprec); 323 Prec = realprec + shift + 2 + shift_x; 324 mpfr_set_prec (t, Prec); 325 mpfr_set_prec (tmp, Prec); 326 } 327 MPFR_ZIV_FREE (ziv_loop); 328 329 mpz_clear (uk); 330 mpfr_clear (tmp); 331 mpfr_clear (t); 332 mpfr_clear (x_copy); 333 MPFR_SAVE_EXPO_FREE (expo); 334 return inexact; 335 } 336