1 /* mpfr_exp_2 -- exponential of a floating-point number 2 using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n)) 3 4 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 5 Contributed by the Arenaire and Caramel projects, INRIA. 6 7 This file is part of the GNU MPFR Library. 8 9 The GNU MPFR Library is free software; you can redistribute it and/or modify 10 it under the terms of the GNU Lesser General Public License as published by 11 the Free Software Foundation; either version 3 of the License, or (at your 12 option) any later version. 13 14 The GNU MPFR Library is distributed in the hope that it will be useful, but 15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17 License for more details. 18 19 You should have received a copy of the GNU Lesser General Public License 20 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 21 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 23 24 /* #define DEBUG */ 25 #define MPFR_NEED_LONGLONG_H /* for count_leading_zeros */ 26 #include "mpfr-impl.h" 27 28 static unsigned long 29 mpfr_exp2_aux (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *); 30 static unsigned long 31 mpfr_exp2_aux2 (mpz_t, mpfr_srcptr, mpfr_prec_t, mpfr_exp_t *); 32 static mpfr_exp_t 33 mpz_normalize (mpz_t, mpz_t, mpfr_exp_t); 34 static mpfr_exp_t 35 mpz_normalize2 (mpz_t, mpz_t, mpfr_exp_t, mpfr_exp_t); 36 37 /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q. 38 Otherwise do nothing and return 0. 39 */ 40 static mpfr_exp_t 41 mpz_normalize (mpz_t rop, mpz_t z, mpfr_exp_t q) 42 { 43 size_t k; 44 45 MPFR_MPZ_SIZEINBASE2 (k, z); 46 MPFR_ASSERTD (k == (mpfr_uexp_t) k); 47 if (q < 0 || (mpfr_uexp_t) k > (mpfr_uexp_t) q) 48 { 49 mpz_fdiv_q_2exp (rop, z, (unsigned long) ((mpfr_uexp_t) k - q)); 50 return (mpfr_exp_t) k - q; 51 } 52 if (MPFR_UNLIKELY(rop != z)) 53 mpz_set (rop, z); 54 return 0; 55 } 56 57 /* if expz > target, shift z by (expz-target) bits to the left. 58 if expz < target, shift z by (target-expz) bits to the right. 59 Returns target. 60 */ 61 static mpfr_exp_t 62 mpz_normalize2 (mpz_t rop, mpz_t z, mpfr_exp_t expz, mpfr_exp_t target) 63 { 64 if (target > expz) 65 mpz_fdiv_q_2exp (rop, z, target - expz); 66 else 67 mpz_mul_2exp (rop, z, expz - target); 68 return target; 69 } 70 71 /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n 72 where x = n*log(2)+(2^K)*r 73 together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the 74 evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)). 75 This function returns with the exact flags due to exp. 76 */ 77 int 78 mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode) 79 { 80 long n; 81 unsigned long K, k, l, err; /* FIXME: Which type ? */ 82 int error_r; 83 mpfr_exp_t exps, expx; 84 mpfr_prec_t q, precy; 85 int inexact; 86 mpfr_t r, s; 87 mpz_t ss; 88 MPFR_ZIV_DECL (loop); 89 90 MPFR_LOG_FUNC 91 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode), 92 ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y, 93 inexact)); 94 95 expx = MPFR_GET_EXP (x); 96 precy = MPFR_PREC(y); 97 98 /* Warning: we cannot use the 'double' type here, since on 64-bit machines 99 x may be as large as 2^62*log(2) without overflow, and then x/log(2) 100 is about 2^62: not every integer of that size can be represented as a 101 'double', thus the argument reduction would fail. */ 102 if (expx <= -2) 103 /* |x| <= 0.25, thus n = round(x/log(2)) = 0 */ 104 n = 0; 105 else 106 { 107 mpfr_init2 (r, sizeof (long) * CHAR_BIT); 108 mpfr_const_log2 (r, MPFR_RNDZ); 109 mpfr_div (r, x, r, MPFR_RNDN); 110 n = mpfr_get_si (r, MPFR_RNDN); 111 mpfr_clear (r); 112 } 113 /* we have |x| <= (|n|+1)*log(2) */ 114 MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x), n)); 115 116 /* error_r bounds the cancelled bits in x - n*log(2) */ 117 if (MPFR_UNLIKELY (n == 0)) 118 error_r = 0; 119 else 120 { 121 count_leading_zeros (error_r, (mp_limb_t) SAFE_ABS (unsigned long, n) + 1); 122 error_r = GMP_NUMB_BITS - error_r; 123 /* we have |x| <= 2^error_r * log(2) */ 124 } 125 126 /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of 127 n/K terms costs about n/(2K) multiplications when computed in fixed 128 point */ 129 K = (precy < MPFR_EXP_2_THRESHOLD) ? __gmpfr_isqrt ((precy + 1) / 2) 130 : __gmpfr_cuberoot (4*precy); 131 l = (precy - 1) / K + 1; 132 err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18); 133 /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */ 134 q = precy + err + K + 8; 135 /* if |x| >> 1, take into account the cancelled bits */ 136 if (expx > 0) 137 q += expx; 138 139 /* Note: due to the mpfr_prec_round below, it is not possible to use 140 the MPFR_GROUP_* macros here. */ 141 142 mpfr_init2 (r, q + error_r); 143 mpfr_init2 (s, q + error_r); 144 145 /* the algorithm consists in computing an upper bound of exp(x) using 146 a precision of q bits, and see if we can round to MPFR_PREC(y) taking 147 into account the maximal error. Otherwise we increase q. */ 148 MPFR_ZIV_INIT (loop, q); 149 for (;;) 150 { 151 MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n", 152 n, K, l, (unsigned long) q, error_r)); 153 154 /* First reduce the argument to r = x - n * log(2), 155 so that r is small in absolute value. We want an upper 156 bound on r to get an upper bound on exp(x). */ 157 158 /* if n<0, we have to get an upper bound of log(2) 159 in order to get an upper bound of r = x-n*log(2) */ 160 mpfr_const_log2 (s, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU); 161 /* s is within 1 ulp(s) of log(2) */ 162 163 mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? MPFR_RNDZ : MPFR_RNDU); 164 /* r is within 3 ulps of |n|*log(2) */ 165 if (n < 0) 166 MPFR_CHANGE_SIGN (r); 167 /* r <= n*log(2), within 3 ulps */ 168 169 MPFR_LOG_VAR (x); 170 MPFR_LOG_VAR (r); 171 172 mpfr_sub (r, x, r, MPFR_RNDU); 173 174 if (MPFR_IS_PURE_FP (r)) 175 { 176 while (MPFR_IS_NEG (r)) 177 { /* initial approximation n was too large */ 178 n--; 179 mpfr_add (r, r, s, MPFR_RNDU); 180 } 181 182 /* since there was a cancellation in x - n*log(2), the low error_r 183 bits from r are zero and thus non significant, thus we can reduce 184 the working precision */ 185 if (error_r > 0) 186 mpfr_prec_round (r, q, MPFR_RNDU); 187 /* the error on r is at most 3 ulps (3 ulps if error_r = 0, 188 and 1 + 3/2 if error_r > 0) */ 189 MPFR_LOG_VAR (r); 190 MPFR_ASSERTD (MPFR_IS_POS (r)); 191 mpfr_div_2ui (r, r, K, MPFR_RNDU); /* r = (x-n*log(2))/2^K, exact */ 192 193 mpz_init (ss); 194 exps = mpfr_get_z_2exp (ss, s); 195 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */ 196 MPFR_ASSERTD (MPFR_IS_PURE_FP (r) && MPFR_EXP (r) < 0); 197 l = (precy < MPFR_EXP_2_THRESHOLD) 198 ? mpfr_exp2_aux (ss, r, q, &exps) /* naive method */ 199 : mpfr_exp2_aux2 (ss, r, q, &exps); /* Paterson/Stockmeyer meth */ 200 201 MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n", 202 l, (unsigned long) q, (K + l) * (double) q * q)); 203 204 for (k = 0; k < K; k++) 205 { 206 mpz_mul (ss, ss, ss); 207 exps <<= 1; 208 exps += mpz_normalize (ss, ss, q); 209 } 210 mpfr_set_z (s, ss, MPFR_RNDN); 211 212 MPFR_SET_EXP(s, MPFR_GET_EXP (s) + exps); 213 mpz_clear (ss); 214 215 /* error is at most 2^K*l, plus 2 to take into account of 216 the error of 3 ulps on r */ 217 err = K + MPFR_INT_CEIL_LOG2 (l) + 2; 218 219 MPFR_LOG_MSG (("before mult. by 2^n:\n", 0)); 220 MPFR_LOG_VAR (s); 221 MPFR_LOG_MSG (("err=%lu bits\n", K)); 222 223 if (MPFR_LIKELY (MPFR_CAN_ROUND (s, q - err, precy, rnd_mode))) 224 { 225 mpfr_clear_flags (); 226 inexact = mpfr_mul_2si (y, s, n, rnd_mode); 227 break; 228 } 229 } 230 231 MPFR_ZIV_NEXT (loop, q); 232 mpfr_set_prec (r, q + error_r); 233 mpfr_set_prec (s, q + error_r); 234 } 235 MPFR_ZIV_FREE (loop); 236 237 mpfr_clear (r); 238 mpfr_clear (s); 239 240 return inexact; 241 } 242 243 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q 244 using naive method with O(l) multiplications. 245 Return the number of iterations l. 246 The absolute error on s is less than 3*l*(l+1)*2^(-q). 247 Version using fixed-point arithmetic with mpz instead 248 of mpfr for internal computations. 249 NOTE[VL]: the following sentence seems to be obsolete since MY_INIT_MPZ 250 is no longer used (r6919); qn was the number of limbs of q. 251 s must have at least qn+1 limbs (qn should be enough, but currently fails 252 since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs) 253 */ 254 static unsigned long 255 mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps) 256 { 257 unsigned long l; 258 mpfr_exp_t dif, expt, expr; 259 mpz_t t, rr; 260 mp_size_t sbit, tbit; 261 262 MPFR_ASSERTN (MPFR_IS_PURE_FP (r)); 263 264 expt = 0; 265 *exps = 1 - (mpfr_exp_t) q; /* s = 2^(q-1) */ 266 mpz_init (t); 267 mpz_init (rr); 268 mpz_set_ui(t, 1); 269 mpz_set_ui(s, 1); 270 mpz_mul_2exp(s, s, q-1); 271 expr = mpfr_get_z_2exp(rr, r); /* no error here */ 272 273 l = 0; 274 for (;;) { 275 l++; 276 mpz_mul(t, t, rr); 277 expt += expr; 278 MPFR_MPZ_SIZEINBASE2 (sbit, s); 279 MPFR_MPZ_SIZEINBASE2 (tbit, t); 280 dif = *exps + sbit - expt - tbit; 281 /* truncates the bits of t which are < ulp(s) = 2^(1-q) */ 282 expt += mpz_normalize(t, t, (mpfr_exp_t) q-dif); /* error at most 2^(1-q) */ 283 mpz_fdiv_q_ui (t, t, l); /* error at most 2^(1-q) */ 284 /* the error wrt t^l/l! is here at most 3*l*ulp(s) */ 285 MPFR_ASSERTD (expt == *exps); 286 if (mpz_sgn (t) == 0) 287 break; 288 mpz_add(s, s, t); /* no error here: exact */ 289 /* ensures rr has the same size as t: after several shifts, the error 290 on rr is still at most ulp(t)=ulp(s) */ 291 MPFR_MPZ_SIZEINBASE2 (tbit, t); 292 expr += mpz_normalize(rr, rr, tbit); 293 } 294 295 mpz_clear (t); 296 mpz_clear (rr); 297 298 return 3 * l * (l + 1); 299 } 300 301 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q 302 using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications. 303 Return l. 304 Uses m multiplications of full size and 2l/m of decreasing size, 305 i.e. a total equivalent to about m+l/m full multiplications, 306 i.e. 2*sqrt(l) for m=sqrt(l). 307 NOTE[VL]: The following sentence seems to be obsolete since MY_INIT_MPZ 308 is no longer used (r6919); sizer was the number of limbs of r. 309 Version using mpz. ss must have at least (sizer+1) limbs. 310 The error is bounded by (l^2+4*l) ulps where l is the return value. 311 */ 312 static unsigned long 313 mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, mpfr_prec_t q, mpfr_exp_t *exps) 314 { 315 mpfr_exp_t expr, *expR, expt; 316 mpfr_prec_t ql; 317 unsigned long l, m, i; 318 mpz_t t, *R, rr, tmp; 319 mp_size_t sbit, rrbit; 320 MPFR_TMP_DECL(marker); 321 322 /* estimate value of l */ 323 MPFR_ASSERTD (MPFR_GET_EXP (r) < 0); 324 l = q / (- MPFR_GET_EXP (r)); 325 m = __gmpfr_isqrt (l); 326 /* we access R[2], thus we need m >= 2 */ 327 if (m < 2) 328 m = 2; 329 330 MPFR_TMP_MARK(marker); 331 R = (mpz_t*) MPFR_TMP_ALLOC ((m + 1) * sizeof (mpz_t)); /* R[i] is r^i */ 332 expR = (mpfr_exp_t*) MPFR_TMP_ALLOC((m + 1) * sizeof (mpfr_exp_t)); 333 /* expR[i] is the exponent for R[i] */ 334 mpz_init (tmp); 335 mpz_init (rr); 336 mpz_init (t); 337 mpz_set_ui (s, 0); 338 *exps = 1 - q; /* 1 ulp = 2^(1-q) */ 339 for (i = 0 ; i <= m ; i++) 340 mpz_init (R[i]); 341 expR[1] = mpfr_get_z_2exp (R[1], r); /* exact operation: no error */ 342 expR[1] = mpz_normalize2 (R[1], R[1], expR[1], 1 - q); /* error <= 1 ulp */ 343 mpz_mul (t, R[1], R[1]); /* err(t) <= 2 ulps */ 344 mpz_fdiv_q_2exp (R[2], t, q - 1); /* err(R[2]) <= 3 ulps */ 345 expR[2] = 1 - q; 346 for (i = 3 ; i <= m ; i++) 347 { 348 if ((i & 1) == 1) 349 mpz_mul (t, R[i-1], R[1]); /* err(t) <= 2*i-2 */ 350 else 351 mpz_mul (t, R[i/2], R[i/2]); 352 mpz_fdiv_q_2exp (R[i], t, q - 1); /* err(R[i]) <= 2*i-1 ulps */ 353 expR[i] = 1 - q; 354 } 355 mpz_set_ui (R[0], 1); 356 mpz_mul_2exp (R[0], R[0], q-1); 357 expR[0] = 1-q; /* R[0]=1 */ 358 mpz_set_ui (rr, 1); 359 expr = 0; /* rr contains r^l/l! */ 360 /* by induction: err(rr) <= 2*l ulps */ 361 362 l = 0; 363 ql = q; /* precision used for current giant step */ 364 do 365 { 366 /* all R[i] must have exponent 1-ql */ 367 if (l != 0) 368 for (i = 0 ; i < m ; i++) 369 expR[i] = mpz_normalize2 (R[i], R[i], expR[i], 1 - ql); 370 /* the absolute error on R[i]*rr is still 2*i-1 ulps */ 371 expt = mpz_normalize2 (t, R[m-1], expR[m-1], 1 - ql); 372 /* err(t) <= 2*m-1 ulps */ 373 /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)! 374 using Horner's scheme */ 375 for (i = m-1 ; i-- != 0 ; ) 376 { 377 mpz_fdiv_q_ui (t, t, l+i+1); /* err(t) += 1 ulp */ 378 mpz_add (t, t, R[i]); 379 } 380 /* now err(t) <= (3m-2) ulps */ 381 382 /* now multiplies t by r^l/l! and adds to s */ 383 mpz_mul (t, t, rr); 384 expt += expr; 385 expt = mpz_normalize2 (t, t, expt, *exps); 386 /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */ 387 MPFR_ASSERTD (expt == *exps); 388 mpz_add (s, s, t); /* no error here */ 389 390 /* updates rr, the multiplication of the factors l+i could be done 391 using binary splitting too, but it is not sure it would save much */ 392 mpz_mul (t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */ 393 expr += expR[m]; 394 mpz_set_ui (tmp, 1); 395 for (i = 1 ; i <= m ; i++) 396 mpz_mul_ui (tmp, tmp, l + i); 397 mpz_fdiv_q (t, t, tmp); /* err(t) <= err(rr) + 2m */ 398 l += m; 399 if (MPFR_UNLIKELY (mpz_sgn (t) == 0)) 400 break; 401 expr += mpz_normalize (rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */ 402 if (MPFR_UNLIKELY (mpz_sgn (rr) == 0)) 403 rrbit = 1; 404 else 405 MPFR_MPZ_SIZEINBASE2 (rrbit, rr); 406 MPFR_MPZ_SIZEINBASE2 (sbit, s); 407 ql = q - *exps - sbit + expr + rrbit; 408 /* TODO: Wrong cast. I don't want what is right, but this is 409 certainly wrong */ 410 } 411 while ((size_t) expr + rrbit > (size_t) -q); 412 413 for (i = 0 ; i <= m ; i++) 414 mpz_clear (R[i]); 415 MPFR_TMP_FREE(marker); 416 mpz_clear (rr); 417 mpz_clear (t); 418 mpz_clear (tmp); 419 420 return l * (l + 4); 421 } 422