1 /* mpfr_lngamma -- lngamma function 2 3 Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramel projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 /* given a precision p, return alpha, such that the argument reduction 27 will use k = alpha*p*log(2). 28 29 Warning: we should always have alpha >= log(2)/(2Pi) ~ 0.11, 30 and the smallest value of alpha multiplied by the smallest working 31 precision should be >= 4. 32 */ 33 static void 34 mpfr_gamma_alpha (mpfr_t s, mpfr_prec_t p) 35 { 36 if (p <= 100) 37 mpfr_set_ui_2exp (s, 614, -10, MPFR_RNDN); /* about 0.6 */ 38 else if (p <= 500) 39 mpfr_set_ui_2exp (s, 819, -10, MPFR_RNDN); /* about 0.8 */ 40 else if (p <= 1000) 41 mpfr_set_ui_2exp (s, 1331, -10, MPFR_RNDN); /* about 1.3 */ 42 else if (p <= 2000) 43 mpfr_set_ui_2exp (s, 1741, -10, MPFR_RNDN); /* about 1.7 */ 44 else if (p <= 5000) 45 mpfr_set_ui_2exp (s, 2253, -10, MPFR_RNDN); /* about 2.2 */ 46 else if (p <= 10000) 47 mpfr_set_ui_2exp (s, 3482, -10, MPFR_RNDN); /* about 3.4 */ 48 else 49 mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDN); /* 4.5 */ 50 } 51 52 #ifdef IS_GAMMA 53 54 /* This function is called in case of intermediate overflow/underflow. 55 The s1 and s2 arguments are temporary MPFR numbers, having the 56 working precision. If the result could be determined, then the 57 flags are updated via pexpo, y is set to the result, and the 58 (non-zero) ternary value is returned. Otherwise 0 is returned 59 in order to perform the next Ziv iteration. */ 60 static int 61 mpfr_explgamma (mpfr_ptr y, mpfr_srcptr x, mpfr_save_expo_t *pexpo, 62 mpfr_ptr s1, mpfr_ptr s2, mpfr_rnd_t rnd) 63 { 64 mpfr_t t1, t2; 65 int inex1, inex2, sign; 66 MPFR_BLOCK_DECL (flags1); 67 MPFR_BLOCK_DECL (flags2); 68 MPFR_GROUP_DECL (group); 69 70 MPFR_BLOCK (flags1, inex1 = mpfr_lgamma (s1, &sign, x, MPFR_RNDD)); 71 MPFR_ASSERTN (inex1 != 0); 72 /* s1 = RNDD(lngamma(x)), inexact */ 73 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags1))) 74 { 75 if (MPFR_SIGN (s1) > 0) 76 { 77 MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_OVERFLOW); 78 return mpfr_overflow (y, rnd, sign); 79 } 80 else 81 { 82 MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_UNDERFLOW); 83 return mpfr_underflow (y, rnd == MPFR_RNDN ? MPFR_RNDZ : rnd, sign); 84 } 85 } 86 87 mpfr_set (s2, s1, MPFR_RNDN); /* exact */ 88 mpfr_nextabove (s2); /* v = RNDU(lngamma(z0)) */ 89 90 if (sign < 0) 91 rnd = MPFR_INVERT_RND (rnd); /* since the result with be negated */ 92 MPFR_GROUP_INIT_2 (group, MPFR_PREC (y), t1, t2); 93 MPFR_BLOCK (flags1, inex1 = mpfr_exp (t1, s1, rnd)); 94 MPFR_BLOCK (flags2, inex2 = mpfr_exp (t2, s2, rnd)); 95 /* t1 is the rounding with mode 'rnd' of a lower bound on |Gamma(x)|, 96 t2 is the rounding with mode 'rnd' of an upper bound, thus if both 97 are equal, so is the wanted result. If t1 and t2 differ or the flags 98 differ, at some point of Ziv's loop they should agree. */ 99 if (mpfr_equal_p (t1, t2) && flags1 == flags2) 100 { 101 MPFR_ASSERTN ((inex1 > 0 && inex2 > 0) || (inex1 < 0 && inex2 < 0)); 102 mpfr_set4 (y, t1, MPFR_RNDN, sign); /* exact */ 103 if (sign < 0) 104 inex1 = - inex1; 105 MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, flags1); 106 } 107 else 108 inex1 = 0; /* couldn't determine the result */ 109 MPFR_GROUP_CLEAR (group); 110 111 return inex1; 112 } 113 114 #else 115 116 static int 117 unit_bit (mpfr_srcptr x) 118 { 119 mpfr_exp_t expo; 120 mpfr_prec_t prec; 121 mp_limb_t x0; 122 123 expo = MPFR_GET_EXP (x); 124 if (expo <= 0) 125 return 0; /* |x| < 1 */ 126 127 prec = MPFR_PREC (x); 128 if (expo > prec) 129 return 0; /* y is a multiple of 2^(expo-prec), thus an even integer */ 130 131 /* Now, the unit bit is represented. */ 132 133 prec = MPFR_PREC2LIMBS (prec) * GMP_NUMB_BITS - expo; 134 /* number of represented fractional bits (including the trailing 0's) */ 135 136 x0 = *(MPFR_MANT (x) + prec / GMP_NUMB_BITS); 137 /* limb containing the unit bit */ 138 139 return (x0 >> (prec % GMP_NUMB_BITS)) & 1; 140 } 141 142 #endif 143 144 /* lngamma(x) = log(gamma(x)). 145 We use formula [6.1.40] from Abramowitz&Stegun: 146 lngamma(z) = (z-1/2)*log(z) - z + 1/2*log(2*Pi) 147 + sum (Bernoulli[2m]/(2m)/(2m-1)/z^(2m-1),m=1..infinity) 148 According to [6.1.42], if the sum is truncated after m=n, the error 149 R_n(z) is bounded by |B[2n+2]|*K(z)/(2n+1)/(2n+2)/|z|^(2n+1) 150 where K(z) = max (z^2/(u^2+z^2)) for u >= 0. 151 For z real, |K(z)| <= 1 thus R_n(z) is bounded by the first neglected term. 152 */ 153 #ifdef IS_GAMMA 154 #define GAMMA_FUNC mpfr_gamma_aux 155 #else 156 #define GAMMA_FUNC mpfr_lngamma_aux 157 #endif 158 159 static int 160 GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd) 161 { 162 mpfr_prec_t precy, w; /* working precision */ 163 mpfr_t s, t, u, v, z; 164 unsigned long m, k, maxm; 165 mpz_t *INITIALIZED(B); /* variable B declared as initialized */ 166 int compared; 167 int inexact = 0; /* 0 means: result y not set yet */ 168 mpfr_exp_t err_s, err_t; 169 unsigned long Bm = 0; /* number of allocated B[] */ 170 unsigned long oldBm; 171 double d; 172 MPFR_SAVE_EXPO_DECL (expo); 173 MPFR_ZIV_DECL (loop); 174 175 compared = mpfr_cmp_ui (z0, 1); 176 177 MPFR_SAVE_EXPO_MARK (expo); 178 179 #ifndef IS_GAMMA /* lngamma or lgamma */ 180 if (compared == 0 || (compared > 0 && mpfr_cmp_ui (z0, 2) == 0)) 181 { 182 MPFR_SAVE_EXPO_FREE (expo); 183 return mpfr_set_ui (y, 0, MPFR_RNDN); /* lngamma(1 or 2) = +0 */ 184 } 185 186 /* Deal here with tiny inputs. We have for -0.3 <= x <= 0.3: 187 - log|x| - gamma*x <= log|gamma(x)| <= - log|x| - gamma*x + x^2 */ 188 if (MPFR_EXP(z0) <= - (mpfr_exp_t) MPFR_PREC(y)) 189 { 190 mpfr_t l, h, g; 191 int ok, inex1, inex2; 192 mpfr_prec_t prec = MPFR_PREC(y) + 14; 193 MPFR_ZIV_DECL (loop); 194 195 MPFR_ZIV_INIT (loop, prec); 196 do 197 { 198 mpfr_init2 (l, prec); 199 if (MPFR_IS_POS(z0)) 200 { 201 mpfr_log (l, z0, MPFR_RNDU); /* upper bound for log(z0) */ 202 mpfr_init2 (h, MPFR_PREC(l)); 203 } 204 else 205 { 206 mpfr_init2 (h, MPFR_PREC(z0)); 207 mpfr_neg (h, z0, MPFR_RNDN); /* exact */ 208 mpfr_log (l, h, MPFR_RNDU); /* upper bound for log(-z0) */ 209 mpfr_set_prec (h, MPFR_PREC(l)); 210 } 211 mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(|z0|) */ 212 mpfr_set (h, l, MPFR_RNDD); /* exact */ 213 mpfr_nextabove (h); /* upper bound for -log(|z0|), avoids two calls 214 to mpfr_log */ 215 mpfr_init2 (g, MPFR_PREC(l)); 216 /* if z0>0, we need an upper approximation of Euler's constant 217 for the left bound */ 218 mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDU : MPFR_RNDD); 219 mpfr_mul (g, g, z0, MPFR_RNDD); 220 mpfr_sub (l, l, g, MPFR_RNDD); 221 mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDD : MPFR_RNDU); /* cached */ 222 mpfr_mul (g, g, z0, MPFR_RNDU); 223 mpfr_sub (h, h, g, MPFR_RNDD); 224 mpfr_mul (g, z0, z0, MPFR_RNDU); 225 mpfr_add (h, h, g, MPFR_RNDU); 226 inex1 = mpfr_prec_round (l, MPFR_PREC(y), rnd); 227 inex2 = mpfr_prec_round (h, MPFR_PREC(y), rnd); 228 /* Caution: we not only need l = h, but both inexact flags should 229 agree. Indeed, one of the inexact flags might be zero. In that 230 case if we assume lngamma(z0) cannot be exact, the other flag 231 should be correct. We are conservative here and request that both 232 inexact flags agree. */ 233 ok = SAME_SIGN (inex1, inex2) && mpfr_cmp (l, h) == 0; 234 if (ok) 235 mpfr_set (y, h, rnd); /* exact */ 236 mpfr_clear (l); 237 mpfr_clear (h); 238 mpfr_clear (g); 239 if (ok) 240 { 241 MPFR_ZIV_FREE (loop); 242 MPFR_SAVE_EXPO_FREE (expo); 243 return mpfr_check_range (y, inex1, rnd); 244 } 245 /* since we have log|gamma(x)| = - log|x| - gamma*x + O(x^2), 246 if x ~ 2^(-n), then we have a n-bit approximation, thus 247 we can try again with a working precision of n bits, 248 especially when n >> PREC(y). 249 Otherwise we would use the reflection formula evaluating x-1, 250 which would need precision n. */ 251 MPFR_ZIV_NEXT (loop, prec); 252 } 253 while (prec <= -MPFR_EXP(z0)); 254 MPFR_ZIV_FREE (loop); 255 } 256 #endif 257 258 precy = MPFR_PREC(y); 259 260 mpfr_init2 (s, MPFR_PREC_MIN); 261 mpfr_init2 (t, MPFR_PREC_MIN); 262 mpfr_init2 (u, MPFR_PREC_MIN); 263 mpfr_init2 (v, MPFR_PREC_MIN); 264 mpfr_init2 (z, MPFR_PREC_MIN); 265 266 if (compared < 0) 267 { 268 mpfr_exp_t err_u; 269 270 /* use reflection formula: 271 gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x) 272 thus lngamma(x) = log(Pi*(x-1)/sin(Pi*(2-x))) - lngamma(2-x) */ 273 274 w = precy + MPFR_INT_CEIL_LOG2 (precy); 275 w += MPFR_INT_CEIL_LOG2 (w) + 14; 276 MPFR_ZIV_INIT (loop, w); 277 while (1) 278 { 279 MPFR_ASSERTD(w >= 3); 280 mpfr_set_prec (s, w); 281 mpfr_set_prec (t, w); 282 mpfr_set_prec (u, w); 283 mpfr_set_prec (v, w); 284 /* In the following, we write r for a real of absolute value 285 at most 2^(-w). Different instances of r may represent different 286 values. */ 287 mpfr_ui_sub (s, 2, z0, MPFR_RNDD); /* s = (2-z0) * (1+2r) >= 1 */ 288 mpfr_const_pi (t, MPFR_RNDN); /* t = Pi * (1+r) */ 289 mpfr_lngamma (u, s, MPFR_RNDN); /* lngamma(2-x) */ 290 /* Let s = (2-z0) + h. By construction, -(2-z0)*2^(1-w) <= h <= 0. 291 We have lngamma(s) = lngamma(2-z0) + h*Psi(z), z in [2-z0+h,2-z0]. 292 Since 2-z0+h = s >= 1 and |Psi(x)| <= max(1,log(x)) for x >= 1, 293 the error on u is bounded by 294 ulp(u)/2 + (2-z0)*max(1,log(2-z0))*2^(1-w) 295 = (1/2 + (2-z0)*max(1,log(2-z0))*2^(1-E(u))) ulp(u) */ 296 d = (double) MPFR_GET_EXP(s) * 0.694; /* upper bound for log(2-z0) */ 297 err_u = MPFR_GET_EXP(s) + __gmpfr_ceil_log2 (d) + 1 - MPFR_GET_EXP(u); 298 err_u = (err_u >= 0) ? err_u + 1 : 0; 299 /* now the error on u is bounded by 2^err_u ulps */ 300 301 mpfr_mul (s, s, t, MPFR_RNDN); /* Pi*(2-x) * (1+r)^4 */ 302 err_s = MPFR_GET_EXP(s); /* 2-x <= 2^err_s */ 303 mpfr_sin (s, s, MPFR_RNDN); /* sin(Pi*(2-x)) */ 304 /* the error on s is bounded by 1/2*ulp(s) + [(1+2^(-w))^4-1]*(2-x) 305 <= 1/2*ulp(s) + 5*2^(-w)*(2-x) for w >= 3 306 <= (1/2 + 5 * 2^(-E(s)) * (2-x)) ulp(s) */ 307 err_s += 3 - MPFR_GET_EXP(s); 308 err_s = (err_s >= 0) ? err_s + 1 : 0; 309 /* the error on s is bounded by 2^err_s ulp(s), thus by 310 2^(err_s+1)*2^(-w)*|s| since ulp(s) <= 2^(1-w)*|s|. 311 Now n*2^(-w) can always be written |(1+r)^n-1| for some 312 |r|<=2^(-w), thus taking n=2^(err_s+1) we see that 313 |S - s| <= |(1+r)^(2^(err_s+1))-1| * |s|, where S is the 314 true value. 315 In fact if ulp(s) <= ulp(S) the same inequality holds for 316 |S| instead of |s| in the right hand side, i.e., we can 317 write s = (1+r)^(2^(err_s+1)) * S. 318 But if ulp(S) < ulp(s), we need to add one ``bit'' to the error, 319 to get s = (1+r)^(2^(err_s+2)) * S. This is true since with 320 E = n*2^(-w) we have |s - S| <= E * |s|, thus 321 |s - S| <= E/(1-E) * |S|. 322 Now E/(1-E) is bounded by 2E as long as E<=1/2, 323 and 2E can be written (1+r)^(2n)-1 as above. 324 */ 325 err_s += 2; /* exponent of relative error */ 326 327 mpfr_sub_ui (v, z0, 1, MPFR_RNDN); /* v = (x-1) * (1+r) */ 328 mpfr_mul (v, v, t, MPFR_RNDN); /* v = Pi*(x-1) * (1+r)^3 */ 329 mpfr_div (v, v, s, MPFR_RNDN); /* Pi*(x-1)/sin(Pi*(2-x)) */ 330 mpfr_abs (v, v, MPFR_RNDN); 331 /* (1+r)^(3+2^err_s+1) */ 332 err_s = (err_s <= 1) ? 3 : err_s + 1; 333 /* now (1+r)^M with M <= 2^err_s */ 334 mpfr_log (v, v, MPFR_RNDN); 335 /* log(v*(1+e)) = log(v)+log(1+e) where |e| <= 2^(err_s-w). 336 Since |log(1+e)| <= 2*e for |e| <= 1/4, the error on v is 337 bounded by ulp(v)/2 + 2^(err_s+1-w). */ 338 if (err_s + 2 > w) 339 { 340 w += err_s + 2; 341 } 342 else 343 { 344 err_s += 1 - MPFR_GET_EXP(v); 345 err_s = (err_s >= 0) ? err_s + 1 : 0; 346 /* the error on v is bounded by 2^err_s ulps */ 347 err_u += MPFR_GET_EXP(u); /* absolute error on u */ 348 err_s += MPFR_GET_EXP(v); /* absolute error on v */ 349 mpfr_sub (s, v, u, MPFR_RNDN); 350 /* the total error on s is bounded by ulp(s)/2 + 2^(err_u-w) 351 + 2^(err_s-w) <= ulp(s)/2 + 2^(max(err_u,err_s)+1-w) */ 352 err_s = (err_s >= err_u) ? err_s : err_u; 353 err_s += 1 - MPFR_GET_EXP(s); /* error is 2^err_s ulp(s) */ 354 err_s = (err_s >= 0) ? err_s + 1 : 0; 355 if (mpfr_can_round (s, w - err_s, MPFR_RNDN, MPFR_RNDZ, precy 356 + (rnd == MPFR_RNDN))) 357 goto end; 358 } 359 MPFR_ZIV_NEXT (loop, w); 360 } 361 MPFR_ZIV_FREE (loop); 362 } 363 364 /* now z0 > 1 */ 365 366 MPFR_ASSERTD (compared > 0); 367 368 /* since k is O(w), the value of log(z0*...*(z0+k-1)) is about w*log(w), 369 so there is a cancellation of ~log(w) in the argument reconstruction */ 370 w = precy + MPFR_INT_CEIL_LOG2 (precy); 371 w += MPFR_INT_CEIL_LOG2 (w) + 13; 372 MPFR_ZIV_INIT (loop, w); 373 while (1) 374 { 375 MPFR_ASSERTD (w >= 3); 376 377 /* argument reduction: we compute gamma(z0 + k), where the series 378 has error term B_{2n}/(z0+k)^(2n) ~ (n/(Pi*e*(z0+k)))^(2n) 379 and we need k steps of argument reconstruction. Assuming k is large 380 with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e., 381 k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w. 382 However, since the series is more expensive to compute, the optimal 383 value seems to be k ~ 4.5 * w experimentally. */ 384 mpfr_set_prec (s, 53); 385 mpfr_gamma_alpha (s, w); 386 mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDU); 387 mpfr_mul_ui (s, s, w, MPFR_RNDU); 388 if (mpfr_cmp (z0, s) < 0) 389 { 390 mpfr_sub (s, s, z0, MPFR_RNDU); 391 k = mpfr_get_ui (s, MPFR_RNDU); 392 if (k < 3) 393 k = 3; 394 } 395 else 396 k = 3; 397 398 mpfr_set_prec (s, w); 399 mpfr_set_prec (t, w); 400 mpfr_set_prec (u, w); 401 mpfr_set_prec (v, w); 402 mpfr_set_prec (z, w); 403 404 mpfr_add_ui (z, z0, k, MPFR_RNDN); 405 /* z = (z0+k)*(1+t1) with |t1| <= 2^(-w) */ 406 407 /* z >= 4 ensures the relative error on log(z) is small, 408 and also (z-1/2)*log(z)-z >= 0 */ 409 MPFR_ASSERTD (mpfr_cmp_ui (z, 4) >= 0); 410 411 mpfr_log (s, z, MPFR_RNDN); /* log(z) */ 412 /* we have s = log((z0+k)*(1+t1))*(1+t2) with |t1|, |t2| <= 2^(-w). 413 Since w >= 2 and z0+k >= 4, we can write log((z0+k)*(1+t1)) 414 = log(z0+k) * (1+t3) with |t3| <= 2^(-w), thus we have 415 s = log(z0+k) * (1+t4)^2 with |t4| <= 2^(-w) */ 416 mpfr_mul_2ui (t, z, 1, MPFR_RNDN); /* t = 2z * (1+t5) */ 417 mpfr_sub_ui (t, t, 1, MPFR_RNDN); /* t = 2z-1 * (1+t6)^3 */ 418 /* since we can write 2z*(1+t5) = (2z-1)*(1+t5') with 419 t5' = 2z/(2z-1) * t5, thus |t5'| <= 8/7 * t5 */ 420 mpfr_mul (s, s, t, MPFR_RNDN); /* (2z-1)*log(z) * (1+t7)^6 */ 421 mpfr_div_2ui (s, s, 1, MPFR_RNDN); /* (z-1/2)*log(z) * (1+t7)^6 */ 422 mpfr_sub (s, s, z, MPFR_RNDN); /* (z-1/2)*log(z)-z */ 423 /* s = [(z-1/2)*log(z)-z]*(1+u)^14, s >= 1/2 */ 424 425 mpfr_ui_div (u, 1, z, MPFR_RNDN); /* 1/z * (1+u), u <= 1/4 since z >= 4 */ 426 427 /* the first term is B[2]/2/z = 1/12/z: t=1/12/z, C[2]=1 */ 428 mpfr_div_ui (t, u, 12, MPFR_RNDN); /* 1/(12z) * (1+u)^2, t <= 3/128 */ 429 mpfr_set (v, t, MPFR_RNDN); /* (1+u)^2, v < 2^(-5) */ 430 mpfr_add (s, s, v, MPFR_RNDN); /* (1+u)^15 */ 431 432 mpfr_mul (u, u, u, MPFR_RNDN); /* 1/z^2 * (1+u)^3 */ 433 434 if (Bm == 0) 435 { 436 B = mpfr_bernoulli_internal ((mpz_t *) 0, 0); 437 B = mpfr_bernoulli_internal (B, 1); 438 Bm = 2; 439 } 440 441 /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */ 442 maxm = 1UL << (GMP_NUMB_BITS / 2 - 1); 443 444 /* s:(1+u)^15, t:(1+u)^2, t <= 3/128 */ 445 446 for (m = 2; MPFR_GET_EXP(v) + (mpfr_exp_t) w >= MPFR_GET_EXP(s); m++) 447 { 448 mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(10m-14) */ 449 if (m <= maxm) 450 { 451 mpfr_mul_ui (t, t, 2*(m-1)*(2*m-3), MPFR_RNDN); 452 mpfr_div_ui (t, t, 2*m*(2*m-1), MPFR_RNDN); 453 mpfr_div_ui (t, t, 2*m*(2*m+1), MPFR_RNDN); 454 } 455 else 456 { 457 mpfr_mul_ui (t, t, 2*(m-1), MPFR_RNDN); 458 mpfr_mul_ui (t, t, 2*m-3, MPFR_RNDN); 459 mpfr_div_ui (t, t, 2*m, MPFR_RNDN); 460 mpfr_div_ui (t, t, 2*m-1, MPFR_RNDN); 461 mpfr_div_ui (t, t, 2*m, MPFR_RNDN); 462 mpfr_div_ui (t, t, 2*m+1, MPFR_RNDN); 463 } 464 /* (1+u)^(10m-8) */ 465 /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */ 466 if (Bm <= m) 467 { 468 B = mpfr_bernoulli_internal (B, m); /* B[2m]*(2m+1)!, exact */ 469 Bm ++; 470 } 471 mpfr_mul_z (v, t, B[m], MPFR_RNDN); /* (1+u)^(10m-7) */ 472 MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3)); 473 mpfr_add (s, s, v, MPFR_RNDN); 474 } 475 /* m <= 1/2*Pi*e*z ensures that |v[m]| < 1/2^(2m+3) */ 476 MPFR_ASSERTD ((double) m <= 4.26 * mpfr_get_d (z, MPFR_RNDZ)); 477 478 /* We have sum([(1+u)^(10m-7)-1]*1/2^(2m+3), m=2..infinity) 479 <= 1.46*u for u <= 2^(-3). 480 We have 0 < lngamma(z) - [(z - 1/2) ln(z) - z + 1/2 ln(2 Pi)] < 0.021 481 for z >= 4, thus since the initial s >= 0.85, the different values of 482 s differ by at most one binade, and the total rounding error on s 483 in the for-loop is bounded by 2*(m-1)*ulp(final_s). 484 The error coming from the v's is bounded by 485 1.46*2^(-w) <= 2*ulp(final_s). 486 Thus the total error so far is bounded by [(1+u)^15-1]*s+2m*ulp(s) 487 <= (2m+47)*ulp(s). 488 Taking into account the truncation error (which is bounded by the last 489 term v[] according to 6.1.42 in A&S), the bound is (2m+48)*ulp(s). 490 */ 491 492 /* add 1/2*log(2*Pi) and subtract log(z0*(z0+1)*...*(z0+k-1)) */ 493 mpfr_const_pi (v, MPFR_RNDN); /* v = Pi*(1+u) */ 494 mpfr_mul_2ui (v, v, 1, MPFR_RNDN); /* v = 2*Pi * (1+u) */ 495 if (k) 496 { 497 unsigned long l; 498 mpfr_set (t, z0, MPFR_RNDN); /* t = z0*(1+u) */ 499 for (l = 1; l < k; l++) 500 { 501 mpfr_add_ui (u, z0, l, MPFR_RNDN); /* u = (z0+l)*(1+u) */ 502 mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(2l+1) */ 503 } 504 /* now t: (1+u)^(2k-1) */ 505 /* instead of computing log(sqrt(2*Pi)/t), we compute 506 1/2*log(2*Pi/t^2), which trades a square root for a square */ 507 mpfr_mul (t, t, t, MPFR_RNDN); /* (z0*...*(z0+k-1))^2, (1+u)^(4k-1) */ 508 mpfr_div (v, v, t, MPFR_RNDN); 509 /* 2*Pi/(z0*...*(z0+k-1))^2 (1+u)^(4k+1) */ 510 } 511 #ifdef IS_GAMMA 512 err_s = MPFR_GET_EXP(s); 513 mpfr_exp (s, s, MPFR_RNDN); 514 /* If s is +Inf, we compute exp(lngamma(z0)). */ 515 if (mpfr_inf_p (s)) 516 { 517 inexact = mpfr_explgamma (y, z0, &expo, s, t, rnd); 518 if (inexact) 519 goto end0; 520 else 521 goto ziv_next; 522 } 523 /* before the exponential, we have s = s0 + h where 524 |h| <= (2m+48)*ulp(s), thus exp(s0) = exp(s) * exp(-h). 525 For |h| <= 1/4, we have |exp(h)-1| <= 1.2*|h| thus 526 |exp(s) - exp(s0)| <= 1.2 * exp(s) * (2m+48)* 2^(EXP(s)-w). */ 527 d = 1.2 * (2.0 * (double) m + 48.0); 528 /* the error on s is bounded by d*2^err_s * 2^(-w) */ 529 mpfr_sqrt (t, v, MPFR_RNDN); 530 /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1), 531 thus t = sqrt(v0)*(1+u)^(2k+3/2). */ 532 mpfr_mul (s, s, t, MPFR_RNDN); 533 /* the error on input s is bounded by (1+u)^(d*2^err_s), 534 and that on t is (1+u)^(2k+3/2), thus the 535 total error is (1+u)^(d*2^err_s+2k+5/2) */ 536 err_s += __gmpfr_ceil_log2 (d); 537 err_t = __gmpfr_ceil_log2 (2.0 * (double) k + 2.5); 538 err_s = (err_s >= err_t) ? err_s + 1 : err_t + 1; 539 #else 540 mpfr_log (t, v, MPFR_RNDN); 541 /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1), 542 thus log(v) = log(v0) + (4k+1)*log(1+u). Since |log(1+u)/u| <= 1.07 543 for |u| <= 2^(-3), the absolute error on log(v) is bounded by 544 1.07*(4k+1)*u, and the rounding error by ulp(t). */ 545 mpfr_div_2ui (t, t, 1, MPFR_RNDN); 546 /* the error on t is now bounded by ulp(t) + 0.54*(4k+1)*2^(-w). 547 We have sqrt(2*Pi)/(z0*(z0+1)*...*(z0+k-1)) <= sqrt(2*Pi)/k! <= 0.5 548 since k>=3, thus t <= -0.5 and ulp(t) >= 2^(-w). 549 Thus the error on t is bounded by (2.16*k+1.54)*ulp(t). */ 550 err_t = MPFR_GET_EXP(t) + (mpfr_exp_t) 551 __gmpfr_ceil_log2 (2.2 * (double) k + 1.6); 552 err_s = MPFR_GET_EXP(s) + (mpfr_exp_t) 553 __gmpfr_ceil_log2 (2.0 * (double) m + 48.0); 554 mpfr_add (s, s, t, MPFR_RNDN); /* this is a subtraction in fact */ 555 /* the final error in ulp(s) is 556 <= 1 + 2^(err_t-EXP(s)) + 2^(err_s-EXP(s)) 557 <= 2^(1+max(err_t,err_s)-EXP(s)) if err_t <> err_s 558 <= 2^(2+max(err_t,err_s)-EXP(s)) if err_t = err_s */ 559 err_s = (err_t == err_s) ? 1 + err_s : ((err_t > err_s) ? err_t : err_s); 560 err_s += 1 - MPFR_GET_EXP(s); 561 #endif 562 if (MPFR_LIKELY (MPFR_CAN_ROUND (s, w - err_s, precy, rnd))) 563 break; 564 #ifdef IS_GAMMA 565 ziv_next: 566 #endif 567 MPFR_ZIV_NEXT (loop, w); 568 } 569 570 #ifdef IS_GAMMA 571 end0: 572 #endif 573 oldBm = Bm; 574 while (Bm--) 575 mpz_clear (B[Bm]); 576 (*__gmp_free_func) (B, oldBm * sizeof (mpz_t)); 577 578 end: 579 if (inexact == 0) 580 inexact = mpfr_set (y, s, rnd); 581 MPFR_ZIV_FREE (loop); 582 583 mpfr_clear (s); 584 mpfr_clear (t); 585 mpfr_clear (u); 586 mpfr_clear (v); 587 mpfr_clear (z); 588 589 MPFR_SAVE_EXPO_FREE (expo); 590 return mpfr_check_range (y, inexact, rnd); 591 } 592 593 #ifndef IS_GAMMA 594 595 int 596 mpfr_lngamma (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd) 597 { 598 int inex; 599 600 MPFR_LOG_FUNC 601 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd), 602 ("y[%Pu]=%.*Rg inexact=%d", 603 mpfr_get_prec (y), mpfr_log_prec, y, inex)); 604 605 /* special cases */ 606 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) 607 { 608 if (MPFR_IS_NAN (x) || MPFR_IS_NEG (x)) 609 { 610 MPFR_SET_NAN (y); 611 MPFR_RET_NAN; 612 } 613 else /* lngamma(+Inf) = lngamma(+0) = +Inf */ 614 { 615 if (MPFR_IS_ZERO (x)) 616 mpfr_set_divby0 (); 617 MPFR_SET_INF (y); 618 MPFR_SET_POS (y); 619 MPFR_RET (0); /* exact */ 620 } 621 } 622 623 /* if x < 0 and -2k-1 <= x <= -2k, then lngamma(x) = NaN */ 624 if (MPFR_IS_NEG (x) && (unit_bit (x) == 0 || mpfr_integer_p (x))) 625 { 626 MPFR_SET_NAN (y); 627 MPFR_RET_NAN; 628 } 629 630 inex = mpfr_lngamma_aux (y, x, rnd); 631 return inex; 632 } 633 634 int 635 mpfr_lgamma (mpfr_ptr y, int *signp, mpfr_srcptr x, mpfr_rnd_t rnd) 636 { 637 int inex; 638 639 MPFR_LOG_FUNC 640 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd), 641 ("y[%Pu]=%.*Rg signp=%d inexact=%d", 642 mpfr_get_prec (y), mpfr_log_prec, y, *signp, inex)); 643 644 *signp = 1; /* most common case */ 645 646 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) 647 { 648 if (MPFR_IS_NAN (x)) 649 { 650 MPFR_SET_NAN (y); 651 MPFR_RET_NAN; 652 } 653 else 654 { 655 if (MPFR_IS_ZERO (x)) 656 mpfr_set_divby0 (); 657 *signp = MPFR_INT_SIGN (x); 658 MPFR_SET_INF (y); 659 MPFR_SET_POS (y); 660 MPFR_RET (0); 661 } 662 } 663 664 if (MPFR_IS_NEG (x)) 665 { 666 if (mpfr_integer_p (x)) 667 { 668 MPFR_SET_INF (y); 669 MPFR_SET_POS (y); 670 mpfr_set_divby0 (); 671 MPFR_RET (0); 672 } 673 674 if (unit_bit (x) == 0) 675 *signp = -1; 676 677 /* For tiny negative x, we have gamma(x) = 1/x - euler + O(x), 678 thus |gamma(x)| = -1/x + euler + O(x), and 679 log |gamma(x)| = -log(-x) - euler*x + O(x^2). 680 More precisely we have for -0.4 <= x < 0: 681 -log(-x) <= log |gamma(x)| <= -log(-x) - x. 682 Since log(x) is not representable, we may have an instance of the 683 Table Maker Dilemma. The only way to ensure correct rounding is to 684 compute an interval [l,h] such that l <= -log(-x) and 685 -log(-x) - x <= h, and check whether l and h round to the same number 686 for the target precision and rounding modes. */ 687 if (MPFR_EXP(x) + 1 <= - (mpfr_exp_t) MPFR_PREC(y)) 688 /* since PREC(y) >= 1, this ensures EXP(x) <= -2, 689 thus |x| <= 0.25 < 0.4 */ 690 { 691 mpfr_t l, h; 692 int ok, inex2; 693 mpfr_prec_t w = MPFR_PREC (y) + 14; 694 mpfr_exp_t expl; 695 696 while (1) 697 { 698 mpfr_init2 (l, w); 699 mpfr_init2 (h, w); 700 /* we want a lower bound on -log(-x), thus an upper bound 701 on log(-x), thus an upper bound on -x. */ 702 mpfr_neg (l, x, MPFR_RNDU); /* upper bound on -x */ 703 mpfr_log (l, l, MPFR_RNDU); /* upper bound for log(-x) */ 704 mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(-x) */ 705 mpfr_neg (h, x, MPFR_RNDD); /* lower bound on -x */ 706 mpfr_log (h, h, MPFR_RNDD); /* lower bound on log(-x) */ 707 mpfr_neg (h, h, MPFR_RNDU); /* upper bound for -log(-x) */ 708 mpfr_sub (h, h, x, MPFR_RNDU); /* upper bound for -log(-x) - x */ 709 inex = mpfr_prec_round (l, MPFR_PREC (y), rnd); 710 inex2 = mpfr_prec_round (h, MPFR_PREC (y), rnd); 711 /* Caution: we not only need l = h, but both inexact flags 712 should agree. Indeed, one of the inexact flags might be 713 zero. In that case if we assume ln|gamma(x)| cannot be 714 exact, the other flag should be correct. We are conservative 715 here and request that both inexact flags agree. */ 716 ok = SAME_SIGN (inex, inex2) && mpfr_equal_p (l, h); 717 if (ok) 718 mpfr_set (y, h, rnd); /* exact */ 719 else 720 expl = MPFR_EXP (l); 721 mpfr_clear (l); 722 mpfr_clear (h); 723 if (ok) 724 return inex; 725 /* if ulp(log(-x)) <= |x| there is no reason to loop, 726 since the width of [l, h] will be at least |x| */ 727 if (expl < MPFR_EXP(x) + (mpfr_exp_t) w) 728 break; 729 w += MPFR_INT_CEIL_LOG2(w) + 3; 730 } 731 } 732 } 733 734 inex = mpfr_lngamma_aux (y, x, rnd); 735 return inex; 736 } 737 738 #endif 739