1 /* mpfr_lngamma -- lngamma function 2 3 Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 4 Contributed by the Arenaire and Caramel projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 /* given a precision p, return alpha, such that the argument reduction 27 will use k = alpha*p*log(2). 28 29 Warning: we should always have alpha >= log(2)/(2Pi) ~ 0.11, 30 and the smallest value of alpha multiplied by the smallest working 31 precision should be >= 4. 32 */ 33 static void 34 mpfr_gamma_alpha (mpfr_t s, mpfr_prec_t p) 35 { 36 if (p <= 100) 37 mpfr_set_ui_2exp (s, 614, -10, MPFR_RNDN); /* about 0.6 */ 38 else if (p <= 500) 39 mpfr_set_ui_2exp (s, 819, -10, MPFR_RNDN); /* about 0.8 */ 40 else if (p <= 1000) 41 mpfr_set_ui_2exp (s, 1331, -10, MPFR_RNDN); /* about 1.3 */ 42 else if (p <= 2000) 43 mpfr_set_ui_2exp (s, 1741, -10, MPFR_RNDN); /* about 1.7 */ 44 else if (p <= 5000) 45 mpfr_set_ui_2exp (s, 2253, -10, MPFR_RNDN); /* about 2.2 */ 46 else if (p <= 10000) 47 mpfr_set_ui_2exp (s, 3482, -10, MPFR_RNDN); /* about 3.4 */ 48 else 49 mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDN); /* 4.5 */ 50 } 51 52 #ifndef IS_GAMMA 53 static int 54 unit_bit (mpfr_srcptr (x)) 55 { 56 mpfr_exp_t expo; 57 mpfr_prec_t prec; 58 mp_limb_t x0; 59 60 expo = MPFR_GET_EXP (x); 61 if (expo <= 0) 62 return 0; /* |x| < 1 */ 63 64 prec = MPFR_PREC (x); 65 if (expo > prec) 66 return 0; /* y is a multiple of 2^(expo-prec), thus an even integer */ 67 68 /* Now, the unit bit is represented. */ 69 70 prec = ((prec - 1) / GMP_NUMB_BITS + 1) * GMP_NUMB_BITS - expo; 71 /* number of represented fractional bits (including the trailing 0's) */ 72 73 x0 = *(MPFR_MANT (x) + prec / GMP_NUMB_BITS); 74 /* limb containing the unit bit */ 75 76 return (x0 >> (prec % GMP_NUMB_BITS)) & 1; 77 } 78 #endif 79 80 /* lngamma(x) = log(gamma(x)). 81 We use formula [6.1.40] from Abramowitz&Stegun: 82 lngamma(z) = (z-1/2)*log(z) - z + 1/2*log(2*Pi) 83 + sum (Bernoulli[2m]/(2m)/(2m-1)/z^(2m-1),m=1..infinity) 84 According to [6.1.42], if the sum is truncated after m=n, the error 85 R_n(z) is bounded by |B[2n+2]|*K(z)/(2n+1)/(2n+2)/|z|^(2n+1) 86 where K(z) = max (z^2/(u^2+z^2)) for u >= 0. 87 For z real, |K(z)| <= 1 thus R_n(z) is bounded by the first neglected term. 88 */ 89 #ifdef IS_GAMMA 90 #define GAMMA_FUNC mpfr_gamma_aux 91 #else 92 #define GAMMA_FUNC mpfr_lngamma_aux 93 #endif 94 95 static int 96 GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd) 97 { 98 mpfr_prec_t precy, w; /* working precision */ 99 mpfr_t s, t, u, v, z; 100 unsigned long m, k, maxm; 101 mpz_t *INITIALIZED(B); /* variable B declared as initialized */ 102 int inexact, compared; 103 mpfr_exp_t err_s, err_t; 104 unsigned long Bm = 0; /* number of allocated B[] */ 105 unsigned long oldBm; 106 double d; 107 MPFR_SAVE_EXPO_DECL (expo); 108 109 compared = mpfr_cmp_ui (z0, 1); 110 111 MPFR_SAVE_EXPO_MARK (expo); 112 113 #ifndef IS_GAMMA /* lngamma or lgamma */ 114 if (compared == 0 || (compared > 0 && mpfr_cmp_ui (z0, 2) == 0)) 115 { 116 MPFR_SAVE_EXPO_FREE (expo); 117 return mpfr_set_ui (y, 0, MPFR_RNDN); /* lngamma(1 or 2) = +0 */ 118 } 119 120 /* Deal here with tiny inputs. We have for -0.3 <= x <= 0.3: 121 - log|x| - gamma*x <= log|gamma(x)| <= - log|x| - gamma*x + x^2 */ 122 if (MPFR_EXP(z0) <= - (mpfr_exp_t) MPFR_PREC(y)) 123 { 124 mpfr_t l, h, g; 125 int ok, inex2; 126 mpfr_prec_t prec = MPFR_PREC(y) + 14; 127 MPFR_ZIV_DECL (loop); 128 129 MPFR_ZIV_INIT (loop, prec); 130 do 131 { 132 mpfr_init2 (l, prec); 133 if (MPFR_IS_POS(z0)) 134 { 135 mpfr_log (l, z0, MPFR_RNDU); /* upper bound for log(z0) */ 136 mpfr_init2 (h, MPFR_PREC(l)); 137 } 138 else 139 { 140 mpfr_init2 (h, MPFR_PREC(z0)); 141 mpfr_neg (h, z0, MPFR_RNDN); /* exact */ 142 mpfr_log (l, h, MPFR_RNDU); /* upper bound for log(-z0) */ 143 mpfr_set_prec (h, MPFR_PREC(l)); 144 } 145 mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(|z0|) */ 146 mpfr_set (h, l, MPFR_RNDD); /* exact */ 147 mpfr_nextabove (h); /* upper bound for -log(|z0|), avoids two calls 148 to mpfr_log */ 149 mpfr_init2 (g, MPFR_PREC(l)); 150 /* if z0>0, we need an upper approximation of Euler's constant 151 for the left bound */ 152 mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDU : MPFR_RNDD); 153 mpfr_mul (g, g, z0, MPFR_RNDD); 154 mpfr_sub (l, l, g, MPFR_RNDD); 155 mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDD : MPFR_RNDU); /* cached */ 156 mpfr_mul (g, g, z0, MPFR_RNDU); 157 mpfr_sub (h, h, g, MPFR_RNDD); 158 mpfr_mul (g, z0, z0, MPFR_RNDU); 159 mpfr_add (h, h, g, MPFR_RNDU); 160 inexact = mpfr_prec_round (l, MPFR_PREC(y), rnd); 161 inex2 = mpfr_prec_round (h, MPFR_PREC(y), rnd); 162 /* Caution: we not only need l = h, but both inexact flags should 163 agree. Indeed, one of the inexact flags might be zero. In that 164 case if we assume lngamma(z0) cannot be exact, the other flag 165 should be correct. We are conservative here and request that both 166 inexact flags agree. */ 167 ok = SAME_SIGN (inexact, inex2) && mpfr_cmp (l, h) == 0; 168 if (ok) 169 mpfr_set (y, h, rnd); /* exact */ 170 mpfr_clear (l); 171 mpfr_clear (h); 172 mpfr_clear (g); 173 if (ok) 174 { 175 MPFR_SAVE_EXPO_FREE (expo); 176 return mpfr_check_range (y, inexact, rnd); 177 } 178 /* since we have log|gamma(x)| = - log|x| - gamma*x + O(x^2), 179 if x ~ 2^(-n), then we have a n-bit approximation, thus 180 we can try again with a working precision of n bits, 181 especially when n >> PREC(y). 182 Otherwise we would use the reflection formula evaluating x-1, 183 which would need precision n. */ 184 MPFR_ZIV_NEXT (loop, prec); 185 } 186 while (prec <= -MPFR_EXP(z0)); 187 MPFR_ZIV_FREE (loop); 188 } 189 #endif 190 191 precy = MPFR_PREC(y); 192 193 mpfr_init2 (s, MPFR_PREC_MIN); 194 mpfr_init2 (t, MPFR_PREC_MIN); 195 mpfr_init2 (u, MPFR_PREC_MIN); 196 mpfr_init2 (v, MPFR_PREC_MIN); 197 mpfr_init2 (z, MPFR_PREC_MIN); 198 199 if (compared < 0) 200 { 201 mpfr_exp_t err_u; 202 203 /* use reflection formula: 204 gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x) 205 thus lngamma(x) = log(Pi*(x-1)/sin(Pi*(2-x))) - lngamma(2-x) */ 206 207 w = precy + MPFR_INT_CEIL_LOG2 (precy); 208 while (1) 209 { 210 w += MPFR_INT_CEIL_LOG2 (w) + 14; 211 MPFR_ASSERTD(w >= 3); 212 mpfr_set_prec (s, w); 213 mpfr_set_prec (t, w); 214 mpfr_set_prec (u, w); 215 mpfr_set_prec (v, w); 216 /* In the following, we write r for a real of absolute value 217 at most 2^(-w). Different instances of r may represent different 218 values. */ 219 mpfr_ui_sub (s, 2, z0, MPFR_RNDD); /* s = (2-z0) * (1+2r) >= 1 */ 220 mpfr_const_pi (t, MPFR_RNDN); /* t = Pi * (1+r) */ 221 mpfr_lngamma (u, s, MPFR_RNDN); /* lngamma(2-x) */ 222 /* Let s = (2-z0) + h. By construction, -(2-z0)*2^(1-w) <= h <= 0. 223 We have lngamma(s) = lngamma(2-z0) + h*Psi(z), z in [2-z0+h,2-z0]. 224 Since 2-z0+h = s >= 1 and |Psi(x)| <= max(1,log(x)) for x >= 1, 225 the error on u is bounded by 226 ulp(u)/2 + (2-z0)*max(1,log(2-z0))*2^(1-w) 227 = (1/2 + (2-z0)*max(1,log(2-z0))*2^(1-E(u))) ulp(u) */ 228 d = (double) MPFR_GET_EXP(s) * 0.694; /* upper bound for log(2-z0) */ 229 err_u = MPFR_GET_EXP(s) + __gmpfr_ceil_log2 (d) + 1 - MPFR_GET_EXP(u); 230 err_u = (err_u >= 0) ? err_u + 1 : 0; 231 /* now the error on u is bounded by 2^err_u ulps */ 232 233 mpfr_mul (s, s, t, MPFR_RNDN); /* Pi*(2-x) * (1+r)^4 */ 234 err_s = MPFR_GET_EXP(s); /* 2-x <= 2^err_s */ 235 mpfr_sin (s, s, MPFR_RNDN); /* sin(Pi*(2-x)) */ 236 /* the error on s is bounded by 1/2*ulp(s) + [(1+2^(-w))^4-1]*(2-x) 237 <= 1/2*ulp(s) + 5*2^(-w)*(2-x) for w >= 3 238 <= (1/2 + 5 * 2^(-E(s)) * (2-x)) ulp(s) */ 239 err_s += 3 - MPFR_GET_EXP(s); 240 err_s = (err_s >= 0) ? err_s + 1 : 0; 241 /* the error on s is bounded by 2^err_s ulp(s), thus by 242 2^(err_s+1)*2^(-w)*|s| since ulp(s) <= 2^(1-w)*|s|. 243 Now n*2^(-w) can always be written |(1+r)^n-1| for some 244 |r|<=2^(-w), thus taking n=2^(err_s+1) we see that 245 |S - s| <= |(1+r)^(2^(err_s+1))-1| * |s|, where S is the 246 true value. 247 In fact if ulp(s) <= ulp(S) the same inequality holds for 248 |S| instead of |s| in the right hand side, i.e., we can 249 write s = (1+r)^(2^(err_s+1)) * S. 250 But if ulp(S) < ulp(s), we need to add one ``bit'' to the error, 251 to get s = (1+r)^(2^(err_s+2)) * S. This is true since with 252 E = n*2^(-w) we have |s - S| <= E * |s|, thus 253 |s - S| <= E/(1-E) * |S|. 254 Now E/(1-E) is bounded by 2E as long as E<=1/2, 255 and 2E can be written (1+r)^(2n)-1 as above. 256 */ 257 err_s += 2; /* exponent of relative error */ 258 259 mpfr_sub_ui (v, z0, 1, MPFR_RNDN); /* v = (x-1) * (1+r) */ 260 mpfr_mul (v, v, t, MPFR_RNDN); /* v = Pi*(x-1) * (1+r)^3 */ 261 mpfr_div (v, v, s, MPFR_RNDN); /* Pi*(x-1)/sin(Pi*(2-x)) */ 262 mpfr_abs (v, v, MPFR_RNDN); 263 /* (1+r)^(3+2^err_s+1) */ 264 err_s = (err_s <= 1) ? 3 : err_s + 1; 265 /* now (1+r)^M with M <= 2^err_s */ 266 mpfr_log (v, v, MPFR_RNDN); 267 /* log(v*(1+e)) = log(v)+log(1+e) where |e| <= 2^(err_s-w). 268 Since |log(1+e)| <= 2*e for |e| <= 1/4, the error on v is 269 bounded by ulp(v)/2 + 2^(err_s+1-w). */ 270 if (err_s + 2 > w) 271 { 272 w += err_s + 2; 273 } 274 else 275 { 276 err_s += 1 - MPFR_GET_EXP(v); 277 err_s = (err_s >= 0) ? err_s + 1 : 0; 278 /* the error on v is bounded by 2^err_s ulps */ 279 err_u += MPFR_GET_EXP(u); /* absolute error on u */ 280 err_s += MPFR_GET_EXP(v); /* absolute error on v */ 281 mpfr_sub (s, v, u, MPFR_RNDN); 282 /* the total error on s is bounded by ulp(s)/2 + 2^(err_u-w) 283 + 2^(err_s-w) <= ulp(s)/2 + 2^(max(err_u,err_s)+1-w) */ 284 err_s = (err_s >= err_u) ? err_s : err_u; 285 err_s += 1 - MPFR_GET_EXP(s); /* error is 2^err_s ulp(s) */ 286 err_s = (err_s >= 0) ? err_s + 1 : 0; 287 if (mpfr_can_round (s, w - err_s, MPFR_RNDN, MPFR_RNDZ, precy 288 + (rnd == MPFR_RNDN))) 289 goto end; 290 } 291 } 292 } 293 294 /* now z0 > 1 */ 295 296 MPFR_ASSERTD (compared > 0); 297 298 /* since k is O(w), the value of log(z0*...*(z0+k-1)) is about w*log(w), 299 so there is a cancellation of ~log(w) in the argument reconstruction */ 300 w = precy + MPFR_INT_CEIL_LOG2 (precy); 301 302 do 303 { 304 w += MPFR_INT_CEIL_LOG2 (w) + 13; 305 MPFR_ASSERTD (w >= 3); 306 307 /* argument reduction: we compute gamma(z0 + k), where the series 308 has error term B_{2n}/(z0+k)^(2n) ~ (n/(Pi*e*(z0+k)))^(2n) 309 and we need k steps of argument reconstruction. Assuming k is large 310 with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e., 311 k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w. 312 However, since the series is more expensive to compute, the optimal 313 value seems to be k ~ 4.5 * w experimentally. */ 314 mpfr_set_prec (s, 53); 315 mpfr_gamma_alpha (s, w); 316 mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDU); 317 mpfr_mul_ui (s, s, w, MPFR_RNDU); 318 if (mpfr_cmp (z0, s) < 0) 319 { 320 mpfr_sub (s, s, z0, MPFR_RNDU); 321 k = mpfr_get_ui (s, MPFR_RNDU); 322 if (k < 3) 323 k = 3; 324 } 325 else 326 k = 3; 327 328 mpfr_set_prec (s, w); 329 mpfr_set_prec (t, w); 330 mpfr_set_prec (u, w); 331 mpfr_set_prec (v, w); 332 mpfr_set_prec (z, w); 333 334 mpfr_add_ui (z, z0, k, MPFR_RNDN); 335 /* z = (z0+k)*(1+t1) with |t1| <= 2^(-w) */ 336 337 /* z >= 4 ensures the relative error on log(z) is small, 338 and also (z-1/2)*log(z)-z >= 0 */ 339 MPFR_ASSERTD (mpfr_cmp_ui (z, 4) >= 0); 340 341 mpfr_log (s, z, MPFR_RNDN); /* log(z) */ 342 /* we have s = log((z0+k)*(1+t1))*(1+t2) with |t1|, |t2| <= 2^(-w). 343 Since w >= 2 and z0+k >= 4, we can write log((z0+k)*(1+t1)) 344 = log(z0+k) * (1+t3) with |t3| <= 2^(-w), thus we have 345 s = log(z0+k) * (1+t4)^2 with |t4| <= 2^(-w) */ 346 mpfr_mul_2ui (t, z, 1, MPFR_RNDN); /* t = 2z * (1+t5) */ 347 mpfr_sub_ui (t, t, 1, MPFR_RNDN); /* t = 2z-1 * (1+t6)^3 */ 348 /* since we can write 2z*(1+t5) = (2z-1)*(1+t5') with 349 t5' = 2z/(2z-1) * t5, thus |t5'| <= 8/7 * t5 */ 350 mpfr_mul (s, s, t, MPFR_RNDN); /* (2z-1)*log(z) * (1+t7)^6 */ 351 mpfr_div_2ui (s, s, 1, MPFR_RNDN); /* (z-1/2)*log(z) * (1+t7)^6 */ 352 mpfr_sub (s, s, z, MPFR_RNDN); /* (z-1/2)*log(z)-z */ 353 /* s = [(z-1/2)*log(z)-z]*(1+u)^14, s >= 1/2 */ 354 355 mpfr_ui_div (u, 1, z, MPFR_RNDN); /* 1/z * (1+u), u <= 1/4 since z >= 4 */ 356 357 /* the first term is B[2]/2/z = 1/12/z: t=1/12/z, C[2]=1 */ 358 mpfr_div_ui (t, u, 12, MPFR_RNDN); /* 1/(12z) * (1+u)^2, t <= 3/128 */ 359 mpfr_set (v, t, MPFR_RNDN); /* (1+u)^2, v < 2^(-5) */ 360 mpfr_add (s, s, v, MPFR_RNDN); /* (1+u)^15 */ 361 362 mpfr_mul (u, u, u, MPFR_RNDN); /* 1/z^2 * (1+u)^3 */ 363 364 if (Bm == 0) 365 { 366 B = mpfr_bernoulli_internal ((mpz_t *) 0, 0); 367 B = mpfr_bernoulli_internal (B, 1); 368 Bm = 2; 369 } 370 371 /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */ 372 maxm = 1UL << (GMP_NUMB_BITS / 2 - 1); 373 374 /* s:(1+u)^15, t:(1+u)^2, t <= 3/128 */ 375 376 for (m = 2; MPFR_GET_EXP(v) + (mpfr_exp_t) w >= MPFR_GET_EXP(s); m++) 377 { 378 mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(10m-14) */ 379 if (m <= maxm) 380 { 381 mpfr_mul_ui (t, t, 2*(m-1)*(2*m-3), MPFR_RNDN); 382 mpfr_div_ui (t, t, 2*m*(2*m-1), MPFR_RNDN); 383 mpfr_div_ui (t, t, 2*m*(2*m+1), MPFR_RNDN); 384 } 385 else 386 { 387 mpfr_mul_ui (t, t, 2*(m-1), MPFR_RNDN); 388 mpfr_mul_ui (t, t, 2*m-3, MPFR_RNDN); 389 mpfr_div_ui (t, t, 2*m, MPFR_RNDN); 390 mpfr_div_ui (t, t, 2*m-1, MPFR_RNDN); 391 mpfr_div_ui (t, t, 2*m, MPFR_RNDN); 392 mpfr_div_ui (t, t, 2*m+1, MPFR_RNDN); 393 } 394 /* (1+u)^(10m-8) */ 395 /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */ 396 if (Bm <= m) 397 { 398 B = mpfr_bernoulli_internal (B, m); /* B[2m]*(2m+1)!, exact */ 399 Bm ++; 400 } 401 mpfr_mul_z (v, t, B[m], MPFR_RNDN); /* (1+u)^(10m-7) */ 402 MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3)); 403 mpfr_add (s, s, v, MPFR_RNDN); 404 } 405 /* m <= 1/2*Pi*e*z ensures that |v[m]| < 1/2^(2m+3) */ 406 MPFR_ASSERTD ((double) m <= 4.26 * mpfr_get_d (z, MPFR_RNDZ)); 407 408 /* We have sum([(1+u)^(10m-7)-1]*1/2^(2m+3), m=2..infinity) 409 <= 1.46*u for u <= 2^(-3). 410 We have 0 < lngamma(z) - [(z - 1/2) ln(z) - z + 1/2 ln(2 Pi)] < 0.021 411 for z >= 4, thus since the initial s >= 0.85, the different values of 412 s differ by at most one binade, and the total rounding error on s 413 in the for-loop is bounded by 2*(m-1)*ulp(final_s). 414 The error coming from the v's is bounded by 415 1.46*2^(-w) <= 2*ulp(final_s). 416 Thus the total error so far is bounded by [(1+u)^15-1]*s+2m*ulp(s) 417 <= (2m+47)*ulp(s). 418 Taking into account the truncation error (which is bounded by the last 419 term v[] according to 6.1.42 in A&S), the bound is (2m+48)*ulp(s). 420 */ 421 422 /* add 1/2*log(2*Pi) and subtract log(z0*(z0+1)*...*(z0+k-1)) */ 423 mpfr_const_pi (v, MPFR_RNDN); /* v = Pi*(1+u) */ 424 mpfr_mul_2ui (v, v, 1, MPFR_RNDN); /* v = 2*Pi * (1+u) */ 425 if (k) 426 { 427 unsigned long l; 428 mpfr_set (t, z0, MPFR_RNDN); /* t = z0*(1+u) */ 429 for (l = 1; l < k; l++) 430 { 431 mpfr_add_ui (u, z0, l, MPFR_RNDN); /* u = (z0+l)*(1+u) */ 432 mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(2l+1) */ 433 } 434 /* now t: (1+u)^(2k-1) */ 435 /* instead of computing log(sqrt(2*Pi)/t), we compute 436 1/2*log(2*Pi/t^2), which trades a square root for a square */ 437 mpfr_mul (t, t, t, MPFR_RNDN); /* (z0*...*(z0+k-1))^2, (1+u)^(4k-1) */ 438 mpfr_div (v, v, t, MPFR_RNDN); 439 /* 2*Pi/(z0*...*(z0+k-1))^2 (1+u)^(4k+1) */ 440 } 441 #ifdef IS_GAMMA 442 err_s = MPFR_GET_EXP(s); 443 mpfr_exp (s, s, MPFR_RNDN); 444 /* before the exponential, we have s = s0 + h where 445 |h| <= (2m+48)*ulp(s), thus exp(s0) = exp(s) * exp(-h). 446 For |h| <= 1/4, we have |exp(h)-1| <= 1.2*|h| thus 447 |exp(s) - exp(s0)| <= 1.2 * exp(s) * (2m+48)* 2^(EXP(s)-w). */ 448 d = 1.2 * (2.0 * (double) m + 48.0); 449 /* the error on s is bounded by d*2^err_s * 2^(-w) */ 450 mpfr_sqrt (t, v, MPFR_RNDN); 451 /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1), 452 thus t = sqrt(v0)*(1+u)^(2k+3/2). */ 453 mpfr_mul (s, s, t, MPFR_RNDN); 454 /* the error on input s is bounded by (1+u)^(d*2^err_s), 455 and that on t is (1+u)^(2k+3/2), thus the 456 total error is (1+u)^(d*2^err_s+2k+5/2) */ 457 err_s += __gmpfr_ceil_log2 (d); 458 err_t = __gmpfr_ceil_log2 (2.0 * (double) k + 2.5); 459 err_s = (err_s >= err_t) ? err_s + 1 : err_t + 1; 460 #else 461 mpfr_log (t, v, MPFR_RNDN); 462 /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1), 463 thus log(v) = log(v0) + (4k+1)*log(1+u). Since |log(1+u)/u| <= 1.07 464 for |u| <= 2^(-3), the absolute error on log(v) is bounded by 465 1.07*(4k+1)*u, and the rounding error by ulp(t). */ 466 mpfr_div_2ui (t, t, 1, MPFR_RNDN); 467 /* the error on t is now bounded by ulp(t) + 0.54*(4k+1)*2^(-w). 468 We have sqrt(2*Pi)/(z0*(z0+1)*...*(z0+k-1)) <= sqrt(2*Pi)/k! <= 0.5 469 since k>=3, thus t <= -0.5 and ulp(t) >= 2^(-w). 470 Thus the error on t is bounded by (2.16*k+1.54)*ulp(t). */ 471 err_t = MPFR_GET_EXP(t) + (mpfr_exp_t) 472 __gmpfr_ceil_log2 (2.2 * (double) k + 1.6); 473 err_s = MPFR_GET_EXP(s) + (mpfr_exp_t) 474 __gmpfr_ceil_log2 (2.0 * (double) m + 48.0); 475 mpfr_add (s, s, t, MPFR_RNDN); /* this is a subtraction in fact */ 476 /* the final error in ulp(s) is 477 <= 1 + 2^(err_t-EXP(s)) + 2^(err_s-EXP(s)) 478 <= 2^(1+max(err_t,err_s)-EXP(s)) if err_t <> err_s 479 <= 2^(2+max(err_t,err_s)-EXP(s)) if err_t = err_s */ 480 err_s = (err_t == err_s) ? 1 + err_s : ((err_t > err_s) ? err_t : err_s); 481 err_s += 1 - MPFR_GET_EXP(s); 482 #endif 483 } 484 while (MPFR_UNLIKELY (!MPFR_CAN_ROUND (s, w - err_s, precy, rnd))); 485 486 oldBm = Bm; 487 while (Bm--) 488 mpz_clear (B[Bm]); 489 (*__gmp_free_func) (B, oldBm * sizeof (mpz_t)); 490 491 end: 492 inexact = mpfr_set (y, s, rnd); 493 494 mpfr_clear (s); 495 mpfr_clear (t); 496 mpfr_clear (u); 497 mpfr_clear (v); 498 mpfr_clear (z); 499 500 MPFR_SAVE_EXPO_FREE (expo); 501 return mpfr_check_range (y, inexact, rnd); 502 } 503 504 #ifndef IS_GAMMA 505 506 int 507 mpfr_lngamma (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd) 508 { 509 int inex; 510 511 MPFR_LOG_FUNC 512 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd), 513 ("y[%Pu]=%.*Rg inexact=%d", 514 mpfr_get_prec (y), mpfr_log_prec, y, inex)); 515 516 /* special cases */ 517 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) 518 { 519 if (MPFR_IS_NAN (x) || MPFR_IS_NEG (x)) 520 { 521 MPFR_SET_NAN (y); 522 MPFR_RET_NAN; 523 } 524 else /* lngamma(+Inf) = lngamma(+0) = +Inf */ 525 { 526 if (MPFR_IS_ZERO (x)) 527 mpfr_set_divby0 (); 528 MPFR_SET_INF (y); 529 MPFR_SET_POS (y); 530 MPFR_RET (0); /* exact */ 531 } 532 } 533 534 /* if x < 0 and -2k-1 <= x <= -2k, then lngamma(x) = NaN */ 535 if (MPFR_IS_NEG (x) && (unit_bit (x) == 0 || mpfr_integer_p (x))) 536 { 537 MPFR_SET_NAN (y); 538 MPFR_RET_NAN; 539 } 540 541 inex = mpfr_lngamma_aux (y, x, rnd); 542 return inex; 543 } 544 545 int 546 mpfr_lgamma (mpfr_ptr y, int *signp, mpfr_srcptr x, mpfr_rnd_t rnd) 547 { 548 int inex; 549 550 MPFR_LOG_FUNC 551 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd), 552 ("y[%Pu]=%.*Rg signp=%d inexact=%d", 553 mpfr_get_prec (y), mpfr_log_prec, y, *signp, inex)); 554 555 *signp = 1; /* most common case */ 556 557 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) 558 { 559 if (MPFR_IS_NAN (x)) 560 { 561 MPFR_SET_NAN (y); 562 MPFR_RET_NAN; 563 } 564 else 565 { 566 if (MPFR_IS_ZERO (x)) 567 mpfr_set_divby0 (); 568 *signp = MPFR_INT_SIGN (x); 569 MPFR_SET_INF (y); 570 MPFR_SET_POS (y); 571 MPFR_RET (0); 572 } 573 } 574 575 if (MPFR_IS_NEG (x)) 576 { 577 if (mpfr_integer_p (x)) 578 { 579 MPFR_SET_INF (y); 580 MPFR_SET_POS (y); 581 mpfr_set_divby0 (); 582 MPFR_RET (0); 583 } 584 585 if (unit_bit (x) == 0) 586 *signp = -1; 587 588 /* For tiny negative x, we have gamma(x) = 1/x - euler + O(x), 589 thus |gamma(x)| = -1/x + euler + O(x), and 590 log |gamma(x)| = -log(-x) - euler*x + O(x^2). 591 More precisely we have for -0.4 <= x < 0: 592 -log(-x) <= log |gamma(x)| <= -log(-x) - x. 593 Since log(x) is not representable, we may have an instance of the 594 Table Maker Dilemma. The only way to ensure correct rounding is to 595 compute an interval [l,h] such that l <= -log(-x) and 596 -log(-x) - x <= h, and check whether l and h round to the same number 597 for the target precision and rounding modes. */ 598 if (MPFR_EXP(x) + 1 <= - (mpfr_exp_t) MPFR_PREC(y)) 599 /* since PREC(y) >= 1, this ensures EXP(x) <= -2, 600 thus |x| <= 0.25 < 0.4 */ 601 { 602 mpfr_t l, h; 603 int ok, inex2; 604 mpfr_prec_t w = MPFR_PREC (y) + 14; 605 mpfr_exp_t expl; 606 607 while (1) 608 { 609 mpfr_init2 (l, w); 610 mpfr_init2 (h, w); 611 /* we want a lower bound on -log(-x), thus an upper bound 612 on log(-x), thus an upper bound on -x. */ 613 mpfr_neg (l, x, MPFR_RNDU); /* upper bound on -x */ 614 mpfr_log (l, l, MPFR_RNDU); /* upper bound for log(-x) */ 615 mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(-x) */ 616 mpfr_neg (h, x, MPFR_RNDD); /* lower bound on -x */ 617 mpfr_log (h, h, MPFR_RNDD); /* lower bound on log(-x) */ 618 mpfr_neg (h, h, MPFR_RNDU); /* upper bound for -log(-x) */ 619 mpfr_sub (h, h, x, MPFR_RNDU); /* upper bound for -log(-x) - x */ 620 inex = mpfr_prec_round (l, MPFR_PREC (y), rnd); 621 inex2 = mpfr_prec_round (h, MPFR_PREC (y), rnd); 622 /* Caution: we not only need l = h, but both inexact flags 623 should agree. Indeed, one of the inexact flags might be 624 zero. In that case if we assume ln|gamma(x)| cannot be 625 exact, the other flag should be correct. We are conservative 626 here and request that both inexact flags agree. */ 627 ok = SAME_SIGN (inex, inex2) && mpfr_equal_p (l, h); 628 if (ok) 629 mpfr_set (y, h, rnd); /* exact */ 630 else 631 expl = MPFR_EXP (l); 632 mpfr_clear (l); 633 mpfr_clear (h); 634 if (ok) 635 return inex; 636 /* if ulp(log(-x)) <= |x| there is no reason to loop, 637 since the width of [l, h] will be at least |x| */ 638 if (expl < MPFR_EXP(x) + (mpfr_exp_t) w) 639 break; 640 w += MPFR_INT_CEIL_LOG2(w) + 3; 641 } 642 } 643 } 644 645 inex = mpfr_lngamma_aux (y, x, rnd); 646 return inex; 647 } 648 649 #endif 650