1 /* mpfr_gamma -- gamma function 2 3 Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramel projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 #define IS_GAMMA 27 #include "lngamma.c" 28 #undef IS_GAMMA 29 30 /* return a sufficient precision such that 2-x is exact, assuming x < 0 */ 31 static mpfr_prec_t 32 mpfr_gamma_2_minus_x_exact (mpfr_srcptr x) 33 { 34 /* Since x < 0, 2-x = 2+y with y := -x. 35 If y < 2, a precision w >= PREC(y) + EXP(2)-EXP(y) = PREC(y) + 2 - EXP(y) 36 is enough, since no overlap occurs in 2+y, so no carry happens. 37 If y >= 2, either ULP(y) <= 2, and we need w >= PREC(y)+1 since a 38 carry can occur, or ULP(y) > 2, and we need w >= EXP(y)-1: 39 (a) if EXP(y) <= 1, w = PREC(y) + 2 - EXP(y) 40 (b) if EXP(y) > 1 and EXP(y)-PREC(y) <= 1, w = PREC(y) + 1 41 (c) if EXP(y) > 1 and EXP(y)-PREC(y) > 1, w = EXP(y) - 1 */ 42 return (MPFR_GET_EXP(x) <= 1) ? MPFR_PREC(x) + 2 - MPFR_GET_EXP(x) 43 : ((MPFR_GET_EXP(x) <= MPFR_PREC(x) + 1) ? MPFR_PREC(x) + 1 44 : MPFR_GET_EXP(x) - 1); 45 } 46 47 /* return a sufficient precision such that 1-x is exact, assuming x < 1 */ 48 static mpfr_prec_t 49 mpfr_gamma_1_minus_x_exact (mpfr_srcptr x) 50 { 51 if (MPFR_IS_POS(x)) 52 return MPFR_PREC(x) - MPFR_GET_EXP(x); 53 else if (MPFR_GET_EXP(x) <= 0) 54 return MPFR_PREC(x) + 1 - MPFR_GET_EXP(x); 55 else if (MPFR_PREC(x) >= MPFR_GET_EXP(x)) 56 return MPFR_PREC(x) + 1; 57 else 58 return MPFR_GET_EXP(x); 59 } 60 61 /* returns a lower bound of the number of significant bits of n! 62 (not counting the low zero bits). 63 We know n! >= (n/e)^n*sqrt(2*Pi*n) for n >= 1, and the number of zero bits 64 is floor(n/2) + floor(n/4) + floor(n/8) + ... 65 This approximation is exact for n <= 500000, except for n = 219536, 235928, 66 298981, 355854, 464848, 493725, 498992 where it returns a value 1 too small. 67 */ 68 static unsigned long 69 bits_fac (unsigned long n) 70 { 71 mpfr_t x, y; 72 unsigned long r, k; 73 mpfr_init2 (x, 38); 74 mpfr_init2 (y, 38); 75 mpfr_set_ui (x, n, MPFR_RNDZ); 76 mpfr_set_str_binary (y, "10.101101111110000101010001011000101001"); /* upper bound of e */ 77 mpfr_div (x, x, y, MPFR_RNDZ); 78 mpfr_pow_ui (x, x, n, MPFR_RNDZ); 79 mpfr_const_pi (y, MPFR_RNDZ); 80 mpfr_mul_ui (y, y, 2 * n, MPFR_RNDZ); 81 mpfr_sqrt (y, y, MPFR_RNDZ); 82 mpfr_mul (x, x, y, MPFR_RNDZ); 83 mpfr_log2 (x, x, MPFR_RNDZ); 84 r = mpfr_get_ui (x, MPFR_RNDU); 85 for (k = 2; k <= n; k *= 2) 86 r -= n / k; 87 mpfr_clear (x); 88 mpfr_clear (y); 89 return r; 90 } 91 92 /* We use the reflection formula 93 Gamma(1+t) Gamma(1-t) = - Pi t / sin(Pi (1 + t)) 94 in order to treat the case x <= 1, 95 i.e. with x = 1-t, then Gamma(x) = -Pi*(1-x)/sin(Pi*(2-x))/GAMMA(2-x) 96 */ 97 int 98 mpfr_gamma (mpfr_ptr gamma, mpfr_srcptr x, mpfr_rnd_t rnd_mode) 99 { 100 mpfr_t xp, GammaTrial, tmp, tmp2; 101 mpz_t fact; 102 mpfr_prec_t realprec; 103 int compared, is_integer; 104 int inex = 0; /* 0 means: result gamma not set yet */ 105 MPFR_GROUP_DECL (group); 106 MPFR_SAVE_EXPO_DECL (expo); 107 MPFR_ZIV_DECL (loop); 108 109 MPFR_LOG_FUNC 110 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode), 111 ("gamma[%Pu]=%.*Rg inexact=%d", 112 mpfr_get_prec (gamma), mpfr_log_prec, gamma, inex)); 113 114 /* Trivial cases */ 115 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) 116 { 117 if (MPFR_IS_NAN (x)) 118 { 119 MPFR_SET_NAN (gamma); 120 MPFR_RET_NAN; 121 } 122 else if (MPFR_IS_INF (x)) 123 { 124 if (MPFR_IS_NEG (x)) 125 { 126 MPFR_SET_NAN (gamma); 127 MPFR_RET_NAN; 128 } 129 else 130 { 131 MPFR_SET_INF (gamma); 132 MPFR_SET_POS (gamma); 133 MPFR_RET (0); /* exact */ 134 } 135 } 136 else /* x is zero */ 137 { 138 MPFR_ASSERTD(MPFR_IS_ZERO(x)); 139 MPFR_SET_INF(gamma); 140 MPFR_SET_SAME_SIGN(gamma, x); 141 mpfr_set_divby0 (); 142 MPFR_RET (0); /* exact */ 143 } 144 } 145 146 /* Check for tiny arguments, where gamma(x) ~ 1/x - euler + .... 147 We know from "Bound on Runs of Zeros and Ones for Algebraic Functions", 148 Proceedings of Arith15, T. Lang and J.-M. Muller, 2001, that the maximal 149 number of consecutive zeroes or ones after the round bit is n-1 for an 150 input of n bits. But we need a more precise lower bound. Assume x has 151 n bits, and 1/x is near a floating-point number y of n+1 bits. We can 152 write x = X*2^e, y = Y/2^f with X, Y integers of n and n+1 bits. 153 Thus X*Y^2^(e-f) is near from 1, i.e., X*Y is near from 2^(f-e). 154 Two cases can happen: 155 (i) either X*Y is exactly 2^(f-e), but this can happen only if X and Y 156 are themselves powers of two, i.e., x is a power of two; 157 (ii) or X*Y is at distance at least one from 2^(f-e), thus 158 |xy-1| >= 2^(e-f), or |y-1/x| >= 2^(e-f)/x = 2^(-f)/X >= 2^(-f-n). 159 Since ufp(y) = 2^(n-f) [ufp = unit in first place], this means 160 that the distance |y-1/x| >= 2^(-2n) ufp(y). 161 Now assuming |gamma(x)-1/x| <= 1, which is true for x <= 1, 162 if 2^(-2n) ufp(y) >= 2, the error is at most 2^(-2n-1) ufp(y), 163 and round(1/x) with precision >= 2n+2 gives the correct result. 164 If x < 2^E, then y > 2^(-E), thus ufp(y) > 2^(-E-1). 165 A sufficient condition is thus EXP(x) + 2 <= -2 MAX(PREC(x),PREC(Y)). 166 */ 167 if (MPFR_GET_EXP (x) + 2 168 <= -2 * (mpfr_exp_t) MAX(MPFR_PREC(x), MPFR_PREC(gamma))) 169 { 170 int sign = MPFR_SIGN (x); /* retrieve sign before possible override */ 171 int special; 172 MPFR_BLOCK_DECL (flags); 173 174 MPFR_SAVE_EXPO_MARK (expo); 175 176 /* for overflow cases, see below; this needs to be done 177 before x possibly gets overridden. */ 178 special = 179 MPFR_GET_EXP (x) == 1 - MPFR_EMAX_MAX && 180 MPFR_IS_POS_SIGN (sign) && 181 MPFR_IS_LIKE_RNDD (rnd_mode, sign) && 182 mpfr_powerof2_raw (x); 183 184 MPFR_BLOCK (flags, inex = mpfr_ui_div (gamma, 1, x, rnd_mode)); 185 if (inex == 0) /* x is a power of two */ 186 { 187 /* return RND(1/x - euler) = RND(+/- 2^k - eps) with eps > 0 */ 188 if (rnd_mode == MPFR_RNDN || MPFR_IS_LIKE_RNDU (rnd_mode, sign)) 189 inex = 1; 190 else 191 { 192 mpfr_nextbelow (gamma); 193 inex = -1; 194 } 195 } 196 else if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags))) 197 { 198 /* Overflow in the division 1/x. This is a real overflow, except 199 in RNDZ or RNDD when 1/x = 2^emax, i.e. x = 2^(-emax): due to 200 the "- euler", the rounded value in unbounded exponent range 201 is 0.111...11 * 2^emax (not an overflow). */ 202 if (!special) 203 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, flags); 204 } 205 MPFR_SAVE_EXPO_FREE (expo); 206 /* Note: an overflow is possible with an infinite result; 207 in this case, the overflow flag will automatically be 208 restored by mpfr_check_range. */ 209 return mpfr_check_range (gamma, inex, rnd_mode); 210 } 211 212 is_integer = mpfr_integer_p (x); 213 /* gamma(x) for x a negative integer gives NaN */ 214 if (is_integer && MPFR_IS_NEG(x)) 215 { 216 MPFR_SET_NAN (gamma); 217 MPFR_RET_NAN; 218 } 219 220 compared = mpfr_cmp_ui (x, 1); 221 if (compared == 0) 222 return mpfr_set_ui (gamma, 1, rnd_mode); 223 224 /* if x is an integer that fits into an unsigned long, use mpfr_fac_ui 225 if argument is not too large. 226 If precision is p, fac_ui costs O(u*p), whereas gamma costs O(p*M(p)), 227 so for u <= M(p), fac_ui should be faster. 228 We approximate here M(p) by p*log(p)^2, which is not a bad guess. 229 Warning: since the generic code does not handle exact cases, 230 we want all cases where gamma(x) is exact to be treated here. 231 */ 232 if (is_integer && mpfr_fits_ulong_p (x, MPFR_RNDN)) 233 { 234 unsigned long int u; 235 mpfr_prec_t p = MPFR_PREC(gamma); 236 u = mpfr_get_ui (x, MPFR_RNDN); 237 if (u < 44787929UL && bits_fac (u - 1) <= p + (rnd_mode == MPFR_RNDN)) 238 /* bits_fac: lower bound on the number of bits of m, 239 where gamma(x) = (u-1)! = m*2^e with m odd. */ 240 return mpfr_fac_ui (gamma, u - 1, rnd_mode); 241 /* if bits_fac(...) > p (resp. p+1 for rounding to nearest), 242 then gamma(x) cannot be exact in precision p (resp. p+1). 243 FIXME: remove the test u < 44787929UL after changing bits_fac 244 to return a mpz_t or mpfr_t. */ 245 } 246 247 MPFR_SAVE_EXPO_MARK (expo); 248 249 /* check for overflow: according to (6.1.37) in Abramowitz & Stegun, 250 gamma(x) >= exp(-x) * x^(x-1/2) * sqrt(2*Pi) 251 >= 2 * (x/e)^x / x for x >= 1 */ 252 if (compared > 0) 253 { 254 mpfr_t yp; 255 mpfr_exp_t expxp; 256 MPFR_BLOCK_DECL (flags); 257 258 /* 1/e rounded down to 53 bits */ 259 #define EXPM1_STR "0.010111100010110101011000110110001011001110111100111" 260 mpfr_init2 (xp, 53); 261 mpfr_init2 (yp, 53); 262 mpfr_set_str_binary (xp, EXPM1_STR); 263 mpfr_mul (xp, x, xp, MPFR_RNDZ); 264 mpfr_sub_ui (yp, x, 2, MPFR_RNDZ); 265 mpfr_pow (xp, xp, yp, MPFR_RNDZ); /* (x/e)^(x-2) */ 266 mpfr_set_str_binary (yp, EXPM1_STR); 267 mpfr_mul (xp, xp, yp, MPFR_RNDZ); /* x^(x-2) / e^(x-1) */ 268 mpfr_mul (xp, xp, yp, MPFR_RNDZ); /* x^(x-2) / e^x */ 269 mpfr_mul (xp, xp, x, MPFR_RNDZ); /* lower bound on x^(x-1) / e^x */ 270 MPFR_BLOCK (flags, mpfr_mul_2ui (xp, xp, 1, MPFR_RNDZ)); 271 expxp = MPFR_GET_EXP (xp); 272 mpfr_clear (xp); 273 mpfr_clear (yp); 274 MPFR_SAVE_EXPO_FREE (expo); 275 return MPFR_OVERFLOW (flags) || expxp > __gmpfr_emax ? 276 mpfr_overflow (gamma, rnd_mode, 1) : 277 mpfr_gamma_aux (gamma, x, rnd_mode); 278 } 279 280 /* now compared < 0 */ 281 282 /* check for underflow: for x < 1, 283 gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x). 284 Since gamma(2-x) >= 2 * ((2-x)/e)^(2-x) / (2-x), we have 285 |gamma(x)| <= Pi*(1-x)*(2-x)/2/((2-x)/e)^(2-x) / |sin(Pi*(2-x))| 286 <= 12 * ((2-x)/e)^x / |sin(Pi*(2-x))|. 287 To avoid an underflow in ((2-x)/e)^x, we compute the logarithm. 288 */ 289 if (MPFR_IS_NEG(x)) 290 { 291 int underflow = 0, sgn, ck; 292 mpfr_prec_t w; 293 294 mpfr_init2 (xp, 53); 295 mpfr_init2 (tmp, 53); 296 mpfr_init2 (tmp2, 53); 297 /* we want an upper bound for x * [log(2-x)-1]. 298 since x < 0, we need a lower bound on log(2-x) */ 299 mpfr_ui_sub (xp, 2, x, MPFR_RNDD); 300 mpfr_log (xp, xp, MPFR_RNDD); 301 mpfr_sub_ui (xp, xp, 1, MPFR_RNDD); 302 mpfr_mul (xp, xp, x, MPFR_RNDU); 303 304 /* we need an upper bound on 1/|sin(Pi*(2-x))|, 305 thus a lower bound on |sin(Pi*(2-x))|. 306 If 2-x is exact, then the error of Pi*(2-x) is (1+u)^2 with u = 2^(-p) 307 thus the error on sin(Pi*(2-x)) is less than 1/2ulp + 3Pi(2-x)u, 308 assuming u <= 1, thus <= u + 3Pi(2-x)u */ 309 310 w = mpfr_gamma_2_minus_x_exact (x); /* 2-x is exact for prec >= w */ 311 w += 17; /* to get tmp2 small enough */ 312 mpfr_set_prec (tmp, w); 313 mpfr_set_prec (tmp2, w); 314 ck = mpfr_ui_sub (tmp, 2, x, MPFR_RNDN); 315 MPFR_ASSERTD (ck == 0); (void) ck; /* use ck to avoid a warning */ 316 mpfr_const_pi (tmp2, MPFR_RNDN); 317 mpfr_mul (tmp2, tmp2, tmp, MPFR_RNDN); /* Pi*(2-x) */ 318 mpfr_sin (tmp, tmp2, MPFR_RNDN); /* sin(Pi*(2-x)) */ 319 sgn = mpfr_sgn (tmp); 320 mpfr_abs (tmp, tmp, MPFR_RNDN); 321 mpfr_mul_ui (tmp2, tmp2, 3, MPFR_RNDU); /* 3Pi(2-x) */ 322 mpfr_add_ui (tmp2, tmp2, 1, MPFR_RNDU); /* 3Pi(2-x)+1 */ 323 mpfr_div_2ui (tmp2, tmp2, mpfr_get_prec (tmp), MPFR_RNDU); 324 /* if tmp2<|tmp|, we get a lower bound */ 325 if (mpfr_cmp (tmp2, tmp) < 0) 326 { 327 mpfr_sub (tmp, tmp, tmp2, MPFR_RNDZ); /* low bnd on |sin(Pi*(2-x))| */ 328 mpfr_ui_div (tmp, 12, tmp, MPFR_RNDU); /* upper bound */ 329 mpfr_log2 (tmp, tmp, MPFR_RNDU); 330 mpfr_add (xp, tmp, xp, MPFR_RNDU); 331 /* The assert below checks that expo.saved_emin - 2 always 332 fits in a long. FIXME if we want to allow mpfr_exp_t to 333 be a long long, for instance. */ 334 MPFR_ASSERTN (MPFR_EMIN_MIN - 2 >= LONG_MIN); 335 underflow = mpfr_cmp_si (xp, expo.saved_emin - 2) <= 0; 336 } 337 338 mpfr_clear (xp); 339 mpfr_clear (tmp); 340 mpfr_clear (tmp2); 341 if (underflow) /* the sign is the opposite of that of sin(Pi*(2-x)) */ 342 { 343 MPFR_SAVE_EXPO_FREE (expo); 344 return mpfr_underflow (gamma, (rnd_mode == MPFR_RNDN) ? MPFR_RNDZ : rnd_mode, -sgn); 345 } 346 } 347 348 realprec = MPFR_PREC (gamma); 349 /* we want both 1-x and 2-x to be exact */ 350 { 351 mpfr_prec_t w; 352 w = mpfr_gamma_1_minus_x_exact (x); 353 if (realprec < w) 354 realprec = w; 355 w = mpfr_gamma_2_minus_x_exact (x); 356 if (realprec < w) 357 realprec = w; 358 } 359 realprec = realprec + MPFR_INT_CEIL_LOG2 (realprec) + 20; 360 MPFR_ASSERTD(realprec >= 5); 361 362 MPFR_GROUP_INIT_4 (group, realprec + MPFR_INT_CEIL_LOG2 (realprec) + 20, 363 xp, tmp, tmp2, GammaTrial); 364 mpz_init (fact); 365 MPFR_ZIV_INIT (loop, realprec); 366 for (;;) 367 { 368 mpfr_exp_t err_g; 369 int ck; 370 MPFR_GROUP_REPREC_4 (group, realprec, xp, tmp, tmp2, GammaTrial); 371 372 /* reflection formula: gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x) */ 373 374 ck = mpfr_ui_sub (xp, 2, x, MPFR_RNDN); /* 2-x, exact */ 375 MPFR_ASSERTD(ck == 0); (void) ck; /* use ck to avoid a warning */ 376 mpfr_gamma (tmp, xp, MPFR_RNDN); /* gamma(2-x), error (1+u) */ 377 mpfr_const_pi (tmp2, MPFR_RNDN); /* Pi, error (1+u) */ 378 mpfr_mul (GammaTrial, tmp2, xp, MPFR_RNDN); /* Pi*(2-x), error (1+u)^2 */ 379 err_g = MPFR_GET_EXP(GammaTrial); 380 mpfr_sin (GammaTrial, GammaTrial, MPFR_RNDN); /* sin(Pi*(2-x)) */ 381 /* If tmp is +Inf, we compute exp(lngamma(x)). */ 382 if (mpfr_inf_p (tmp)) 383 { 384 inex = mpfr_explgamma (gamma, x, &expo, tmp, tmp2, rnd_mode); 385 if (inex) 386 goto end; 387 else 388 goto ziv_next; 389 } 390 err_g = err_g + 1 - MPFR_GET_EXP(GammaTrial); 391 /* let g0 the true value of Pi*(2-x), g the computed value. 392 We have g = g0 + h with |h| <= |(1+u^2)-1|*g. 393 Thus sin(g) = sin(g0) + h' with |h'| <= |(1+u^2)-1|*g. 394 The relative error is thus bounded by |(1+u^2)-1|*g/sin(g) 395 <= |(1+u^2)-1|*2^err_g. <= 2.25*u*2^err_g for |u|<=1/4. 396 With the rounding error, this gives (0.5 + 2.25*2^err_g)*u. */ 397 ck = mpfr_sub_ui (xp, x, 1, MPFR_RNDN); /* x-1, exact */ 398 MPFR_ASSERTD(ck == 0); (void) ck; /* use ck to avoid a warning */ 399 mpfr_mul (xp, tmp2, xp, MPFR_RNDN); /* Pi*(x-1), error (1+u)^2 */ 400 mpfr_mul (GammaTrial, GammaTrial, tmp, MPFR_RNDN); 401 /* [1 + (0.5 + 2.25*2^err_g)*u]*(1+u)^2 = 1 + (2.5 + 2.25*2^err_g)*u 402 + (0.5 + 2.25*2^err_g)*u*(2u+u^2) + u^2. 403 For err_g <= realprec-2, we have (0.5 + 2.25*2^err_g)*u <= 404 0.5*u + 2.25/4 <= 0.6875 and u^2 <= u/4, thus 405 (0.5 + 2.25*2^err_g)*u*(2u+u^2) + u^2 <= 0.6875*(2u+u/4) + u/4 406 <= 1.8*u, thus the rel. error is bounded by (4.5 + 2.25*2^err_g)*u. */ 407 mpfr_div (GammaTrial, xp, GammaTrial, MPFR_RNDN); 408 /* the error is of the form (1+u)^3/[1 + (4.5 + 2.25*2^err_g)*u]. 409 For realprec >= 5 and err_g <= realprec-2, [(4.5 + 2.25*2^err_g)*u]^2 410 <= 0.71, and for |y|<=0.71, 1/(1-y) can be written 1+a*y with a<=4. 411 (1+u)^3 * (1+4*(4.5 + 2.25*2^err_g)*u) 412 = 1 + (21 + 9*2^err_g)*u + (57+27*2^err_g)*u^2 + (55+27*2^err_g)*u^3 413 + (18+9*2^err_g)*u^4 414 <= 1 + (21 + 9*2^err_g)*u + (57+27*2^err_g)*u^2 + (56+28*2^err_g)*u^3 415 <= 1 + (21 + 9*2^err_g)*u + (59+28*2^err_g)*u^2 416 <= 1 + (23 + 10*2^err_g)*u. 417 The final error is thus bounded by (23 + 10*2^err_g) ulps, 418 which is <= 2^6 for err_g<=2, and <= 2^(err_g+4) for err_g >= 2. */ 419 err_g = (err_g <= 2) ? 6 : err_g + 4; 420 421 if (MPFR_LIKELY (MPFR_CAN_ROUND (GammaTrial, realprec - err_g, 422 MPFR_PREC(gamma), rnd_mode))) 423 break; 424 425 ziv_next: 426 MPFR_ZIV_NEXT (loop, realprec); 427 } 428 429 end: 430 MPFR_ZIV_FREE (loop); 431 432 if (inex == 0) 433 inex = mpfr_set (gamma, GammaTrial, rnd_mode); 434 MPFR_GROUP_CLEAR (group); 435 mpz_clear (fact); 436 437 MPFR_SAVE_EXPO_FREE (expo); 438 return mpfr_check_range (gamma, inex, rnd_mode); 439 } 440