1 /* mpfr_gamma -- gamma function 2 3 Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 4 Contributed by the Arenaire and Caramel projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 #define IS_GAMMA 27 #include "lngamma.c" 28 #undef IS_GAMMA 29 30 /* return a sufficient precision such that 2-x is exact, assuming x < 0 */ 31 static mpfr_prec_t 32 mpfr_gamma_2_minus_x_exact (mpfr_srcptr x) 33 { 34 /* Since x < 0, 2-x = 2+y with y := -x. 35 If y < 2, a precision w >= PREC(y) + EXP(2)-EXP(y) = PREC(y) + 2 - EXP(y) 36 is enough, since no overlap occurs in 2+y, so no carry happens. 37 If y >= 2, either ULP(y) <= 2, and we need w >= PREC(y)+1 since a 38 carry can occur, or ULP(y) > 2, and we need w >= EXP(y)-1: 39 (a) if EXP(y) <= 1, w = PREC(y) + 2 - EXP(y) 40 (b) if EXP(y) > 1 and EXP(y)-PREC(y) <= 1, w = PREC(y) + 1 41 (c) if EXP(y) > 1 and EXP(y)-PREC(y) > 1, w = EXP(y) - 1 */ 42 return (MPFR_GET_EXP(x) <= 1) ? MPFR_PREC(x) + 2 - MPFR_GET_EXP(x) 43 : ((MPFR_GET_EXP(x) <= MPFR_PREC(x) + 1) ? MPFR_PREC(x) + 1 44 : MPFR_GET_EXP(x) - 1); 45 } 46 47 /* return a sufficient precision such that 1-x is exact, assuming x < 1 */ 48 static mpfr_prec_t 49 mpfr_gamma_1_minus_x_exact (mpfr_srcptr x) 50 { 51 if (MPFR_IS_POS(x)) 52 return MPFR_PREC(x) - MPFR_GET_EXP(x); 53 else if (MPFR_GET_EXP(x) <= 0) 54 return MPFR_PREC(x) + 1 - MPFR_GET_EXP(x); 55 else if (MPFR_PREC(x) >= MPFR_GET_EXP(x)) 56 return MPFR_PREC(x) + 1; 57 else 58 return MPFR_GET_EXP(x); 59 } 60 61 /* returns a lower bound of the number of significant bits of n! 62 (not counting the low zero bits). 63 We know n! >= (n/e)^n*sqrt(2*Pi*n) for n >= 1, and the number of zero bits 64 is floor(n/2) + floor(n/4) + floor(n/8) + ... 65 This approximation is exact for n <= 500000, except for n = 219536, 235928, 66 298981, 355854, 464848, 493725, 498992 where it returns a value 1 too small. 67 */ 68 static unsigned long 69 bits_fac (unsigned long n) 70 { 71 mpfr_t x, y; 72 unsigned long r, k; 73 mpfr_init2 (x, 38); 74 mpfr_init2 (y, 38); 75 mpfr_set_ui (x, n, MPFR_RNDZ); 76 mpfr_set_str_binary (y, "10.101101111110000101010001011000101001"); /* upper bound of e */ 77 mpfr_div (x, x, y, MPFR_RNDZ); 78 mpfr_pow_ui (x, x, n, MPFR_RNDZ); 79 mpfr_const_pi (y, MPFR_RNDZ); 80 mpfr_mul_ui (y, y, 2 * n, MPFR_RNDZ); 81 mpfr_sqrt (y, y, MPFR_RNDZ); 82 mpfr_mul (x, x, y, MPFR_RNDZ); 83 mpfr_log2 (x, x, MPFR_RNDZ); 84 r = mpfr_get_ui (x, MPFR_RNDU); 85 for (k = 2; k <= n; k *= 2) 86 r -= n / k; 87 mpfr_clear (x); 88 mpfr_clear (y); 89 return r; 90 } 91 92 /* We use the reflection formula 93 Gamma(1+t) Gamma(1-t) = - Pi t / sin(Pi (1 + t)) 94 in order to treat the case x <= 1, 95 i.e. with x = 1-t, then Gamma(x) = -Pi*(1-x)/sin(Pi*(2-x))/GAMMA(2-x) 96 */ 97 int 98 mpfr_gamma (mpfr_ptr gamma, mpfr_srcptr x, mpfr_rnd_t rnd_mode) 99 { 100 mpfr_t xp, GammaTrial, tmp, tmp2; 101 mpz_t fact; 102 mpfr_prec_t realprec; 103 int compared, inex, is_integer; 104 MPFR_GROUP_DECL (group); 105 MPFR_SAVE_EXPO_DECL (expo); 106 MPFR_ZIV_DECL (loop); 107 108 MPFR_LOG_FUNC 109 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode), 110 ("gamma[%Pu]=%.*Rg inexact=%d", 111 mpfr_get_prec (gamma), mpfr_log_prec, gamma, inex)); 112 113 /* Trivial cases */ 114 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) 115 { 116 if (MPFR_IS_NAN (x)) 117 { 118 MPFR_SET_NAN (gamma); 119 MPFR_RET_NAN; 120 } 121 else if (MPFR_IS_INF (x)) 122 { 123 if (MPFR_IS_NEG (x)) 124 { 125 MPFR_SET_NAN (gamma); 126 MPFR_RET_NAN; 127 } 128 else 129 { 130 MPFR_SET_INF (gamma); 131 MPFR_SET_POS (gamma); 132 MPFR_RET (0); /* exact */ 133 } 134 } 135 else /* x is zero */ 136 { 137 MPFR_ASSERTD(MPFR_IS_ZERO(x)); 138 MPFR_SET_INF(gamma); 139 MPFR_SET_SAME_SIGN(gamma, x); 140 mpfr_set_divby0 (); 141 MPFR_RET (0); /* exact */ 142 } 143 } 144 145 /* Check for tiny arguments, where gamma(x) ~ 1/x - euler + .... 146 We know from "Bound on Runs of Zeros and Ones for Algebraic Functions", 147 Proceedings of Arith15, T. Lang and J.-M. Muller, 2001, that the maximal 148 number of consecutive zeroes or ones after the round bit is n-1 for an 149 input of n bits. But we need a more precise lower bound. Assume x has 150 n bits, and 1/x is near a floating-point number y of n+1 bits. We can 151 write x = X*2^e, y = Y/2^f with X, Y integers of n and n+1 bits. 152 Thus X*Y^2^(e-f) is near from 1, i.e., X*Y is near from 2^(f-e). 153 Two cases can happen: 154 (i) either X*Y is exactly 2^(f-e), but this can happen only if X and Y 155 are themselves powers of two, i.e., x is a power of two; 156 (ii) or X*Y is at distance at least one from 2^(f-e), thus 157 |xy-1| >= 2^(e-f), or |y-1/x| >= 2^(e-f)/x = 2^(-f)/X >= 2^(-f-n). 158 Since ufp(y) = 2^(n-f) [ufp = unit in first place], this means 159 that the distance |y-1/x| >= 2^(-2n) ufp(y). 160 Now assuming |gamma(x)-1/x| <= 1, which is true for x <= 1, 161 if 2^(-2n) ufp(y) >= 2, the error is at most 2^(-2n-1) ufp(y), 162 and round(1/x) with precision >= 2n+2 gives the correct result. 163 If x < 2^E, then y > 2^(-E), thus ufp(y) > 2^(-E-1). 164 A sufficient condition is thus EXP(x) + 2 <= -2 MAX(PREC(x),PREC(Y)). 165 */ 166 if (MPFR_GET_EXP (x) + 2 167 <= -2 * (mpfr_exp_t) MAX(MPFR_PREC(x), MPFR_PREC(gamma))) 168 { 169 int sign = MPFR_SIGN (x); /* retrieve sign before possible override */ 170 int special; 171 MPFR_BLOCK_DECL (flags); 172 173 MPFR_SAVE_EXPO_MARK (expo); 174 175 /* for overflow cases, see below; this needs to be done 176 before x possibly gets overridden. */ 177 special = 178 MPFR_GET_EXP (x) == 1 - MPFR_EMAX_MAX && 179 MPFR_IS_POS_SIGN (sign) && 180 MPFR_IS_LIKE_RNDD (rnd_mode, sign) && 181 mpfr_powerof2_raw (x); 182 183 MPFR_BLOCK (flags, inex = mpfr_ui_div (gamma, 1, x, rnd_mode)); 184 if (inex == 0) /* x is a power of two */ 185 { 186 /* return RND(1/x - euler) = RND(+/- 2^k - eps) with eps > 0 */ 187 if (rnd_mode == MPFR_RNDN || MPFR_IS_LIKE_RNDU (rnd_mode, sign)) 188 inex = 1; 189 else 190 { 191 mpfr_nextbelow (gamma); 192 inex = -1; 193 } 194 } 195 else if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags))) 196 { 197 /* Overflow in the division 1/x. This is a real overflow, except 198 in RNDZ or RNDD when 1/x = 2^emax, i.e. x = 2^(-emax): due to 199 the "- euler", the rounded value in unbounded exponent range 200 is 0.111...11 * 2^emax (not an overflow). */ 201 if (!special) 202 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, flags); 203 } 204 MPFR_SAVE_EXPO_FREE (expo); 205 /* Note: an overflow is possible with an infinite result; 206 in this case, the overflow flag will automatically be 207 restored by mpfr_check_range. */ 208 return mpfr_check_range (gamma, inex, rnd_mode); 209 } 210 211 is_integer = mpfr_integer_p (x); 212 /* gamma(x) for x a negative integer gives NaN */ 213 if (is_integer && MPFR_IS_NEG(x)) 214 { 215 MPFR_SET_NAN (gamma); 216 MPFR_RET_NAN; 217 } 218 219 compared = mpfr_cmp_ui (x, 1); 220 if (compared == 0) 221 return mpfr_set_ui (gamma, 1, rnd_mode); 222 223 /* if x is an integer that fits into an unsigned long, use mpfr_fac_ui 224 if argument is not too large. 225 If precision is p, fac_ui costs O(u*p), whereas gamma costs O(p*M(p)), 226 so for u <= M(p), fac_ui should be faster. 227 We approximate here M(p) by p*log(p)^2, which is not a bad guess. 228 Warning: since the generic code does not handle exact cases, 229 we want all cases where gamma(x) is exact to be treated here. 230 */ 231 if (is_integer && mpfr_fits_ulong_p (x, MPFR_RNDN)) 232 { 233 unsigned long int u; 234 mpfr_prec_t p = MPFR_PREC(gamma); 235 u = mpfr_get_ui (x, MPFR_RNDN); 236 if (u < 44787929UL && bits_fac (u - 1) <= p + (rnd_mode == MPFR_RNDN)) 237 /* bits_fac: lower bound on the number of bits of m, 238 where gamma(x) = (u-1)! = m*2^e with m odd. */ 239 return mpfr_fac_ui (gamma, u - 1, rnd_mode); 240 /* if bits_fac(...) > p (resp. p+1 for rounding to nearest), 241 then gamma(x) cannot be exact in precision p (resp. p+1). 242 FIXME: remove the test u < 44787929UL after changing bits_fac 243 to return a mpz_t or mpfr_t. */ 244 } 245 246 MPFR_SAVE_EXPO_MARK (expo); 247 248 /* check for overflow: according to (6.1.37) in Abramowitz & Stegun, 249 gamma(x) >= exp(-x) * x^(x-1/2) * sqrt(2*Pi) 250 >= 2 * (x/e)^x / x for x >= 1 */ 251 if (compared > 0) 252 { 253 mpfr_t yp; 254 mpfr_exp_t expxp; 255 MPFR_BLOCK_DECL (flags); 256 257 /* 1/e rounded down to 53 bits */ 258 #define EXPM1_STR "0.010111100010110101011000110110001011001110111100111" 259 mpfr_init2 (xp, 53); 260 mpfr_init2 (yp, 53); 261 mpfr_set_str_binary (xp, EXPM1_STR); 262 mpfr_mul (xp, x, xp, MPFR_RNDZ); 263 mpfr_sub_ui (yp, x, 2, MPFR_RNDZ); 264 mpfr_pow (xp, xp, yp, MPFR_RNDZ); /* (x/e)^(x-2) */ 265 mpfr_set_str_binary (yp, EXPM1_STR); 266 mpfr_mul (xp, xp, yp, MPFR_RNDZ); /* x^(x-2) / e^(x-1) */ 267 mpfr_mul (xp, xp, yp, MPFR_RNDZ); /* x^(x-2) / e^x */ 268 mpfr_mul (xp, xp, x, MPFR_RNDZ); /* lower bound on x^(x-1) / e^x */ 269 MPFR_BLOCK (flags, mpfr_mul_2ui (xp, xp, 1, MPFR_RNDZ)); 270 expxp = MPFR_GET_EXP (xp); 271 mpfr_clear (xp); 272 mpfr_clear (yp); 273 MPFR_SAVE_EXPO_FREE (expo); 274 return MPFR_OVERFLOW (flags) || expxp > __gmpfr_emax ? 275 mpfr_overflow (gamma, rnd_mode, 1) : 276 mpfr_gamma_aux (gamma, x, rnd_mode); 277 } 278 279 /* now compared < 0 */ 280 281 /* check for underflow: for x < 1, 282 gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x). 283 Since gamma(2-x) >= 2 * ((2-x)/e)^(2-x) / (2-x), we have 284 |gamma(x)| <= Pi*(1-x)*(2-x)/2/((2-x)/e)^(2-x) / |sin(Pi*(2-x))| 285 <= 12 * ((2-x)/e)^x / |sin(Pi*(2-x))|. 286 To avoid an underflow in ((2-x)/e)^x, we compute the logarithm. 287 */ 288 if (MPFR_IS_NEG(x)) 289 { 290 int underflow = 0, sgn, ck; 291 mpfr_prec_t w; 292 293 mpfr_init2 (xp, 53); 294 mpfr_init2 (tmp, 53); 295 mpfr_init2 (tmp2, 53); 296 /* we want an upper bound for x * [log(2-x)-1]. 297 since x < 0, we need a lower bound on log(2-x) */ 298 mpfr_ui_sub (xp, 2, x, MPFR_RNDD); 299 mpfr_log2 (xp, xp, MPFR_RNDD); 300 mpfr_sub_ui (xp, xp, 1, MPFR_RNDD); 301 mpfr_mul (xp, xp, x, MPFR_RNDU); 302 303 /* we need an upper bound on 1/|sin(Pi*(2-x))|, 304 thus a lower bound on |sin(Pi*(2-x))|. 305 If 2-x is exact, then the error of Pi*(2-x) is (1+u)^2 with u = 2^(-p) 306 thus the error on sin(Pi*(2-x)) is less than 1/2ulp + 3Pi(2-x)u, 307 assuming u <= 1, thus <= u + 3Pi(2-x)u */ 308 309 w = mpfr_gamma_2_minus_x_exact (x); /* 2-x is exact for prec >= w */ 310 w += 17; /* to get tmp2 small enough */ 311 mpfr_set_prec (tmp, w); 312 mpfr_set_prec (tmp2, w); 313 ck = mpfr_ui_sub (tmp, 2, x, MPFR_RNDN); 314 MPFR_ASSERTD (ck == 0); (void) ck; /* use ck to avoid a warning */ 315 mpfr_const_pi (tmp2, MPFR_RNDN); 316 mpfr_mul (tmp2, tmp2, tmp, MPFR_RNDN); /* Pi*(2-x) */ 317 mpfr_sin (tmp, tmp2, MPFR_RNDN); /* sin(Pi*(2-x)) */ 318 sgn = mpfr_sgn (tmp); 319 mpfr_abs (tmp, tmp, MPFR_RNDN); 320 mpfr_mul_ui (tmp2, tmp2, 3, MPFR_RNDU); /* 3Pi(2-x) */ 321 mpfr_add_ui (tmp2, tmp2, 1, MPFR_RNDU); /* 3Pi(2-x)+1 */ 322 mpfr_div_2ui (tmp2, tmp2, mpfr_get_prec (tmp), MPFR_RNDU); 323 /* if tmp2<|tmp|, we get a lower bound */ 324 if (mpfr_cmp (tmp2, tmp) < 0) 325 { 326 mpfr_sub (tmp, tmp, tmp2, MPFR_RNDZ); /* low bnd on |sin(Pi*(2-x))| */ 327 mpfr_ui_div (tmp, 12, tmp, MPFR_RNDU); /* upper bound */ 328 mpfr_log2 (tmp, tmp, MPFR_RNDU); 329 mpfr_add (xp, tmp, xp, MPFR_RNDU); 330 /* The assert below checks that expo.saved_emin - 2 always 331 fits in a long. FIXME if we want to allow mpfr_exp_t to 332 be a long long, for instance. */ 333 MPFR_ASSERTN (MPFR_EMIN_MIN - 2 >= LONG_MIN); 334 underflow = mpfr_cmp_si (xp, expo.saved_emin - 2) <= 0; 335 } 336 337 mpfr_clear (xp); 338 mpfr_clear (tmp); 339 mpfr_clear (tmp2); 340 if (underflow) /* the sign is the opposite of that of sin(Pi*(2-x)) */ 341 { 342 MPFR_SAVE_EXPO_FREE (expo); 343 return mpfr_underflow (gamma, (rnd_mode == MPFR_RNDN) ? MPFR_RNDZ : rnd_mode, -sgn); 344 } 345 } 346 347 realprec = MPFR_PREC (gamma); 348 /* we want both 1-x and 2-x to be exact */ 349 { 350 mpfr_prec_t w; 351 w = mpfr_gamma_1_minus_x_exact (x); 352 if (realprec < w) 353 realprec = w; 354 w = mpfr_gamma_2_minus_x_exact (x); 355 if (realprec < w) 356 realprec = w; 357 } 358 realprec = realprec + MPFR_INT_CEIL_LOG2 (realprec) + 20; 359 MPFR_ASSERTD(realprec >= 5); 360 361 MPFR_GROUP_INIT_4 (group, realprec + MPFR_INT_CEIL_LOG2 (realprec) + 20, 362 xp, tmp, tmp2, GammaTrial); 363 mpz_init (fact); 364 MPFR_ZIV_INIT (loop, realprec); 365 for (;;) 366 { 367 mpfr_exp_t err_g; 368 int ck; 369 MPFR_GROUP_REPREC_4 (group, realprec, xp, tmp, tmp2, GammaTrial); 370 371 /* reflection formula: gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x) */ 372 373 ck = mpfr_ui_sub (xp, 2, x, MPFR_RNDN); /* 2-x, exact */ 374 MPFR_ASSERTD(ck == 0); (void) ck; /* use ck to avoid a warning */ 375 mpfr_gamma (tmp, xp, MPFR_RNDN); /* gamma(2-x), error (1+u) */ 376 mpfr_const_pi (tmp2, MPFR_RNDN); /* Pi, error (1+u) */ 377 mpfr_mul (GammaTrial, tmp2, xp, MPFR_RNDN); /* Pi*(2-x), error (1+u)^2 */ 378 err_g = MPFR_GET_EXP(GammaTrial); 379 mpfr_sin (GammaTrial, GammaTrial, MPFR_RNDN); /* sin(Pi*(2-x)) */ 380 err_g = err_g + 1 - MPFR_GET_EXP(GammaTrial); 381 /* let g0 the true value of Pi*(2-x), g the computed value. 382 We have g = g0 + h with |h| <= |(1+u^2)-1|*g. 383 Thus sin(g) = sin(g0) + h' with |h'| <= |(1+u^2)-1|*g. 384 The relative error is thus bounded by |(1+u^2)-1|*g/sin(g) 385 <= |(1+u^2)-1|*2^err_g. <= 2.25*u*2^err_g for |u|<=1/4. 386 With the rounding error, this gives (0.5 + 2.25*2^err_g)*u. */ 387 ck = mpfr_sub_ui (xp, x, 1, MPFR_RNDN); /* x-1, exact */ 388 MPFR_ASSERTD(ck == 0); (void) ck; /* use ck to avoid a warning */ 389 mpfr_mul (xp, tmp2, xp, MPFR_RNDN); /* Pi*(x-1), error (1+u)^2 */ 390 mpfr_mul (GammaTrial, GammaTrial, tmp, MPFR_RNDN); 391 /* [1 + (0.5 + 2.25*2^err_g)*u]*(1+u)^2 = 1 + (2.5 + 2.25*2^err_g)*u 392 + (0.5 + 2.25*2^err_g)*u*(2u+u^2) + u^2. 393 For err_g <= realprec-2, we have (0.5 + 2.25*2^err_g)*u <= 394 0.5*u + 2.25/4 <= 0.6875 and u^2 <= u/4, thus 395 (0.5 + 2.25*2^err_g)*u*(2u+u^2) + u^2 <= 0.6875*(2u+u/4) + u/4 396 <= 1.8*u, thus the rel. error is bounded by (4.5 + 2.25*2^err_g)*u. */ 397 mpfr_div (GammaTrial, xp, GammaTrial, MPFR_RNDN); 398 /* the error is of the form (1+u)^3/[1 + (4.5 + 2.25*2^err_g)*u]. 399 For realprec >= 5 and err_g <= realprec-2, [(4.5 + 2.25*2^err_g)*u]^2 400 <= 0.71, and for |y|<=0.71, 1/(1-y) can be written 1+a*y with a<=4. 401 (1+u)^3 * (1+4*(4.5 + 2.25*2^err_g)*u) 402 = 1 + (21 + 9*2^err_g)*u + (57+27*2^err_g)*u^2 + (55+27*2^err_g)*u^3 403 + (18+9*2^err_g)*u^4 404 <= 1 + (21 + 9*2^err_g)*u + (57+27*2^err_g)*u^2 + (56+28*2^err_g)*u^3 405 <= 1 + (21 + 9*2^err_g)*u + (59+28*2^err_g)*u^2 406 <= 1 + (23 + 10*2^err_g)*u. 407 The final error is thus bounded by (23 + 10*2^err_g) ulps, 408 which is <= 2^6 for err_g<=2, and <= 2^(err_g+4) for err_g >= 2. */ 409 err_g = (err_g <= 2) ? 6 : err_g + 4; 410 411 if (MPFR_LIKELY (MPFR_CAN_ROUND (GammaTrial, realprec - err_g, 412 MPFR_PREC(gamma), rnd_mode))) 413 break; 414 MPFR_ZIV_NEXT (loop, realprec); 415 } 416 MPFR_ZIV_FREE (loop); 417 418 inex = mpfr_set (gamma, GammaTrial, rnd_mode); 419 MPFR_GROUP_CLEAR (group); 420 mpz_clear (fact); 421 422 MPFR_SAVE_EXPO_FREE (expo); 423 return mpfr_check_range (gamma, inex, rnd_mode); 424 } 425