1 /* mpfr_lngamma -- lngamma function
2
3 Copyright 2005-2020 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25
26 /* given a precision p, return alpha, such that the argument reduction
27 will use k = alpha*p*log(2).
28
29 Warning: we should always have alpha >= log(2)/(2Pi) ~ 0.11,
30 and the smallest value of alpha multiplied by the smallest working
31 precision should be >= 4.
32 */
33 static void
mpfr_gamma_alpha(mpfr_ptr s,mpfr_prec_t p)34 mpfr_gamma_alpha (mpfr_ptr s, mpfr_prec_t p)
35 {
36 MPFR_LOG_FUNC
37 (("p=%Pu", p),
38 ("s[%Pu]=%.*Rg", mpfr_get_prec (s), mpfr_log_prec, s));
39
40 if (p <= 100)
41 mpfr_set_ui_2exp (s, 614, -10, MPFR_RNDN); /* about 0.6 */
42 else if (p <= 500)
43 mpfr_set_ui_2exp (s, 819, -10, MPFR_RNDN); /* about 0.8 */
44 else if (p <= 1000)
45 mpfr_set_ui_2exp (s, 1331, -10, MPFR_RNDN); /* about 1.3 */
46 else if (p <= 2000)
47 mpfr_set_ui_2exp (s, 1741, -10, MPFR_RNDN); /* about 1.7 */
48 else if (p <= 5000)
49 mpfr_set_ui_2exp (s, 2253, -10, MPFR_RNDN); /* about 2.2 */
50 else if (p <= 10000)
51 mpfr_set_ui_2exp (s, 3482, -10, MPFR_RNDN); /* about 3.4 */
52 else
53 mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDN); /* 4.5 */
54 }
55
56 #ifdef IS_GAMMA
57
58 /* This function is called in case of intermediate overflow/underflow.
59 The s1 and s2 arguments are temporary MPFR numbers, having the
60 working precision. If the result could be determined, then the
61 flags are updated via pexpo, y is set to the result, and the
62 (non-zero) ternary value is returned. Otherwise 0 is returned
63 in order to perform the next Ziv iteration. */
64 static int
mpfr_explgamma(mpfr_ptr y,mpfr_srcptr x,mpfr_save_expo_t * pexpo,mpfr_ptr s1,mpfr_ptr s2,mpfr_rnd_t rnd)65 mpfr_explgamma (mpfr_ptr y, mpfr_srcptr x, mpfr_save_expo_t *pexpo,
66 mpfr_ptr s1, mpfr_ptr s2, mpfr_rnd_t rnd)
67 {
68 mpfr_t t1, t2;
69 int inex1, inex2, sign;
70 MPFR_BLOCK_DECL (flags1);
71 MPFR_BLOCK_DECL (flags2);
72 MPFR_GROUP_DECL (group);
73
74 MPFR_BLOCK (flags1, inex1 = mpfr_lgamma (s1, &sign, x, MPFR_RNDD));
75 MPFR_ASSERTN (inex1 != 0);
76 /* s1 = RNDD(lngamma(x)), inexact */
77 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags1)))
78 {
79 if (MPFR_IS_POS (s1))
80 {
81 MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_OVERFLOW);
82 return mpfr_overflow (y, rnd, sign);
83 }
84 else
85 {
86 MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_UNDERFLOW);
87 return mpfr_underflow (y, rnd == MPFR_RNDN ? MPFR_RNDZ : rnd, sign);
88 }
89 }
90
91 mpfr_set (s2, s1, MPFR_RNDN); /* exact */
92 mpfr_nextabove (s2); /* v = RNDU(lngamma(z0)) */
93
94 if (sign < 0)
95 rnd = MPFR_INVERT_RND (rnd); /* since the result with be negated */
96 MPFR_GROUP_INIT_2 (group, MPFR_PREC (y), t1, t2);
97 MPFR_BLOCK (flags1, inex1 = mpfr_exp (t1, s1, rnd));
98 MPFR_BLOCK (flags2, inex2 = mpfr_exp (t2, s2, rnd));
99 /* t1 is the rounding with mode 'rnd' of a lower bound on |Gamma(x)|,
100 t2 is the rounding with mode 'rnd' of an upper bound, thus if both
101 are equal, so is the wanted result. If t1 and t2 differ or the flags
102 differ, at some point of Ziv's loop they should agree. */
103 if (mpfr_equal_p (t1, t2) && flags1 == flags2)
104 {
105 MPFR_ASSERTN ((inex1 > 0 && inex2 > 0) || (inex1 < 0 && inex2 < 0));
106 mpfr_set4 (y, t1, MPFR_RNDN, sign); /* exact */
107 if (sign < 0)
108 inex1 = - inex1;
109 MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, flags1);
110 }
111 else
112 inex1 = 0; /* couldn't determine the result */
113 MPFR_GROUP_CLEAR (group);
114
115 return inex1;
116 }
117
118 #else
119
120 static int
unit_bit(mpfr_srcptr x)121 unit_bit (mpfr_srcptr x)
122 {
123 mpfr_exp_t expo;
124 mpfr_prec_t prec;
125 mp_limb_t x0;
126
127 expo = MPFR_GET_EXP (x);
128 if (expo <= 0)
129 return 0; /* |x| < 1 */
130
131 prec = MPFR_PREC (x);
132 if (expo > prec)
133 return 0; /* y is a multiple of 2^(expo-prec), thus an even integer */
134
135 /* Now, the unit bit is represented. */
136
137 prec = MPFR_PREC2LIMBS (prec) * GMP_NUMB_BITS - expo;
138 /* number of represented fractional bits (including the trailing 0's) */
139
140 x0 = *(MPFR_MANT (x) + prec / GMP_NUMB_BITS);
141 /* limb containing the unit bit */
142
143 return (x0 >> (prec % GMP_NUMB_BITS)) & 1;
144 }
145
146 #endif
147
148 /* FIXME: There is an internal overflow when z is very large.
149 Simple overflow detection with possible false negatives?
150 For the particular cases near the overflow boundary,
151 scaling by a power of two?
152 */
153
154 /* lngamma(x) = log(gamma(x)).
155 We use formula [6.1.40] from Abramowitz&Stegun:
156 lngamma(z) = (z-1/2)*log(z) - z + 1/2*log(2*Pi)
157 + sum (Bernoulli[2m]/(2m)/(2m-1)/z^(2m-1),m=1..infinity)
158 According to [6.1.42], if the sum is truncated after m=n, the error
159 R_n(z) is bounded by |B[2n+2]|*K(z)/(2n+1)/(2n+2)/|z|^(2n+1)
160 where K(z) = max (z^2/(u^2+z^2)) for u >= 0.
161 For z real, |K(z)| <= 1 thus R_n(z) is bounded by the first neglected term.
162 */
163 #ifdef IS_GAMMA
164 #define GAMMA_FUNC mpfr_gamma_aux
165 #else
166 #define GAMMA_FUNC mpfr_lngamma_aux
167 #endif
168
169 static int
GAMMA_FUNC(mpfr_ptr y,mpfr_srcptr z0,mpfr_rnd_t rnd)170 GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
171 {
172 mpfr_prec_t precy, w; /* working precision */
173 mpfr_t s, t, u, v, z;
174 unsigned long m, k, maxm, l;
175 int compared, inexact;
176 mpfr_exp_t err_s, err_t;
177 double d;
178 MPFR_SAVE_EXPO_DECL (expo);
179 MPFR_ZIV_DECL (loop);
180
181 MPFR_LOG_FUNC
182 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (z0), mpfr_log_prec, z0, rnd),
183 ("y[%Pu]=%.*Rg inexact=%d",
184 mpfr_get_prec (y), mpfr_log_prec, y, inexact));
185
186 compared = mpfr_cmp_ui (z0, 1);
187
188 MPFR_SAVE_EXPO_MARK (expo);
189
190 #ifndef IS_GAMMA /* lngamma or lgamma */
191 if (compared == 0 || (compared > 0 && mpfr_cmp_ui (z0, 2) == 0))
192 {
193 MPFR_SAVE_EXPO_FREE (expo);
194 return mpfr_set_ui (y, 0, MPFR_RNDN); /* lngamma(1 or 2) = +0 */
195 }
196
197 /* Deal with very large inputs: according to [6.1.42], if we denote
198 R_n(z) = lngamma(z) - (z-1/2)*log(z) + z - 1/2*log(2*Pi), we have
199 |R_n(z)| <= B_2/2/z, thus for z >= 2 we have
200 |lngamma(z) - [z*(log(z) - 1)]| < 1/2*log(z) + 1. */
201 if (compared > 0 && MPFR_GET_EXP (z0) >= (mpfr_exp_t) MPFR_PREC(y) + 2)
202 {
203 /* Since PREC(y) >= 2, this ensures EXP(z0) >= 4, thus |z0| >= 8,
204 thus 1/2*log(z0) + 1 < log(z0).
205 Since the largest possible z is < 2^(2^62) on a 64-bit machine,
206 the largest value of log(z) is 2^62*log(2.) < 3.2e18 < 2^62,
207 thus if we use at least 62 bits of precision, then log(t)-1 will
208 be exact */
209 mpfr_init2 (t, MPFR_PREC(y) >= 52 ? MPFR_PREC(y) + 10 : 62);
210 mpfr_log (t, z0, MPFR_RNDU); /* error < 1 ulp */
211 inexact = mpfr_sub_ui (t, t, 1, MPFR_RNDU); /* err < 2 ulps, since the
212 exponent of t might have
213 decreased */
214 MPFR_ASSERTD(inexact == 0);
215 mpfr_mul (t, z0, t, MPFR_RNDU); /* err < 1+2*2=5 ulps according to
216 "Generic error on multiplication"
217 in algorithms.tex */
218 if (MPFR_IS_INF(t))
219 {
220 mpfr_clear (t);
221 MPFR_SAVE_EXPO_FREE (expo);
222 inexact = mpfr_overflow (y, rnd, 1);
223 return inexact;
224 }
225 if (MPFR_GET_EXP(t) - MPFR_PREC(t) >= 62)
226 {
227 /* then ulp(t) >= 2^62 > log(z0) thus the total error is bounded
228 by 6 ulp(t) */
229 if (MPFR_CAN_ROUND (t, MPFR_PREC(t) - 3, MPFR_PREC(y), rnd))
230 {
231 inexact = mpfr_set (y, t, rnd);
232 mpfr_clear (t);
233 MPFR_SAVE_EXPO_FREE (expo);
234 return mpfr_check_range (y, inexact, rnd);
235 }
236 }
237 mpfr_clear (t);
238 }
239
240 /* Deal here with tiny inputs. We have for -0.3 <= x <= 0.3:
241 - log|x| - gamma*x <= log|gamma(x)| <= - log|x| - gamma*x + x^2 */
242 if (MPFR_GET_EXP (z0) <= - (mpfr_exp_t) MPFR_PREC(y))
243 {
244 mpfr_t l, h, g;
245 int ok, inex1, inex2;
246 mpfr_prec_t prec = MPFR_PREC(y) + 14;
247 MPFR_ZIV_DECL (loop);
248
249 MPFR_ZIV_INIT (loop, prec);
250 do
251 {
252 mpfr_init2 (l, prec);
253 if (MPFR_IS_POS(z0))
254 {
255 mpfr_log (l, z0, MPFR_RNDU); /* upper bound for log(z0) */
256 mpfr_init2 (h, MPFR_PREC(l));
257 }
258 else
259 {
260 mpfr_init2 (h, MPFR_PREC(z0));
261 mpfr_neg (h, z0, MPFR_RNDN); /* exact */
262 mpfr_log (l, h, MPFR_RNDU); /* upper bound for log(-z0) */
263 mpfr_set_prec (h, MPFR_PREC(l));
264 }
265 mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(|z0|) */
266 mpfr_set (h, l, MPFR_RNDD); /* exact */
267 mpfr_nextabove (h); /* upper bound for -log(|z0|), avoids two calls
268 to mpfr_log */
269 mpfr_init2 (g, MPFR_PREC(l));
270 /* if z0>0, we need an upper approximation of Euler's constant
271 for the left bound */
272 mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDU : MPFR_RNDD);
273 mpfr_mul (g, g, z0, MPFR_RNDD);
274 mpfr_sub (l, l, g, MPFR_RNDD);
275 mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDD : MPFR_RNDU); /* cached */
276 mpfr_mul (g, g, z0, MPFR_RNDU);
277 mpfr_sub (h, h, g, MPFR_RNDD);
278 mpfr_sqr (g, z0, MPFR_RNDU);
279 mpfr_add (h, h, g, MPFR_RNDU);
280 inex1 = mpfr_prec_round (l, MPFR_PREC(y), rnd);
281 inex2 = mpfr_prec_round (h, MPFR_PREC(y), rnd);
282 /* Caution: we not only need l = h, but both inexact flags should
283 agree. Indeed, one of the inexact flags might be zero. In that
284 case if we assume lngamma(z0) cannot be exact, the other flag
285 should be correct. We are conservative here and request that both
286 inexact flags agree. */
287 ok = SAME_SIGN (inex1, inex2) && mpfr_cmp (l, h) == 0;
288 if (ok)
289 mpfr_set (y, h, rnd); /* exact */
290 mpfr_clear (l);
291 mpfr_clear (h);
292 mpfr_clear (g);
293 if (ok)
294 {
295 MPFR_ZIV_FREE (loop);
296 MPFR_SAVE_EXPO_FREE (expo);
297 return mpfr_check_range (y, inex1, rnd);
298 }
299 /* since we have log|gamma(x)| = - log|x| - gamma*x + O(x^2),
300 if x ~ 2^(-n), then we have a n-bit approximation, thus
301 we can try again with a working precision of n bits,
302 especially when n >> PREC(y).
303 Otherwise we would use the reflection formula evaluating x-1,
304 which would need precision n. */
305 MPFR_ZIV_NEXT (loop, prec);
306 }
307 while (prec <= - MPFR_GET_EXP (z0));
308 MPFR_ZIV_FREE (loop);
309 }
310 #endif
311
312 precy = MPFR_PREC(y);
313
314 mpfr_init2 (s, MPFR_PREC_MIN);
315 mpfr_init2 (t, MPFR_PREC_MIN);
316 mpfr_init2 (u, MPFR_PREC_MIN);
317 mpfr_init2 (v, MPFR_PREC_MIN);
318 mpfr_init2 (z, MPFR_PREC_MIN);
319
320 inexact = 0; /* 0 means: result y not set yet */
321
322 if (compared < 0)
323 {
324 mpfr_exp_t err_u;
325
326 /* use reflection formula:
327 gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x)
328 thus lngamma(x) = log(Pi*(x-1)/sin(Pi*(2-x))) - lngamma(2-x) */
329
330 w = precy + MPFR_INT_CEIL_LOG2 (precy);
331 w += MPFR_INT_CEIL_LOG2 (w) + 14;
332 MPFR_ZIV_INIT (loop, w);
333 while (1)
334 {
335 MPFR_ASSERTD(w >= 3);
336 mpfr_set_prec (s, w);
337 mpfr_set_prec (t, w);
338 mpfr_set_prec (u, w);
339 mpfr_set_prec (v, w);
340 /* In the following, we write r for a real of absolute value
341 at most 2^(-w). Different instances of r may represent different
342 values. */
343 mpfr_ui_sub (s, 2, z0, MPFR_RNDD); /* s = (2-z0) * (1+2r) >= 1 */
344 mpfr_const_pi (t, MPFR_RNDN); /* t = Pi * (1+r) */
345 mpfr_lngamma (u, s, MPFR_RNDN); /* lngamma(2-x) */
346 /* Let s = (2-z0) + h. By construction, -(2-z0)*2^(1-w) <= h <= 0.
347 We have lngamma(s) = lngamma(2-z0) + h*Psi(z), z in [2-z0+h,2-z0].
348 Since 2-z0+h = s >= 1 and |Psi(x)| <= max(1,log(x)) for x >= 1,
349 the error on u is bounded by
350 ulp(u)/2 + (2-z0)*max(1,log(2-z0))*2^(1-w)
351 = (1/2 + (2-z0)*max(1,log(2-z0))*2^(1-E(u))) ulp(u) */
352 d = (double) MPFR_GET_EXP(s) * 0.694; /* upper bound for log(2-z0) */
353 if (MPFR_IS_ZERO(u)) /* in that case the error on u is zero */
354 err_u = 0;
355 else
356 err_u = MPFR_GET_EXP(s) + __gmpfr_ceil_log2 (d) + 1 - MPFR_GET_EXP(u);
357 err_u = (err_u >= 0) ? err_u + 1 : 0;
358 /* now the error on u is bounded by 2^err_u ulps */
359
360 mpfr_mul (s, s, t, MPFR_RNDN); /* Pi*(2-x) * (1+r)^4 */
361 err_s = MPFR_GET_EXP(s); /* 2-x <= 2^err_s */
362 mpfr_sin (s, s, MPFR_RNDN); /* sin(Pi*(2-x)) */
363 /* the error on s is bounded by 1/2*ulp(s) + [(1+2^(-w))^4-1]*(2-x)
364 <= 1/2*ulp(s) + 5*2^(-w)*(2-x) for w >= 3
365 <= (1/2 + 5 * 2^(-E(s)) * (2-x)) ulp(s) */
366 err_s += 3 - MPFR_GET_EXP(s);
367 err_s = (err_s >= 0) ? err_s + 1 : 0;
368 /* the error on s is bounded by 2^err_s ulp(s), thus by
369 2^(err_s+1)*2^(-w)*|s| since ulp(s) <= 2^(1-w)*|s|.
370 Now n*2^(-w) can always be written |(1+r)^n-1| for some
371 |r|<=2^(-w), thus taking n=2^(err_s+1) we see that
372 |S - s| <= |(1+r)^(2^(err_s+1))-1| * |s|, where S is the
373 true value.
374 In fact if ulp(s) <= ulp(S) the same inequality holds for
375 |S| instead of |s| in the right hand side, i.e., we can
376 write s = (1+r)^(2^(err_s+1)) * S.
377 But if ulp(S) < ulp(s), we need to add one ``bit'' to the error,
378 to get s = (1+r)^(2^(err_s+2)) * S. This is true since with
379 E = n*2^(-w) we have |s - S| <= E * |s|, thus
380 |s - S| <= E/(1-E) * |S|.
381 Now E/(1-E) is bounded by 2E as long as E<=1/2,
382 and 2E can be written (1+r)^(2n)-1 as above.
383 */
384 err_s += 2; /* exponent of relative error */
385
386 mpfr_sub_ui (v, z0, 1, MPFR_RNDN); /* v = (x-1) * (1+r) */
387 mpfr_mul (v, v, t, MPFR_RNDN); /* v = Pi*(x-1) * (1+r)^3 */
388 mpfr_div (v, v, s, MPFR_RNDN); /* Pi*(x-1)/sin(Pi*(2-x)) */
389 mpfr_abs (v, v, MPFR_RNDN);
390 /* (1+r)^(3+2^err_s+1) */
391 err_s = (err_s <= 1) ? 3 : err_s + 1;
392 /* now (1+r)^M with M <= 2^err_s */
393 mpfr_log (v, v, MPFR_RNDN);
394 /* log(v*(1+e)) = log(v)+log(1+e) where |e| <= 2^(err_s-w).
395 Since |log(1+e)| <= 2*e for |e| <= 1/4, the error on v is
396 bounded by ulp(v)/2 + 2^(err_s+1-w). */
397 if (err_s + 2 > w)
398 {
399 w += err_s + 2;
400 }
401 else
402 {
403 /* if v = 0 here, it was 1 before the call to mpfr_log,
404 thus the error on v was zero */
405 if (!MPFR_IS_ZERO(v))
406 err_s += 1 - MPFR_GET_EXP(v);
407 err_s = (err_s >= 0) ? err_s + 1 : 0;
408 /* the error on v is bounded by 2^err_s ulps */
409 err_u += MPFR_GET_EXP(u); /* absolute error on u */
410 if (!MPFR_IS_ZERO(v)) /* same as above */
411 err_s += MPFR_GET_EXP(v); /* absolute error on v */
412 mpfr_sub (s, v, u, MPFR_RNDN);
413 /* the total error on s is bounded by ulp(s)/2 + 2^(err_u-w)
414 + 2^(err_s-w) <= ulp(s)/2 + 2^(max(err_u,err_s)+1-w) */
415 err_s = (err_s >= err_u) ? err_s : err_u;
416 err_s += 1 - MPFR_GET_EXP(s); /* error is 2^err_s ulp(s) */
417 err_s = (err_s >= 0) ? err_s + 1 : 0;
418 if (MPFR_CAN_ROUND (s, w - err_s, precy, rnd))
419 goto end;
420 }
421 MPFR_ZIV_NEXT (loop, w);
422 }
423 MPFR_ZIV_FREE (loop);
424 }
425
426 /* now z0 > 1 */
427
428 MPFR_ASSERTD (compared > 0);
429
430 /* since k is O(w), the value of log(z0*...*(z0+k-1)) is about w*log(w),
431 so there is a cancellation of ~log(w) in the argument reconstruction */
432 w = precy + MPFR_INT_CEIL_LOG2 (precy);
433 w += MPFR_INT_CEIL_LOG2 (w) + 13;
434 MPFR_ZIV_INIT (loop, w);
435 while (1)
436 {
437 MPFR_ASSERTD (w >= 3);
438
439 /* argument reduction: we compute gamma(z0 + k), where the series
440 has error term B_{2n}/(z0+k)^(2n) ~ (n/(Pi*e*(z0+k)))^(2n)
441 and we need k steps of argument reconstruction. Assuming k is large
442 with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e.,
443 k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w.
444 However, since the series is slightly more expensive to compute,
445 the optimal value seems to be k ~ 0.25 * w experimentally (with
446 caching of Bernoulli numbers).
447 For only one computation of gamma with large precision, it is better
448 to set k to a larger value, say k ~ w. */
449 mpfr_set_prec (s, 53);
450 mpfr_gamma_alpha (s, w);
451 mpfr_set_ui_2exp (s, 4, -4, MPFR_RNDU);
452 mpfr_mul_ui (s, s, w, MPFR_RNDU);
453 if (mpfr_cmp (z0, s) < 0)
454 {
455 mpfr_sub (s, s, z0, MPFR_RNDU);
456 k = mpfr_get_ui (s, MPFR_RNDU);
457 if (k < 3)
458 k = 3;
459 }
460 else
461 k = 3;
462
463 mpfr_set_prec (s, w);
464 mpfr_set_prec (t, w);
465 mpfr_set_prec (u, w);
466 mpfr_set_prec (v, w);
467 mpfr_set_prec (z, w);
468
469 mpfr_add_ui (z, z0, k, MPFR_RNDN);
470 /* z = (z0+k)*(1+t1) with |t1| <= 2^(-w) */
471
472 /* z >= 4 ensures the relative error on log(z) is small,
473 and also (z-1/2)*log(z)-z >= 0 */
474 MPFR_ASSERTD (mpfr_cmp_ui (z, 4) >= 0);
475
476 mpfr_log (s, z, MPFR_RNDN); /* log(z) */
477 /* we have s = log((z0+k)*(1+t1))*(1+t2) with |t1|, |t2| <= 2^(-w).
478 Since w >= 2 and z0+k >= 4, we can write log((z0+k)*(1+t1))
479 = log(z0+k) * (1+t3) with |t3| <= 2^(-w), thus we have
480 s = log(z0+k) * (1+t4)^2 with |t4| <= 2^(-w) */
481 mpfr_mul_2ui (t, z, 1, MPFR_RNDN); /* t = 2z * (1+t5) */
482 mpfr_sub_ui (t, t, 1, MPFR_RNDN); /* t = 2z-1 * (1+t6)^3 */
483 /* since we can write 2z*(1+t5) = (2z-1)*(1+t5') with
484 t5' = 2z/(2z-1) * t5, thus |t5'| <= 8/7 * t5 */
485 mpfr_mul (s, s, t, MPFR_RNDN); /* (2z-1)*log(z) * (1+t7)^6 */
486 mpfr_div_2ui (s, s, 1, MPFR_RNDN); /* (z-1/2)*log(z) * (1+t7)^6 */
487 mpfr_sub (s, s, z, MPFR_RNDN); /* (z-1/2)*log(z)-z */
488 /* s = [(z-1/2)*log(z)-z]*(1+u)^14, s >= 1/2 */
489
490 mpfr_ui_div (u, 1, z, MPFR_RNDN); /* 1/z * (1+u), u <= 1/4 since z >= 4 */
491
492 /* the first term is B[2]/2/z = 1/12/z: t=1/12/z, C[2]=1 */
493 mpfr_div_ui (t, u, 12, MPFR_RNDN); /* 1/(12z) * (1+u)^2, t <= 3/128 */
494 mpfr_set (v, t, MPFR_RNDN); /* (1+u)^2, v < 2^(-5) */
495 mpfr_add (s, s, v, MPFR_RNDN); /* (1+u)^15 */
496
497 mpfr_sqr (u, u, MPFR_RNDN); /* 1/z^2 * (1+u)^3 */
498
499 /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */
500 maxm = 1UL << (sizeof(unsigned long) * CHAR_BIT / 2 - 1);
501
502 /* s:(1+u)^15, t:(1+u)^2, t <= 3/128 */
503
504 for (m = 2; MPFR_GET_EXP(v) + (mpfr_exp_t) w >= MPFR_GET_EXP(s); m++)
505 {
506 mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(10m-14) */
507 if (m <= maxm)
508 {
509 mpfr_mul_ui (t, t, 2*(m-1)*(2*m-3), MPFR_RNDN);
510 mpfr_div_ui (t, t, 2*m*(2*m-1), MPFR_RNDN);
511 mpfr_div_ui (t, t, 2*m*(2*m+1), MPFR_RNDN);
512 }
513 else
514 {
515 mpfr_mul_ui (t, t, 2*(m-1), MPFR_RNDN);
516 mpfr_mul_ui (t, t, 2*m-3, MPFR_RNDN);
517 mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
518 mpfr_div_ui (t, t, 2*m-1, MPFR_RNDN);
519 mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
520 mpfr_div_ui (t, t, 2*m+1, MPFR_RNDN);
521 }
522 /* (1+u)^(10m-8) */
523 /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */
524 mpfr_mul_z (v, t, mpfr_bernoulli_cache(m), MPFR_RNDN); /* (1+u)^(10m-7) */
525 MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3));
526 mpfr_add (s, s, v, MPFR_RNDN);
527 }
528 /* m <= 1/2*Pi*e*z ensures that |v[m]| < 1/2^(2m+3) */
529 MPFR_ASSERTD ((double) m <= 4.26 * mpfr_get_d (z, MPFR_RNDZ));
530
531 /* We have sum([(1+u)^(10m-7)-1]*1/2^(2m+3), m=2..infinity)
532 <= 1.46*u for u <= 2^(-3).
533 We have 0 < lngamma(z) - [(z - 1/2) ln(z) - z + 1/2 ln(2 Pi)] < 0.021
534 for z >= 4, thus since the initial s >= 0.85, the different values of
535 s differ by at most one binade, and the total rounding error on s
536 in the for-loop is bounded by 2*(m-1)*ulp(final_s).
537 The error coming from the v's is bounded by
538 1.46*2^(-w) <= 2*ulp(final_s).
539 Thus the total error so far is bounded by [(1+u)^15-1]*s+2m*ulp(s)
540 <= (2m+47)*ulp(s).
541 Taking into account the truncation error (which is bounded by the last
542 term v[] according to 6.1.42 in A&S), the bound is (2m+48)*ulp(s).
543 */
544
545 /* add 1/2*log(2*Pi) and subtract log(z0*(z0+1)*...*(z0+k-1)) */
546 mpfr_const_pi (v, MPFR_RNDN); /* v = Pi*(1+u) */
547 mpfr_mul_2ui (v, v, 1, MPFR_RNDN); /* v = 2*Pi * (1+u) */
548 /* k >= 3 */
549 mpfr_set (t, z0, MPFR_RNDN); /* t = z0*(1+u) */
550 l = 1;
551
552 /* replace #if 1 by #if 0 for the naive argument reconstruction */
553 #if 1
554
555 /* We multiply by (z0+1)*(z0+2)*...*(z0+k-1) by blocks of j consecutive
556 terms where j ~ sqrt(k).
557 If we multiply naively by z0+1, then by z0+2, ..., then by z0+j,
558 the multiplicative term for the rounding error is (1+u)^(2j).
559 The multiplicative term is not larger when we multiply by
560 Z[j] + c[j-1]*Z[j-1] + ... + c[2]*Z[2] + c[1]*z0 + c[0]
561 with c[p] integers, and Z[p] = z0^p * (1+u)^(p-1).
562 Note that all terms are positive.
563 Indeed, since c[1] is exact, c[1]*z0 corresponds to (1+u),
564 then c[1]*z0 + c[0] corresponds to (1+u)^2,
565 c[2]*Z[2] + c[1]*z0 + c[0] to (1+u)^3, ...,
566 c[j-1]*Z[j-1] + ... + c[0] to (1+u)^j,
567 and Z[j] + c[j-1]*Z[j-1] + ... + c[1]*z0 + c[0] to (1+u)^(j+1).
568 With the accumulation in t, we get (1+u)^(j+2) and j+2 <= 2j. */
569 {
570 unsigned long j, i, p;
571 mpfr_t *Z;
572 mpz_t *c;
573 for (j = 2; (j + 1) * (j + 1) < k; j++);
574 /* Z[i] stores z0^i for i <= j */
575 Z = (mpfr_t *) mpfr_allocate_func ((j + 1) * sizeof (mpfr_t));
576 for (i = 2; i <= j; i++)
577 mpfr_init2 (Z[i], w);
578 mpfr_sqr (Z[2], z0, MPFR_RNDN);
579 for (i = 3; i <= j; i++)
580 if ((i & 1) == 0)
581 mpfr_sqr (Z[i], Z[i >> 1], MPFR_RNDN);
582 else
583 mpfr_mul (Z[i], Z[i-1], z0, MPFR_RNDN);
584 c = (mpz_t *) mpfr_allocate_func ((j + 1) * sizeof (mpz_t));
585 for (i = 0; i <= j; i++)
586 mpz_init (c[i]);
587 for (; l + j <= k; l += j)
588 {
589 /* c[i] is the coefficient of x^i in (x+l)*...*(x+l+j-1) */
590 mpz_set_ui (c[0], 1);
591 for (i = 0; i < j; i++)
592 /* multiply (x+l)*(x+l+1)*...*(x+l+i-1) by x+l+i:
593 (b[i]*x^i + b[i-1]*x^(i-1) + ... + b[0])*(x+l+i) =
594 b[i]*x^(i+1) + (b[i-1]+(l+i)*b[i])*x^i + ...
595 + (b[0]+(l+i)*b[1])*x + i*b[0] */
596 {
597 mpz_set (c[i+1], c[i]); /* b[i]*x^(i+1) */
598 for (p = i; p > 0; p--)
599 {
600 mpz_mul_ui (c[p], c[p], l + i);
601 mpz_add (c[p], c[p], c[p-1]); /* b[p-1]+(l+i)*b[p] */
602 }
603 mpz_mul_ui (c[0], c[0], l+i); /* i*b[0] */
604 }
605 /* now compute z0^j + c[j-1]*z0^(j-1) + ... + c[1]*z0 + c[0] */
606 mpfr_set_z (u, c[0], MPFR_RNDN);
607 for (i = 0; i < j; i++)
608 {
609 mpfr_mul_z (z, (i == 0) ? z0 : Z[i+1], c[i+1], MPFR_RNDN);
610 mpfr_add (u, u, z, MPFR_RNDN);
611 }
612 mpfr_mul (t, t, u, MPFR_RNDN);
613 }
614 for (i = 0; i <= j; i++)
615 mpz_clear (c[i]);
616 mpfr_free_func (c, (j + 1) * sizeof (mpz_t));
617 for (i = 2; i <= j; i++)
618 mpfr_clear (Z[i]);
619 mpfr_free_func (Z, (j + 1) * sizeof (mpfr_t));
620 }
621 #endif /* end of fast argument reconstruction */
622
623 for (; l < k; l++)
624 {
625 mpfr_add_ui (u, z0, l, MPFR_RNDN); /* u = (z0+l)*(1+u) */
626 mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(2l+1) */
627 }
628 /* now t: (1+u)^(2k-1) */
629 /* instead of computing log(sqrt(2*Pi)/t), we compute
630 1/2*log(2*Pi/t^2), which trades a square root for a square */
631 mpfr_sqr (t, t, MPFR_RNDN); /* (z0*...*(z0+k-1))^2, (1+u)^(4k-1) */
632 mpfr_div (v, v, t, MPFR_RNDN);
633 /* 2*Pi/(z0*...*(z0+k-1))^2 (1+u)^(4k+1) */
634 #ifdef IS_GAMMA
635 err_s = MPFR_GET_EXP(s);
636 mpfr_exp (s, s, MPFR_RNDN);
637 /* If s is +Inf, we compute exp(lngamma(z0)). */
638 if (mpfr_inf_p (s))
639 {
640 inexact = mpfr_explgamma (y, z0, &expo, s, t, rnd);
641 if (inexact)
642 goto end0;
643 else
644 goto ziv_next;
645 }
646 /* before the exponential, we have s = s0 + h where
647 |h| <= (2m+48)*ulp(s), thus exp(s0) = exp(s) * exp(-h).
648 For |h| <= 1/4, we have |exp(h)-1| <= 1.2*|h| thus
649 |exp(s) - exp(s0)| <= 1.2 * exp(s) * (2m+48)* 2^(EXP(s)-w). */
650 /* d = 1.2 * (2.0 * (double) m + 48.0); */
651 /* the error on s is bounded by d*2^err_s * 2^(-w) */
652 mpfr_sqrt (t, v, MPFR_RNDN);
653 /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
654 thus t = sqrt(v0)*(1+u)^(2k+3/2). */
655 mpfr_mul (s, s, t, MPFR_RNDN);
656 /* the error on input s is bounded by (1+u)^(d*2^err_s),
657 and that on t is (1+u)^(2k+3/2), thus the
658 total error is (1+u)^(d*2^err_s+2k+5/2) */
659 /* err_s += __gmpfr_ceil_log2 (d); */
660 /* since d = 1.2 * (2m+48), ceil(log2(d)) = 2 + ceil(log2(0.6*m+14.4))
661 <= 2 + ceil(log2(0.6*m+15)) */
662 {
663 unsigned long mm = (1 + m / 5) * 3; /* 0.6*m <= mm */
664 err_s += 2 + __gmpfr_int_ceil_log2 (mm + 15);
665 }
666 err_t = __gmpfr_ceil_log2 (2.0 * (double) k + 2.5);
667 err_s = (err_s >= err_t) ? err_s + 1 : err_t + 1;
668 #else
669 mpfr_log (t, v, MPFR_RNDN);
670 /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
671 thus log(v) = log(v0) + (4k+1)*log(1+u). Since |log(1+u)/u| <= 1.07
672 for |u| <= 2^(-3), the absolute error on log(v) is bounded by
673 1.07*(4k+1)*u, and the rounding error by ulp(t). */
674 mpfr_div_2ui (t, t, 1, MPFR_RNDN);
675 /* the error on t is now bounded by ulp(t) + 0.54*(4k+1)*2^(-w).
676 We have sqrt(2*Pi)/(z0*(z0+1)*...*(z0+k-1)) <= sqrt(2*Pi)/k! <= 0.5
677 since k>=3, thus t <= -0.5 and ulp(t) >= 2^(-w).
678 Thus the error on t is bounded by (2.16*k+1.54)*ulp(t). */
679 err_t = MPFR_GET_EXP(t) + (mpfr_exp_t)
680 __gmpfr_ceil_log2 (2.2 * (double) k + 1.6);
681 err_s = MPFR_GET_EXP(s) + (mpfr_exp_t)
682 __gmpfr_ceil_log2 (2.0 * (double) m + 48.0);
683 mpfr_add (s, s, t, MPFR_RNDN); /* this is a subtraction in fact */
684 /* the final error in ulp(s) is
685 <= 1 + 2^(err_t-EXP(s)) + 2^(err_s-EXP(s))
686 <= 2^(1+max(err_t,err_s)-EXP(s)) if err_t <> err_s
687 <= 2^(2+max(err_t,err_s)-EXP(s)) if err_t = err_s */
688 err_s = (err_t == err_s) ? 1 + err_s : ((err_t > err_s) ? err_t : err_s);
689 err_s += 1 - MPFR_GET_EXP(s);
690 #endif
691 if (MPFR_LIKELY (MPFR_CAN_ROUND (s, w - err_s, precy, rnd)))
692 break;
693 #ifdef IS_GAMMA
694 ziv_next:
695 #endif
696 MPFR_ZIV_NEXT (loop, w);
697 }
698
699 #ifdef IS_GAMMA
700 end0:
701 #endif
702
703 end:
704 if (inexact == 0)
705 inexact = mpfr_set (y, s, rnd);
706 MPFR_ZIV_FREE (loop);
707
708 mpfr_clear (s);
709 mpfr_clear (t);
710 mpfr_clear (u);
711 mpfr_clear (v);
712 mpfr_clear (z);
713
714 MPFR_SAVE_EXPO_FREE (expo);
715 return mpfr_check_range (y, inexact, rnd);
716 }
717
718 #ifndef IS_GAMMA
719
720 int
mpfr_lngamma(mpfr_ptr y,mpfr_srcptr x,mpfr_rnd_t rnd)721 mpfr_lngamma (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd)
722 {
723 int inex;
724
725 MPFR_LOG_FUNC
726 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
727 ("y[%Pu]=%.*Rg inexact=%d",
728 mpfr_get_prec (y), mpfr_log_prec, y, inex));
729
730 /* special cases */
731 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x) ||
732 (MPFR_IS_NEG (x) && mpfr_integer_p (x))))
733 {
734 if (MPFR_IS_NAN (x))
735 {
736 MPFR_SET_NAN (y);
737 MPFR_RET_NAN;
738 }
739 else /* lngamma(+/-Inf) = lngamma(nonpositive integer) = +Inf */
740 {
741 if (!MPFR_IS_INF (x))
742 MPFR_SET_DIVBY0 ();
743 MPFR_SET_INF (y);
744 MPFR_SET_POS (y);
745 MPFR_RET (0); /* exact */
746 }
747 }
748
749 /* if -2k-1 < x < -2k <= 0, then lngamma(x) = NaN */
750 if (MPFR_IS_NEG (x) && unit_bit (x) == 0)
751 {
752 MPFR_SET_NAN (y);
753 MPFR_RET_NAN;
754 }
755
756 inex = mpfr_lngamma_aux (y, x, rnd);
757 return inex;
758 }
759
760 int
mpfr_lgamma(mpfr_ptr y,int * signp,mpfr_srcptr x,mpfr_rnd_t rnd)761 mpfr_lgamma (mpfr_ptr y, int *signp, mpfr_srcptr x, mpfr_rnd_t rnd)
762 {
763 int inex;
764
765 MPFR_LOG_FUNC
766 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
767 ("y[%Pu]=%.*Rg signp=%d inexact=%d",
768 mpfr_get_prec (y), mpfr_log_prec, y, *signp, inex));
769
770 *signp = 1; /* most common case */
771
772 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
773 {
774 if (MPFR_IS_NAN (x))
775 {
776 MPFR_SET_NAN (y);
777 MPFR_RET_NAN;
778 }
779 else
780 {
781 if (MPFR_IS_ZERO (x))
782 MPFR_SET_DIVBY0 ();
783 *signp = MPFR_INT_SIGN (x);
784 MPFR_SET_INF (y);
785 MPFR_SET_POS (y);
786 MPFR_RET (0);
787 }
788 }
789
790 if (MPFR_IS_NEG (x))
791 {
792 if (mpfr_integer_p (x))
793 {
794 MPFR_SET_INF (y);
795 MPFR_SET_POS (y);
796 MPFR_SET_DIVBY0 ();
797 MPFR_RET (0);
798 }
799
800 if (unit_bit (x) == 0)
801 *signp = -1;
802
803 /* For tiny negative x, we have gamma(x) = 1/x - euler + O(x),
804 thus |gamma(x)| = -1/x + euler + O(x), and
805 log |gamma(x)| = -log(-x) - euler*x + O(x^2).
806 More precisely we have for -0.4 <= x < 0:
807 -log(-x) <= log |gamma(x)| <= -log(-x) - x.
808 Since log(x) is not representable, we may have an instance of the
809 Table Maker Dilemma. The only way to ensure correct rounding is to
810 compute an interval [l,h] such that l <= -log(-x) and
811 -log(-x) - x <= h, and check whether l and h round to the same number
812 for the target precision and rounding modes. */
813 if (MPFR_EXP(x) + 1 <= - (mpfr_exp_t) MPFR_PREC(y))
814 /* since PREC(y) >= 1, this ensures EXP(x) <= -2,
815 thus |x| <= 0.25 < 0.4 */
816 {
817 mpfr_t l, h;
818 int ok, inex2;
819 mpfr_prec_t w = MPFR_PREC (y) + 14;
820 mpfr_exp_t expl;
821 MPFR_SAVE_EXPO_DECL (expo);
822
823 MPFR_SAVE_EXPO_MARK (expo);
824
825 while (1)
826 {
827 mpfr_init2 (l, w);
828 mpfr_init2 (h, w);
829 /* we want a lower bound on -log(-x), thus an upper bound
830 on log(-x), thus an upper bound on -x. */
831 mpfr_neg (l, x, MPFR_RNDU); /* upper bound on -x */
832 mpfr_log (l, l, MPFR_RNDU); /* upper bound for log(-x) */
833 mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(-x) */
834 mpfr_neg (h, x, MPFR_RNDD); /* lower bound on -x */
835 mpfr_log (h, h, MPFR_RNDD); /* lower bound on log(-x) */
836 mpfr_neg (h, h, MPFR_RNDU); /* upper bound for -log(-x) */
837 mpfr_sub (h, h, x, MPFR_RNDU); /* upper bound for -log(-x) - x */
838 inex = mpfr_prec_round (l, MPFR_PREC (y), rnd);
839 inex2 = mpfr_prec_round (h, MPFR_PREC (y), rnd);
840 /* Caution: we not only need l = h, but both inexact flags
841 should agree. Indeed, one of the inexact flags might be
842 zero. In that case if we assume ln|gamma(x)| cannot be
843 exact, the other flag should be correct. We are conservative
844 here and request that both inexact flags agree. */
845 ok = SAME_SIGN (inex, inex2) && mpfr_equal_p (l, h);
846 if (ok)
847 mpfr_set (y, h, rnd); /* exact */
848 else
849 expl = MPFR_EXP (l);
850 mpfr_clear (l);
851 mpfr_clear (h);
852 if (ok)
853 {
854 MPFR_SAVE_EXPO_FREE (expo);
855 return mpfr_check_range (y, inex, rnd);
856 }
857 /* if ulp(log(-x)) <= |x| there is no reason to loop,
858 since the width of [l, h] will be at least |x| */
859 if (expl < MPFR_EXP (x) + w)
860 break;
861 w += MPFR_INT_CEIL_LOG2(w) + 3;
862 }
863
864 MPFR_SAVE_EXPO_FREE (expo);
865 }
866 }
867
868 inex = mpfr_lngamma_aux (y, x, rnd);
869 return inex;
870 }
871
872 #endif
873