1 /*
2     Copyright (C) 2021 Fredrik Johansson
3 
4     This file is part of Arb.
5 
6     Arb is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include "arb_hypgeom.h"
13 
14 #define DEBUG 0
15 
16 
17 const double arb_hypgeom_rgamma_d_tab[128] = {
18     1.0,
19     0.57721566490153286061,
20     -0.65587807152025388108,
21     -0.042002635034095235529,
22     0.1665386113822914895,
23     -0.042197734555544336748,
24     -0.0096219715278769735621,
25     0.0072189432466630995424,
26     -0.0011651675918590651121,
27     -0.00021524167411495097282,
28     0.00012805028238811618615,
29     -0.000020134854780788238656,
30     -1.2504934821426706573e-6,
31     1.1330272319816958824e-6,
32     -2.0563384169776071035e-7,
33     6.1160951044814158179e-9,
34     5.0020076444692229301e-9,
35     -1.1812745704870201446e-9,
36     1.0434267116911005105e-10,
37     7.782263439905071254e-12,
38     -3.6968056186422057082e-12,
39     5.100370287454475979e-13,
40     -2.0583260535665067832e-14,
41     -5.3481225394230179824e-15,
42     1.2267786282382607902e-15,
43     -1.1812593016974587695e-16,
44     1.1866922547516003326e-18,
45     1.4123806553180317816e-18,
46     -2.2987456844353702066e-19,
47     1.7144063219273374334e-20,
48     1.3373517304936931149e-22,
49     -2.0542335517666727893e-22,
50     2.7360300486079998448e-23,
51     -1.7323564459105166391e-24,
52     -2.3606190244992872873e-26,
53     1.8649829417172944307e-26,
54     -2.2180956242071972044e-27,
55     1.2977819749479936688e-28,
56     1.1806974749665284062e-30,
57     -1.1245843492770880903e-30,
58     1.277085175140866204e-31,
59     -7.3914511696151408235e-33,
60     1.134750257554215761e-35,
61     4.6391346410587220299e-35,
62     -5.3473368184391988751e-36,
63     3.2079959236133526229e-37,
64     -4.4458297365507568821e-39,
65     -1.3111745188819887129e-39,
66     1.6470333525438138868e-40,
67     -1.0562331785035812186e-41,
68     2.6784429826430494784e-43,
69     2.4247154948517826897e-44,
70     -3.736587834535612554e-45,
71     2.6283329809401954491e-46,
72     -9.2981759953768862996e-48,
73     -2.3279424186994705986e-49,
74     6.1696208352443874204e-50,
75     -4.9282955867709899305e-51,
76     2.1835131834145106973e-52,
77     -1.2187221891475165553e-54,
78     -7.1171088416628746319e-55,
79     6.9205040543286892535e-56,
80     -3.6764384683566763277e-57,
81     8.563098056275654328e-59,
82     4.9630454283668443848e-60,
83     -7.1542945770816152182e-61,
84     4.5517276890885041177e-62,
85     -1.6183993053202944344e-63,
86     -3.8180434243999502464e-66,
87     5.1850524119058482295e-66,
88     -4.1671368092239208861e-67,
89     1.9162906929373887193e-68,
90     -3.8089281324683658733e-70,
91     -2.2063861055924121016e-71,
92     2.7722310960098954165e-72,
93     -1.5987660478100181057e-73,
94     5.3197307804174034028e-75,
95     -8.0517461416842390432e-78,
96     -1.2484629810263795113e-77,
97     9.6431887683992238428e-79,
98     -4.2827980483017479213e-80,
99     9.5087142369030441861e-82,
100     2.7131392138694383464e-83,
101     -4.0968779415069156659e-84,
102     2.3742980019740160598e-85,
103     -8.2770890210072789764e-87,
104     9.072497609426645865e-89,
105     1.0645558195026985633e-89,
106     -9.285335619603754493e-91,
107     4.3333135927203670323e-92,
108     -1.1745606334673315984e-93,
109     -2.6908010752365215433e-96,
110     2.3898952892036810357e-96,
111     -1.5569361182789167325e-97,
112     6.0488748201074133757e-99,
113     -1.2273370571029378615e-100,
114     -2.540738850916238751e-102,
115     3.7708800953170816508e-103,
116     -2.0089261677502892352e-104,
117     6.6158100911447349361e-106,
118     -9.2404702022121568081e-108,
119     -4.82072018655246532e-109,
120     4.4938898756858357188e-110,
121     -2.0497789059725778416e-111,
122     5.7862770569866937508e-113,
123     -4.5696744624334387424e-115,
124     -5.8267365553303743945e-116,
125     4.2025380699297338056e-117,
126     -1.6889318527713702846e-118,
127     4.1226213324018604871e-120,
128     -8.2451196593745569675e-123,
129     -5.2036993784470216679e-123,
130     3.1616685922306712047e-124,
131     -1.1432359131094236326e-125,
132     2.4359648735131490197e-127,
133     8.8701584767164321698e-130,
134     -3.6328610892429035156e-130,
135     1.9485148907440212068e-131,
136     -6.450096583602651512e-133,
137     1.215186561728963791e-134,
138     1.0637863819629713691e-136,
139     -2.0430980587447135517e-137,
140     9.9760876002985183681e-139,
141     -3.0707428945789381066e-140,
142     5.2091832948433107534e-142,
143     6.7131589510935005823e-144,
144     -9.434301219575868381e-145,
145     4.2908149482548296582e-146,
146 };
147 
148 #define GAMMA_MIN_X 1.4616321449683623413
149 #define GAMMA_MIN_Y 0.88560319441088870028
150 
151 /* Crude upper bound for psi(x) for x > 0, adequate for perturbation bounds
152    for gamma. */
153 double
d_abs_digamma_ubound(double x)154 d_abs_digamma_ubound(double x)
155 {
156     if (x <= 1.0)
157     {
158         return (1.0 + 1e-14) / x + 0.57721566490153286061 - x + 1e-14;
159     }
160     else if (x <= GAMMA_MIN_X)
161     {
162         return -1.250380137503405359*x + 1.8275958024049382196 + 1e-14;
163     }
164     else if (x <= 8.0)
165     {
166         return (x - GAMMA_MIN_X) * (1.7581621716802087234 +
167             x * (-0.74622516195984912595 + x * (0.17009872711678924164 +
168             x * (-0.018637559864260712285 + x * 0.00077747045691426195132)))) + 1e-12;
169     }
170     else if (x <= 128.0)
171     {
172         return 0.75334126757115431475 + x * (0.21045131598436795981 +
173             x * (-0.0075387469533717503617 + x * (0.00017308475161765275722 +
174                 x * (-2.4025446500822043239e-6 + x * (1.9547402969088507111e-8 +
175             x * (-8.5654894222045481692e-11 + x * 1.5584520745423393038e-13)))))) + 1e-12;
176     }
177     else
178     {
179         return (mag_d_log_upper_bound(x) + 1.0 / x) * (1.0 + 1e-14);
180     }
181 }
182 
183 /* Upper or lower bound (depending on direction) for gamma(x),
184    assuming x > 0, no overflow. */
185 double
_arb_hypgeom_d_gamma(double x,int direction)186 _arb_hypgeom_d_gamma(double x, int direction)
187 {
188     double s, t, p;
189     int i, r;
190 
191     if (direction == 1)
192         p = 1 + 1e-14;
193     else
194         p = 1 - 1e-14;
195 
196     if (x < 0.5)
197     {
198         s = d_polyval(arb_hypgeom_rgamma_d_tab, 19, x);
199         s = 1.0 / (s * x);
200     }
201     else if (x <= 1.5)
202     {
203         s = 1.0 / d_polyval(arb_hypgeom_rgamma_d_tab, 19, x - 1.0);
204     }
205     else
206     {
207         r = (int) (x + 0.5);
208 
209         s = d_polyval(arb_hypgeom_rgamma_d_tab, 19, x - r);
210 
211         t = 1.0;
212         for (i = 0; i < r - 1; i++)
213             t *= (x - i - 1) * p;
214 
215         s = t / s;
216     }
217 
218     return s * p;
219 }
220 
221 /* Set res = [a, b]; not checking overflow or underflow. */
arb_set_interval_d_fast(arb_t res,double a,double b,slong prec)222 void arb_set_interval_d_fast(arb_t res, double a, double b, slong prec)
223 {
224     double mid, rad;
225 
226     if (a > b)
227     {
228         flint_printf("arb_set_interval_d_fast: expected a < b\n");
229         flint_abort();
230     }
231 
232     mid = a + 0.5 * (b - a);
233     rad = (0.5 * (b - a) + (mid * 1e-15)) * (1 + 1e-15);
234     arf_set_d(arb_midref(res), mid);
235     mag_set_d(arb_radref(res), rad);
236     arb_set_round(res, res, prec);
237 }
238 
239 int _arf_increment_fast(arf_t x, slong prec);
240 
241 
242 
243 /* Try to compute gamma(x) using Taylor series. Returns 1 on success, 0 on
244    failure (x too large or precision too large). */
245 int
arb_hypgeom_gamma_taylor(arb_t res,const arb_t x,int reciprocal,slong prec)246 arb_hypgeom_gamma_taylor(arb_t res, const arb_t x, int reciprocal, slong prec)
247 {
248     double dx, dxerr, log2u, ds, du;
249     slong i, n, wp, r, tail_bound, rad_exp, mid_exp;
250     arf_t s, u, v;
251     short term_prec[ARB_HYPGEOM_GAMMA_TAB_NUM];
252     int success;
253 
254 #if DEBUG
255     printf("INPUT: "); arb_printd(x, 200); printf("\n");
256     printf("INPUT prec: %ld\n", prec);
257 #endif
258 
259     /* We don't want to deal with infinities or huge/tiny exponents here. */
260     if (!ARB_IS_LAGOM(x))
261         return 0;
262 
263     /* 2^e bounds for the midpoint and radius. */
264     mid_exp = arf_is_zero(arb_midref(x)) ? WORD_MIN : ARF_EXP(arb_midref(x));
265     rad_exp = mag_is_zero(arb_radref(x)) ? WORD_MIN : MAG_EXP(arb_radref(x));
266 
267     /* Containing zero. */
268     if (rad_exp >= mid_exp && arb_contains_zero(x))
269     {
270         if (reciprocal)
271         {
272             arb_t t;
273             arb_init(t);
274             arb_add_ui(t, x, 1, prec + 10);
275 
276             if (!arb_contains_zero(t))
277             {
278                 success = arb_hypgeom_gamma_taylor(t, t, reciprocal, prec + 10);
279                 if (success)
280                     arb_mul(res, x, t, prec);
281             }
282             else
283             {
284                 /* todo: accurate wide interval */
285                 success = 0;
286             }
287 
288             arb_clear(t);
289             return success;
290         }
291         else
292         {
293             arb_indeterminate(res);
294             return 1;
295         }
296     }
297 
298     /* Quick exclusion of too large numbers. */
299     if (mid_exp > 8 || rad_exp > 8)
300         return 0;
301 
302     /* Adjust precision if the input is not precise. */
303     if (rad_exp != WORD_MIN)
304         prec = FLINT_MIN(prec, -rad_exp + MAG_BITS);
305     prec = FLINT_MAX(prec, 2);
306 
307     /* Midpoint and radius as doubles. */
308     dx = arf_get_d(arb_midref(x), ARF_RND_NEAR);
309     dxerr = mag_get_d(arb_radref(x));
310 
311     /* Too large to be efficient (high precision), or gamma(x) may overflow
312        doubles (wide case). */
313     if (dx + dxerr > 160.0 || dx - dxerr < -160.0)
314         return 0;
315 
316     /* Very close to 0, reduce to gamma(x) = gamma(x + 1) / x. */
317     if (mid_exp < -32 || (dx - dxerr >= -0.5 && dx - dxerr < ldexp(1.0, -6)))
318     {
319         arb_t t;
320         arb_init(t);
321         arb_add_ui(t, x, 1, prec + 10);
322 
323 #if DEBUG
324     printf("DIVIDING NEAR 0\n");
325 #endif
326 
327         success = arb_hypgeom_gamma_taylor(t, t, reciprocal, prec + 10);
328         if (success)
329         {
330             if (reciprocal)
331                 arb_mul(res, x, t, prec);
332             else
333                 arb_div(res, t, x, prec);
334         }
335 
336         arb_clear(t);
337         return success;
338     }
339 
340     /* Nearest (roughly) integer to x, to use as shift for argument reduction
341        to move to the interval [-0.5,0.5]. It's OK that dx is approximate so
342        that the reduced argument will actually lie in [-0.5-eps,0.5+eps]. */
343     if (dx >= 0.0)
344         r = (slong) (dx + 0.5);
345     else
346         r = -(slong) (-dx + 0.5);
347 
348     /* Tuning cutoff. */
349     if (prec >= 40)
350     {
351         if (r < -(40 + (prec - 40) / 4))
352             return 0;
353 
354         if (r > 70 + (prec - 40) / 8)
355             return 0;
356     }
357 
358     /* For negative numbers, reduce to the positive case. */
359     /* gamma(x) = (-1)^r * gamma(1+x-r) / (rf(1+r-x,-r)*(x-r)) */
360     /* 1/gamma(x) = (-1)^r * rgamma(1+x-r) * rf(1+r-x,-r) * (x-r) */
361     if (dx < 0.0)
362     {
363         arb_t t, u, v;
364 
365         arb_init(t);
366         arb_init(u);
367         arb_init(v);
368 
369         arb_sub_si(t, x, r, prec + 10);
370 
371         /* Pole. */
372         if (!reciprocal && arb_contains_zero(t))
373         {
374             arb_indeterminate(res);
375             success = 1;
376         }
377         else
378         {
379             arb_add_si(u, x, 1 - r, prec + 10);
380 
381             success = 1;
382             if (reciprocal && !arb_is_positive(u))
383             {
384                 /* todo: accurate wide interval */
385                 success = 0;
386             }
387 
388             success = arb_hypgeom_gamma_taylor(u, u, reciprocal, prec + 10);
389 
390             if (success)
391             {
392                 /* Wide bounds for rising factorial. */
393                 if (prec < 44)
394                 {
395                     double a, b, c, d;
396 
397                     c = (-dx + r + 1 - dxerr) * (1 - 1e-14);
398                     d = (-dx + r + 1 + dxerr) * (1 + 1e-14);
399                     a = b = 1.0;
400 
401                     for (i = 0; i < -r; i++)
402                     {
403                         a = a * ((c + i) * (1 - 1e-15));
404                         b = b * ((d + i) * (1 + 1e-15));
405                     }
406 
407                     arb_set_interval_d_fast(v, a, b, 53);
408 
409                     if (reciprocal)
410                     {
411                         arb_mul(res, u, v, prec + 10);
412                         arb_mul(res, res, t, prec);
413                     }
414                     else
415                     {
416                         arb_div(res, u, v, prec + 10);
417                         arb_div(res, res, t, prec);
418                     }
419                 }
420                 else
421                 {
422                     arb_neg(v, x);
423                     arb_add_si(v, v, 1 + r, prec + 10);
424                     arb_hypgeom_rising_ui_rec(v, v, -r, prec + 10);
425                     arb_mul(v, v, t, prec + 10);
426 
427                     if (reciprocal)
428                         arb_mul(res, u, v, prec);
429                     else
430                         arb_div(res, u, v, prec);
431                 }
432 
433                 if (r % 2)
434                     arb_neg(res, res);
435             }
436         }
437 
438         arb_clear(t);
439         arb_clear(u);
440         arb_clear(v);
441         return success;
442     }
443 
444     /* Wide enclosure. */
445     if (prec < 40 || rad_exp > -16)
446     {
447         double a, b, c;
448 
449 #if DEBUG
450     printf("WIDE CASE\n");
451 #endif
452 
453         dxerr += ldexp(1.0, mid_exp - 51);
454         dxerr *= (1 + 1e-15);
455 
456         a = (dx - dxerr) * (1 - 1e-15);
457         b = (dx + dxerr) * (1 + 1e-15);
458 
459         if (a >= GAMMA_MIN_X)
460         {
461             a = _arb_hypgeom_d_gamma(a, -1);
462             b = _arb_hypgeom_d_gamma(b, 1);
463         }
464         else if (b <= GAMMA_MIN_X)
465         {
466             c = _arb_hypgeom_d_gamma(a, 1);
467             a = _arb_hypgeom_d_gamma(b, -1);
468             b = c;
469         }
470         else
471         {
472             a = _arb_hypgeom_d_gamma(a, 1);
473             b = _arb_hypgeom_d_gamma(b, 1);
474             b = FLINT_MAX(a, b);
475             a = GAMMA_MIN_Y * (1 - 1e-15);
476         }
477 
478         if (reciprocal)
479         {
480             c = (1.0 / b) * (1 - 1e-15);
481             b = (1.0 / a) * (1 + 1e-15);
482             a = c;
483         }
484 
485         arb_set_interval_d_fast(res, a, b, prec);
486         return 1;
487     }
488 
489     /* Propagated error. */
490     if (rad_exp == WORD_MIN)
491     {
492         dxerr = 0.0;
493         rad_exp = WORD_MIN;
494     }
495     else
496     {
497         /* First-order relative error estimate plus safety factor to guarantee
498            an upper bound. */
499         dxerr = MAG_MAN(arb_radref(x)) * ldexp(1.0, -MAG_BITS);
500         dxerr = dxerr * d_abs_digamma_ubound(dx) * 1.001;
501     }
502 
503 #if DEBUG
504     flint_printf("propagated error = %g x 2^%wd\n", dxerr, rad_exp);
505 #endif
506 
507     wp = prec + 6 + FLINT_BIT_COUNT(FLINT_ABS(r));
508 
509     if (wp > ARB_HYPGEOM_GAMMA_TAB_PREC)
510         return 0;
511 
512     success = 0;
513 
514     arf_init(s);
515     arf_init(u);
516     arf_init(v);
517 
518     /* u = x - r */
519     arf_sub_si(u, arb_midref(x), r, wp, ARF_RND_DOWN);
520 
521     /* du = dx - r; */
522     du = arf_get_d(u, ARF_RND_NEAR);
523 
524     /* bound log2(u) */
525     if (-0.0001 < du && du < 0.0001)
526         log2u = arf_is_zero(u) ? -wp : ARF_EXP(u);
527     else
528         log2u = mag_d_log_upper_bound(du < 0 ? -du : du) * 1.4426950408889634074 * (1 + 1e-14);
529 
530     term_prec[0] = wp;
531     n = 0;
532 
533     for (i = 1; i < ARB_HYPGEOM_GAMMA_TAB_NUM; i++)
534     {
535         tail_bound = arb_hypgeom_gamma_coeffs[i].exp + i * log2u + 5;
536 
537         if (tail_bound <= -wp)
538         {
539             n = i;
540             break;
541         }
542 
543         term_prec[i] = FLINT_MIN(FLINT_MAX(wp + tail_bound, 2), wp);
544     }
545 
546     if (n == 0)
547     {
548         flint_printf("warning: gamma_taylor: unexpected failure\n");
549         success = 0;
550         goto cleanup;
551     }
552 
553 #if DEBUG
554     printf("COMPUTATION: wp = %ld, du = %g, log2u = %g, n = %ld\n", wp, du, log2u, n);
555 #endif
556 
557     if (wp <= 512 && n <= 128)
558     {
559         ds = 0.0;
560         for (i = n - 1; i >= 1 && term_prec[i] <= 53; i--)
561         {
562 #if DEBUG
563             flint_printf("add term %wd with precision %wd (doubles)\n", i, term_prec[i]);
564 #endif
565 
566             ds = du * ds + arb_hypgeom_rgamma_d_tab[i];
567         }
568 
569         arf_set_d(s, ds);
570     }
571     else
572     {
573         i = n - 1;
574     }
575 
576     for ( ; i >= 1; i--)
577     {
578         arf_t c;
579 
580 #if DEBUG
581         flint_printf("add term %wd with precision %wd\n", i, term_prec[i]);
582 #endif
583 
584         if (!_arb_hypgeom_gamma_coeff_shallow(c, NULL, i, term_prec[i]))
585         {
586             flint_printf("arb_hypgeom_gamma_taylor: prec = %wd, du = %g, log2u = %d, term_prec[%wd] = %wd",
587                 prec, du, log2u, i, term_prec[i]);
588             flint_abort();
589         }
590 
591         if (term_prec[i] < wp - 128)
592         {
593             arf_set_round(v, u, term_prec[i], ARF_RND_DOWN);
594             arf_mul(s, s, v, term_prec[i], ARF_RND_DOWN);
595             arf_add(s, s, c, term_prec[i], ARF_RND_DOWN);
596         }
597         else
598         {
599             arf_mul(s, s, u, term_prec[i], ARF_RND_DOWN);
600             arf_add(s, s, c, term_prec[i], ARF_RND_DOWN);
601         }
602     }
603 
604     if (i == 0)
605     {
606 #if DEBUG
607         flint_printf("add term %wd with precision %wd\n", i, term_prec[i]);
608 #endif
609 
610         arf_mul(s, s, u, wp, ARF_RND_DOWN);
611         arf_add_ui(s, s, 1, wp, ARF_RND_DOWN);
612     }
613 
614     if (r == 0 || r == 1)
615     {
616         if (r == 0)
617             arf_mul(s, s, u, wp, ARF_RND_DOWN);
618 
619         if (reciprocal)
620         {
621             arf_set_round(arb_midref(res), s, prec, ARF_RND_DOWN);
622         }
623         else
624         {
625             arf_one(u);
626             arf_div(arb_midref(res), u, s, prec, ARF_RND_DOWN);
627         }
628         arf_mag_set_ulp(arb_radref(res), arb_midref(res), prec - 1);
629     }
630     else if (wp <= 320 || r <= 3)
631     {
632         _arf_increment_fast(u, wp);
633         arf_set(v, u);
634 
635         for (i = 2; i < r; i++)
636         {
637             _arf_increment_fast(u, wp);
638             arf_mul(v, v, u, wp, ARF_RND_DOWN);
639         }
640 
641         if (reciprocal)
642             arf_div(arb_midref(res), s, v, prec, ARF_RND_DOWN);
643         else
644             arf_div(arb_midref(res), v, s, prec, ARF_RND_DOWN);
645         arf_mag_set_ulp(arb_radref(res), arb_midref(res), prec - 1);
646     }
647     else
648     {
649         arb_t t;
650         arb_init(t);
651         _arf_increment_fast(u, wp);
652         arb_set_arf(t, u);
653         arb_hypgeom_rising_ui_rec(t, t, r - 1, wp);
654 
655         if (reciprocal)
656         {
657             arb_set_arf(res, s);
658             arb_div(res, res, t, prec);
659         }
660         else
661             arb_div_arf(res, t, s, prec);
662 
663         arf_mag_add_ulp(arb_radref(res), arb_radref(res), arb_midref(res), prec - 1);
664         arb_clear(t);
665     }
666 
667     /* Add propagated error. */
668     if (dxerr != 0)
669     {
670         mag_t err;
671         double dy;
672         dy = arf_get_d(arb_midref(res), ARF_RND_UP);
673         dxerr = dxerr * dy * (1 + 1e-15);
674         MAG_SET_D_2EXP(MAG_MAN(err), MAG_EXP(err), dxerr, rad_exp);
675         mag_add(arb_radref(res), arb_radref(res), err);
676     }
677 
678     success = 1;
679 
680 #if DEBUG
681     printf("OUTPUT: "); arb_printd(res, 200); printf("\n");
682 #endif
683 
684 cleanup:
685     arf_clear(s);
686     arf_clear(u);
687     arf_clear(v);
688 
689     return success;
690 }
691 
692