xref: /netbsd/external/lgpl3/mpfr/dist/src/cos.c (revision 606004a0)
115e9af00Smrg /* mpfr_cos -- cosine of a floating-point number
215e9af00Smrg 
3*606004a0Smrg Copyright 2001-2023 Free Software Foundation, Inc.
46c7ec94dSmrg Contributed by the AriC and Caramba projects, INRIA.
515e9af00Smrg 
615e9af00Smrg This file is part of the GNU MPFR Library.
715e9af00Smrg 
815e9af00Smrg The GNU MPFR Library is free software; you can redistribute it and/or modify
915e9af00Smrg it under the terms of the GNU Lesser General Public License as published by
1015e9af00Smrg the Free Software Foundation; either version 3 of the License, or (at your
1115e9af00Smrg option) any later version.
1215e9af00Smrg 
1315e9af00Smrg The GNU MPFR Library is distributed in the hope that it will be useful, but
1415e9af00Smrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1515e9af00Smrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1615e9af00Smrg License for more details.
1715e9af00Smrg 
1815e9af00Smrg You should have received a copy of the GNU Lesser General Public License
1915e9af00Smrg along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
204da858a9Smrg https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2115e9af00Smrg 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
2215e9af00Smrg 
2315e9af00Smrg #define MPFR_NEED_LONGLONG_H
2415e9af00Smrg #include "mpfr-impl.h"
2515e9af00Smrg 
2615e9af00Smrg static int
mpfr_cos_fast(mpfr_ptr y,mpfr_srcptr x,mpfr_rnd_t rnd_mode)2715e9af00Smrg mpfr_cos_fast (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
2815e9af00Smrg {
2915e9af00Smrg   int inex;
3015e9af00Smrg 
3115e9af00Smrg   inex = mpfr_sincos_fast (NULL, y, x, rnd_mode);
3215e9af00Smrg   inex = inex >> 2; /* 0: exact, 1: rounded up, 2: rounded down */
3315e9af00Smrg   return (inex == 2) ? -1 : inex;
3415e9af00Smrg }
3515e9af00Smrg 
3615e9af00Smrg /* f <- 1 - r/2! + r^2/4! + ... + (-1)^l r^l/(2l)! + ...
3715e9af00Smrg    Assumes |r| < 1/2, and f, r have the same precision.
3815e9af00Smrg    Returns e such that the error on f is bounded by 2^e ulps.
3915e9af00Smrg */
4015e9af00Smrg static int
mpfr_cos2_aux(mpfr_ptr f,mpfr_srcptr r)4115e9af00Smrg mpfr_cos2_aux (mpfr_ptr f, mpfr_srcptr r)
4215e9af00Smrg {
4315e9af00Smrg   mpz_t x, t, s;
4415e9af00Smrg   mpfr_exp_t ex, l, m;
4515e9af00Smrg   mpfr_prec_t p, q;
4615e9af00Smrg   unsigned long i, maxi, imax;
4715e9af00Smrg 
4815e9af00Smrg   MPFR_ASSERTD(mpfr_get_exp (r) <= -1);
4915e9af00Smrg 
5015e9af00Smrg   /* compute minimal i such that i*(i+1) does not fit in an unsigned long,
5115e9af00Smrg      assuming that there are no padding bits. */
5203f29264Smrg   maxi = 1UL << (sizeof(unsigned long) * CHAR_BIT / 2);
5315e9af00Smrg   if (maxi * (maxi / 2) == 0) /* test checked at compile time */
5415e9af00Smrg     {
5515e9af00Smrg       /* can occur only when there are padding bits. */
5615e9af00Smrg       /* maxi * (maxi-1) is representable iff maxi * (maxi / 2) != 0 */
5715e9af00Smrg       do
5815e9af00Smrg         maxi /= 2;
5915e9af00Smrg       while (maxi * (maxi / 2) == 0);
6015e9af00Smrg     }
6115e9af00Smrg 
6215e9af00Smrg   mpz_init (x);
6315e9af00Smrg   mpz_init (s);
6415e9af00Smrg   mpz_init (t);
6515e9af00Smrg   ex = mpfr_get_z_2exp (x, r); /* r = x*2^ex */
6615e9af00Smrg 
674da858a9Smrg   /* Remove trailing zeroes.
684da858a9Smrg      Since x comes from a regular MPFR number, due to the constraints on the
694da858a9Smrg      exponent and the precision, there can be no integer overflow below. */
7015e9af00Smrg   l = mpz_scan1 (x, 0);
7115e9af00Smrg   ex += l;
7215e9af00Smrg   mpz_fdiv_q_2exp (x, x, l);
7315e9af00Smrg 
7415e9af00Smrg   /* since |r| < 1, r = x*2^ex, and x is an integer, necessarily ex < 0 */
7515e9af00Smrg 
76*606004a0Smrg   p = mpfr_get_prec (f); /* same as r */
7715e9af00Smrg   /* bound for number of iterations */
7815e9af00Smrg   imax = p / (-mpfr_get_exp (r));
7915e9af00Smrg   imax += (imax == 0);
8015e9af00Smrg   q = 2 * MPFR_INT_CEIL_LOG2(imax) + 4; /* bound for (3l)^2 */
8115e9af00Smrg 
8215e9af00Smrg   mpz_set_ui (s, 1); /* initialize sum with 1 */
8315e9af00Smrg   mpz_mul_2exp (s, s, p + q); /* scale all values by 2^(p+q) */
8415e9af00Smrg   mpz_set (t, s); /* invariant: t is previous term */
8515e9af00Smrg   for (i = 1; (m = mpz_sizeinbase (t, 2)) >= q; i += 2)
8615e9af00Smrg     {
8715e9af00Smrg       /* adjust precision of x to that of t */
8815e9af00Smrg       l = mpz_sizeinbase (x, 2);
8915e9af00Smrg       if (l > m)
9015e9af00Smrg         {
9115e9af00Smrg           l -= m;
9215e9af00Smrg           mpz_fdiv_q_2exp (x, x, l);
9315e9af00Smrg           ex += l;
9415e9af00Smrg         }
9515e9af00Smrg       /* multiply t by r */
9615e9af00Smrg       mpz_mul (t, t, x);
9715e9af00Smrg       mpz_fdiv_q_2exp (t, t, -ex);
9815e9af00Smrg       /* divide t by i*(i+1) */
9915e9af00Smrg       if (i < maxi)
10015e9af00Smrg         mpz_fdiv_q_ui (t, t, i * (i + 1));
10115e9af00Smrg       else
10215e9af00Smrg         {
10315e9af00Smrg           mpz_fdiv_q_ui (t, t, i);
10415e9af00Smrg           mpz_fdiv_q_ui (t, t, i + 1);
10515e9af00Smrg         }
10615e9af00Smrg       /* if m is the (current) number of bits of t, we can consider that
10715e9af00Smrg          all operations on t so far had precision >= m, so we can prove
10815e9af00Smrg          by induction that the relative error on t is of the form
10915e9af00Smrg          (1+u)^(3l)-1, where |u| <= 2^(-m), and l=(i+1)/2 is the # of loops.
11015e9af00Smrg          Since |(1+x^2)^(1/x) - 1| <= 4x/3 for |x| <= 1/2,
11115e9af00Smrg          for |u| <= 1/(3l)^2, the absolute error is bounded by
11215e9af00Smrg          4/3*(3l)*2^(-m)*t <= 4*l since |t| < 2^m.
11315e9af00Smrg          Therefore the error on s is bounded by 2*l*(l+1). */
11415e9af00Smrg       /* add or subtract to s */
11515e9af00Smrg       if (i % 4 == 1)
11615e9af00Smrg         mpz_sub (s, s, t);
11715e9af00Smrg       else
11815e9af00Smrg         mpz_add (s, s, t);
11915e9af00Smrg     }
12015e9af00Smrg 
12115e9af00Smrg   mpfr_set_z (f, s, MPFR_RNDN);
12215e9af00Smrg   mpfr_div_2ui (f, f, p + q, MPFR_RNDN);
12315e9af00Smrg 
12415e9af00Smrg   mpz_clear (x);
12515e9af00Smrg   mpz_clear (s);
12615e9af00Smrg   mpz_clear (t);
12715e9af00Smrg 
12815e9af00Smrg   l = (i - 1) / 2; /* number of iterations */
12915e9af00Smrg   return 2 * MPFR_INT_CEIL_LOG2 (l + 1) + 1; /* bound is 2l(l+1) */
13015e9af00Smrg }
13115e9af00Smrg 
13215e9af00Smrg int
mpfr_cos(mpfr_ptr y,mpfr_srcptr x,mpfr_rnd_t rnd_mode)13315e9af00Smrg mpfr_cos (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
13415e9af00Smrg {
13515e9af00Smrg   mpfr_prec_t K0, K, precy, m, k, l;
13615e9af00Smrg   int inexact, reduce = 0;
13715e9af00Smrg   mpfr_t r, s, xr, c;
13815e9af00Smrg   mpfr_exp_t exps, cancel = 0, expx;
13915e9af00Smrg   MPFR_ZIV_DECL (loop);
14015e9af00Smrg   MPFR_SAVE_EXPO_DECL (expo);
14115e9af00Smrg   MPFR_GROUP_DECL (group);
14215e9af00Smrg 
14315e9af00Smrg   MPFR_LOG_FUNC (
144*606004a0Smrg     ("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode),
145*606004a0Smrg     ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec (y), mpfr_log_prec, y,
14615e9af00Smrg      inexact));
14715e9af00Smrg 
14815e9af00Smrg   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
14915e9af00Smrg     {
15015e9af00Smrg       if (MPFR_IS_NAN (x) || MPFR_IS_INF (x))
15115e9af00Smrg         {
15215e9af00Smrg           MPFR_SET_NAN (y);
15315e9af00Smrg           MPFR_RET_NAN;
15415e9af00Smrg         }
15515e9af00Smrg       else
15615e9af00Smrg         {
15715e9af00Smrg           MPFR_ASSERTD (MPFR_IS_ZERO (x));
15815e9af00Smrg           return mpfr_set_ui (y, 1, rnd_mode);
15915e9af00Smrg         }
16015e9af00Smrg     }
16115e9af00Smrg 
16215e9af00Smrg   MPFR_SAVE_EXPO_MARK (expo);
16315e9af00Smrg 
16415e9af00Smrg   /* cos(x) = 1-x^2/2 + ..., so error < 2^(2*EXP(x)-1) */
16515e9af00Smrg   expx = MPFR_GET_EXP (x);
16615e9af00Smrg   MPFR_SMALL_INPUT_AFTER_SAVE_EXPO (y, __gmpfr_one, -2 * expx,
16715e9af00Smrg                                     1, 0, rnd_mode, expo, {});
16815e9af00Smrg 
16915e9af00Smrg   /* Compute initial precision */
17015e9af00Smrg   precy = MPFR_PREC (y);
17115e9af00Smrg 
17215e9af00Smrg   if (precy >= MPFR_SINCOS_THRESHOLD)
17315e9af00Smrg     {
1746c7ec94dSmrg       inexact = mpfr_cos_fast (y, x, rnd_mode);
1756c7ec94dSmrg       goto end;
17615e9af00Smrg     }
17715e9af00Smrg 
17815e9af00Smrg   K0 = __gmpfr_isqrt (precy / 3);
17903f29264Smrg   m = precy + 2 * MPFR_INT_CEIL_LOG2 (precy) + 2 * K0 + 4;
18015e9af00Smrg 
18115e9af00Smrg   if (expx >= 3)
18215e9af00Smrg     {
18315e9af00Smrg       reduce = 1;
18415e9af00Smrg       /* As expx + m - 1 will silently be converted into mpfr_prec_t
18515e9af00Smrg          in the mpfr_init2 call, the assert below may be useful to
18615e9af00Smrg          avoid undefined behavior. */
18715e9af00Smrg       MPFR_ASSERTN (expx + m - 1 <= MPFR_PREC_MAX);
18815e9af00Smrg       mpfr_init2 (c, expx + m - 1);
18915e9af00Smrg       mpfr_init2 (xr, m);
19015e9af00Smrg     }
19115e9af00Smrg 
19215e9af00Smrg   MPFR_GROUP_INIT_2 (group, m, r, s);
19315e9af00Smrg   MPFR_ZIV_INIT (loop, m);
19415e9af00Smrg   for (;;)
19515e9af00Smrg     {
19615e9af00Smrg       /* If |x| >= 4, first reduce x cmod (2*Pi) into xr, using mpfr_remainder:
19715e9af00Smrg          let e = EXP(x) >= 3, and m the target precision:
19815e9af00Smrg          (1) c <- 2*Pi              [precision e+m-1, nearest]
19915e9af00Smrg          (2) xr <- remainder (x, c) [precision m, nearest]
20015e9af00Smrg          We have |c - 2*Pi| <= 1/2ulp(c) = 2^(3-e-m)
20115e9af00Smrg                  |xr - x - k c| <= 1/2ulp(xr) <= 2^(1-m)
20215e9af00Smrg                  |k| <= |x|/(2*Pi) <= 2^(e-2)
20315e9af00Smrg          Thus |xr - x - 2kPi| <= |k| |c - 2Pi| + 2^(1-m) <= 2^(2-m).
20415e9af00Smrg          It follows |cos(xr) - cos(x)| <= 2^(2-m). */
20515e9af00Smrg       if (reduce)
20615e9af00Smrg         {
20715e9af00Smrg           mpfr_const_pi (c, MPFR_RNDN);
20815e9af00Smrg           mpfr_mul_2ui (c, c, 1, MPFR_RNDN); /* 2Pi */
20915e9af00Smrg           mpfr_remainder (xr, x, c, MPFR_RNDN);
21015e9af00Smrg           if (MPFR_IS_ZERO(xr))
21115e9af00Smrg             goto ziv_next;
21215e9af00Smrg           /* now |xr| <= 4, thus r <= 16 below */
2134da858a9Smrg           mpfr_sqr (r, xr, MPFR_RNDU); /* err <= 1 ulp */
21415e9af00Smrg         }
21515e9af00Smrg       else
2164da858a9Smrg         mpfr_sqr (r, x, MPFR_RNDU); /* err <= 1 ulp */
21715e9af00Smrg 
21815e9af00Smrg       /* now |x| < 4 (or xr if reduce = 1), thus |r| <= 16 */
21915e9af00Smrg 
22015e9af00Smrg       /* we need |r| < 1/2 for mpfr_cos2_aux, i.e., EXP(r) - 2K <= -1 */
22115e9af00Smrg       K = K0 + 1 + MAX(0, MPFR_GET_EXP(r)) / 2;
22215e9af00Smrg       /* since K0 >= 0, if EXP(r) < 0, then K >= 1, thus EXP(r) - 2K <= -3;
22315e9af00Smrg          otherwise if EXP(r) >= 0, then K >= 1/2 + EXP(r)/2, thus
22415e9af00Smrg          EXP(r) - 2K <= -1 */
22515e9af00Smrg 
22615e9af00Smrg       MPFR_SET_EXP (r, MPFR_GET_EXP (r) - 2 * K); /* Can't overflow! */
22715e9af00Smrg 
22815e9af00Smrg       /* s <- 1 - r/2! + ... + (-1)^l r^l/(2l)! */
22915e9af00Smrg       l = mpfr_cos2_aux (s, r);
23015e9af00Smrg       /* l is the error bound in ulps on s */
23115e9af00Smrg       MPFR_SET_ONE (r);
23215e9af00Smrg       for (k = 0; k < K; k++)
23315e9af00Smrg         {
23415e9af00Smrg           mpfr_sqr (s, s, MPFR_RNDU);            /* err <= 2*olderr */
23515e9af00Smrg           MPFR_SET_EXP (s, MPFR_GET_EXP (s) + 1); /* Can't overflow */
23615e9af00Smrg           mpfr_sub (s, s, r, MPFR_RNDN);         /* err <= 4*olderr */
23715e9af00Smrg           if (MPFR_IS_ZERO(s))
23815e9af00Smrg             goto ziv_next;
23915e9af00Smrg           MPFR_ASSERTD (MPFR_GET_EXP (s) <= 1);
24015e9af00Smrg         }
24115e9af00Smrg 
24215e9af00Smrg       /* The absolute error on s is bounded by (2l+1/3)*2^(2K-m)
24315e9af00Smrg          2l+1/3 <= 2l+1.
24415e9af00Smrg          If |x| >= 4, we need to add 2^(2-m) for the argument reduction
24515e9af00Smrg          by 2Pi: if K = 0, this amounts to add 4 to 2l+1/3, i.e., to add
24615e9af00Smrg          2 to l; if K >= 1, this amounts to add 1 to 2*l+1/3. */
24715e9af00Smrg       l = 2 * l + 1;
24815e9af00Smrg       if (reduce)
24915e9af00Smrg         l += (K == 0) ? 4 : 1;
25015e9af00Smrg       k = MPFR_INT_CEIL_LOG2 (l) + 2 * K;
25115e9af00Smrg       /* now the error is bounded by 2^(k-m) = 2^(EXP(s)-err) */
25215e9af00Smrg 
25315e9af00Smrg       exps = MPFR_GET_EXP (s);
25415e9af00Smrg       if (MPFR_LIKELY (MPFR_CAN_ROUND (s, exps + m - k, precy, rnd_mode)))
25515e9af00Smrg         break;
25615e9af00Smrg 
25715e9af00Smrg       if (MPFR_UNLIKELY (exps == 1))
25815e9af00Smrg         /* s = 1 or -1, and except x=0 which was already checked above,
25915e9af00Smrg            cos(x) cannot be 1 or -1, so we can round if the error is less
26015e9af00Smrg            than 2^(-precy) for directed rounding, or 2^(-precy-1) for rounding
26115e9af00Smrg            to nearest. */
26215e9af00Smrg         {
26315e9af00Smrg           if (m > k && (m - k >= precy + (rnd_mode == MPFR_RNDN)))
26415e9af00Smrg             {
26515e9af00Smrg               /* If round to nearest or away, result is s = 1 or -1,
266*606004a0Smrg                  otherwise it is round(nexttoward (s, 0)). However, in order
267*606004a0Smrg                  to have the inexact flag correctly set below, we set |s| to
26815e9af00Smrg                  1 - 2^(-m) in all cases. */
26915e9af00Smrg               mpfr_nexttozero (s);
27015e9af00Smrg               break;
27115e9af00Smrg             }
27215e9af00Smrg         }
27315e9af00Smrg 
27415e9af00Smrg       if (exps < cancel)
27515e9af00Smrg         {
27615e9af00Smrg           m += cancel - exps;
27715e9af00Smrg           cancel = exps;
27815e9af00Smrg         }
27915e9af00Smrg 
28015e9af00Smrg     ziv_next:
28115e9af00Smrg       MPFR_ZIV_NEXT (loop, m);
28215e9af00Smrg       MPFR_GROUP_REPREC_2 (group, m, r, s);
28315e9af00Smrg       if (reduce)
28415e9af00Smrg         {
28515e9af00Smrg           mpfr_set_prec (xr, m);
28615e9af00Smrg           mpfr_set_prec (c, expx + m - 1);
28715e9af00Smrg         }
28815e9af00Smrg     }
28915e9af00Smrg   MPFR_ZIV_FREE (loop);
29015e9af00Smrg   inexact = mpfr_set (y, s, rnd_mode);
29115e9af00Smrg   MPFR_GROUP_CLEAR (group);
29215e9af00Smrg   if (reduce)
29315e9af00Smrg     {
29415e9af00Smrg       mpfr_clear (xr);
29515e9af00Smrg       mpfr_clear (c);
29615e9af00Smrg     }
29715e9af00Smrg 
2986c7ec94dSmrg  end:
29915e9af00Smrg   MPFR_SAVE_EXPO_FREE (expo);
30015e9af00Smrg   return mpfr_check_range (y, inexact, rnd_mode);
30115e9af00Smrg }
302