dist/src/cos.c

15e9af00Smrg/* mpfr_cos -- cosine of a floating-point number
15e9af00Smrg
*606004a0SmrgCopyright 2001-2023 Free Software Foundation, Inc.
6c7ec94dSmrgContributed by the AriC and Caramba projects, INRIA.
15e9af00Smrg
15e9af00SmrgThis file is part of the GNU MPFR Library.
15e9af00Smrg
15e9af00SmrgThe GNU MPFR Library is free software; you can redistribute it and/or modify
15e9af00Smrgit under the terms of the GNU Lesser General Public License as published by
15e9af00Smrgthe Free Software Foundation; either version 3 of the License, or (at your
15e9af00Smrgoption) any later version.
15e9af00Smrg
15e9af00SmrgThe GNU MPFR Library is distributed in the hope that it will be useful, but
15e9af00SmrgWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15e9af00Smrgor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15e9af00SmrgLicense for more details.
15e9af00Smrg
15e9af00SmrgYou should have received a copy of the GNU Lesser General Public License
15e9af00Smrgalong with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
4da858a9Smrghttps://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
15e9af00Smrg51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
15e9af00Smrg
15e9af00Smrg#define MPFR_NEED_LONGLONG_H
15e9af00Smrg#include "mpfr-impl.h"
15e9af00Smrg
15e9af00Smrgstatic int
15e9af00Smrgmpfr_cos_fast (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
15e9af00Smrg{
15e9af00Smrg  int inex;
15e9af00Smrg
15e9af00Smrg  inex = mpfr_sincos_fast (NULL, y, x, rnd_mode);
15e9af00Smrg  inex = inex >> 2; /* 0: exact, 1: rounded up, 2: rounded down */
15e9af00Smrg  return (inex == 2) ? -1 : inex;
15e9af00Smrg}
15e9af00Smrg
15e9af00Smrg/* f <- 1 - r/2! + r^2/4! + ... + (-1)^l r^l/(2l)! + ...
15e9af00Smrg   Assumes |r| < 1/2, and f, r have the same precision.
15e9af00Smrg   Returns e such that the error on f is bounded by 2^e ulps.
15e9af00Smrg*/
15e9af00Smrgstatic int
15e9af00Smrgmpfr_cos2_aux (mpfr_ptr f, mpfr_srcptr r)
15e9af00Smrg{
15e9af00Smrg  mpz_t x, t, s;
15e9af00Smrg  mpfr_exp_t ex, l, m;
15e9af00Smrg  mpfr_prec_t p, q;
15e9af00Smrg  unsigned long i, maxi, imax;
15e9af00Smrg
15e9af00Smrg  MPFR_ASSERTD(mpfr_get_exp (r) <= -1);
15e9af00Smrg
15e9af00Smrg  /* compute minimal i such that i*(i+1) does not fit in an unsigned long,
15e9af00Smrg     assuming that there are no padding bits. */
03f29264Smrg  maxi = 1UL << (sizeof(unsigned long) * CHAR_BIT / 2);
15e9af00Smrg  if (maxi * (maxi / 2) == 0) /* test checked at compile time */
15e9af00Smrg    {
15e9af00Smrg      /* can occur only when there are padding bits. */
15e9af00Smrg      /* maxi * (maxi-1) is representable iff maxi * (maxi / 2) != 0 */
15e9af00Smrg      do
15e9af00Smrg        maxi /= 2;
15e9af00Smrg      while (maxi * (maxi / 2) == 0);
15e9af00Smrg    }
15e9af00Smrg
15e9af00Smrg  mpz_init (x);
15e9af00Smrg  mpz_init (s);
15e9af00Smrg  mpz_init (t);
15e9af00Smrg  ex = mpfr_get_z_2exp (x, r); /* r = x*2^ex */
15e9af00Smrg
4da858a9Smrg  /* Remove trailing zeroes.
4da858a9Smrg     Since x comes from a regular MPFR number, due to the constraints on the
4da858a9Smrg     exponent and the precision, there can be no integer overflow below. */
15e9af00Smrg  l = mpz_scan1 (x, 0);
15e9af00Smrg  ex += l;
15e9af00Smrg  mpz_fdiv_q_2exp (x, x, l);
15e9af00Smrg
15e9af00Smrg  /* since |r| < 1, r = x*2^ex, and x is an integer, necessarily ex < 0 */
15e9af00Smrg
*606004a0Smrg  p = mpfr_get_prec (f); /* same as r */
15e9af00Smrg  /* bound for number of iterations */
15e9af00Smrg  imax = p / (-mpfr_get_exp (r));
15e9af00Smrg  imax += (imax == 0);
15e9af00Smrg  q = 2 * MPFR_INT_CEIL_LOG2(imax) + 4; /* bound for (3l)^2 */
15e9af00Smrg
15e9af00Smrg  mpz_set_ui (s, 1); /* initialize sum with 1 */
15e9af00Smrg  mpz_mul_2exp (s, s, p + q); /* scale all values by 2^(p+q) */
15e9af00Smrg  mpz_set (t, s); /* invariant: t is previous term */
15e9af00Smrg  for (i = 1; (m = mpz_sizeinbase (t, 2)) >= q; i += 2)
15e9af00Smrg    {
15e9af00Smrg      /* adjust precision of x to that of t */
15e9af00Smrg      l = mpz_sizeinbase (x, 2);
15e9af00Smrg      if (l > m)
15e9af00Smrg        {
15e9af00Smrg          l -= m;
15e9af00Smrg          mpz_fdiv_q_2exp (x, x, l);
15e9af00Smrg          ex += l;
15e9af00Smrg        }
15e9af00Smrg      /* multiply t by r */
15e9af00Smrg      mpz_mul (t, t, x);
15e9af00Smrg      mpz_fdiv_q_2exp (t, t, -ex);
15e9af00Smrg      /* divide t by i*(i+1) */
15e9af00Smrg      if (i < maxi)
15e9af00Smrg        mpz_fdiv_q_ui (t, t, i * (i + 1));
15e9af00Smrg      else
15e9af00Smrg        {
15e9af00Smrg          mpz_fdiv_q_ui (t, t, i);
15e9af00Smrg          mpz_fdiv_q_ui (t, t, i + 1);
15e9af00Smrg        }
15e9af00Smrg      /* if m is the (current) number of bits of t, we can consider that
15e9af00Smrg         all operations on t so far had precision >= m, so we can prove
15e9af00Smrg         by induction that the relative error on t is of the form
15e9af00Smrg         (1+u)^(3l)-1, where |u| <= 2^(-m), and l=(i+1)/2 is the # of loops.
15e9af00Smrg         Since |(1+x^2)^(1/x) - 1| <= 4x/3 for |x| <= 1/2,
15e9af00Smrg         for |u| <= 1/(3l)^2, the absolute error is bounded by
15e9af00Smrg         4/3*(3l)*2^(-m)*t <= 4*l since |t| < 2^m.
15e9af00Smrg         Therefore the error on s is bounded by 2*l*(l+1). */
15e9af00Smrg      /* add or subtract to s */
15e9af00Smrg      if (i % 4 == 1)
15e9af00Smrg        mpz_sub (s, s, t);
15e9af00Smrg      else
15e9af00Smrg        mpz_add (s, s, t);
15e9af00Smrg    }
15e9af00Smrg
15e9af00Smrg  mpfr_set_z (f, s, MPFR_RNDN);
15e9af00Smrg  mpfr_div_2ui (f, f, p + q, MPFR_RNDN);
15e9af00Smrg
15e9af00Smrg  mpz_clear (x);
15e9af00Smrg  mpz_clear (s);
15e9af00Smrg  mpz_clear (t);
15e9af00Smrg
15e9af00Smrg  l = (i - 1) / 2; /* number of iterations */
15e9af00Smrg  return 2 * MPFR_INT_CEIL_LOG2 (l + 1) + 1; /* bound is 2l(l+1) */
15e9af00Smrg}
15e9af00Smrg
15e9af00Smrgint
15e9af00Smrgmpfr_cos (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
15e9af00Smrg{
15e9af00Smrg  mpfr_prec_t K0, K, precy, m, k, l;
15e9af00Smrg  int inexact, reduce = 0;
15e9af00Smrg  mpfr_t r, s, xr, c;
15e9af00Smrg  mpfr_exp_t exps, cancel = 0, expx;
15e9af00Smrg  MPFR_ZIV_DECL (loop);
15e9af00Smrg  MPFR_SAVE_EXPO_DECL (expo);
15e9af00Smrg  MPFR_GROUP_DECL (group);
15e9af00Smrg
15e9af00Smrg  MPFR_LOG_FUNC (
*606004a0Smrg    ("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode),
*606004a0Smrg    ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec (y), mpfr_log_prec, y,
15e9af00Smrg     inexact));
15e9af00Smrg
15e9af00Smrg  if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
15e9af00Smrg    {
15e9af00Smrg      if (MPFR_IS_NAN (x) || MPFR_IS_INF (x))
15e9af00Smrg        {
15e9af00Smrg          MPFR_SET_NAN (y);
15e9af00Smrg          MPFR_RET_NAN;
15e9af00Smrg        }
15e9af00Smrg      else
15e9af00Smrg        {
15e9af00Smrg          MPFR_ASSERTD (MPFR_IS_ZERO (x));
15e9af00Smrg          return mpfr_set_ui (y, 1, rnd_mode);
15e9af00Smrg        }
15e9af00Smrg    }
15e9af00Smrg
15e9af00Smrg  MPFR_SAVE_EXPO_MARK (expo);
15e9af00Smrg
15e9af00Smrg  /* cos(x) = 1-x^2/2 + ..., so error < 2^(2*EXP(x)-1) */
15e9af00Smrg  expx = MPFR_GET_EXP (x);
15e9af00Smrg  MPFR_SMALL_INPUT_AFTER_SAVE_EXPO (y, __gmpfr_one, -2 * expx,
15e9af00Smrg                                    1, 0, rnd_mode, expo, {});
15e9af00Smrg
15e9af00Smrg  /* Compute initial precision */
15e9af00Smrg  precy = MPFR_PREC (y);
15e9af00Smrg
15e9af00Smrg  if (precy >= MPFR_SINCOS_THRESHOLD)
15e9af00Smrg    {
6c7ec94dSmrg      inexact = mpfr_cos_fast (y, x, rnd_mode);
6c7ec94dSmrg      goto end;
15e9af00Smrg    }
15e9af00Smrg
15e9af00Smrg  K0 = __gmpfr_isqrt (precy / 3);
03f29264Smrg  m = precy + 2 * MPFR_INT_CEIL_LOG2 (precy) + 2 * K0 + 4;
15e9af00Smrg
15e9af00Smrg  if (expx >= 3)
15e9af00Smrg    {
15e9af00Smrg      reduce = 1;
15e9af00Smrg      /* As expx + m - 1 will silently be converted into mpfr_prec_t
15e9af00Smrg         in the mpfr_init2 call, the assert below may be useful to
15e9af00Smrg         avoid undefined behavior. */
15e9af00Smrg      MPFR_ASSERTN (expx + m - 1 <= MPFR_PREC_MAX);
15e9af00Smrg      mpfr_init2 (c, expx + m - 1);
15e9af00Smrg      mpfr_init2 (xr, m);
15e9af00Smrg    }
15e9af00Smrg
15e9af00Smrg  MPFR_GROUP_INIT_2 (group, m, r, s);
15e9af00Smrg  MPFR_ZIV_INIT (loop, m);
15e9af00Smrg  for (;;)
15e9af00Smrg    {
15e9af00Smrg      /* If |x| >= 4, first reduce x cmod (2*Pi) into xr, using mpfr_remainder:
15e9af00Smrg         let e = EXP(x) >= 3, and m the target precision:
15e9af00Smrg         (1) c <- 2*Pi              [precision e+m-1, nearest]
15e9af00Smrg         (2) xr <- remainder (x, c) [precision m, nearest]
15e9af00Smrg         We have |c - 2*Pi| <= 1/2ulp(c) = 2^(3-e-m)
15e9af00Smrg                 |xr - x - k c| <= 1/2ulp(xr) <= 2^(1-m)
15e9af00Smrg                 |k| <= |x|/(2*Pi) <= 2^(e-2)
15e9af00Smrg         Thus |xr - x - 2kPi| <= |k| |c - 2Pi| + 2^(1-m) <= 2^(2-m).
15e9af00Smrg         It follows |cos(xr) - cos(x)| <= 2^(2-m). */
15e9af00Smrg      if (reduce)
15e9af00Smrg        {
15e9af00Smrg          mpfr_const_pi (c, MPFR_RNDN);
15e9af00Smrg          mpfr_mul_2ui (c, c, 1, MPFR_RNDN); /* 2Pi */
15e9af00Smrg          mpfr_remainder (xr, x, c, MPFR_RNDN);
15e9af00Smrg          if (MPFR_IS_ZERO(xr))
15e9af00Smrg            goto ziv_next;
15e9af00Smrg          /* now |xr| <= 4, thus r <= 16 below */
4da858a9Smrg          mpfr_sqr (r, xr, MPFR_RNDU); /* err <= 1 ulp */
15e9af00Smrg        }
15e9af00Smrg      else
4da858a9Smrg        mpfr_sqr (r, x, MPFR_RNDU); /* err <= 1 ulp */
15e9af00Smrg
15e9af00Smrg      /* now |x| < 4 (or xr if reduce = 1), thus |r| <= 16 */
15e9af00Smrg
15e9af00Smrg      /* we need |r| < 1/2 for mpfr_cos2_aux, i.e., EXP(r) - 2K <= -1 */
15e9af00Smrg      K = K0 + 1 + MAX(0, MPFR_GET_EXP(r)) / 2;
15e9af00Smrg      /* since K0 >= 0, if EXP(r) < 0, then K >= 1, thus EXP(r) - 2K <= -3;
15e9af00Smrg         otherwise if EXP(r) >= 0, then K >= 1/2 + EXP(r)/2, thus
15e9af00Smrg         EXP(r) - 2K <= -1 */
15e9af00Smrg
15e9af00Smrg      MPFR_SET_EXP (r, MPFR_GET_EXP (r) - 2 * K); /* Can't overflow! */
15e9af00Smrg
15e9af00Smrg      /* s <- 1 - r/2! + ... + (-1)^l r^l/(2l)! */
15e9af00Smrg      l = mpfr_cos2_aux (s, r);
15e9af00Smrg      /* l is the error bound in ulps on s */
15e9af00Smrg      MPFR_SET_ONE (r);
15e9af00Smrg      for (k = 0; k < K; k++)
15e9af00Smrg        {
15e9af00Smrg          mpfr_sqr (s, s, MPFR_RNDU);            /* err <= 2*olderr */
15e9af00Smrg          MPFR_SET_EXP (s, MPFR_GET_EXP (s) + 1); /* Can't overflow */
15e9af00Smrg          mpfr_sub (s, s, r, MPFR_RNDN);         /* err <= 4*olderr */
15e9af00Smrg          if (MPFR_IS_ZERO(s))
15e9af00Smrg            goto ziv_next;
15e9af00Smrg          MPFR_ASSERTD (MPFR_GET_EXP (s) <= 1);
15e9af00Smrg        }
15e9af00Smrg
15e9af00Smrg      /* The absolute error on s is bounded by (2l+1/3)*2^(2K-m)
15e9af00Smrg         2l+1/3 <= 2l+1.
15e9af00Smrg         If |x| >= 4, we need to add 2^(2-m) for the argument reduction
15e9af00Smrg         by 2Pi: if K = 0, this amounts to add 4 to 2l+1/3, i.e., to add
15e9af00Smrg         2 to l; if K >= 1, this amounts to add 1 to 2*l+1/3. */
15e9af00Smrg      l = 2 * l + 1;
15e9af00Smrg      if (reduce)
15e9af00Smrg        l += (K == 0) ? 4 : 1;
15e9af00Smrg      k = MPFR_INT_CEIL_LOG2 (l) + 2 * K;
15e9af00Smrg      /* now the error is bounded by 2^(k-m) = 2^(EXP(s)-err) */
15e9af00Smrg
15e9af00Smrg      exps = MPFR_GET_EXP (s);
15e9af00Smrg      if (MPFR_LIKELY (MPFR_CAN_ROUND (s, exps + m - k, precy, rnd_mode)))
15e9af00Smrg        break;
15e9af00Smrg
15e9af00Smrg      if (MPFR_UNLIKELY (exps == 1))
15e9af00Smrg        /* s = 1 or -1, and except x=0 which was already checked above,
15e9af00Smrg           cos(x) cannot be 1 or -1, so we can round if the error is less
15e9af00Smrg           than 2^(-precy) for directed rounding, or 2^(-precy-1) for rounding
15e9af00Smrg           to nearest. */
15e9af00Smrg        {
15e9af00Smrg          if (m > k && (m - k >= precy + (rnd_mode == MPFR_RNDN)))
15e9af00Smrg            {
15e9af00Smrg              /* If round to nearest or away, result is s = 1 or -1,
*606004a0Smrg                 otherwise it is round(nexttoward (s, 0)). However, in order
*606004a0Smrg                 to have the inexact flag correctly set below, we set |s| to
15e9af00Smrg                 1 - 2^(-m) in all cases. */
15e9af00Smrg              mpfr_nexttozero (s);
15e9af00Smrg              break;
15e9af00Smrg            }
15e9af00Smrg        }
15e9af00Smrg
15e9af00Smrg      if (exps < cancel)
15e9af00Smrg        {
15e9af00Smrg          m += cancel - exps;
15e9af00Smrg          cancel = exps;
15e9af00Smrg        }
15e9af00Smrg
15e9af00Smrg    ziv_next:
15e9af00Smrg      MPFR_ZIV_NEXT (loop, m);
15e9af00Smrg      MPFR_GROUP_REPREC_2 (group, m, r, s);
15e9af00Smrg      if (reduce)
15e9af00Smrg        {
15e9af00Smrg          mpfr_set_prec (xr, m);
15e9af00Smrg          mpfr_set_prec (c, expx + m - 1);
15e9af00Smrg        }
15e9af00Smrg    }
15e9af00Smrg  MPFR_ZIV_FREE (loop);
15e9af00Smrg  inexact = mpfr_set (y, s, rnd_mode);
15e9af00Smrg  MPFR_GROUP_CLEAR (group);
15e9af00Smrg  if (reduce)
15e9af00Smrg    {
15e9af00Smrg      mpfr_clear (xr);
15e9af00Smrg      mpfr_clear (c);
15e9af00Smrg    }
15e9af00Smrg
6c7ec94dSmrg end:
15e9af00Smrg  MPFR_SAVE_EXPO_FREE (expo);
15e9af00Smrg  return mpfr_check_range (y, inexact, rnd_mode);
15e9af00Smrg}