115e9af00Smrg /* mpfr_cos -- cosine of a floating-point number
215e9af00Smrg
3*606004a0Smrg Copyright 2001-2023 Free Software Foundation, Inc.
46c7ec94dSmrg Contributed by the AriC and Caramba projects, INRIA.
515e9af00Smrg
615e9af00Smrg This file is part of the GNU MPFR Library.
715e9af00Smrg
815e9af00Smrg The GNU MPFR Library is free software; you can redistribute it and/or modify
915e9af00Smrg it under the terms of the GNU Lesser General Public License as published by
1015e9af00Smrg the Free Software Foundation; either version 3 of the License, or (at your
1115e9af00Smrg option) any later version.
1215e9af00Smrg
1315e9af00Smrg The GNU MPFR Library is distributed in the hope that it will be useful, but
1415e9af00Smrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1515e9af00Smrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1615e9af00Smrg License for more details.
1715e9af00Smrg
1815e9af00Smrg You should have received a copy of the GNU Lesser General Public License
1915e9af00Smrg along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
204da858a9Smrg https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2115e9af00Smrg 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
2215e9af00Smrg
2315e9af00Smrg #define MPFR_NEED_LONGLONG_H
2415e9af00Smrg #include "mpfr-impl.h"
2515e9af00Smrg
2615e9af00Smrg static int
mpfr_cos_fast(mpfr_ptr y,mpfr_srcptr x,mpfr_rnd_t rnd_mode)2715e9af00Smrg mpfr_cos_fast (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
2815e9af00Smrg {
2915e9af00Smrg int inex;
3015e9af00Smrg
3115e9af00Smrg inex = mpfr_sincos_fast (NULL, y, x, rnd_mode);
3215e9af00Smrg inex = inex >> 2; /* 0: exact, 1: rounded up, 2: rounded down */
3315e9af00Smrg return (inex == 2) ? -1 : inex;
3415e9af00Smrg }
3515e9af00Smrg
3615e9af00Smrg /* f <- 1 - r/2! + r^2/4! + ... + (-1)^l r^l/(2l)! + ...
3715e9af00Smrg Assumes |r| < 1/2, and f, r have the same precision.
3815e9af00Smrg Returns e such that the error on f is bounded by 2^e ulps.
3915e9af00Smrg */
4015e9af00Smrg static int
mpfr_cos2_aux(mpfr_ptr f,mpfr_srcptr r)4115e9af00Smrg mpfr_cos2_aux (mpfr_ptr f, mpfr_srcptr r)
4215e9af00Smrg {
4315e9af00Smrg mpz_t x, t, s;
4415e9af00Smrg mpfr_exp_t ex, l, m;
4515e9af00Smrg mpfr_prec_t p, q;
4615e9af00Smrg unsigned long i, maxi, imax;
4715e9af00Smrg
4815e9af00Smrg MPFR_ASSERTD(mpfr_get_exp (r) <= -1);
4915e9af00Smrg
5015e9af00Smrg /* compute minimal i such that i*(i+1) does not fit in an unsigned long,
5115e9af00Smrg assuming that there are no padding bits. */
5203f29264Smrg maxi = 1UL << (sizeof(unsigned long) * CHAR_BIT / 2);
5315e9af00Smrg if (maxi * (maxi / 2) == 0) /* test checked at compile time */
5415e9af00Smrg {
5515e9af00Smrg /* can occur only when there are padding bits. */
5615e9af00Smrg /* maxi * (maxi-1) is representable iff maxi * (maxi / 2) != 0 */
5715e9af00Smrg do
5815e9af00Smrg maxi /= 2;
5915e9af00Smrg while (maxi * (maxi / 2) == 0);
6015e9af00Smrg }
6115e9af00Smrg
6215e9af00Smrg mpz_init (x);
6315e9af00Smrg mpz_init (s);
6415e9af00Smrg mpz_init (t);
6515e9af00Smrg ex = mpfr_get_z_2exp (x, r); /* r = x*2^ex */
6615e9af00Smrg
674da858a9Smrg /* Remove trailing zeroes.
684da858a9Smrg Since x comes from a regular MPFR number, due to the constraints on the
694da858a9Smrg exponent and the precision, there can be no integer overflow below. */
7015e9af00Smrg l = mpz_scan1 (x, 0);
7115e9af00Smrg ex += l;
7215e9af00Smrg mpz_fdiv_q_2exp (x, x, l);
7315e9af00Smrg
7415e9af00Smrg /* since |r| < 1, r = x*2^ex, and x is an integer, necessarily ex < 0 */
7515e9af00Smrg
76*606004a0Smrg p = mpfr_get_prec (f); /* same as r */
7715e9af00Smrg /* bound for number of iterations */
7815e9af00Smrg imax = p / (-mpfr_get_exp (r));
7915e9af00Smrg imax += (imax == 0);
8015e9af00Smrg q = 2 * MPFR_INT_CEIL_LOG2(imax) + 4; /* bound for (3l)^2 */
8115e9af00Smrg
8215e9af00Smrg mpz_set_ui (s, 1); /* initialize sum with 1 */
8315e9af00Smrg mpz_mul_2exp (s, s, p + q); /* scale all values by 2^(p+q) */
8415e9af00Smrg mpz_set (t, s); /* invariant: t is previous term */
8515e9af00Smrg for (i = 1; (m = mpz_sizeinbase (t, 2)) >= q; i += 2)
8615e9af00Smrg {
8715e9af00Smrg /* adjust precision of x to that of t */
8815e9af00Smrg l = mpz_sizeinbase (x, 2);
8915e9af00Smrg if (l > m)
9015e9af00Smrg {
9115e9af00Smrg l -= m;
9215e9af00Smrg mpz_fdiv_q_2exp (x, x, l);
9315e9af00Smrg ex += l;
9415e9af00Smrg }
9515e9af00Smrg /* multiply t by r */
9615e9af00Smrg mpz_mul (t, t, x);
9715e9af00Smrg mpz_fdiv_q_2exp (t, t, -ex);
9815e9af00Smrg /* divide t by i*(i+1) */
9915e9af00Smrg if (i < maxi)
10015e9af00Smrg mpz_fdiv_q_ui (t, t, i * (i + 1));
10115e9af00Smrg else
10215e9af00Smrg {
10315e9af00Smrg mpz_fdiv_q_ui (t, t, i);
10415e9af00Smrg mpz_fdiv_q_ui (t, t, i + 1);
10515e9af00Smrg }
10615e9af00Smrg /* if m is the (current) number of bits of t, we can consider that
10715e9af00Smrg all operations on t so far had precision >= m, so we can prove
10815e9af00Smrg by induction that the relative error on t is of the form
10915e9af00Smrg (1+u)^(3l)-1, where |u| <= 2^(-m), and l=(i+1)/2 is the # of loops.
11015e9af00Smrg Since |(1+x^2)^(1/x) - 1| <= 4x/3 for |x| <= 1/2,
11115e9af00Smrg for |u| <= 1/(3l)^2, the absolute error is bounded by
11215e9af00Smrg 4/3*(3l)*2^(-m)*t <= 4*l since |t| < 2^m.
11315e9af00Smrg Therefore the error on s is bounded by 2*l*(l+1). */
11415e9af00Smrg /* add or subtract to s */
11515e9af00Smrg if (i % 4 == 1)
11615e9af00Smrg mpz_sub (s, s, t);
11715e9af00Smrg else
11815e9af00Smrg mpz_add (s, s, t);
11915e9af00Smrg }
12015e9af00Smrg
12115e9af00Smrg mpfr_set_z (f, s, MPFR_RNDN);
12215e9af00Smrg mpfr_div_2ui (f, f, p + q, MPFR_RNDN);
12315e9af00Smrg
12415e9af00Smrg mpz_clear (x);
12515e9af00Smrg mpz_clear (s);
12615e9af00Smrg mpz_clear (t);
12715e9af00Smrg
12815e9af00Smrg l = (i - 1) / 2; /* number of iterations */
12915e9af00Smrg return 2 * MPFR_INT_CEIL_LOG2 (l + 1) + 1; /* bound is 2l(l+1) */
13015e9af00Smrg }
13115e9af00Smrg
13215e9af00Smrg int
mpfr_cos(mpfr_ptr y,mpfr_srcptr x,mpfr_rnd_t rnd_mode)13315e9af00Smrg mpfr_cos (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
13415e9af00Smrg {
13515e9af00Smrg mpfr_prec_t K0, K, precy, m, k, l;
13615e9af00Smrg int inexact, reduce = 0;
13715e9af00Smrg mpfr_t r, s, xr, c;
13815e9af00Smrg mpfr_exp_t exps, cancel = 0, expx;
13915e9af00Smrg MPFR_ZIV_DECL (loop);
14015e9af00Smrg MPFR_SAVE_EXPO_DECL (expo);
14115e9af00Smrg MPFR_GROUP_DECL (group);
14215e9af00Smrg
14315e9af00Smrg MPFR_LOG_FUNC (
144*606004a0Smrg ("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode),
145*606004a0Smrg ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec (y), mpfr_log_prec, y,
14615e9af00Smrg inexact));
14715e9af00Smrg
14815e9af00Smrg if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
14915e9af00Smrg {
15015e9af00Smrg if (MPFR_IS_NAN (x) || MPFR_IS_INF (x))
15115e9af00Smrg {
15215e9af00Smrg MPFR_SET_NAN (y);
15315e9af00Smrg MPFR_RET_NAN;
15415e9af00Smrg }
15515e9af00Smrg else
15615e9af00Smrg {
15715e9af00Smrg MPFR_ASSERTD (MPFR_IS_ZERO (x));
15815e9af00Smrg return mpfr_set_ui (y, 1, rnd_mode);
15915e9af00Smrg }
16015e9af00Smrg }
16115e9af00Smrg
16215e9af00Smrg MPFR_SAVE_EXPO_MARK (expo);
16315e9af00Smrg
16415e9af00Smrg /* cos(x) = 1-x^2/2 + ..., so error < 2^(2*EXP(x)-1) */
16515e9af00Smrg expx = MPFR_GET_EXP (x);
16615e9af00Smrg MPFR_SMALL_INPUT_AFTER_SAVE_EXPO (y, __gmpfr_one, -2 * expx,
16715e9af00Smrg 1, 0, rnd_mode, expo, {});
16815e9af00Smrg
16915e9af00Smrg /* Compute initial precision */
17015e9af00Smrg precy = MPFR_PREC (y);
17115e9af00Smrg
17215e9af00Smrg if (precy >= MPFR_SINCOS_THRESHOLD)
17315e9af00Smrg {
1746c7ec94dSmrg inexact = mpfr_cos_fast (y, x, rnd_mode);
1756c7ec94dSmrg goto end;
17615e9af00Smrg }
17715e9af00Smrg
17815e9af00Smrg K0 = __gmpfr_isqrt (precy / 3);
17903f29264Smrg m = precy + 2 * MPFR_INT_CEIL_LOG2 (precy) + 2 * K0 + 4;
18015e9af00Smrg
18115e9af00Smrg if (expx >= 3)
18215e9af00Smrg {
18315e9af00Smrg reduce = 1;
18415e9af00Smrg /* As expx + m - 1 will silently be converted into mpfr_prec_t
18515e9af00Smrg in the mpfr_init2 call, the assert below may be useful to
18615e9af00Smrg avoid undefined behavior. */
18715e9af00Smrg MPFR_ASSERTN (expx + m - 1 <= MPFR_PREC_MAX);
18815e9af00Smrg mpfr_init2 (c, expx + m - 1);
18915e9af00Smrg mpfr_init2 (xr, m);
19015e9af00Smrg }
19115e9af00Smrg
19215e9af00Smrg MPFR_GROUP_INIT_2 (group, m, r, s);
19315e9af00Smrg MPFR_ZIV_INIT (loop, m);
19415e9af00Smrg for (;;)
19515e9af00Smrg {
19615e9af00Smrg /* If |x| >= 4, first reduce x cmod (2*Pi) into xr, using mpfr_remainder:
19715e9af00Smrg let e = EXP(x) >= 3, and m the target precision:
19815e9af00Smrg (1) c <- 2*Pi [precision e+m-1, nearest]
19915e9af00Smrg (2) xr <- remainder (x, c) [precision m, nearest]
20015e9af00Smrg We have |c - 2*Pi| <= 1/2ulp(c) = 2^(3-e-m)
20115e9af00Smrg |xr - x - k c| <= 1/2ulp(xr) <= 2^(1-m)
20215e9af00Smrg |k| <= |x|/(2*Pi) <= 2^(e-2)
20315e9af00Smrg Thus |xr - x - 2kPi| <= |k| |c - 2Pi| + 2^(1-m) <= 2^(2-m).
20415e9af00Smrg It follows |cos(xr) - cos(x)| <= 2^(2-m). */
20515e9af00Smrg if (reduce)
20615e9af00Smrg {
20715e9af00Smrg mpfr_const_pi (c, MPFR_RNDN);
20815e9af00Smrg mpfr_mul_2ui (c, c, 1, MPFR_RNDN); /* 2Pi */
20915e9af00Smrg mpfr_remainder (xr, x, c, MPFR_RNDN);
21015e9af00Smrg if (MPFR_IS_ZERO(xr))
21115e9af00Smrg goto ziv_next;
21215e9af00Smrg /* now |xr| <= 4, thus r <= 16 below */
2134da858a9Smrg mpfr_sqr (r, xr, MPFR_RNDU); /* err <= 1 ulp */
21415e9af00Smrg }
21515e9af00Smrg else
2164da858a9Smrg mpfr_sqr (r, x, MPFR_RNDU); /* err <= 1 ulp */
21715e9af00Smrg
21815e9af00Smrg /* now |x| < 4 (or xr if reduce = 1), thus |r| <= 16 */
21915e9af00Smrg
22015e9af00Smrg /* we need |r| < 1/2 for mpfr_cos2_aux, i.e., EXP(r) - 2K <= -1 */
22115e9af00Smrg K = K0 + 1 + MAX(0, MPFR_GET_EXP(r)) / 2;
22215e9af00Smrg /* since K0 >= 0, if EXP(r) < 0, then K >= 1, thus EXP(r) - 2K <= -3;
22315e9af00Smrg otherwise if EXP(r) >= 0, then K >= 1/2 + EXP(r)/2, thus
22415e9af00Smrg EXP(r) - 2K <= -1 */
22515e9af00Smrg
22615e9af00Smrg MPFR_SET_EXP (r, MPFR_GET_EXP (r) - 2 * K); /* Can't overflow! */
22715e9af00Smrg
22815e9af00Smrg /* s <- 1 - r/2! + ... + (-1)^l r^l/(2l)! */
22915e9af00Smrg l = mpfr_cos2_aux (s, r);
23015e9af00Smrg /* l is the error bound in ulps on s */
23115e9af00Smrg MPFR_SET_ONE (r);
23215e9af00Smrg for (k = 0; k < K; k++)
23315e9af00Smrg {
23415e9af00Smrg mpfr_sqr (s, s, MPFR_RNDU); /* err <= 2*olderr */
23515e9af00Smrg MPFR_SET_EXP (s, MPFR_GET_EXP (s) + 1); /* Can't overflow */
23615e9af00Smrg mpfr_sub (s, s, r, MPFR_RNDN); /* err <= 4*olderr */
23715e9af00Smrg if (MPFR_IS_ZERO(s))
23815e9af00Smrg goto ziv_next;
23915e9af00Smrg MPFR_ASSERTD (MPFR_GET_EXP (s) <= 1);
24015e9af00Smrg }
24115e9af00Smrg
24215e9af00Smrg /* The absolute error on s is bounded by (2l+1/3)*2^(2K-m)
24315e9af00Smrg 2l+1/3 <= 2l+1.
24415e9af00Smrg If |x| >= 4, we need to add 2^(2-m) for the argument reduction
24515e9af00Smrg by 2Pi: if K = 0, this amounts to add 4 to 2l+1/3, i.e., to add
24615e9af00Smrg 2 to l; if K >= 1, this amounts to add 1 to 2*l+1/3. */
24715e9af00Smrg l = 2 * l + 1;
24815e9af00Smrg if (reduce)
24915e9af00Smrg l += (K == 0) ? 4 : 1;
25015e9af00Smrg k = MPFR_INT_CEIL_LOG2 (l) + 2 * K;
25115e9af00Smrg /* now the error is bounded by 2^(k-m) = 2^(EXP(s)-err) */
25215e9af00Smrg
25315e9af00Smrg exps = MPFR_GET_EXP (s);
25415e9af00Smrg if (MPFR_LIKELY (MPFR_CAN_ROUND (s, exps + m - k, precy, rnd_mode)))
25515e9af00Smrg break;
25615e9af00Smrg
25715e9af00Smrg if (MPFR_UNLIKELY (exps == 1))
25815e9af00Smrg /* s = 1 or -1, and except x=0 which was already checked above,
25915e9af00Smrg cos(x) cannot be 1 or -1, so we can round if the error is less
26015e9af00Smrg than 2^(-precy) for directed rounding, or 2^(-precy-1) for rounding
26115e9af00Smrg to nearest. */
26215e9af00Smrg {
26315e9af00Smrg if (m > k && (m - k >= precy + (rnd_mode == MPFR_RNDN)))
26415e9af00Smrg {
26515e9af00Smrg /* If round to nearest or away, result is s = 1 or -1,
266*606004a0Smrg otherwise it is round(nexttoward (s, 0)). However, in order
267*606004a0Smrg to have the inexact flag correctly set below, we set |s| to
26815e9af00Smrg 1 - 2^(-m) in all cases. */
26915e9af00Smrg mpfr_nexttozero (s);
27015e9af00Smrg break;
27115e9af00Smrg }
27215e9af00Smrg }
27315e9af00Smrg
27415e9af00Smrg if (exps < cancel)
27515e9af00Smrg {
27615e9af00Smrg m += cancel - exps;
27715e9af00Smrg cancel = exps;
27815e9af00Smrg }
27915e9af00Smrg
28015e9af00Smrg ziv_next:
28115e9af00Smrg MPFR_ZIV_NEXT (loop, m);
28215e9af00Smrg MPFR_GROUP_REPREC_2 (group, m, r, s);
28315e9af00Smrg if (reduce)
28415e9af00Smrg {
28515e9af00Smrg mpfr_set_prec (xr, m);
28615e9af00Smrg mpfr_set_prec (c, expx + m - 1);
28715e9af00Smrg }
28815e9af00Smrg }
28915e9af00Smrg MPFR_ZIV_FREE (loop);
29015e9af00Smrg inexact = mpfr_set (y, s, rnd_mode);
29115e9af00Smrg MPFR_GROUP_CLEAR (group);
29215e9af00Smrg if (reduce)
29315e9af00Smrg {
29415e9af00Smrg mpfr_clear (xr);
29515e9af00Smrg mpfr_clear (c);
29615e9af00Smrg }
29715e9af00Smrg
2986c7ec94dSmrg end:
29915e9af00Smrg MPFR_SAVE_EXPO_FREE (expo);
30015e9af00Smrg return mpfr_check_range (y, inexact, rnd_mode);
30115e9af00Smrg }
302