1 /* mpfr_sin_cos -- sine and cosine of a floating-point number
2 
3 Copyright 2002-2020 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25 
26 /* (y, z) <- (sin(x), cos(x)), return value is 0 iff both results are exact
27    ie, iff x = 0 */
28 int
mpfr_sin_cos(mpfr_ptr y,mpfr_ptr z,mpfr_srcptr x,mpfr_rnd_t rnd_mode)29 mpfr_sin_cos (mpfr_ptr y, mpfr_ptr z, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
30 {
31   mpfr_prec_t prec, m;
32   int neg, reduce;
33   mpfr_t c, xr;
34   mpfr_srcptr xx;
35   mpfr_exp_t err, expx;
36   int inexy, inexz;
37   MPFR_ZIV_DECL (loop);
38   MPFR_SAVE_EXPO_DECL (expo);
39 
40   MPFR_ASSERTN (y != z);
41 
42   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
43     {
44       if (MPFR_IS_NAN(x) || MPFR_IS_INF(x))
45         {
46           MPFR_SET_NAN (y);
47           MPFR_SET_NAN (z);
48           MPFR_RET_NAN;
49         }
50       else /* x is zero */
51         {
52           MPFR_ASSERTD (MPFR_IS_ZERO (x));
53           MPFR_SET_ZERO (y);
54           MPFR_SET_SAME_SIGN (y, x);
55           /* y = 0, thus exact, but z is inexact in case of underflow
56              or overflow */
57           inexy = 0; /* y is exact */
58           inexz = mpfr_set_ui (z, 1, rnd_mode);
59           return INEX(inexy,inexz);
60         }
61     }
62 
63   MPFR_LOG_FUNC
64     (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode),
65      ("sin[%Pu]=%.*Rg cos[%Pu]=%.*Rg", mpfr_get_prec(y), mpfr_log_prec, y,
66       mpfr_get_prec (z), mpfr_log_prec, z));
67 
68   MPFR_SAVE_EXPO_MARK (expo);
69 
70   prec = MAX (MPFR_PREC (y), MPFR_PREC (z));
71   m = prec + MPFR_INT_CEIL_LOG2 (prec) + 13;
72   expx = MPFR_GET_EXP (x);
73 
74   /* When x is close to 0, say 2^(-k), then there is a cancellation of about
75      2k bits in 1-cos(x)^2. FIXME: in that case, it would be more efficient
76      to compute sin(x) directly. VL: This is partly done by using
77      MPFR_FAST_COMPUTE_IF_SMALL_INPUT from the mpfr_sin and mpfr_cos
78      functions. Moreover, any overflow on m is avoided. */
79   if (expx < 0)
80     {
81       /* Warning: in case y = x, and the first call to
82          MPFR_FAST_COMPUTE_IF_SMALL_INPUT succeeds but the second fails,
83          we will have clobbered the original value of x.
84          The workaround is to first compute z = cos(x) in that case, since
85          y and z are different. */
86       if (y != x)
87         /* y and x differ, thus we can safely try to compute y first */
88         {
89           MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
90             y, x, -2 * expx, 2, 0, rnd_mode,
91             { inexy = _inexact;
92               goto small_input; });
93           if (0)
94             {
95             small_input:
96               /* we can go here only if we can round sin(x) */
97               MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
98                 z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
99                 { inexz = _inexact;
100                   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
101                   goto end; });
102             }
103 
104           /* if we go here, one of the two MPFR_FAST_COMPUTE_IF_SMALL_INPUT
105              calls failed */
106         }
107       else /* y and x are the same variable: try to compute z first, which
108               necessarily differs */
109         {
110           MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
111             z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
112             { inexz = _inexact;
113               goto small_input2; });
114           if (0)
115             {
116             small_input2:
117               /* we can go here only if we can round cos(x) */
118               MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
119                 y, x, -2 * expx, 2, 0, rnd_mode,
120                 { inexy = _inexact;
121                   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
122                   goto end; });
123             }
124         }
125       m += 2 * (-expx);
126     }
127 
128   if (prec >= MPFR_SINCOS_THRESHOLD)
129     {
130       MPFR_SAVE_EXPO_FREE (expo);
131       return mpfr_sincos_fast (y, z, x, rnd_mode);
132     }
133 
134   mpfr_init (c);
135   mpfr_init (xr);
136 
137   MPFR_ZIV_INIT (loop, m);
138   for (;;)
139     {
140       /* the following is copied from sin.c */
141       if (expx >= 2) /* reduce the argument */
142         {
143           reduce = 1;
144           mpfr_set_prec (c, expx + m - 1);
145           mpfr_set_prec (xr, m);
146           mpfr_const_pi (c, MPFR_RNDN);
147           mpfr_mul_2ui (c, c, 1, MPFR_RNDN);
148           mpfr_remainder (xr, x, c, MPFR_RNDN);
149           mpfr_div_2ui (c, c, 1, MPFR_RNDN);
150           if (MPFR_IS_POS (xr))
151             mpfr_sub (c, c, xr, MPFR_RNDZ);
152           else
153             mpfr_add (c, c, xr, MPFR_RNDZ);
154           if (MPFR_IS_ZERO(xr)
155               || MPFR_EXP(xr) < (mpfr_exp_t) 3 - (mpfr_exp_t) m
156               || MPFR_EXP(c) < (mpfr_exp_t) 3 - (mpfr_exp_t) m)
157             goto next_step;
158           xx = xr;
159         }
160       else /* the input argument is already reduced */
161         {
162           reduce = 0;
163           xx = x;
164         }
165 
166       neg = MPFR_IS_NEG (xx); /* gives sign of sin(x) */
167       mpfr_set_prec (c, m);
168       mpfr_cos (c, xx, MPFR_RNDZ);
169       /* If no argument reduction was performed, the error is at most ulp(c),
170          otherwise it is at most ulp(c) + 2^(2-m). Since |c| < 1, we have
171          ulp(c) <= 2^(-m), thus the error is bounded by 2^(3-m) in that later
172          case. */
173       if (reduce == 0)
174         err = m;
175       else
176         err = MPFR_GET_EXP (c) + (mpfr_exp_t) (m - 3);
177       if (!MPFR_CAN_ROUND (c, err, MPFR_PREC (z), rnd_mode))
178         goto next_step;
179 
180       /* we can't set z now, because in case z = x, and the MPFR_CAN_ROUND()
181          call below fails, we will have clobbered the input */
182       mpfr_set_prec (xr, MPFR_PREC(c));
183       mpfr_swap (xr, c); /* save the approximation of the cosine in xr */
184       mpfr_sqr (c, xr, MPFR_RNDU); /* the absolute error is bounded by
185                                       2^(5-m) if reduce=1, and by 2^(2-m)
186                                       otherwise */
187       mpfr_ui_sub (c, 1, c, MPFR_RNDN); /* error bounded by 2^(6-m) if reduce
188                                            is 1, and 2^(3-m) otherwise */
189       mpfr_sqrt (c, c, MPFR_RNDN); /* the absolute error is bounded by
190                                       2^(6-m-Exp(c)) if reduce=1, and
191                                       2^(3-m-Exp(c)) otherwise */
192       err = 3 + 3 * reduce - MPFR_GET_EXP (c);
193       if (neg)
194         MPFR_CHANGE_SIGN (c);
195 
196       /* the absolute error on c is at most 2^(err-m), which we must put
197          in the form 2^(EXP(c)-err). */
198       err = MPFR_GET_EXP (c) + (mpfr_exp_t) m - err;
199       if (MPFR_CAN_ROUND (c, err, MPFR_PREC (y), rnd_mode))
200         break;
201       /* check for huge cancellation */
202       if (err < (mpfr_exp_t) MPFR_PREC (y))
203         m += MPFR_PREC (y) - err;
204       /* Check if near 1 */
205       if (MPFR_GET_EXP (c) == 1
206           && MPFR_MANT (c)[MPFR_LIMB_SIZE (c)-1] == MPFR_LIMB_HIGHBIT)
207         m += m;
208 
209     next_step:
210       MPFR_ZIV_NEXT (loop, m);
211       mpfr_set_prec (c, m);
212     }
213   MPFR_ZIV_FREE (loop);
214 
215   inexy = mpfr_set (y, c, rnd_mode);
216   inexz = mpfr_set (z, xr, rnd_mode);
217 
218   mpfr_clear (c);
219   mpfr_clear (xr);
220 
221  end:
222   MPFR_SAVE_EXPO_FREE (expo);
223   /* FIXME: add a test for bug before revision 7355 */
224   inexy = mpfr_check_range (y, inexy, rnd_mode);
225   inexz = mpfr_check_range (z, inexz, rnd_mode);
226   MPFR_RET (INEX(inexy,inexz));
227 }
228 
229 /*************** asymptotically fast implementation below ********************/
230 
231 /* truncate Q from R to at most prec bits.
232    Return the number of truncated bits.
233  */
234 static mpfr_prec_t
reduce(mpz_t Q,mpz_srcptr R,mpfr_prec_t prec)235 reduce (mpz_t Q, mpz_srcptr R, mpfr_prec_t prec)
236 {
237   mpfr_prec_t l;
238 
239   MPFR_MPZ_SIZEINBASE2(l, R);
240   l = (l > prec) ? l - prec : 0;
241   mpz_fdiv_q_2exp (Q, R, l);
242   return l;
243 }
244 
245 /* truncate S and C so that the smaller has prec bits.
246    Return the number of truncated bits.
247  */
248 static unsigned long
reduce2(mpz_t S,mpz_t C,mpfr_prec_t prec)249 reduce2 (mpz_t S, mpz_t C, mpfr_prec_t prec)
250 {
251   unsigned long ls;
252   unsigned long lc;
253   unsigned long l;
254 
255   MPFR_MPZ_SIZEINBASE2(ls, S);
256   MPFR_MPZ_SIZEINBASE2(lc, C);
257 
258   l = (ls < lc) ? ls : lc; /* smaller length */
259   l = (l > prec) ? l - prec : 0;
260   mpz_fdiv_q_2exp (S, S, l);
261   mpz_fdiv_q_2exp (C, C, l);
262   return l;
263 }
264 
265 /* return in S0/Q0 a rational approximation of sin(X) with absolute error
266                      bounded by 9*2^(-prec), where 0 <= X=p/2^r <= 1/2,
267    and in    C0/Q0 a rational approximation of cos(X), with relative error
268                      bounded by 9*2^(-prec) (and also absolute error, since
269                      |cos(X)| <= 1).
270    We have sin(X)/X = sum((-1)^i*(p/2^r)^i/(2i+1)!, i=0..infinity).
271    We use the following binary splitting formula:
272    P(a,b) = (-p)^(b-a)
273    Q(a,b) = (2a)*(2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
274    T(a,b) = 1 if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise.
275 
276    Since we use P(a,b) for b-a=2^k only, we compute only p^(2^k).
277    We do not store the factor 2^r in Q().
278 
279    Then sin(X)/X ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
280 
281    Return l such that Q0 has to be multiplied by 2^l.
282 
283    Assumes prec >= 10.
284 */
285 
286 #define KMAX 64
287 static unsigned long
sin_bs_aux(mpz_t Q0,mpz_t S0,mpz_t C0,mpz_srcptr p,mpfr_prec_t r,mpfr_prec_t prec)288 sin_bs_aux (mpz_t Q0, mpz_t S0, mpz_t C0, mpz_srcptr p, mpfr_prec_t r,
289             mpfr_prec_t prec)
290 {
291   mpz_t T[KMAX], Q[KMAX], ptoj[KMAX], pp;
292   mpfr_prec_t log2_nb_terms[KMAX], mult[KMAX];
293   mpfr_prec_t accu[KMAX], size_ptoj[KMAX];
294   mpfr_prec_t prec_i_have, h, r0 = r, pp_s, p_s;
295   unsigned long i, j, m;
296   int alloc, k, l;
297 
298   if (MPFR_UNLIKELY(mpz_cmp_ui (p, 0) == 0)) /* sin(x)/x -> 1 */
299     {
300       mpz_set_ui (Q0, 1);
301       mpz_set_ui (S0, 1);
302       mpz_set_ui (C0, 1);
303       return 0;
304     }
305 
306   /* check that X=p/2^r <= 1/2 */
307   MPFR_ASSERTN(mpz_sizeinbase (p, 2) - (mpfr_exp_t) r <= -1);
308 
309   mpz_init (pp);
310 
311   /* normalize p (non-zero here) */
312   h = mpz_scan1 (p, 0);
313   mpz_fdiv_q_2exp (pp, p, h); /* p = pp * 2^h */
314   mpz_mul (pp, pp, pp);
315   r = 2 * (r - h);            /* x^2 = (p/2^r0)^2 = pp / 2^r */
316 
317   /* now p is odd */
318   alloc = 2;
319   mpz_init_set_ui (T[0], 6);
320   mpz_init_set_ui (Q[0], 6);
321   mpz_init_set (ptoj[0], pp); /* ptoj[i] = pp^(2^i) */
322   mpz_init (T[1]);
323   mpz_init (Q[1]);
324   mpz_init (ptoj[1]);
325   mpz_mul (ptoj[1], pp, pp);  /* ptoj[1] = pp^2 */
326   MPFR_MPZ_SIZEINBASE2(size_ptoj[1], ptoj[1]);
327 
328   mpz_mul_2exp (T[0], T[0], r);
329   mpz_sub (T[0], T[0], pp);      /* 6*2^r - pp = 6*2^r*(1 - x^2/6) */
330   log2_nb_terms[0] = 1;
331 
332   /* already take into account the factor x=p/2^r in sin(x) = x * (...) */
333   MPFR_MPZ_SIZEINBASE2(pp_s, pp);
334   MPFR_MPZ_SIZEINBASE2(p_s, p);
335   mult[0] = r - pp_s + r0 - p_s;
336   /* we have x^3 < 1/2^mult[0] */
337 
338   for (i = 2, k = 0, prec_i_have = mult[0]; prec_i_have < prec; i += 2)
339     {
340       /* i is even here */
341       /* invariant: Q[0]*Q[1]*...*Q[k] equals (2i-1)!,
342          we have already summed terms of index < i
343          in S[0]/Q[0], ..., S[k]/Q[k] */
344       k ++;
345       if (k + 1 >= alloc) /* necessarily k + 1 = alloc */
346         {
347           MPFR_ASSERTD (k + 1 == alloc);
348           alloc ++;
349           MPFR_ASSERTN (k + 1 < KMAX);
350           mpz_init (T[k+1]);
351           mpz_init (Q[k+1]);
352           mpz_init (ptoj[k+1]);
353           mpz_mul (ptoj[k+1], ptoj[k], ptoj[k]); /* pp^(2^(k+1)) */
354           MPFR_MPZ_SIZEINBASE2(size_ptoj[k+1], ptoj[k+1]);
355         }
356       /* for i even, we have Q[k] = (2*i)*(2*i+1), T[k] = 1,
357          then                Q[k+1] = (2*i+2)*(2*i+3), T[k+1] = 1,
358          which reduces to T[k] = (2*i+2)*(2*i+3)*2^r-pp,
359          Q[k] = (2*i)*(2*i+1)*(2*i+2)*(2*i+3). */
360       MPFR_ASSERTN (k < KMAX);
361       log2_nb_terms[k] = 1;
362       mpz_set_ui (Q[k], 2 * i + 2);
363       mpz_mul_ui (Q[k], Q[k], 2 * i + 3);
364       mpz_mul_2exp (T[k], Q[k], r);
365       mpz_sub (T[k], T[k], pp);
366       mpz_mul_ui (Q[k], Q[k], 2 * i);
367       mpz_mul_ui (Q[k], Q[k], 2 * i + 1);
368       /* the next term of the series is divided by Q[k] and multiplied
369          by pp^2/2^(2r), thus the mult. factor < 1/2^mult[k] */
370       MPFR_MPZ_SIZEINBASE2(mult[k], Q[k]);
371       mult[k] += 2 * r - size_ptoj[1] - 1;
372       /* the absolute contribution of the next term is 1/2^accu[k] */
373       accu[k] = (k == 0) ? mult[k] : mult[k] + accu[k-1];
374       prec_i_have = accu[k]; /* the current term is < 1/2^accu[k] */
375       j = (i + 2) / 2;
376       l = 1;
377       while ((j & 1) == 0) /* combine and reduce */
378         {
379           MPFR_ASSERTN (k >= 1);
380           mpz_mul (T[k], T[k], ptoj[l]);
381           mpz_mul (T[k-1], T[k-1], Q[k]);
382           mpz_mul_2exp (T[k-1], T[k-1], r << l);
383           mpz_add (T[k-1], T[k-1], T[k]);
384           mpz_mul (Q[k-1], Q[k-1], Q[k]);
385           log2_nb_terms[k-1] ++; /* number of terms in S[k-1]
386                                     is a power of 2 by construction */
387           MPFR_MPZ_SIZEINBASE2(prec_i_have, Q[k]);
388           mult[k-1] += prec_i_have + (r << l) - size_ptoj[l] - 1;
389           accu[k-1] = (k == 1) ? mult[k-1] : mult[k-1] + accu[k-2];
390           prec_i_have = accu[k-1];
391           l ++;
392           j >>= 1;
393           k --;
394         }
395     }
396 
397   /* accumulate all products in T[0] and Q[0]. Warning: contrary to above,
398      here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
399   h = 0; /* number of accumulated terms in the right part T[k]/Q[k] */
400   while (k > 0)
401     {
402       mpz_mul (T[k], T[k], ptoj[log2_nb_terms[k-1]]);
403       mpz_mul (T[k-1], T[k-1], Q[k]);
404       h += (mpfr_prec_t) 1 << log2_nb_terms[k];
405       mpz_mul_2exp (T[k-1], T[k-1], r * h);
406       mpz_add (T[k-1], T[k-1], T[k]);
407       mpz_mul (Q[k-1], Q[k-1], Q[k]);
408       k--;
409     }
410 
411   m = r0 + r * (i - 1); /* implicit multiplier 2^r for Q0 */
412   /* at this point T[0]/(2^m*Q[0]) is an approximation of sin(x) where the 1st
413      neglected term has contribution < 1/2^prec, thus since the series has
414      alternate signs, the error is < 1/2^prec */
415 
416   /* we truncate Q0 to prec bits: the relative error is at most 2^(1-prec),
417      which means that Q0 = Q[0] * (1+theta) with |theta| <= 2^(1-prec)
418      [up to a power of two] */
419   m += reduce (Q0, Q[0], prec);
420   m -= reduce (T[0], T[0], prec);
421   /* multiply by x = p/2^m */
422   mpz_mul (S0, T[0], p);
423   m -= reduce (S0, S0, prec); /* S0 = T[0] * (1 + theta)^2 up to power of 2 */
424   /* sin(X) ~ S0/Q0*(1 + theta)^3 + err with |theta| <= 2^(1-prec) and
425               |err| <= 2^(-prec), thus since |S0/Q0| <= 1:
426      |sin(X) - S0/Q0| <= 4*|theta*S0/Q0| + |err| <= 9*2^(-prec) */
427 
428   mpz_clear (pp);
429   for (k = 0; k < alloc; k ++)
430     {
431       mpz_clear (T[k]);
432       mpz_clear (Q[k]);
433       mpz_clear (ptoj[k]);
434     }
435 
436   /* compute cos(X) from sin(X): sqrt(1-(S/Q)^2) = sqrt(Q^2-S^2)/Q
437      = sqrt(Q0^2*2^(2m)-S0^2)/Q0.
438      Write S/Q = sin(X) + eps with |eps| <= 9*2^(-prec),
439      then sqrt(Q^2-S^2) = sqrt(Q^2-Q^2*(sin(X)+eps)^2)
440                         = sqrt(Q^2*cos(X)^2-Q^2*(2*sin(X)*eps+eps^2))
441                         = sqrt(Q^2*cos(X)^2-Q^2*eps1) with |eps1|<=9*2^(-prec)
442                           [using X<=1/2 and eps<=9*2^(-prec) and prec>=10]
443 
444                         Since we truncate the square root, we get:
445                           sqrt(Q^2*cos(X)^2-Q^2*eps1)+eps2 with |eps2|<1
446                         = Q*sqrt(cos(X)^2-eps1)+eps2
447                         = Q*cos(X)*(1+eps3)+eps2 with |eps3| <= 6*2^(-prec)
448                         = Q*cos(X)*(1+eps3+eps2/(Q*cos(X)))
449                         = Q*cos(X)*(1+eps4) with |eps4| <= 9*2^(-prec)
450                           since |Q| >= 2^(prec-1) */
451   /* we assume that Q0*2^m >= 2^(prec-1) */
452   MPFR_ASSERTN(m + mpz_sizeinbase (Q0, 2) >= prec);
453   mpz_mul (C0, Q0, Q0);
454   mpz_mul_2exp (C0, C0, 2 * m);
455   mpz_submul (C0, S0, S0);
456   mpz_sqrt (C0, C0);
457 
458   return m;
459 }
460 
461 /* Put in s and c approximations of sin(x) and cos(x) respectively.
462    Assumes 0 < x < Pi/4 and PREC(s) = PREC(c) >= 10.
463    Return err such that the relative error is bounded by 2^err ulps.
464 */
465 static int
sincos_aux(mpfr_ptr s,mpfr_ptr c,mpfr_srcptr x,mpfr_rnd_t rnd_mode)466 sincos_aux (mpfr_ptr s, mpfr_ptr c, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
467 {
468   mpfr_prec_t prec_s, sh;
469   mpz_t Q, S, C, Q2, S2, C2, y;
470   mpfr_t x2;
471   unsigned long l, l2, j, err;
472 
473   MPFR_ASSERTD(MPFR_PREC(s) == MPFR_PREC(c));
474 
475   prec_s = MPFR_PREC(s);
476 
477   mpfr_init2 (x2, MPFR_PREC(x));
478   mpz_init (Q);
479   mpz_init (S);
480   mpz_init (C);
481   mpz_init (Q2);
482   mpz_init (S2);
483   mpz_init (C2);
484   mpz_init (y);
485 
486   mpfr_set (x2, x, MPFR_RNDN); /* exact */
487   mpz_set_ui (Q, 1);
488   l = 0;
489   mpz_set_ui (S, 0); /* sin(0) = S/(2^l*Q), exact */
490   mpz_set_ui (C, 1); /* cos(0) = C/(2^l*Q), exact */
491 
492   /* Invariant: x = X + x2/2^(sh-1), where the part X was already treated,
493      S/(2^l*Q) ~ sin(X), C/(2^l*Q) ~ cos(X), and x2/2^(sh-1) < Pi/4.
494      'sh-1' is the number of already shifted bits in x2.
495   */
496 
497   for (sh = 1, j = 0; mpfr_cmp_ui (x2, 0) != 0 && sh <= prec_s; sh <<= 1, j++)
498     {
499       if (sh > prec_s / 2) /* sin(x) = x + O(x^3), cos(x) = 1 + O(x^2) */
500         {
501           l2 = -mpfr_get_z_2exp (S2, x2); /* S2/2^l2 = x2 */
502           l2 += sh - 1;
503           mpz_set_ui (Q2, 1);
504           mpz_set_ui (C2, 1);
505           mpz_mul_2exp (C2, C2, l2);
506           mpfr_set_ui (x2, 0, MPFR_RNDN);
507         }
508       else
509         {
510           /* y <- trunc(x2 * 2^sh) = trunc(x * 2^(2*sh-1)) */
511           mpfr_mul_2ui (x2, x2, sh, MPFR_RNDN); /* exact */
512           mpfr_get_z (y, x2, MPFR_RNDZ); /* round toward zero: now
513                                            0 <= x2 < 2^sh, thus
514                                            0 <= x2/2^(sh-1) < 2^(1-sh) */
515           if (mpz_cmp_ui (y, 0) == 0)
516             continue;
517           mpfr_sub_z (x2, x2, y, MPFR_RNDN); /* should be exact */
518           l2 = sin_bs_aux (Q2, S2, C2, y, 2 * sh - 1, prec_s);
519           /* we now have |S2/Q2/2^l2 - sin(X)| <= 9*2^(prec_s)
520              and |C2/Q2/2^l2 - cos(X)| <= 6*2^(prec_s), with X=y/2^(2sh-1) */
521         }
522       if (sh == 1) /* S=0, C=1 */
523         {
524           l = l2;
525           mpz_swap (Q, Q2);
526           mpz_swap (S, S2);
527           mpz_swap (C, C2);
528         }
529       else
530         {
531           /* s <- s*c2+c*s2, c <- c*c2-s*s2, using Karatsuba:
532              a = s+c, b = s2+c2, t = a*b, d = s*s2, e = c*c2,
533              s <- t - d - e, c <- e - d */
534           mpz_add (y, S, C); /* a */
535           mpz_mul (C, C, C2); /* e */
536           mpz_add (C2, C2, S2); /* b */
537           mpz_mul (S2, S, S2); /* d */
538           mpz_mul (y, y, C2); /* a*b */
539           mpz_sub (S, y, S2); /* t - d */
540           mpz_sub (S, S, C); /* t - d - e */
541           mpz_sub (C, C, S2); /* e - d */
542           mpz_mul (Q, Q, Q2);
543           /* after j loops, the error is <= (11j-2)*2^(prec_s) */
544           l += l2;
545           /* reduce Q to prec_s bits */
546           l += reduce (Q, Q, prec_s);
547           /* reduce S,C to prec_s bits, error <= 11*j*2^(prec_s) */
548           l -= reduce2 (S, C, prec_s);
549         }
550     }
551 
552   j = 11 * j;
553   for (err = 0; j > 1; j = (j + 1) / 2, err ++);
554 
555   mpfr_set_z (s, S, MPFR_RNDN);
556   mpfr_div_z (s, s, Q, MPFR_RNDN);
557   mpfr_div_2ui (s, s, l, MPFR_RNDN);
558 
559   mpfr_set_z (c, C, MPFR_RNDN);
560   mpfr_div_z (c, c, Q, MPFR_RNDN);
561   mpfr_div_2ui (c, c, l, MPFR_RNDN);
562 
563   mpz_clear (Q);
564   mpz_clear (S);
565   mpz_clear (C);
566   mpz_clear (Q2);
567   mpz_clear (S2);
568   mpz_clear (C2);
569   mpz_clear (y);
570   mpfr_clear (x2);
571   return err;
572 }
573 
574 /* Assumes x is neither NaN, +/-Inf, nor +/- 0.
575    One of s and c might be NULL, in which case the corresponding value is
576    not computed.
577    Assumes s differs from c.
578  */
579 int
mpfr_sincos_fast(mpfr_ptr s,mpfr_ptr c,mpfr_srcptr x,mpfr_rnd_t rnd)580 mpfr_sincos_fast (mpfr_ptr s, mpfr_ptr c, mpfr_srcptr x, mpfr_rnd_t rnd)
581 {
582   int inexs, inexc;
583   mpfr_t x_red, ts, tc;
584   mpfr_prec_t w;
585   mpfr_exp_t err, errs, errc;
586   MPFR_GROUP_DECL (group);
587   MPFR_ZIV_DECL (loop);
588 
589   MPFR_ASSERTN(s != c);
590   if (s == NULL)
591     w = MPFR_PREC(c);
592   else if (c == NULL)
593     w = MPFR_PREC(s);
594   else
595     w = MPFR_PREC(s) >= MPFR_PREC(c) ? MPFR_PREC(s) : MPFR_PREC(c);
596   w += MPFR_INT_CEIL_LOG2(w) + 9; /* ensures w >= 10 (needed by sincos_aux) */
597 
598   MPFR_GROUP_INIT_2(group, w, ts, tc);
599 
600   MPFR_ZIV_INIT (loop, w);
601   for (;;)
602     {
603       /* if 0 < x <= Pi/4, we can call sincos_aux directly */
604       if (MPFR_IS_POS(x) && mpfr_cmp_ui_2exp (x, 1686629713, -31) <= 0)
605         {
606           err = sincos_aux (ts, tc, x, MPFR_RNDN);
607         }
608       /* if -Pi/4 <= x < 0, use sin(-x)=-sin(x) */
609       else if (MPFR_IS_NEG(x) && mpfr_cmp_si_2exp (x, -1686629713, -31) >= 0)
610         {
611           MPFR_ALIAS(x_red, x, MPFR_SIGN_POS, MPFR_GET_EXP(x));
612           err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
613           MPFR_CHANGE_SIGN(ts);
614         }
615       else /* argument reduction is needed */
616         {
617           long q;
618           mpfr_t pi;
619           int neg = 0;
620 
621           mpfr_init2 (x_red, w);
622           mpfr_init2 (pi, (MPFR_EXP(x) > 0) ? w + MPFR_EXP(x) : w);
623           mpfr_const_pi (pi, MPFR_RNDN);
624           mpfr_div_2ui (pi, pi, 1, MPFR_RNDN); /* Pi/2 */
625           mpfr_remquo (x_red, &q, x, pi, MPFR_RNDN);
626           /* x = q * (Pi/2 + eps1) + x_red + eps2,
627              where |eps1| <= 1/2*ulp(Pi/2) = 2^(-w-MAX(0,EXP(x))),
628              and eps2 <= 1/2*ulp(x_red) <= 1/2*ulp(Pi/2) = 2^(-w)
629              Since |q| <= x/(Pi/2) <= |x|, we have
630              q*|eps1| <= 2^(-w), thus
631              |x - q * Pi/2 - x_red| <= 2^(1-w) */
632           /* now -Pi/4 <= x_red <= Pi/4: if x_red < 0, consider -x_red */
633           if (MPFR_IS_NEG(x_red))
634             {
635               mpfr_neg (x_red, x_red, MPFR_RNDN);
636               neg = 1;
637             }
638           err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
639           err ++; /* to take into account the argument reduction */
640           if (neg) /* sin(-x) = -sin(x), cos(-x) = cos(x) */
641             mpfr_neg (ts, ts, MPFR_RNDN);
642           if (q & 2) /* sin(x+Pi) = -sin(x), cos(x+Pi) = -cos(x) */
643             {
644               mpfr_neg (ts, ts, MPFR_RNDN);
645               mpfr_neg (tc, tc, MPFR_RNDN);
646             }
647           if (q & 1) /* sin(x+Pi/2) = cos(x), cos(x+Pi/2) = -sin(x) */
648             {
649               mpfr_neg (ts, ts, MPFR_RNDN);
650               mpfr_swap (ts, tc);
651             }
652           mpfr_clear (x_red);
653           mpfr_clear (pi);
654         }
655       /* adjust errors with respect to absolute values */
656       errs = err - MPFR_EXP(ts);
657       errc = err - MPFR_EXP(tc);
658       if ((s == NULL || MPFR_CAN_ROUND (ts, w - errs, MPFR_PREC(s), rnd)) &&
659           (c == NULL || MPFR_CAN_ROUND (tc, w - errc, MPFR_PREC(c), rnd)))
660         break;
661       MPFR_ZIV_NEXT (loop, w);
662       MPFR_GROUP_REPREC_2(group, w, ts, tc);
663     }
664   MPFR_ZIV_FREE (loop);
665 
666   inexs = (s == NULL) ? 0 : mpfr_set (s, ts, rnd);
667   inexc = (c == NULL) ? 0 : mpfr_set (c, tc, rnd);
668 
669   MPFR_GROUP_CLEAR (group);
670   return INEX(inexs,inexc);
671 }
672