1 /* mpfr_sin_cos -- sine and cosine of a floating-point number
2
3 Copyright 2002-2020 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25
26 /* (y, z) <- (sin(x), cos(x)), return value is 0 iff both results are exact
27 ie, iff x = 0 */
28 int
mpfr_sin_cos(mpfr_ptr y,mpfr_ptr z,mpfr_srcptr x,mpfr_rnd_t rnd_mode)29 mpfr_sin_cos (mpfr_ptr y, mpfr_ptr z, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
30 {
31 mpfr_prec_t prec, m;
32 int neg, reduce;
33 mpfr_t c, xr;
34 mpfr_srcptr xx;
35 mpfr_exp_t err, expx;
36 int inexy, inexz;
37 MPFR_ZIV_DECL (loop);
38 MPFR_SAVE_EXPO_DECL (expo);
39
40 MPFR_ASSERTN (y != z);
41
42 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
43 {
44 if (MPFR_IS_NAN(x) || MPFR_IS_INF(x))
45 {
46 MPFR_SET_NAN (y);
47 MPFR_SET_NAN (z);
48 MPFR_RET_NAN;
49 }
50 else /* x is zero */
51 {
52 MPFR_ASSERTD (MPFR_IS_ZERO (x));
53 MPFR_SET_ZERO (y);
54 MPFR_SET_SAME_SIGN (y, x);
55 /* y = 0, thus exact, but z is inexact in case of underflow
56 or overflow */
57 inexy = 0; /* y is exact */
58 inexz = mpfr_set_ui (z, 1, rnd_mode);
59 return INEX(inexy,inexz);
60 }
61 }
62
63 MPFR_LOG_FUNC
64 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode),
65 ("sin[%Pu]=%.*Rg cos[%Pu]=%.*Rg", mpfr_get_prec(y), mpfr_log_prec, y,
66 mpfr_get_prec (z), mpfr_log_prec, z));
67
68 MPFR_SAVE_EXPO_MARK (expo);
69
70 prec = MAX (MPFR_PREC (y), MPFR_PREC (z));
71 m = prec + MPFR_INT_CEIL_LOG2 (prec) + 13;
72 expx = MPFR_GET_EXP (x);
73
74 /* When x is close to 0, say 2^(-k), then there is a cancellation of about
75 2k bits in 1-cos(x)^2. FIXME: in that case, it would be more efficient
76 to compute sin(x) directly. VL: This is partly done by using
77 MPFR_FAST_COMPUTE_IF_SMALL_INPUT from the mpfr_sin and mpfr_cos
78 functions. Moreover, any overflow on m is avoided. */
79 if (expx < 0)
80 {
81 /* Warning: in case y = x, and the first call to
82 MPFR_FAST_COMPUTE_IF_SMALL_INPUT succeeds but the second fails,
83 we will have clobbered the original value of x.
84 The workaround is to first compute z = cos(x) in that case, since
85 y and z are different. */
86 if (y != x)
87 /* y and x differ, thus we can safely try to compute y first */
88 {
89 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
90 y, x, -2 * expx, 2, 0, rnd_mode,
91 { inexy = _inexact;
92 goto small_input; });
93 if (0)
94 {
95 small_input:
96 /* we can go here only if we can round sin(x) */
97 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
98 z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
99 { inexz = _inexact;
100 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
101 goto end; });
102 }
103
104 /* if we go here, one of the two MPFR_FAST_COMPUTE_IF_SMALL_INPUT
105 calls failed */
106 }
107 else /* y and x are the same variable: try to compute z first, which
108 necessarily differs */
109 {
110 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
111 z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
112 { inexz = _inexact;
113 goto small_input2; });
114 if (0)
115 {
116 small_input2:
117 /* we can go here only if we can round cos(x) */
118 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
119 y, x, -2 * expx, 2, 0, rnd_mode,
120 { inexy = _inexact;
121 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
122 goto end; });
123 }
124 }
125 m += 2 * (-expx);
126 }
127
128 if (prec >= MPFR_SINCOS_THRESHOLD)
129 {
130 MPFR_SAVE_EXPO_FREE (expo);
131 return mpfr_sincos_fast (y, z, x, rnd_mode);
132 }
133
134 mpfr_init (c);
135 mpfr_init (xr);
136
137 MPFR_ZIV_INIT (loop, m);
138 for (;;)
139 {
140 /* the following is copied from sin.c */
141 if (expx >= 2) /* reduce the argument */
142 {
143 reduce = 1;
144 mpfr_set_prec (c, expx + m - 1);
145 mpfr_set_prec (xr, m);
146 mpfr_const_pi (c, MPFR_RNDN);
147 mpfr_mul_2ui (c, c, 1, MPFR_RNDN);
148 mpfr_remainder (xr, x, c, MPFR_RNDN);
149 mpfr_div_2ui (c, c, 1, MPFR_RNDN);
150 if (MPFR_IS_POS (xr))
151 mpfr_sub (c, c, xr, MPFR_RNDZ);
152 else
153 mpfr_add (c, c, xr, MPFR_RNDZ);
154 if (MPFR_IS_ZERO(xr)
155 || MPFR_EXP(xr) < (mpfr_exp_t) 3 - (mpfr_exp_t) m
156 || MPFR_EXP(c) < (mpfr_exp_t) 3 - (mpfr_exp_t) m)
157 goto next_step;
158 xx = xr;
159 }
160 else /* the input argument is already reduced */
161 {
162 reduce = 0;
163 xx = x;
164 }
165
166 neg = MPFR_IS_NEG (xx); /* gives sign of sin(x) */
167 mpfr_set_prec (c, m);
168 mpfr_cos (c, xx, MPFR_RNDZ);
169 /* If no argument reduction was performed, the error is at most ulp(c),
170 otherwise it is at most ulp(c) + 2^(2-m). Since |c| < 1, we have
171 ulp(c) <= 2^(-m), thus the error is bounded by 2^(3-m) in that later
172 case. */
173 if (reduce == 0)
174 err = m;
175 else
176 err = MPFR_GET_EXP (c) + (mpfr_exp_t) (m - 3);
177 if (!MPFR_CAN_ROUND (c, err, MPFR_PREC (z), rnd_mode))
178 goto next_step;
179
180 /* we can't set z now, because in case z = x, and the MPFR_CAN_ROUND()
181 call below fails, we will have clobbered the input */
182 mpfr_set_prec (xr, MPFR_PREC(c));
183 mpfr_swap (xr, c); /* save the approximation of the cosine in xr */
184 mpfr_sqr (c, xr, MPFR_RNDU); /* the absolute error is bounded by
185 2^(5-m) if reduce=1, and by 2^(2-m)
186 otherwise */
187 mpfr_ui_sub (c, 1, c, MPFR_RNDN); /* error bounded by 2^(6-m) if reduce
188 is 1, and 2^(3-m) otherwise */
189 mpfr_sqrt (c, c, MPFR_RNDN); /* the absolute error is bounded by
190 2^(6-m-Exp(c)) if reduce=1, and
191 2^(3-m-Exp(c)) otherwise */
192 err = 3 + 3 * reduce - MPFR_GET_EXP (c);
193 if (neg)
194 MPFR_CHANGE_SIGN (c);
195
196 /* the absolute error on c is at most 2^(err-m), which we must put
197 in the form 2^(EXP(c)-err). */
198 err = MPFR_GET_EXP (c) + (mpfr_exp_t) m - err;
199 if (MPFR_CAN_ROUND (c, err, MPFR_PREC (y), rnd_mode))
200 break;
201 /* check for huge cancellation */
202 if (err < (mpfr_exp_t) MPFR_PREC (y))
203 m += MPFR_PREC (y) - err;
204 /* Check if near 1 */
205 if (MPFR_GET_EXP (c) == 1
206 && MPFR_MANT (c)[MPFR_LIMB_SIZE (c)-1] == MPFR_LIMB_HIGHBIT)
207 m += m;
208
209 next_step:
210 MPFR_ZIV_NEXT (loop, m);
211 mpfr_set_prec (c, m);
212 }
213 MPFR_ZIV_FREE (loop);
214
215 inexy = mpfr_set (y, c, rnd_mode);
216 inexz = mpfr_set (z, xr, rnd_mode);
217
218 mpfr_clear (c);
219 mpfr_clear (xr);
220
221 end:
222 MPFR_SAVE_EXPO_FREE (expo);
223 /* FIXME: add a test for bug before revision 7355 */
224 inexy = mpfr_check_range (y, inexy, rnd_mode);
225 inexz = mpfr_check_range (z, inexz, rnd_mode);
226 MPFR_RET (INEX(inexy,inexz));
227 }
228
229 /*************** asymptotically fast implementation below ********************/
230
231 /* truncate Q from R to at most prec bits.
232 Return the number of truncated bits.
233 */
234 static mpfr_prec_t
reduce(mpz_t Q,mpz_srcptr R,mpfr_prec_t prec)235 reduce (mpz_t Q, mpz_srcptr R, mpfr_prec_t prec)
236 {
237 mpfr_prec_t l;
238
239 MPFR_MPZ_SIZEINBASE2(l, R);
240 l = (l > prec) ? l - prec : 0;
241 mpz_fdiv_q_2exp (Q, R, l);
242 return l;
243 }
244
245 /* truncate S and C so that the smaller has prec bits.
246 Return the number of truncated bits.
247 */
248 static unsigned long
reduce2(mpz_t S,mpz_t C,mpfr_prec_t prec)249 reduce2 (mpz_t S, mpz_t C, mpfr_prec_t prec)
250 {
251 unsigned long ls;
252 unsigned long lc;
253 unsigned long l;
254
255 MPFR_MPZ_SIZEINBASE2(ls, S);
256 MPFR_MPZ_SIZEINBASE2(lc, C);
257
258 l = (ls < lc) ? ls : lc; /* smaller length */
259 l = (l > prec) ? l - prec : 0;
260 mpz_fdiv_q_2exp (S, S, l);
261 mpz_fdiv_q_2exp (C, C, l);
262 return l;
263 }
264
265 /* return in S0/Q0 a rational approximation of sin(X) with absolute error
266 bounded by 9*2^(-prec), where 0 <= X=p/2^r <= 1/2,
267 and in C0/Q0 a rational approximation of cos(X), with relative error
268 bounded by 9*2^(-prec) (and also absolute error, since
269 |cos(X)| <= 1).
270 We have sin(X)/X = sum((-1)^i*(p/2^r)^i/(2i+1)!, i=0..infinity).
271 We use the following binary splitting formula:
272 P(a,b) = (-p)^(b-a)
273 Q(a,b) = (2a)*(2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
274 T(a,b) = 1 if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise.
275
276 Since we use P(a,b) for b-a=2^k only, we compute only p^(2^k).
277 We do not store the factor 2^r in Q().
278
279 Then sin(X)/X ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
280
281 Return l such that Q0 has to be multiplied by 2^l.
282
283 Assumes prec >= 10.
284 */
285
286 #define KMAX 64
287 static unsigned long
sin_bs_aux(mpz_t Q0,mpz_t S0,mpz_t C0,mpz_srcptr p,mpfr_prec_t r,mpfr_prec_t prec)288 sin_bs_aux (mpz_t Q0, mpz_t S0, mpz_t C0, mpz_srcptr p, mpfr_prec_t r,
289 mpfr_prec_t prec)
290 {
291 mpz_t T[KMAX], Q[KMAX], ptoj[KMAX], pp;
292 mpfr_prec_t log2_nb_terms[KMAX], mult[KMAX];
293 mpfr_prec_t accu[KMAX], size_ptoj[KMAX];
294 mpfr_prec_t prec_i_have, h, r0 = r, pp_s, p_s;
295 unsigned long i, j, m;
296 int alloc, k, l;
297
298 if (MPFR_UNLIKELY(mpz_cmp_ui (p, 0) == 0)) /* sin(x)/x -> 1 */
299 {
300 mpz_set_ui (Q0, 1);
301 mpz_set_ui (S0, 1);
302 mpz_set_ui (C0, 1);
303 return 0;
304 }
305
306 /* check that X=p/2^r <= 1/2 */
307 MPFR_ASSERTN(mpz_sizeinbase (p, 2) - (mpfr_exp_t) r <= -1);
308
309 mpz_init (pp);
310
311 /* normalize p (non-zero here) */
312 h = mpz_scan1 (p, 0);
313 mpz_fdiv_q_2exp (pp, p, h); /* p = pp * 2^h */
314 mpz_mul (pp, pp, pp);
315 r = 2 * (r - h); /* x^2 = (p/2^r0)^2 = pp / 2^r */
316
317 /* now p is odd */
318 alloc = 2;
319 mpz_init_set_ui (T[0], 6);
320 mpz_init_set_ui (Q[0], 6);
321 mpz_init_set (ptoj[0], pp); /* ptoj[i] = pp^(2^i) */
322 mpz_init (T[1]);
323 mpz_init (Q[1]);
324 mpz_init (ptoj[1]);
325 mpz_mul (ptoj[1], pp, pp); /* ptoj[1] = pp^2 */
326 MPFR_MPZ_SIZEINBASE2(size_ptoj[1], ptoj[1]);
327
328 mpz_mul_2exp (T[0], T[0], r);
329 mpz_sub (T[0], T[0], pp); /* 6*2^r - pp = 6*2^r*(1 - x^2/6) */
330 log2_nb_terms[0] = 1;
331
332 /* already take into account the factor x=p/2^r in sin(x) = x * (...) */
333 MPFR_MPZ_SIZEINBASE2(pp_s, pp);
334 MPFR_MPZ_SIZEINBASE2(p_s, p);
335 mult[0] = r - pp_s + r0 - p_s;
336 /* we have x^3 < 1/2^mult[0] */
337
338 for (i = 2, k = 0, prec_i_have = mult[0]; prec_i_have < prec; i += 2)
339 {
340 /* i is even here */
341 /* invariant: Q[0]*Q[1]*...*Q[k] equals (2i-1)!,
342 we have already summed terms of index < i
343 in S[0]/Q[0], ..., S[k]/Q[k] */
344 k ++;
345 if (k + 1 >= alloc) /* necessarily k + 1 = alloc */
346 {
347 MPFR_ASSERTD (k + 1 == alloc);
348 alloc ++;
349 MPFR_ASSERTN (k + 1 < KMAX);
350 mpz_init (T[k+1]);
351 mpz_init (Q[k+1]);
352 mpz_init (ptoj[k+1]);
353 mpz_mul (ptoj[k+1], ptoj[k], ptoj[k]); /* pp^(2^(k+1)) */
354 MPFR_MPZ_SIZEINBASE2(size_ptoj[k+1], ptoj[k+1]);
355 }
356 /* for i even, we have Q[k] = (2*i)*(2*i+1), T[k] = 1,
357 then Q[k+1] = (2*i+2)*(2*i+3), T[k+1] = 1,
358 which reduces to T[k] = (2*i+2)*(2*i+3)*2^r-pp,
359 Q[k] = (2*i)*(2*i+1)*(2*i+2)*(2*i+3). */
360 MPFR_ASSERTN (k < KMAX);
361 log2_nb_terms[k] = 1;
362 mpz_set_ui (Q[k], 2 * i + 2);
363 mpz_mul_ui (Q[k], Q[k], 2 * i + 3);
364 mpz_mul_2exp (T[k], Q[k], r);
365 mpz_sub (T[k], T[k], pp);
366 mpz_mul_ui (Q[k], Q[k], 2 * i);
367 mpz_mul_ui (Q[k], Q[k], 2 * i + 1);
368 /* the next term of the series is divided by Q[k] and multiplied
369 by pp^2/2^(2r), thus the mult. factor < 1/2^mult[k] */
370 MPFR_MPZ_SIZEINBASE2(mult[k], Q[k]);
371 mult[k] += 2 * r - size_ptoj[1] - 1;
372 /* the absolute contribution of the next term is 1/2^accu[k] */
373 accu[k] = (k == 0) ? mult[k] : mult[k] + accu[k-1];
374 prec_i_have = accu[k]; /* the current term is < 1/2^accu[k] */
375 j = (i + 2) / 2;
376 l = 1;
377 while ((j & 1) == 0) /* combine and reduce */
378 {
379 MPFR_ASSERTN (k >= 1);
380 mpz_mul (T[k], T[k], ptoj[l]);
381 mpz_mul (T[k-1], T[k-1], Q[k]);
382 mpz_mul_2exp (T[k-1], T[k-1], r << l);
383 mpz_add (T[k-1], T[k-1], T[k]);
384 mpz_mul (Q[k-1], Q[k-1], Q[k]);
385 log2_nb_terms[k-1] ++; /* number of terms in S[k-1]
386 is a power of 2 by construction */
387 MPFR_MPZ_SIZEINBASE2(prec_i_have, Q[k]);
388 mult[k-1] += prec_i_have + (r << l) - size_ptoj[l] - 1;
389 accu[k-1] = (k == 1) ? mult[k-1] : mult[k-1] + accu[k-2];
390 prec_i_have = accu[k-1];
391 l ++;
392 j >>= 1;
393 k --;
394 }
395 }
396
397 /* accumulate all products in T[0] and Q[0]. Warning: contrary to above,
398 here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
399 h = 0; /* number of accumulated terms in the right part T[k]/Q[k] */
400 while (k > 0)
401 {
402 mpz_mul (T[k], T[k], ptoj[log2_nb_terms[k-1]]);
403 mpz_mul (T[k-1], T[k-1], Q[k]);
404 h += (mpfr_prec_t) 1 << log2_nb_terms[k];
405 mpz_mul_2exp (T[k-1], T[k-1], r * h);
406 mpz_add (T[k-1], T[k-1], T[k]);
407 mpz_mul (Q[k-1], Q[k-1], Q[k]);
408 k--;
409 }
410
411 m = r0 + r * (i - 1); /* implicit multiplier 2^r for Q0 */
412 /* at this point T[0]/(2^m*Q[0]) is an approximation of sin(x) where the 1st
413 neglected term has contribution < 1/2^prec, thus since the series has
414 alternate signs, the error is < 1/2^prec */
415
416 /* we truncate Q0 to prec bits: the relative error is at most 2^(1-prec),
417 which means that Q0 = Q[0] * (1+theta) with |theta| <= 2^(1-prec)
418 [up to a power of two] */
419 m += reduce (Q0, Q[0], prec);
420 m -= reduce (T[0], T[0], prec);
421 /* multiply by x = p/2^m */
422 mpz_mul (S0, T[0], p);
423 m -= reduce (S0, S0, prec); /* S0 = T[0] * (1 + theta)^2 up to power of 2 */
424 /* sin(X) ~ S0/Q0*(1 + theta)^3 + err with |theta| <= 2^(1-prec) and
425 |err| <= 2^(-prec), thus since |S0/Q0| <= 1:
426 |sin(X) - S0/Q0| <= 4*|theta*S0/Q0| + |err| <= 9*2^(-prec) */
427
428 mpz_clear (pp);
429 for (k = 0; k < alloc; k ++)
430 {
431 mpz_clear (T[k]);
432 mpz_clear (Q[k]);
433 mpz_clear (ptoj[k]);
434 }
435
436 /* compute cos(X) from sin(X): sqrt(1-(S/Q)^2) = sqrt(Q^2-S^2)/Q
437 = sqrt(Q0^2*2^(2m)-S0^2)/Q0.
438 Write S/Q = sin(X) + eps with |eps| <= 9*2^(-prec),
439 then sqrt(Q^2-S^2) = sqrt(Q^2-Q^2*(sin(X)+eps)^2)
440 = sqrt(Q^2*cos(X)^2-Q^2*(2*sin(X)*eps+eps^2))
441 = sqrt(Q^2*cos(X)^2-Q^2*eps1) with |eps1|<=9*2^(-prec)
442 [using X<=1/2 and eps<=9*2^(-prec) and prec>=10]
443
444 Since we truncate the square root, we get:
445 sqrt(Q^2*cos(X)^2-Q^2*eps1)+eps2 with |eps2|<1
446 = Q*sqrt(cos(X)^2-eps1)+eps2
447 = Q*cos(X)*(1+eps3)+eps2 with |eps3| <= 6*2^(-prec)
448 = Q*cos(X)*(1+eps3+eps2/(Q*cos(X)))
449 = Q*cos(X)*(1+eps4) with |eps4| <= 9*2^(-prec)
450 since |Q| >= 2^(prec-1) */
451 /* we assume that Q0*2^m >= 2^(prec-1) */
452 MPFR_ASSERTN(m + mpz_sizeinbase (Q0, 2) >= prec);
453 mpz_mul (C0, Q0, Q0);
454 mpz_mul_2exp (C0, C0, 2 * m);
455 mpz_submul (C0, S0, S0);
456 mpz_sqrt (C0, C0);
457
458 return m;
459 }
460
461 /* Put in s and c approximations of sin(x) and cos(x) respectively.
462 Assumes 0 < x < Pi/4 and PREC(s) = PREC(c) >= 10.
463 Return err such that the relative error is bounded by 2^err ulps.
464 */
465 static int
sincos_aux(mpfr_ptr s,mpfr_ptr c,mpfr_srcptr x,mpfr_rnd_t rnd_mode)466 sincos_aux (mpfr_ptr s, mpfr_ptr c, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
467 {
468 mpfr_prec_t prec_s, sh;
469 mpz_t Q, S, C, Q2, S2, C2, y;
470 mpfr_t x2;
471 unsigned long l, l2, j, err;
472
473 MPFR_ASSERTD(MPFR_PREC(s) == MPFR_PREC(c));
474
475 prec_s = MPFR_PREC(s);
476
477 mpfr_init2 (x2, MPFR_PREC(x));
478 mpz_init (Q);
479 mpz_init (S);
480 mpz_init (C);
481 mpz_init (Q2);
482 mpz_init (S2);
483 mpz_init (C2);
484 mpz_init (y);
485
486 mpfr_set (x2, x, MPFR_RNDN); /* exact */
487 mpz_set_ui (Q, 1);
488 l = 0;
489 mpz_set_ui (S, 0); /* sin(0) = S/(2^l*Q), exact */
490 mpz_set_ui (C, 1); /* cos(0) = C/(2^l*Q), exact */
491
492 /* Invariant: x = X + x2/2^(sh-1), where the part X was already treated,
493 S/(2^l*Q) ~ sin(X), C/(2^l*Q) ~ cos(X), and x2/2^(sh-1) < Pi/4.
494 'sh-1' is the number of already shifted bits in x2.
495 */
496
497 for (sh = 1, j = 0; mpfr_cmp_ui (x2, 0) != 0 && sh <= prec_s; sh <<= 1, j++)
498 {
499 if (sh > prec_s / 2) /* sin(x) = x + O(x^3), cos(x) = 1 + O(x^2) */
500 {
501 l2 = -mpfr_get_z_2exp (S2, x2); /* S2/2^l2 = x2 */
502 l2 += sh - 1;
503 mpz_set_ui (Q2, 1);
504 mpz_set_ui (C2, 1);
505 mpz_mul_2exp (C2, C2, l2);
506 mpfr_set_ui (x2, 0, MPFR_RNDN);
507 }
508 else
509 {
510 /* y <- trunc(x2 * 2^sh) = trunc(x * 2^(2*sh-1)) */
511 mpfr_mul_2ui (x2, x2, sh, MPFR_RNDN); /* exact */
512 mpfr_get_z (y, x2, MPFR_RNDZ); /* round toward zero: now
513 0 <= x2 < 2^sh, thus
514 0 <= x2/2^(sh-1) < 2^(1-sh) */
515 if (mpz_cmp_ui (y, 0) == 0)
516 continue;
517 mpfr_sub_z (x2, x2, y, MPFR_RNDN); /* should be exact */
518 l2 = sin_bs_aux (Q2, S2, C2, y, 2 * sh - 1, prec_s);
519 /* we now have |S2/Q2/2^l2 - sin(X)| <= 9*2^(prec_s)
520 and |C2/Q2/2^l2 - cos(X)| <= 6*2^(prec_s), with X=y/2^(2sh-1) */
521 }
522 if (sh == 1) /* S=0, C=1 */
523 {
524 l = l2;
525 mpz_swap (Q, Q2);
526 mpz_swap (S, S2);
527 mpz_swap (C, C2);
528 }
529 else
530 {
531 /* s <- s*c2+c*s2, c <- c*c2-s*s2, using Karatsuba:
532 a = s+c, b = s2+c2, t = a*b, d = s*s2, e = c*c2,
533 s <- t - d - e, c <- e - d */
534 mpz_add (y, S, C); /* a */
535 mpz_mul (C, C, C2); /* e */
536 mpz_add (C2, C2, S2); /* b */
537 mpz_mul (S2, S, S2); /* d */
538 mpz_mul (y, y, C2); /* a*b */
539 mpz_sub (S, y, S2); /* t - d */
540 mpz_sub (S, S, C); /* t - d - e */
541 mpz_sub (C, C, S2); /* e - d */
542 mpz_mul (Q, Q, Q2);
543 /* after j loops, the error is <= (11j-2)*2^(prec_s) */
544 l += l2;
545 /* reduce Q to prec_s bits */
546 l += reduce (Q, Q, prec_s);
547 /* reduce S,C to prec_s bits, error <= 11*j*2^(prec_s) */
548 l -= reduce2 (S, C, prec_s);
549 }
550 }
551
552 j = 11 * j;
553 for (err = 0; j > 1; j = (j + 1) / 2, err ++);
554
555 mpfr_set_z (s, S, MPFR_RNDN);
556 mpfr_div_z (s, s, Q, MPFR_RNDN);
557 mpfr_div_2ui (s, s, l, MPFR_RNDN);
558
559 mpfr_set_z (c, C, MPFR_RNDN);
560 mpfr_div_z (c, c, Q, MPFR_RNDN);
561 mpfr_div_2ui (c, c, l, MPFR_RNDN);
562
563 mpz_clear (Q);
564 mpz_clear (S);
565 mpz_clear (C);
566 mpz_clear (Q2);
567 mpz_clear (S2);
568 mpz_clear (C2);
569 mpz_clear (y);
570 mpfr_clear (x2);
571 return err;
572 }
573
574 /* Assumes x is neither NaN, +/-Inf, nor +/- 0.
575 One of s and c might be NULL, in which case the corresponding value is
576 not computed.
577 Assumes s differs from c.
578 */
579 int
mpfr_sincos_fast(mpfr_ptr s,mpfr_ptr c,mpfr_srcptr x,mpfr_rnd_t rnd)580 mpfr_sincos_fast (mpfr_ptr s, mpfr_ptr c, mpfr_srcptr x, mpfr_rnd_t rnd)
581 {
582 int inexs, inexc;
583 mpfr_t x_red, ts, tc;
584 mpfr_prec_t w;
585 mpfr_exp_t err, errs, errc;
586 MPFR_GROUP_DECL (group);
587 MPFR_ZIV_DECL (loop);
588
589 MPFR_ASSERTN(s != c);
590 if (s == NULL)
591 w = MPFR_PREC(c);
592 else if (c == NULL)
593 w = MPFR_PREC(s);
594 else
595 w = MPFR_PREC(s) >= MPFR_PREC(c) ? MPFR_PREC(s) : MPFR_PREC(c);
596 w += MPFR_INT_CEIL_LOG2(w) + 9; /* ensures w >= 10 (needed by sincos_aux) */
597
598 MPFR_GROUP_INIT_2(group, w, ts, tc);
599
600 MPFR_ZIV_INIT (loop, w);
601 for (;;)
602 {
603 /* if 0 < x <= Pi/4, we can call sincos_aux directly */
604 if (MPFR_IS_POS(x) && mpfr_cmp_ui_2exp (x, 1686629713, -31) <= 0)
605 {
606 err = sincos_aux (ts, tc, x, MPFR_RNDN);
607 }
608 /* if -Pi/4 <= x < 0, use sin(-x)=-sin(x) */
609 else if (MPFR_IS_NEG(x) && mpfr_cmp_si_2exp (x, -1686629713, -31) >= 0)
610 {
611 MPFR_ALIAS(x_red, x, MPFR_SIGN_POS, MPFR_GET_EXP(x));
612 err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
613 MPFR_CHANGE_SIGN(ts);
614 }
615 else /* argument reduction is needed */
616 {
617 long q;
618 mpfr_t pi;
619 int neg = 0;
620
621 mpfr_init2 (x_red, w);
622 mpfr_init2 (pi, (MPFR_EXP(x) > 0) ? w + MPFR_EXP(x) : w);
623 mpfr_const_pi (pi, MPFR_RNDN);
624 mpfr_div_2ui (pi, pi, 1, MPFR_RNDN); /* Pi/2 */
625 mpfr_remquo (x_red, &q, x, pi, MPFR_RNDN);
626 /* x = q * (Pi/2 + eps1) + x_red + eps2,
627 where |eps1| <= 1/2*ulp(Pi/2) = 2^(-w-MAX(0,EXP(x))),
628 and eps2 <= 1/2*ulp(x_red) <= 1/2*ulp(Pi/2) = 2^(-w)
629 Since |q| <= x/(Pi/2) <= |x|, we have
630 q*|eps1| <= 2^(-w), thus
631 |x - q * Pi/2 - x_red| <= 2^(1-w) */
632 /* now -Pi/4 <= x_red <= Pi/4: if x_red < 0, consider -x_red */
633 if (MPFR_IS_NEG(x_red))
634 {
635 mpfr_neg (x_red, x_red, MPFR_RNDN);
636 neg = 1;
637 }
638 err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
639 err ++; /* to take into account the argument reduction */
640 if (neg) /* sin(-x) = -sin(x), cos(-x) = cos(x) */
641 mpfr_neg (ts, ts, MPFR_RNDN);
642 if (q & 2) /* sin(x+Pi) = -sin(x), cos(x+Pi) = -cos(x) */
643 {
644 mpfr_neg (ts, ts, MPFR_RNDN);
645 mpfr_neg (tc, tc, MPFR_RNDN);
646 }
647 if (q & 1) /* sin(x+Pi/2) = cos(x), cos(x+Pi/2) = -sin(x) */
648 {
649 mpfr_neg (ts, ts, MPFR_RNDN);
650 mpfr_swap (ts, tc);
651 }
652 mpfr_clear (x_red);
653 mpfr_clear (pi);
654 }
655 /* adjust errors with respect to absolute values */
656 errs = err - MPFR_EXP(ts);
657 errc = err - MPFR_EXP(tc);
658 if ((s == NULL || MPFR_CAN_ROUND (ts, w - errs, MPFR_PREC(s), rnd)) &&
659 (c == NULL || MPFR_CAN_ROUND (tc, w - errc, MPFR_PREC(c), rnd)))
660 break;
661 MPFR_ZIV_NEXT (loop, w);
662 MPFR_GROUP_REPREC_2(group, w, ts, tc);
663 }
664 MPFR_ZIV_FREE (loop);
665
666 inexs = (s == NULL) ? 0 : mpfr_set (s, ts, rnd);
667 inexc = (c == NULL) ? 0 : mpfr_set (c, tc, rnd);
668
669 MPFR_GROUP_CLEAR (group);
670 return INEX(inexs,inexc);
671 }
672