1 /* mpfr_sin_cos -- sine and cosine of a floating-point number
2
3 Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramel projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25
26 /* (y, z) <- (sin(x), cos(x)), return value is 0 iff both results are exact
27 ie, iff x = 0 */
28 int
mpfr_sin_cos(mpfr_ptr y,mpfr_ptr z,mpfr_srcptr x,mpfr_rnd_t rnd_mode)29 mpfr_sin_cos (mpfr_ptr y, mpfr_ptr z, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
30 {
31 mpfr_prec_t prec, m;
32 int neg, reduce;
33 mpfr_t c, xr;
34 mpfr_srcptr xx;
35 mpfr_exp_t err, expx;
36 int inexy, inexz;
37 MPFR_ZIV_DECL (loop);
38 MPFR_SAVE_EXPO_DECL (expo);
39
40 MPFR_ASSERTN (y != z);
41
42 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
43 {
44 if (MPFR_IS_NAN(x) || MPFR_IS_INF(x))
45 {
46 MPFR_SET_NAN (y);
47 MPFR_SET_NAN (z);
48 MPFR_RET_NAN;
49 }
50 else /* x is zero */
51 {
52 MPFR_ASSERTD (MPFR_IS_ZERO (x));
53 MPFR_SET_ZERO (y);
54 MPFR_SET_SAME_SIGN (y, x);
55 /* y = 0, thus exact, but z is inexact in case of underflow
56 or overflow */
57 inexy = 0; /* y is exact */
58 inexz = mpfr_set_ui (z, 1, rnd_mode);
59 return INEX(inexy,inexz);
60 }
61 }
62
63 MPFR_LOG_FUNC
64 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode),
65 ("sin[%Pu]=%.*Rg cos[%Pu]=%.*Rg", mpfr_get_prec(y), mpfr_log_prec, y,
66 mpfr_get_prec (z), mpfr_log_prec, z));
67
68 MPFR_SAVE_EXPO_MARK (expo);
69
70 prec = MAX (MPFR_PREC (y), MPFR_PREC (z));
71 m = prec + MPFR_INT_CEIL_LOG2 (prec) + 13;
72 expx = MPFR_GET_EXP (x);
73
74 /* When x is close to 0, say 2^(-k), then there is a cancellation of about
75 2k bits in 1-cos(x)^2. FIXME: in that case, it would be more efficient
76 to compute sin(x) directly. VL: This is partly done by using
77 MPFR_FAST_COMPUTE_IF_SMALL_INPUT from the mpfr_sin and mpfr_cos
78 functions. Moreover, any overflow on m is avoided. */
79 if (expx < 0)
80 {
81 /* Warning: in case y = x, and the first call to
82 MPFR_FAST_COMPUTE_IF_SMALL_INPUT succeeds but the second fails,
83 we will have clobbered the original value of x.
84 The workaround is to first compute z = cos(x) in that case, since
85 y and z are different. */
86 if (y != x)
87 /* y and x differ, thus we can safely try to compute y first */
88 {
89 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
90 y, x, -2 * expx, 2, 0, rnd_mode,
91 { inexy = _inexact;
92 goto small_input; });
93 if (0)
94 {
95 small_input:
96 /* we can go here only if we can round sin(x) */
97 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
98 z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
99 { inexz = _inexact;
100 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
101 goto end; });
102 }
103
104 /* if we go here, one of the two MPFR_FAST_COMPUTE_IF_SMALL_INPUT
105 calls failed */
106 }
107 else /* y and x are the same variable: try to compute z first, which
108 necessarily differs */
109 {
110 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
111 z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
112 { inexz = _inexact;
113 goto small_input2; });
114 if (0)
115 {
116 small_input2:
117 /* we can go here only if we can round cos(x) */
118 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
119 y, x, -2 * expx, 2, 0, rnd_mode,
120 { inexy = _inexact;
121 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
122 goto end; });
123 }
124 }
125 m += 2 * (-expx);
126 }
127
128 if (prec >= MPFR_SINCOS_THRESHOLD)
129 {
130 MPFR_SAVE_EXPO_FREE (expo);
131 return mpfr_sincos_fast (y, z, x, rnd_mode);
132 }
133
134 mpfr_init (c);
135 mpfr_init (xr);
136
137 MPFR_ZIV_INIT (loop, m);
138 for (;;)
139 {
140 /* the following is copied from sin.c */
141 if (expx >= 2) /* reduce the argument */
142 {
143 reduce = 1;
144 mpfr_set_prec (c, expx + m - 1);
145 mpfr_set_prec (xr, m);
146 mpfr_const_pi (c, MPFR_RNDN);
147 mpfr_mul_2ui (c, c, 1, MPFR_RNDN);
148 mpfr_remainder (xr, x, c, MPFR_RNDN);
149 mpfr_div_2ui (c, c, 1, MPFR_RNDN);
150 if (MPFR_SIGN (xr) > 0)
151 mpfr_sub (c, c, xr, MPFR_RNDZ);
152 else
153 mpfr_add (c, c, xr, MPFR_RNDZ);
154 if (MPFR_IS_ZERO(xr)
155 || MPFR_EXP(xr) < (mpfr_exp_t) 3 - (mpfr_exp_t) m
156 || MPFR_EXP(c) < (mpfr_exp_t) 3 - (mpfr_exp_t) m)
157 goto next_step;
158 xx = xr;
159 }
160 else /* the input argument is already reduced */
161 {
162 reduce = 0;
163 xx = x;
164 }
165
166 neg = MPFR_IS_NEG (xx); /* gives sign of sin(x) */
167 mpfr_set_prec (c, m);
168 mpfr_cos (c, xx, MPFR_RNDZ);
169 /* If no argument reduction was performed, the error is at most ulp(c),
170 otherwise it is at most ulp(c) + 2^(2-m). Since |c| < 1, we have
171 ulp(c) <= 2^(-m), thus the error is bounded by 2^(3-m) in that later
172 case. */
173 if (reduce == 0)
174 err = m;
175 else
176 err = MPFR_GET_EXP (c) + (mpfr_exp_t) (m - 3);
177 if (!mpfr_can_round (c, err, MPFR_RNDN, MPFR_RNDZ,
178 MPFR_PREC (z) + (rnd_mode == MPFR_RNDN)))
179 goto next_step;
180
181 /* we can't set z now, because in case z = x, and the mpfr_can_round()
182 call below fails, we will have clobbered the input */
183 mpfr_set_prec (xr, MPFR_PREC(c));
184 mpfr_swap (xr, c); /* save the approximation of the cosine in xr */
185 mpfr_sqr (c, xr, MPFR_RNDU); /* the absolute error is bounded by
186 2^(5-m) if reduce=1, and by 2^(2-m)
187 otherwise */
188 mpfr_ui_sub (c, 1, c, MPFR_RNDN); /* error bounded by 2^(6-m) if reduce
189 is 1, and 2^(3-m) otherwise */
190 mpfr_sqrt (c, c, MPFR_RNDN); /* the absolute error is bounded by
191 2^(6-m-Exp(c)) if reduce=1, and
192 2^(3-m-Exp(c)) otherwise */
193 err = 3 + 3 * reduce - MPFR_GET_EXP (c);
194 if (neg)
195 MPFR_CHANGE_SIGN (c);
196
197 /* the absolute error on c is at most 2^(err-m), which we must put
198 in the form 2^(EXP(c)-err). */
199 err = MPFR_GET_EXP (c) + (mpfr_exp_t) m - err;
200 if (mpfr_can_round (c, err, MPFR_RNDN, MPFR_RNDZ,
201 MPFR_PREC (y) + (rnd_mode == MPFR_RNDN)))
202 break;
203 /* check for huge cancellation */
204 if (err < (mpfr_exp_t) MPFR_PREC (y))
205 m += MPFR_PREC (y) - err;
206 /* Check if near 1 */
207 if (MPFR_GET_EXP (c) == 1
208 && MPFR_MANT (c)[MPFR_LIMB_SIZE (c)-1] == MPFR_LIMB_HIGHBIT)
209 m += m;
210
211 next_step:
212 MPFR_ZIV_NEXT (loop, m);
213 mpfr_set_prec (c, m);
214 }
215 MPFR_ZIV_FREE (loop);
216
217 inexy = mpfr_set (y, c, rnd_mode);
218 inexz = mpfr_set (z, xr, rnd_mode);
219
220 mpfr_clear (c);
221 mpfr_clear (xr);
222
223 end:
224 MPFR_SAVE_EXPO_FREE (expo);
225 /* FIXME: add a test for bug before revision 7355 */
226 inexy = mpfr_check_range (y, inexy, rnd_mode);
227 inexz = mpfr_check_range (z, inexz, rnd_mode);
228 MPFR_RET (INEX(inexy,inexz));
229 }
230
231 /*************** asymptotically fast implementation below ********************/
232
233 /* truncate Q from R to at most prec bits.
234 Return the number of truncated bits.
235 */
236 static mpfr_prec_t
reduce(mpz_t Q,mpz_srcptr R,mpfr_prec_t prec)237 reduce (mpz_t Q, mpz_srcptr R, mpfr_prec_t prec)
238 {
239 mpfr_prec_t l = mpz_sizeinbase (R, 2);
240
241 l = (l > prec) ? l - prec : 0;
242 mpz_fdiv_q_2exp (Q, R, l);
243 return l;
244 }
245
246 /* truncate S and C so that the smaller has prec bits.
247 Return the number of truncated bits.
248 */
249 static unsigned long
reduce2(mpz_t S,mpz_t C,mpfr_prec_t prec)250 reduce2 (mpz_t S, mpz_t C, mpfr_prec_t prec)
251 {
252 unsigned long ls = mpz_sizeinbase (S, 2);
253 unsigned long lc = mpz_sizeinbase (C, 2);
254 unsigned long l;
255
256 l = (ls < lc) ? ls : lc; /* smaller length */
257 l = (l > prec) ? l - prec : 0;
258 mpz_fdiv_q_2exp (S, S, l);
259 mpz_fdiv_q_2exp (C, C, l);
260 return l;
261 }
262
263 /* return in S0/Q0 a rational approximation of sin(X) with absolute error
264 bounded by 9*2^(-prec), where 0 <= X=p/2^r <= 1/2,
265 and in C0/Q0 a rational approximation of cos(X), with relative error
266 bounded by 9*2^(-prec) (and also absolute error, since
267 |cos(X)| <= 1).
268 We have sin(X)/X = sum((-1)^i*(p/2^r)^i/(2i+1)!, i=0..infinity).
269 We use the following binary splitting formula:
270 P(a,b) = (-p)^(b-a)
271 Q(a,b) = (2a)*(2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
272 T(a,b) = 1 if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise.
273
274 Since we use P(a,b) for b-a=2^k only, we compute only p^(2^k).
275 We do not store the factor 2^r in Q().
276
277 Then sin(X)/X ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
278
279 Return l such that Q0 has to be multiplied by 2^l.
280
281 Assumes prec >= 10.
282 */
283 static unsigned long
sin_bs_aux(mpz_t Q0,mpz_t S0,mpz_t C0,mpz_srcptr p,mpfr_prec_t r,mpfr_prec_t prec)284 sin_bs_aux (mpz_t Q0, mpz_t S0, mpz_t C0, mpz_srcptr p, mpfr_prec_t r,
285 mpfr_prec_t prec)
286 {
287 mpz_t T[GMP_NUMB_BITS], Q[GMP_NUMB_BITS], ptoj[GMP_NUMB_BITS], pp;
288 mpfr_prec_t log2_nb_terms[GMP_NUMB_BITS], mult[GMP_NUMB_BITS];
289 mpfr_prec_t accu[GMP_NUMB_BITS], size_ptoj[GMP_NUMB_BITS];
290 mpfr_prec_t prec_i_have, r0 = r;
291 unsigned long alloc, i, j, k;
292 mpfr_prec_t l;
293
294 if (MPFR_UNLIKELY(mpz_cmp_ui (p, 0) == 0)) /* sin(x)/x -> 1 */
295 {
296 mpz_set_ui (Q0, 1);
297 mpz_set_ui (S0, 1);
298 mpz_set_ui (C0, 1);
299 return 0;
300 }
301
302 /* check that X=p/2^r <= 1/2 */
303 MPFR_ASSERTN(mpz_sizeinbase (p, 2) - (mpfr_exp_t) r <= -1);
304
305 mpz_init (pp);
306
307 /* normalize p (non-zero here) */
308 l = mpz_scan1 (p, 0);
309 mpz_fdiv_q_2exp (pp, p, l); /* p = pp * 2^l */
310 mpz_mul (pp, pp, pp);
311 r = 2 * (r - l); /* x^2 = (p/2^r0)^2 = pp / 2^r */
312
313 /* now p is odd */
314 alloc = 2;
315 mpz_init_set_ui (T[0], 6);
316 mpz_init_set_ui (Q[0], 6);
317 mpz_init_set (ptoj[0], pp); /* ptoj[i] = pp^(2^i) */
318 mpz_init (T[1]);
319 mpz_init (Q[1]);
320 mpz_init (ptoj[1]);
321 mpz_mul (ptoj[1], pp, pp); /* ptoj[1] = pp^2 */
322 size_ptoj[1] = mpz_sizeinbase (ptoj[1], 2);
323
324 mpz_mul_2exp (T[0], T[0], r);
325 mpz_sub (T[0], T[0], pp); /* 6*2^r - pp = 6*2^r*(1 - x^2/6) */
326 log2_nb_terms[0] = 1;
327
328 /* already take into account the factor x=p/2^r in sin(x) = x * (...) */
329 mult[0] = r - mpz_sizeinbase (pp, 2) + r0 - mpz_sizeinbase (p, 2);
330 /* we have x^3 < 1/2^mult[0] */
331
332 for (i = 2, k = 0, prec_i_have = mult[0]; prec_i_have < prec; i += 2)
333 {
334 /* i is even here */
335 /* invariant: Q[0]*Q[1]*...*Q[k] equals (2i-1)!,
336 we have already summed terms of index < i
337 in S[0]/Q[0], ..., S[k]/Q[k] */
338 k ++;
339 if (k + 1 >= alloc) /* necessarily k + 1 = alloc */
340 {
341 alloc ++;
342 mpz_init (T[k+1]);
343 mpz_init (Q[k+1]);
344 mpz_init (ptoj[k+1]);
345 mpz_mul (ptoj[k+1], ptoj[k], ptoj[k]); /* pp^(2^(k+1)) */
346 size_ptoj[k+1] = mpz_sizeinbase (ptoj[k+1], 2);
347 }
348 /* for i even, we have Q[k] = (2*i)*(2*i+1), T[k] = 1,
349 then Q[k+1] = (2*i+2)*(2*i+3), T[k+1] = 1,
350 which reduces to T[k] = (2*i+2)*(2*i+3)*2^r-pp,
351 Q[k] = (2*i)*(2*i+1)*(2*i+2)*(2*i+3). */
352 log2_nb_terms[k] = 1;
353 mpz_set_ui (Q[k], (2 * i + 2) * (2 * i + 3));
354 mpz_mul_2exp (T[k], Q[k], r);
355 mpz_sub (T[k], T[k], pp);
356 mpz_mul_ui (Q[k], Q[k], (2 * i) * (2 * i + 1));
357 /* the next term of the series is divided by Q[k] and multiplied
358 by pp^2/2^(2r), thus the mult. factor < 1/2^mult[k] */
359 mult[k] = mpz_sizeinbase (Q[k], 2) + 2 * r - size_ptoj[1] - 1;
360 /* the absolute contribution of the next term is 1/2^accu[k] */
361 accu[k] = (k == 0) ? mult[k] : mult[k] + accu[k-1];
362 prec_i_have = accu[k]; /* the current term is < 1/2^accu[k] */
363 j = (i + 2) / 2;
364 l = 1;
365 while ((j & 1) == 0) /* combine and reduce */
366 {
367 mpz_mul (T[k], T[k], ptoj[l]);
368 mpz_mul (T[k-1], T[k-1], Q[k]);
369 mpz_mul_2exp (T[k-1], T[k-1], r << l);
370 mpz_add (T[k-1], T[k-1], T[k]);
371 mpz_mul (Q[k-1], Q[k-1], Q[k]);
372 log2_nb_terms[k-1] ++; /* number of terms in S[k-1]
373 is a power of 2 by construction */
374 prec_i_have = mpz_sizeinbase (Q[k], 2);
375 mult[k-1] += prec_i_have + (r << l) - size_ptoj[l] - 1;
376 accu[k-1] = (k == 1) ? mult[k-1] : mult[k-1] + accu[k-2];
377 prec_i_have = accu[k-1];
378 l ++;
379 j >>= 1;
380 k --;
381 }
382 }
383
384 /* accumulate all products in T[0] and Q[0]. Warning: contrary to above,
385 here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
386 l = 0; /* number of accumulated terms in the right part T[k]/Q[k] */
387 while (k > 0)
388 {
389 j = log2_nb_terms[k-1];
390 mpz_mul (T[k], T[k], ptoj[j]);
391 mpz_mul (T[k-1], T[k-1], Q[k]);
392 l += 1 << log2_nb_terms[k];
393 mpz_mul_2exp (T[k-1], T[k-1], r * l);
394 mpz_add (T[k-1], T[k-1], T[k]);
395 mpz_mul (Q[k-1], Q[k-1], Q[k]);
396 k--;
397 }
398
399 l = r0 + r * (i - 1); /* implicit multiplier 2^r for Q0 */
400 /* at this point T[0]/(2^l*Q[0]) is an approximation of sin(x) where the 1st
401 neglected term has contribution < 1/2^prec, thus since the series has
402 alternate signs, the error is < 1/2^prec */
403
404 /* we truncate Q0 to prec bits: the relative error is at most 2^(1-prec),
405 which means that Q0 = Q[0] * (1+theta) with |theta| <= 2^(1-prec)
406 [up to a power of two] */
407 l += reduce (Q0, Q[0], prec);
408 l -= reduce (T[0], T[0], prec);
409 /* multiply by x = p/2^l */
410 mpz_mul (S0, T[0], p);
411 l -= reduce (S0, S0, prec); /* S0 = T[0] * (1 + theta)^2 up to power of 2 */
412 /* sin(X) ~ S0/Q0*(1 + theta)^3 + err with |theta| <= 2^(1-prec) and
413 |err| <= 2^(-prec), thus since |S0/Q0| <= 1:
414 |sin(X) - S0/Q0| <= 4*|theta*S0/Q0| + |err| <= 9*2^(-prec) */
415
416 mpz_clear (pp);
417 for (j = 0; j < alloc; j ++)
418 {
419 mpz_clear (T[j]);
420 mpz_clear (Q[j]);
421 mpz_clear (ptoj[j]);
422 }
423
424 /* compute cos(X) from sin(X): sqrt(1-(S/Q)^2) = sqrt(Q^2-S^2)/Q
425 = sqrt(Q0^2*2^(2l)-S0^2)/Q0.
426 Write S/Q = sin(X) + eps with |eps| <= 9*2^(-prec),
427 then sqrt(Q^2-S^2) = sqrt(Q^2-Q^2*(sin(X)+eps)^2)
428 = sqrt(Q^2*cos(X)^2-Q^2*(2*sin(X)*eps+eps^2))
429 = sqrt(Q^2*cos(X)^2-Q^2*eps1) with |eps1|<=9*2^(-prec)
430 [using X<=1/2 and eps<=9*2^(-prec) and prec>=10]
431
432 Since we truncate the square root, we get:
433 sqrt(Q^2*cos(X)^2-Q^2*eps1)+eps2 with |eps2|<1
434 = Q*sqrt(cos(X)^2-eps1)+eps2
435 = Q*cos(X)*(1+eps3)+eps2 with |eps3| <= 6*2^(-prec)
436 = Q*cos(X)*(1+eps3+eps2/(Q*cos(X)))
437 = Q*cos(X)*(1+eps4) with |eps4| <= 9*2^(-prec)
438 since |Q| >= 2^(prec-1) */
439 /* we assume that Q0*2^l >= 2^(prec-1) */
440 MPFR_ASSERTN(l + mpz_sizeinbase (Q0, 2) >= prec);
441 mpz_mul (C0, Q0, Q0);
442 mpz_mul_2exp (C0, C0, 2 * l);
443 mpz_submul (C0, S0, S0);
444 mpz_sqrt (C0, C0);
445
446 return l;
447 }
448
449 /* Put in s and c approximations of sin(x) and cos(x) respectively.
450 Assumes 0 < x < Pi/4 and PREC(s) = PREC(c) >= 10.
451 Return err such that the relative error is bounded by 2^err ulps.
452 */
453 static int
sincos_aux(mpfr_t s,mpfr_t c,mpfr_srcptr x,mpfr_rnd_t rnd_mode)454 sincos_aux (mpfr_t s, mpfr_t c, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
455 {
456 mpfr_prec_t prec_s, sh;
457 mpz_t Q, S, C, Q2, S2, C2, y;
458 mpfr_t x2;
459 unsigned long l, l2, j, err;
460
461 MPFR_ASSERTD(MPFR_PREC(s) == MPFR_PREC(c));
462
463 prec_s = MPFR_PREC(s);
464
465 mpfr_init2 (x2, MPFR_PREC(x));
466 mpz_init (Q);
467 mpz_init (S);
468 mpz_init (C);
469 mpz_init (Q2);
470 mpz_init (S2);
471 mpz_init (C2);
472 mpz_init (y);
473
474 mpfr_set (x2, x, MPFR_RNDN); /* exact */
475 mpz_set_ui (Q, 1);
476 l = 0;
477 mpz_set_ui (S, 0); /* sin(0) = S/(2^l*Q), exact */
478 mpz_set_ui (C, 1); /* cos(0) = C/(2^l*Q), exact */
479
480 /* Invariant: x = X + x2/2^(sh-1), where the part X was already treated,
481 S/(2^l*Q) ~ sin(X), C/(2^l*Q) ~ cos(X), and x2/2^(sh-1) < Pi/4.
482 'sh-1' is the number of already shifted bits in x2.
483 */
484
485 for (sh = 1, j = 0; mpfr_cmp_ui (x2, 0) != 0 && sh <= prec_s; sh <<= 1, j++)
486 {
487 if (sh > prec_s / 2) /* sin(x) = x + O(x^3), cos(x) = 1 + O(x^2) */
488 {
489 l2 = -mpfr_get_z_2exp (S2, x2); /* S2/2^l2 = x2 */
490 l2 += sh - 1;
491 mpz_set_ui (Q2, 1);
492 mpz_set_ui (C2, 1);
493 mpz_mul_2exp (C2, C2, l2);
494 mpfr_set_ui (x2, 0, MPFR_RNDN);
495 }
496 else
497 {
498 /* y <- trunc(x2 * 2^sh) = trunc(x * 2^(2*sh-1)) */
499 mpfr_mul_2exp (x2, x2, sh, MPFR_RNDN); /* exact */
500 mpfr_get_z (y, x2, MPFR_RNDZ); /* round towards zero: now
501 0 <= x2 < 2^sh, thus
502 0 <= x2/2^(sh-1) < 2^(1-sh) */
503 if (mpz_cmp_ui (y, 0) == 0)
504 continue;
505 mpfr_sub_z (x2, x2, y, MPFR_RNDN); /* should be exact */
506 l2 = sin_bs_aux (Q2, S2, C2, y, 2 * sh - 1, prec_s);
507 /* we now have |S2/Q2/2^l2 - sin(X)| <= 9*2^(prec_s)
508 and |C2/Q2/2^l2 - cos(X)| <= 6*2^(prec_s), with X=y/2^(2sh-1) */
509 }
510 if (sh == 1) /* S=0, C=1 */
511 {
512 l = l2;
513 mpz_swap (Q, Q2);
514 mpz_swap (S, S2);
515 mpz_swap (C, C2);
516 }
517 else
518 {
519 /* s <- s*c2+c*s2, c <- c*c2-s*s2, using Karatsuba:
520 a = s+c, b = s2+c2, t = a*b, d = s*s2, e = c*c2,
521 s <- t - d - e, c <- e - d */
522 mpz_add (y, S, C); /* a */
523 mpz_mul (C, C, C2); /* e */
524 mpz_add (C2, C2, S2); /* b */
525 mpz_mul (S2, S, S2); /* d */
526 mpz_mul (y, y, C2); /* a*b */
527 mpz_sub (S, y, S2); /* t - d */
528 mpz_sub (S, S, C); /* t - d - e */
529 mpz_sub (C, C, S2); /* e - d */
530 mpz_mul (Q, Q, Q2);
531 /* after j loops, the error is <= (11j-2)*2^(prec_s) */
532 l += l2;
533 /* reduce Q to prec_s bits */
534 l += reduce (Q, Q, prec_s);
535 /* reduce S,C to prec_s bits, error <= 11*j*2^(prec_s) */
536 l -= reduce2 (S, C, prec_s);
537 }
538 }
539
540 j = 11 * j;
541 for (err = 0; j > 1; j = (j + 1) / 2, err ++);
542
543 mpfr_set_z (s, S, MPFR_RNDN);
544 mpfr_div_z (s, s, Q, MPFR_RNDN);
545 mpfr_div_2exp (s, s, l, MPFR_RNDN);
546
547 mpfr_set_z (c, C, MPFR_RNDN);
548 mpfr_div_z (c, c, Q, MPFR_RNDN);
549 mpfr_div_2exp (c, c, l, MPFR_RNDN);
550
551 mpz_clear (Q);
552 mpz_clear (S);
553 mpz_clear (C);
554 mpz_clear (Q2);
555 mpz_clear (S2);
556 mpz_clear (C2);
557 mpz_clear (y);
558 mpfr_clear (x2);
559 return err;
560 }
561
562 /* Assumes x is neither NaN, +/-Inf, nor +/- 0.
563 One of s and c might be NULL, in which case the corresponding value is
564 not computed.
565 Assumes s differs from c.
566 */
567 int
mpfr_sincos_fast(mpfr_t s,mpfr_t c,mpfr_srcptr x,mpfr_rnd_t rnd)568 mpfr_sincos_fast (mpfr_t s, mpfr_t c, mpfr_srcptr x, mpfr_rnd_t rnd)
569 {
570 int inexs, inexc;
571 mpfr_t x_red, ts, tc;
572 mpfr_prec_t w;
573 mpfr_exp_t err, errs, errc;
574 MPFR_ZIV_DECL (loop);
575
576 MPFR_ASSERTN(s != c);
577 if (s == NULL)
578 w = MPFR_PREC(c);
579 else if (c == NULL)
580 w = MPFR_PREC(s);
581 else
582 w = MPFR_PREC(s) >= MPFR_PREC(c) ? MPFR_PREC(s) : MPFR_PREC(c);
583 w += MPFR_INT_CEIL_LOG2(w) + 9; /* ensures w >= 10 (needed by sincos_aux) */
584 mpfr_init2 (ts, w);
585 mpfr_init2 (tc, w);
586
587 MPFR_ZIV_INIT (loop, w);
588 for (;;)
589 {
590 /* if 0 < x <= Pi/4, we can call sincos_aux directly */
591 if (MPFR_IS_POS(x) && mpfr_cmp_ui_2exp (x, 1686629713, -31) <= 0)
592 {
593 err = sincos_aux (ts, tc, x, MPFR_RNDN);
594 }
595 /* if -Pi/4 <= x < 0, use sin(-x)=-sin(x) */
596 else if (MPFR_IS_NEG(x) && mpfr_cmp_si_2exp (x, -1686629713, -31) >= 0)
597 {
598 mpfr_init2 (x_red, MPFR_PREC(x));
599 mpfr_neg (x_red, x, rnd); /* exact */
600 err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
601 mpfr_neg (ts, ts, MPFR_RNDN);
602 mpfr_clear (x_red);
603 }
604 else /* argument reduction is needed */
605 {
606 long q;
607 mpfr_t pi;
608 int neg = 0;
609
610 mpfr_init2 (x_red, w);
611 mpfr_init2 (pi, (MPFR_EXP(x) > 0) ? w + MPFR_EXP(x) : w);
612 mpfr_const_pi (pi, MPFR_RNDN);
613 mpfr_div_2exp (pi, pi, 1, MPFR_RNDN); /* Pi/2 */
614 mpfr_remquo (x_red, &q, x, pi, MPFR_RNDN);
615 /* x = q * (Pi/2 + eps1) + x_red + eps2,
616 where |eps1| <= 1/2*ulp(Pi/2) = 2^(-w-MAX(0,EXP(x))),
617 and eps2 <= 1/2*ulp(x_red) <= 1/2*ulp(Pi/2) = 2^(-w)
618 Since |q| <= x/(Pi/2) <= |x|, we have
619 q*|eps1| <= 2^(-w), thus
620 |x - q * Pi/2 - x_red| <= 2^(1-w) */
621 /* now -Pi/4 <= x_red <= Pi/4: if x_red < 0, consider -x_red */
622 if (MPFR_IS_NEG(x_red))
623 {
624 mpfr_neg (x_red, x_red, MPFR_RNDN);
625 neg = 1;
626 }
627 err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
628 err ++; /* to take into account the argument reduction */
629 if (neg) /* sin(-x) = -sin(x), cos(-x) = cos(x) */
630 mpfr_neg (ts, ts, MPFR_RNDN);
631 if (q & 2) /* sin(x+Pi) = -sin(x), cos(x+Pi) = -cos(x) */
632 {
633 mpfr_neg (ts, ts, MPFR_RNDN);
634 mpfr_neg (tc, tc, MPFR_RNDN);
635 }
636 if (q & 1) /* sin(x+Pi/2) = cos(x), cos(x+Pi/2) = -sin(x) */
637 {
638 mpfr_neg (ts, ts, MPFR_RNDN);
639 mpfr_swap (ts, tc);
640 }
641 mpfr_clear (x_red);
642 mpfr_clear (pi);
643 }
644 /* adjust errors with respect to absolute values */
645 errs = err - MPFR_EXP(ts);
646 errc = err - MPFR_EXP(tc);
647 if ((s == NULL || MPFR_CAN_ROUND (ts, w - errs, MPFR_PREC(s), rnd)) &&
648 (c == NULL || MPFR_CAN_ROUND (tc, w - errc, MPFR_PREC(c), rnd)))
649 break;
650 MPFR_ZIV_NEXT (loop, w);
651 mpfr_set_prec (ts, w);
652 mpfr_set_prec (tc, w);
653 }
654 MPFR_ZIV_FREE (loop);
655
656 inexs = (s == NULL) ? 0 : mpfr_set (s, ts, rnd);
657 inexc = (c == NULL) ? 0 : mpfr_set (c, tc, rnd);
658
659 mpfr_clear (ts);
660 mpfr_clear (tc);
661 return INEX(inexs,inexc);
662 }
663