1 /*
2 Copyright (C) 2012-2014 Fredrik Johansson
3
4 This file is part of Arb.
5
6 Arb is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include "flint/mpn_extras.h"
13 #include "arb.h"
14
15 #define TMP_ALLOC_LIMBS(__n) TMP_ALLOC((__n) * sizeof(mp_limb_t))
16 #define MAGLIM(prec) FLINT_MAX(65536, (4*prec))
17
18 static void
mag_nonzero_fast_mul(mag_t z,const mag_t x,const mag_t y)19 mag_nonzero_fast_mul(mag_t z, const mag_t x, const mag_t y)
20 {
21 MAG_MAN(z) = MAG_FIXMUL(MAG_MAN(x), MAG_MAN(y)) + LIMB_ONE;
22 MAG_EXP(z) = MAG_EXP(x) + MAG_EXP(y);
23 MAG_FAST_ADJUST_ONE_TOO_SMALL(z);
24 }
25
26 static void
mag_nonzero_fast_add(mag_t z,const mag_t x,const mag_t y)27 mag_nonzero_fast_add(mag_t z, const mag_t x, const mag_t y)
28 {
29 slong shift = MAG_EXP(x) - MAG_EXP(y);
30
31 if (shift == 0)
32 {
33 MAG_EXP(z) = MAG_EXP(x);
34 MAG_MAN(z) = MAG_MAN(x) + MAG_MAN(y);
35 MAG_FAST_ADJUST_ONE_TOO_LARGE(z); /* may need two adjustments */
36 }
37 else if (shift > 0)
38 {
39 MAG_EXP(z) = MAG_EXP(x);
40
41 if (shift >= MAG_BITS)
42 MAG_MAN(z) = MAG_MAN(x) + LIMB_ONE;
43 else
44 MAG_MAN(z) = MAG_MAN(x) + (MAG_MAN(y) >> shift) + LIMB_ONE;
45 }
46 else
47 {
48 shift = -shift;
49 MAG_EXP(z) = MAG_EXP(y);
50
51 if (shift >= MAG_BITS)
52 MAG_MAN(z) = MAG_MAN(y) + LIMB_ONE;
53 else
54 MAG_MAN(z) = MAG_MAN(y) + (MAG_MAN(x) >> shift) + LIMB_ONE;
55 }
56
57 MAG_FAST_ADJUST_ONE_TOO_LARGE(z);
58 }
59
60 static int
mag_nonzero_fast_cmp(const mag_t x,const mag_t y)61 mag_nonzero_fast_cmp(const mag_t x, const mag_t y)
62 {
63 if (MAG_EXP(x) == MAG_EXP(y))
64 return (MAG_MAN(x) < MAG_MAN(y)) ? -1 : 1;
65 else
66 return (MAG_EXP(x) < MAG_EXP(y)) ? -1 : 1;
67 }
68
69 static void
mag_fast_set(mag_t x,const mag_t y)70 mag_fast_set(mag_t x, const mag_t y)
71 {
72 MAG_EXP(x) = MAG_EXP(y);
73 MAG_MAN(x) = MAG_MAN(y);
74 }
75
76 void
_arb_sin_cos(arb_t zsin,arb_t zcos,const arf_t x,const mag_t xrad,slong prec)77 _arb_sin_cos(arb_t zsin, arb_t zcos, const arf_t x, const mag_t xrad, slong prec)
78 {
79 int want_sin, want_cos;
80 slong radexp, exp, wp, wn, N, r, wprounded, maglim, orig_prec;
81 mp_ptr tmp, w, sina, cosa, sinb, cosb, ta, tb;
82 mp_ptr sinptr, cosptr;
83 mp_limb_t p1, q1bits, p2, q2bits, error, error2, p1_tab1, radman;
84 int negative, inexact, octant;
85 int sinnegative, cosnegative, swapsincos;
86 TMP_INIT;
87
88 /* PART 1: special cases and setup. */
89 orig_prec = prec;
90
91 /* Below, both x and xrad will be finite, and x will be nonzero. */
92 if (mag_is_inf(xrad) || arf_is_special(x))
93 {
94 _arb_sin_cos_generic(zsin, zcos, x, xrad, prec);
95 return;
96 }
97
98 exp = ARF_EXP(x);
99 maglim = MAGLIM(prec);
100 negative = ARF_SGNBIT(x);
101
102 /* Unlikely: tiny or huge midpoint. As written, this test also
103 catches any bignum exponents. */
104 if (exp < -(prec/2) - 2 || exp > maglim)
105 {
106 _arb_sin_cos_generic(zsin, zcos, x, xrad, prec);
107 return;
108 }
109
110 want_sin = (zsin != NULL);
111 want_cos = (zcos != NULL);
112
113 /* Copy the radius data. */
114 radexp = MAG_EXP(xrad);
115 radman = MAG_MAN(xrad);
116
117 if (radman != 0)
118 {
119 /* Clamp the radius exponent to a safe range. */
120 if (radexp < MAG_MIN_LAGOM_EXP || radexp > MAG_MAX_LAGOM_EXP)
121 {
122 /* Very wide... */
123 if (fmpz_sgn(MAG_EXPREF(xrad)) > 0)
124 {
125 _arb_sin_cos_wide(zsin, zcos, x, xrad, prec);
126 return;
127 }
128
129 radman = MAG_ONE_HALF;
130 radexp = MAG_MIN_LAGOM_EXP + 1;
131 }
132
133 /* Use wide algorithm. */
134 if (radexp >= -24)
135 {
136 _arb_sin_cos_wide(zsin, zcos, x, xrad, prec);
137 return;
138 }
139
140 /* Regular case: decrease precision to match generic max. accuracy. */
141 /* Note: near x=0, the error can be quadratic for cos. */
142 if (want_cos && exp < -2)
143 prec = FLINT_MIN(prec, 20 - FLINT_MAX(exp, radexp) - radexp);
144 else
145 prec = FLINT_MIN(prec, 20 - radexp);
146 }
147
148 /* Absolute working precision (NOT rounded to a limb multiple) */
149 wp = prec + 8;
150 if (want_sin && exp <= 0)
151 wp += (-exp);
152 /* Number of limbs */
153 wn = (wp + FLINT_BITS - 1) / FLINT_BITS;
154 /* Precision rounded to a number of bits */
155 wprounded = FLINT_BITS * wn;
156 /* Don't be close to the boundary (to allow adding adding the
157 Taylor series truncation error without overflow) */
158 wp = FLINT_MAX(wp, wprounded - (FLINT_BITS - 4));
159
160 /* Too high precision to use table -- use generic algorithm */
161 if (wp > ARB_SIN_COS_TAB2_PREC)
162 {
163 _arb_sin_cos_generic(zsin, zcos, x, xrad, orig_prec);
164 return;
165 }
166
167 /* PART 2: the actual computation. */
168
169 TMP_START;
170 tmp = TMP_ALLOC_LIMBS(9 * wn);
171 w = tmp; /* requires wn limbs */
172 sina = w + wn; /* requires wn limbs */
173 cosa = sina + wn; /* requires wn limbs */
174 sinb = cosa + wn; /* requires wn limbs */
175 cosb = sinb + wn; /* requires wn limbs */
176 ta = cosb + wn; /* requires 2*wn limbs */
177 tb = ta + 2*wn; /* requires 2*wn limbs */
178
179 /* reduce modulo pi/4 */
180 if (_arb_get_mpn_fixed_mod_pi4(w, NULL, &octant, &error, x, wn) == 0)
181 {
182 /* may run out of precision for pi/4 */
183 _arb_sin_cos_generic(zsin, zcos, x, xrad, orig_prec);
184 TMP_END;
185 return;
186 }
187
188 sinnegative = (octant >= 4) ^ negative;
189 cosnegative = (octant >= 2 && octant <= 5);
190 swapsincos = (octant == 1 || octant == 2 || octant == 5 || octant == 6);
191
192 /* Table-based argument reduction (1 or 2 steps) */
193 if (wp <= ARB_SIN_COS_TAB1_PREC)
194 {
195 q1bits = ARB_SIN_COS_TAB1_BITS;
196 q2bits = 0;
197
198 p1 = p1_tab1 = w[wn-1] >> (FLINT_BITS - q1bits);
199 w[wn-1] -= (p1 << (FLINT_BITS - q1bits));
200 p2 = 0;
201
202 /* p1_tab1 will be used for the error bounds at the end. */
203 p1_tab1 = p1;
204 }
205 else
206 {
207 q1bits = ARB_SIN_COS_TAB21_BITS;
208 q2bits = ARB_SIN_COS_TAB21_BITS + ARB_SIN_COS_TAB22_BITS;
209
210 /* p1_tab1 will be used for the error bounds at the end. */
211 p1_tab1 = w[wn-1] >> (FLINT_BITS - ARB_SIN_COS_TAB1_BITS);
212
213 p1 = w[wn-1] >> (FLINT_BITS - q1bits);
214 w[wn-1] -= (p1 << (FLINT_BITS - q1bits));
215 p2 = w[wn-1] >> (FLINT_BITS - q2bits);
216 w[wn-1] -= (p2 << (FLINT_BITS - q2bits));
217 }
218
219 /* |w| <= 2^-r */
220 r = _arb_mpn_leading_zeros(w, wn);
221
222 /* Choose number of terms N such that Taylor series truncation
223 error is <= 2^-wp */
224 N = _arb_exp_taylor_bound(-r, wp);
225
226 /* the summation for sin/cos is actually done to (2N-1)! */
227 N = (N + 1) / 2;
228
229 if (N < 14)
230 {
231 /* Evaluate Taylor series */
232 _arb_sin_cos_taylor_rs(sina, cosa, &error2, w, wn, N, 0, 1);
233 /* Taylor series evaluation error */
234 error += error2;
235 /* Taylor series truncation error */
236 error += UWORD(1) << (wprounded-wp);
237 }
238 else /* Compute cos(a) from sin(a) using a square root. */
239 {
240 /* Evaluate Taylor series */
241 _arb_sin_cos_taylor_rs(sina, cosa, &error2, w, wn, N, 1, 1);
242 error += error2;
243 error += UWORD(1) << (wprounded-wp);
244
245 if (flint_mpn_zero_p(sina, wn))
246 {
247 flint_mpn_store(cosa, wn, LIMB_ONES);
248 error = FLINT_MAX(error, 1);
249 }
250 else
251 {
252 mpn_sqr(ta, sina, wn);
253 /* 1 - s^2 (negation guaranteed to have borrow) */
254 mpn_neg(ta, ta, 2 * wn);
255 /* top limb of ta must be nonzero, so no need to normalize
256 before calling mpn_sqrtrem */
257 mpn_sqrtrem(cosa, ta, ta, 2 * wn);
258
259 /* When converting sin to cos, the error for cos must be
260 smaller than the error for sin; but we also get 1 ulp
261 extra error from the square root. */
262 error += 1;
263 }
264 }
265
266 /*
267 sin(a+b) = sin(a)*cos(b) + cos(a)*sin(b)
268 cos(a+b) = cos(a)*cos(b) - sin(a)*sin(b)
269
270 [F1+e1]*[F2+e2] + [F3+e3]*[F4+e4] - F1 F2 + F3 F4 =
271
272 e1 e2 + e3 e4 + e2 F1 + e1 F2 + e4 F3 + e3 F4
273
274 <= (e1 + e2 + e3 + e4) + 1 (ulp)
275
276 <= 2*left_err + 2*right_err + 1 (ulp)
277
278 Truncating both terms before adding adds another 2 ulp, so the error
279 is bounded by
280
281 <= 2*left_err + 2*right_err + 3 (ulp)
282 */
283
284 if (p1 == 0 && p2 == 0) /* no table lookups */
285 {
286 sinptr = sina;
287 cosptr = cosa;
288 }
289 else if (p1 == 0 || p2 == 0) /* only one table lookup */
290 {
291 mp_srcptr sinc, cosc;
292
293 if (wp <= ARB_SIN_COS_TAB1_PREC) /* must be in table 1 */
294 {
295 sinc = arb_sin_cos_tab1[2 * p1] + ARB_SIN_COS_TAB1_LIMBS - wn;
296 cosc = arb_sin_cos_tab1[2 * p1 + 1] + ARB_SIN_COS_TAB1_LIMBS - wn;
297 }
298 else if (p1 != 0)
299 {
300 sinc = arb_sin_cos_tab21[2 * p1] + ARB_SIN_COS_TAB2_LIMBS - wn;
301 cosc = arb_sin_cos_tab21[2 * p1 + 1] + ARB_SIN_COS_TAB2_LIMBS - wn;
302 }
303 else
304 {
305 sinc = arb_sin_cos_tab22[2 * p2] + ARB_SIN_COS_TAB2_LIMBS - wn;
306 cosc = arb_sin_cos_tab22[2 * p2 + 1] + ARB_SIN_COS_TAB2_LIMBS - wn;
307 }
308
309 if ((want_sin && !swapsincos) || (want_cos && swapsincos))
310 {
311 mpn_mul_n(ta, sina, cosc, wn);
312 mpn_mul_n(tb, cosa, sinc, wn);
313 mpn_add_n(w, ta + wn, tb + wn, wn);
314 }
315
316 if ((want_cos && !swapsincos) || (want_sin && swapsincos))
317 {
318 mpn_mul_n(ta, cosa, cosc, wn);
319 mpn_mul_n(tb, sina, sinc, wn);
320 mpn_sub_n(ta, ta + wn, tb + wn, wn);
321 }
322
323 sinptr = w;
324 cosptr = ta;
325
326 error = 2 * error + 2 * 1 + 3;
327 }
328 else /* two table lookups, must be in table 2 */
329 {
330 mp_srcptr sinc, cosc, sind, cosd;
331
332 sinc = arb_sin_cos_tab21[2 * p1] + ARB_SIN_COS_TAB2_LIMBS - wn;
333 cosc = arb_sin_cos_tab21[2 * p1 + 1] + ARB_SIN_COS_TAB2_LIMBS - wn;
334 sind = arb_sin_cos_tab22[2 * p2] + ARB_SIN_COS_TAB2_LIMBS - wn;
335 cosd = arb_sin_cos_tab22[2 * p2 + 1] + ARB_SIN_COS_TAB2_LIMBS - wn;
336
337 mpn_mul_n(ta, sinc, cosd, wn);
338 mpn_mul_n(tb, cosc, sind, wn);
339 mpn_add_n(sinb, ta + wn, tb + wn, wn);
340
341 mpn_mul_n(ta, cosc, cosd, wn);
342 mpn_mul_n(tb, sinc, sind, wn);
343 mpn_sub_n(cosb, ta + wn, tb + wn, wn);
344
345 error2 = 2 * 1 + 2 * 1 + 3;
346
347 if ((want_sin && !swapsincos) || (want_cos && swapsincos))
348 {
349 mpn_mul_n(ta, sina, cosb, wn);
350 mpn_mul_n(tb, cosa, sinb, wn);
351 mpn_add_n(w, ta + wn, tb + wn, wn);
352 }
353
354 if ((want_cos && !swapsincos) || (want_sin && swapsincos))
355 {
356 mpn_mul_n(ta, cosa, cosb, wn);
357 mpn_mul_n(tb, sina, sinb, wn);
358 mpn_sub_n(ta, ta + wn, tb + wn, wn);
359 }
360
361 error = 2 * error + 2 * error2 + 3;
362
363 sinptr = w;
364 cosptr = ta;
365 }
366
367 /* PART 3: compute propagated error and write output */
368
369 if (swapsincos)
370 {
371 mp_ptr tmptr = sinptr;
372 sinptr = cosptr;
373 cosptr = tmptr;
374 }
375
376 /*
377 We have two sources of error.
378
379 1. Computation error:
380 error * 2^(-wprounded)
381
382 2. With input radius r != 0, the propagated error bound:
383 sin(x): min(2, r, |cos(x)|*r + 0.5*r^2)
384 cos(x): min(2, r, |sin(x)|*r + 0.5*r^2)
385
386 We skip the min by 2 since this is unnecessary with the
387 separate code for wide intervals.
388 */
389 if (radman == 0)
390 {
391 if (want_sin)
392 {
393 mag_set_ui_2exp_si(arb_radref(zsin), error, -wprounded);
394 if (want_cos)
395 mag_set(arb_radref(zcos), arb_radref(zsin));
396 }
397 else
398 {
399 mag_set_ui_2exp_si(arb_radref(zcos), error, -wprounded);
400 }
401 }
402 else
403 {
404 mag_t sin_err, cos_err, quadratic, comp_err, xrad_copy;
405 mp_limb_t A_sin, A_cos, A_exp;
406
407 /* Copy xrad to support aliasing (note: the exponent has
408 also been clamped earlier). */
409 MAG_MAN(xrad_copy) = radman;
410 MAG_EXP(xrad_copy) = radexp;
411
412 /* Bound computed error. */
413 if (error != 0)
414 {
415 mag_init(comp_err); /* no need to free */
416 mag_set_ui_2exp_si(comp_err, error, -wprounded);
417 }
418
419 /* Bound quadratic term for propagated error: 0.5*r^2 */
420 mag_init(quadratic); /* no need to free */
421 mag_nonzero_fast_mul(quadratic, xrad_copy, xrad_copy);
422 MAG_EXP(quadratic) -= 1;
423
424 /* Bound linear term for propagated error: cos(x)*r, sin(x)*r. */
425 /* Note: we could have used the computed values, but then we would
426 need to incorporate the computed error which would be slightly
427 messier, and we would also need extra cases when only computing
428 one of the functions. */
429 A_cos = arb_sin_cos_tab1[2 * p1_tab1][ARB_SIN_COS_TAB1_LIMBS - 1];
430 A_sin = arb_sin_cos_tab1[2 * p1_tab1 + 1][ARB_SIN_COS_TAB1_LIMBS - 1];
431
432 /* Note: ARB_SIN_COS_TAB1_BITS == 8 */
433 /* Adding 2 ulps (here ulp = 2^-8) gives an upper bound.
434 The truncated table entry underestimates the sine or
435 cosine of x by at most 1 ulp, and the top bits of x
436 underestimate x by at most 1 ulp. */
437 A_sin = (A_sin >> (FLINT_BITS - ARB_SIN_COS_TAB1_BITS)) + 2;
438 A_cos = (A_cos >> (FLINT_BITS - ARB_SIN_COS_TAB1_BITS)) + 2;
439 A_exp = -ARB_SIN_COS_TAB1_BITS;
440 if (swapsincos)
441 {
442 mp_limb_t tt = A_sin;
443 A_sin = A_cos;
444 A_cos = tt;
445 }
446 A_sin *= ((MAG_MAN(xrad_copy) >> (MAG_BITS - ARB_SIN_COS_TAB1_BITS)) + LIMB_ONE);
447 A_cos *= ((MAG_MAN(xrad_copy) >> (MAG_BITS - ARB_SIN_COS_TAB1_BITS)) + LIMB_ONE);
448 A_exp -= ARB_SIN_COS_TAB1_BITS;
449 A_exp += radexp;
450
451 if (want_sin)
452 {
453 mag_init(sin_err);
454 mag_set_ui_2exp_si(sin_err, A_sin, A_exp);
455 mag_nonzero_fast_add(sin_err, sin_err, quadratic);
456
457 /* The propagated error is certainly at most r */
458 if (mag_nonzero_fast_cmp(sin_err, xrad_copy) > 0)
459 mag_fast_set(sin_err, xrad_copy);
460
461 /* Add the computed error. */
462 if (error != 0)
463 mag_nonzero_fast_add(sin_err, sin_err, comp_err);
464
465 /* Set it, and clear the original output variable which could
466 have a bignum exponent. */
467 mag_swap(arb_radref(zsin), sin_err);
468 mag_clear(sin_err);
469 }
470
471 /* The same as above. */
472 if (want_cos)
473 {
474 mag_init(cos_err);
475 mag_set_ui_2exp_si(cos_err, A_cos, A_exp);
476 mag_nonzero_fast_add(cos_err, cos_err, quadratic);
477
478 if (mag_nonzero_fast_cmp(cos_err, xrad_copy) > 0)
479 mag_fast_set(cos_err, xrad_copy);
480
481 if (error != 0)
482 mag_nonzero_fast_add(cos_err, cos_err, comp_err);
483 mag_swap(arb_radref(zcos), cos_err);
484 mag_clear(cos_err);
485 }
486 }
487
488 /* Set the midpoints. */
489 if (want_sin)
490 {
491 inexact = _arf_set_mpn_fixed(arb_midref(zsin), sinptr,
492 wn, wn, sinnegative, prec, ARB_RND);
493 if (inexact)
494 arf_mag_add_ulp(arb_radref(zsin),
495 arb_radref(zsin), arb_midref(zsin), prec);
496 }
497
498 if (want_cos)
499 {
500 inexact = _arf_set_mpn_fixed(arb_midref(zcos), cosptr,
501 wn, wn, cosnegative, prec, ARB_RND);
502 if (inexact)
503 arf_mag_add_ulp(arb_radref(zcos),
504 arb_radref(zcos), arb_midref(zcos), prec);
505 }
506
507 TMP_END;
508 }
509
510 void
arb_sin_cos(arb_t s,arb_t c,const arb_t x,slong prec)511 arb_sin_cos(arb_t s, arb_t c, const arb_t x, slong prec)
512 {
513 _arb_sin_cos(s, c, arb_midref(x), arb_radref(x), prec);
514 }
515
516 void
arb_sin(arb_t s,const arb_t x,slong prec)517 arb_sin(arb_t s, const arb_t x, slong prec)
518 {
519 _arb_sin_cos(s, NULL, arb_midref(x), arb_radref(x), prec);
520 }
521
522 void
arb_cos(arb_t c,const arb_t x,slong prec)523 arb_cos(arb_t c, const arb_t x, slong prec)
524 {
525 _arb_sin_cos(NULL, c, arb_midref(x), arb_radref(x), prec);
526 }
527