1 /*
2 Copyright (C) 2017 Fredrik Johansson
3
4 This file is part of Arb.
5
6 Arb is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include "arb.h"
13
14 slong _arb_compute_bs_exponents(slong * tab, slong n);
15 slong _arb_get_exp_pos(const slong * tab, slong step);
16
17 static void
bsplit(fmpz_t T,fmpz_t Q,flint_bitcnt_t * Qexp,const slong * xexp,const fmpz * xpow,flint_bitcnt_t r,slong a,slong b)18 bsplit(fmpz_t T, fmpz_t Q, flint_bitcnt_t * Qexp,
19 const slong * xexp,
20 const fmpz * xpow, flint_bitcnt_t r, slong a, slong b)
21 {
22 int cc;
23
24 if (b - a == 1)
25 {
26 count_trailing_zeros(cc, (2 * a + 2));
27 fmpz_neg_ui(Q, (2 * a + 2) >> cc);
28 fmpz_mul_ui(Q, Q, 2 * a + 3);
29 *Qexp = 2 * r + cc;
30
31 fmpz_set(T, xpow);
32 }
33 else if (b - a == 2)
34 {
35 fmpz_mul2_uiui(T, xpow, (2 * a + 4), (2 * a + 5));
36 fmpz_mul_2exp(T, T, 2 * r);
37 fmpz_neg(T, T);
38 fmpz_add(T, T, xpow + 1);
39
40 count_trailing_zeros(cc, (2 * a + 4));
41 fmpz_neg_ui(Q, (2 * a + 4) >> cc);
42 fmpz_mul_ui(Q, Q, 2 * a + 5);
43 *Qexp = 2 * r + cc;
44
45 count_trailing_zeros(cc, (2 * a + 2));
46 fmpz_mul2_uiui(Q, Q, (2 * a + 2) >> cc, (2 * a + 3));
47 fmpz_neg(Q, Q);
48 *Qexp += 2 * r + cc;
49 }
50 else
51 {
52 slong step, m, i;
53 flint_bitcnt_t Q2exp[1];
54 fmpz_t Q2, T2;
55
56 step = (b - a) / 2;
57 m = a + step;
58
59 fmpz_init(Q2);
60 fmpz_init(T2);
61
62 bsplit(T, Q, Qexp, xexp, xpow, r, a, m);
63 bsplit(T2, Q2, Q2exp, xexp, xpow, r, m, b);
64
65 fmpz_mul(T, T, Q2);
66 fmpz_mul_2exp(T, T, *Q2exp);
67
68 /* find x^step in table */
69 i = _arb_get_exp_pos(xexp, step);
70 fmpz_addmul(T, xpow + i, T2);
71 fmpz_clear(T2);
72
73 fmpz_mul(Q, Q, Q2);
74 *Qexp = *Qexp + *Q2exp;
75 fmpz_clear(Q2);
76 }
77 }
78
79 /* todo: also allow computing cos, using the same table... */
80 void
_arb_sin_sum_bs_powtab(fmpz_t T,fmpz_t Q,flint_bitcnt_t * Qexp,const fmpz_t x,flint_bitcnt_t r,slong N)81 _arb_sin_sum_bs_powtab(fmpz_t T, fmpz_t Q, flint_bitcnt_t * Qexp,
82 const fmpz_t x, flint_bitcnt_t r, slong N)
83 {
84 slong * xexp;
85 slong length, i;
86 fmpz * xpow;
87
88 /* compute the powers of x^2 that will appear (at least x^2) */
89 xexp = flint_calloc(2 * FLINT_BITS, sizeof(slong));
90 length = _arb_compute_bs_exponents(xexp, N);
91
92 xpow = _fmpz_vec_init(length);
93 fmpz_mul(xpow, x, x);
94
95 /* build x^i table */
96 for (i = 1; i < length; i++)
97 {
98 if (xexp[i] == 2 * xexp[i-1])
99 {
100 fmpz_mul(xpow + i, xpow + i - 1, xpow + i - 1);
101 }
102 else if (xexp[i] == 2 * xexp[i-2]) /* prefer squaring if possible */
103 {
104 fmpz_mul(xpow + i, xpow + i - 2, xpow + i - 2);
105 }
106 else if (xexp[i] == 2 * xexp[i-1] + 1)
107 {
108 fmpz_mul(xpow + i, xpow + i - 1, xpow + i - 1);
109 fmpz_mul(xpow + i, xpow + i, xpow);
110 }
111 else if (xexp[i] == 2 * xexp[i-2] + 1)
112 {
113 fmpz_mul(xpow + i, xpow + i - 2, xpow + i - 2);
114 fmpz_mul(xpow + i, xpow + i, xpow);
115 }
116 else
117 {
118 flint_printf("power table has the wrong structure!\n");
119 flint_abort();
120 }
121 }
122
123 bsplit(T, Q, Qexp, xexp, xpow, r, 0, N);
124 _fmpz_vec_clear(xpow, length);
125 flint_free(xexp);
126 }
127
128 /*
129 Determine N such that the error is bounded by 2^-prec when summing the
130 Taylor series of sin(x) up to term x^(2N+1) inclusive. We choose an N with
131 many trailing zeros to improve efficiency of the binary splitting.
132 */
133 static slong
bs_num_terms(slong mag,slong prec)134 bs_num_terms(slong mag, slong prec)
135 {
136 slong N;
137
138 N = _arb_exp_taylor_bound(mag, prec);
139 N = N / 2 - 1;
140 N = FLINT_MAX(N, 1);
141
142 if (N > 10000)
143 while (N % 128 != 0)
144 N++;
145
146 if (N > 1000)
147 while (N % 16 != 0)
148 N++;
149
150 if (N > 100)
151 while (N % 2 != 0)
152 N++;
153
154 return N;
155 }
156
157 void
arb_sin_cos_fmpz_div_2exp_bsplit(arb_t wsin,arb_t wcos,const fmpz_t x,flint_bitcnt_t r,slong prec)158 arb_sin_cos_fmpz_div_2exp_bsplit(arb_t wsin, arb_t wcos, const fmpz_t x, flint_bitcnt_t r, slong prec)
159 {
160 fmpz_t T, Q;
161 slong N, xmag;
162 flint_bitcnt_t Qexp[1];
163
164 /* slightly reduce memory usage at very high precision */
165 arb_zero(wsin);
166 arb_zero(wcos);
167
168 fmpz_init(T);
169 fmpz_init(Q);
170
171 if (r > prec)
172 flint_abort();
173
174 /* Binary splitting (+1 fixed-point ulp truncation error). */
175 xmag = fmpz_bits(x) - r;
176 N = bs_num_terms(xmag, prec);
177 _arb_sin_sum_bs_powtab(T, Q, Qexp, x, r, N);
178
179 /* we still need to multiply and add x/2^r to get sine */
180 fmpz_mul(T, T, x);
181 Qexp[0] += r;
182
183 /* T = T / Q (+1 fixed-point ulp error). */
184 if (Qexp[0] >= prec)
185 {
186 fmpz_tdiv_q_2exp(T, T, Qexp[0] - prec);
187 fmpz_tdiv_q(T, T, Q);
188 }
189 else
190 {
191 fmpz_mul_2exp(T, T, prec - Qexp[0]);
192 fmpz_tdiv_q(T, T, Q);
193 }
194
195 fmpz_mul_2exp(Q, x, prec - r);
196 fmpz_add(T, T, Q);
197
198 /* T = sin(u) with at most 2 fixed-point ulp error. */
199 arf_set_fmpz(arb_midref(wsin), T);
200 arf_mul_2exp_si(arb_midref(wsin), arb_midref(wsin), -prec);
201 mag_set_ui_2exp_si(arb_radref(wsin), 2, -prec);
202
203 /* compute cos from sin */
204 arb_mul(wcos, wsin, wsin, prec);
205 arb_sub_ui(wcos, wcos, 1, prec);
206 arb_neg(wcos, wcos);
207 arb_sqrt(wcos, wcos, prec);
208
209 fmpz_clear(T);
210 fmpz_clear(Q);
211 }
212
213 void
arb_sin_cos_arf_bb(arb_t zsin,arb_t zcos,const arf_t x,slong prec)214 arb_sin_cos_arf_bb(arb_t zsin, arb_t zcos, const arf_t x, slong prec)
215 {
216 slong k, iter, bits, r, xmag, q, wp;
217 slong argred_bits, start_bits;
218 int inexact, negative;
219 fmpz_t t, u;
220 arb_t wcos, wsin, tmp1;
221
222 if (zsin == NULL)
223 {
224 arb_init(tmp1);
225 arb_sin_cos_arf_bb(tmp1, zcos, x, prec);
226 arb_clear(tmp1);
227 return;
228 }
229
230 if (zcos == NULL)
231 {
232 arb_init(tmp1);
233 arb_sin_cos_arf_bb(zsin, tmp1, x, prec);
234 arb_clear(tmp1);
235 return;
236 }
237
238 if (arf_is_zero(x))
239 {
240 arb_zero(zsin);
241 arb_one(zcos);
242 return;
243 }
244
245 xmag = arf_abs_bound_lt_2exp_si(x);
246 negative = arf_sgn(x) < 0;
247
248 /* We assume that this function only gets called with something
249 reasonable as input (huge/tiny input will be handled by
250 the main sin/cos wrapper). */
251 if (arf_is_special(x) || arf_cmpabs_d(x, 3.15) > 0 || xmag < -2 * prec - 100)
252 {
253 flint_printf("arb_sin_cos_arf_bb: unexpectedly large/small input\n");
254 flint_abort();
255 }
256
257 argred_bits = 24;
258 start_bits = argred_bits * 3;
259
260 q = FLINT_MAX(0, xmag + argred_bits);
261 if (q <= 2)
262 q = 0;
263
264 wp = prec + 10 + 2 * (q - xmag) + 2 * FLINT_BIT_COUNT(prec);
265
266 fmpz_init(t);
267 fmpz_init(u);
268 arb_init(wcos);
269 arb_init(wsin);
270 arb_init(tmp1);
271
272 /* Convert x/2^q to a fixed-point number. */
273 inexact = arf_get_fmpz_fixed_si(t, x, -wp + q);
274 fmpz_abs(t, t);
275
276 /* Aliasing of z and x is safe now that only use t. */
277 /* Start with z = 1. */
278 arb_one(zcos);
279 arb_zero(zsin);
280
281 /* Bit-burst loop. */
282 for (iter = 0, bits = start_bits; !fmpz_is_zero(t); iter++, bits *= 3)
283 {
284 /* Extract bits. */
285 r = FLINT_MIN(bits, wp);
286 fmpz_tdiv_q_2exp(u, t, wp - r);
287
288 arb_sin_cos_fmpz_div_2exp_bsplit(wsin, wcos, u, r, wp);
289
290 /* Remove used bits. */
291 fmpz_mul_2exp(u, u, wp - r);
292 fmpz_sub(t, t, u);
293
294 /* zsin, zcos = zsin wcos + zcos wsin, zcos wcos - zsin wsin */
295 /* using karatsuba */
296 arb_add(tmp1, zsin, zcos, wp);
297 arb_mul(zcos, zcos, wcos, wp);
298 arb_add(wcos, wcos, wsin, wp);
299 arb_mul(wsin, wsin, zsin, wp);
300 arb_mul(tmp1, tmp1, wcos, wp);
301 arb_sub(zsin, tmp1, wsin, wp);
302 arb_sub(zsin, zsin, zcos, wp);
303 arb_sub(zcos, zcos, wsin, wp);
304 arb_zero(tmp1); /* slightly reduce memory usage */
305 }
306
307 /* Initial fixed-point truncation error. */
308 if (inexact)
309 {
310 arb_add_error_2exp_si(zcos, -wp);
311 arb_add_error_2exp_si(zsin, -wp);
312 }
313
314 if (q != 0)
315 {
316 /* cos(x) = 2 cos(x/2)^2 - 1 */
317 for (k = 0; k < q; k++)
318 {
319 arb_mul(zcos, zcos, zcos, wp);
320 arb_mul_2exp_si(zcos, zcos, 1);
321 arb_sub_ui(zcos, zcos, 1, wp);
322 }
323
324 arb_mul(tmp1, zcos, zcos, wp);
325 arb_sub_ui(tmp1, tmp1, 1, wp);
326 arb_neg(tmp1, tmp1);
327 arb_sqrt(zsin, tmp1, wp);
328 }
329
330 if (negative)
331 arb_neg(zsin, zsin);
332
333 arb_set_round(zsin, zsin, prec);
334 arb_set_round(zcos, zcos, prec);
335
336 fmpz_clear(t);
337 fmpz_clear(u);
338 arb_clear(wcos);
339 arb_clear(wsin);
340 arb_clear(tmp1);
341 }
342
343