1 /*
2 Copyright (C) 2014 Fredrik Johansson
3
4 This file is part of Arb.
5
6 Arb is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include "arb.h"
13
14 /*
15 Determine N such that the error is bounded by 2^-prec.
16 We choose an N with many trailing zeros to improve efficiency
17 of the binary splitting.
18
19 With N = 0, 1, 2, ... the highest included term is x, x^3, x^5, ...
20 so the error is bounded by x^3, x^5, x^7, ... = x^(2N+3)
21 */
22 static slong
bs_num_terms(slong mag,slong prec)23 bs_num_terms(slong mag, slong prec)
24 {
25 slong N;
26
27 if (mag >= 0)
28 flint_abort();
29
30 N = 0;
31
32 while (mag * (2 * N + 3) > -prec)
33 N++;
34
35 if (N > 10000)
36 while (N % 128 != 0)
37 N++;
38
39 if (N > 1000)
40 while (N % 16 != 0)
41 N++;
42
43 if (N > 100)
44 while (N % 2 != 0)
45 N++;
46
47 return N;
48 }
49
50 /* Argument reduction: apply atan(x) = 2 atan(x/(1+sqrt(1+x^2))) a total
51 of r times, and convert result to a fixed-point number res
52 together with absolute error err. With an initial inversion
53 if xmag > 0.
54 */
55 void
arb_atan_bb_reduce(fmpz_t res,mag_t err,const arf_t x,slong xmag,slong r,slong prec)56 arb_atan_bb_reduce(fmpz_t res, mag_t err, const arf_t x, slong xmag, slong r, slong prec)
57 {
58 int inexact;
59
60 if (r == 0)
61 {
62 if (xmag <= 0)
63 {
64 inexact = arf_get_fmpz_fixed_si(res, x, -prec);
65 mag_set_ui_2exp_si(err, inexact, -prec);
66 }
67 else
68 {
69 slong wp;
70 arb_t t;
71
72 wp = FLINT_MAX(8, prec - xmag);
73
74 arb_init(t);
75 arb_set_arf(t, x);
76 arb_set_round(t, t, wp);
77 arb_ui_div(t, 1, t, wp);
78
79 mag_set(err, arb_radref(t));
80 inexact = arf_get_fmpz_fixed_si(res, arb_midref(t), -prec);
81 mag_add_ui_2exp_si(err, err, inexact, -prec);
82
83 arb_clear(t);
84 }
85 }
86 else
87 {
88 slong k;
89 arb_t p, p2, q, q2;
90
91 arb_init(p);
92 arb_init(p2);
93 arb_init(q);
94 arb_init(q2);
95
96 if (xmag <= 0)
97 {
98 arb_set_arf(p, x);
99 arb_set_round(p, p, prec);
100 arb_mul(p2, p, p, prec);
101 arb_add_ui(q, p2, 1, prec);
102 arb_sqrt(q, q, prec);
103 arb_add_ui(q, q, 1, prec);
104
105 for (k = 1; k < r; k++)
106 {
107 if (k == 1)
108 {
109 arb_mul_2exp_si(q2, q, 1);
110 arb_add(q2, q2, p2, prec);
111 }
112 else
113 {
114 arb_mul(q2, q, q, prec);
115 }
116
117 arb_add(q2, p2, q2, prec);
118 arb_sqrt(q2, q2, prec);
119 arb_add(q, q, q2, prec);
120 }
121 }
122 else
123 {
124 arb_one(p);
125 arb_one(p2);
126 arb_set_arf(q, x);
127 arb_set_round(q, q, prec);
128
129 for (k = 0; k < r; k++)
130 {
131 arb_mul(q2, q, q, prec);
132 arb_add(q2, p2, q2, prec);
133 arb_sqrt(q2, q2, prec);
134 arb_add(q, q, q2, prec);
135 }
136 }
137
138 arb_div(p, p, q, prec);
139
140 mag_set(err, arb_radref(p));
141 inexact = arf_get_fmpz_fixed_si(res, arb_midref(p), -prec);
142 mag_add_ui_2exp_si(err, err, inexact, -prec);
143
144 arb_clear(p);
145 arb_clear(p2);
146 arb_clear(q);
147 arb_clear(q2);
148 }
149 }
150
151 void
arb_atan_arf_bb(arb_t z,const arf_t x,slong prec)152 arb_atan_arf_bb(arb_t z, const arf_t x, slong prec)
153 {
154 slong iter, bits, r, mag, q, wp, N;
155 slong argred_bits, start_bits;
156 flint_bitcnt_t Qexp[1];
157 int inverse;
158 mag_t inp_err;
159 fmpz_t s, t, u, P, Q, err;
160
161 if (arf_is_zero(x))
162 {
163 arb_zero(z);
164 return;
165 }
166
167 if (arf_is_special(x))
168 {
169 flint_abort();
170 }
171
172 if (ARF_SGNBIT(x))
173 {
174 arf_t y;
175 arf_init_neg_shallow(y, x);
176 arb_atan_arf_bb(z, y, prec);
177 arb_neg(z, z);
178 return;
179 }
180
181 mag = arf_abs_bound_lt_2exp_si(x);
182
183 /* We assume that this function only gets called with something
184 reasonable as input (huge/tiny input will be handled by
185 the main atan wrapper). */
186 if (FLINT_ABS(mag) > 2 * prec + 100)
187 {
188 flint_printf("arb_atan_arf_bb: unexpectedly large/small input\n");
189 flint_abort();
190 }
191
192 /* approximate by x - x^3 / 3 or pi/2 - 1/x + (1/3)/x^3 */
193 if (mag < -prec / 4 - 2 || (mag-1) > prec / 5 + 3)
194 {
195 arb_t t;
196 arb_init(t);
197 arb_set_arf(t, x);
198
199 if (mag < 0)
200 {
201 arb_mul(t, t, t, prec);
202 arb_mul_arf(t, t, x, prec);
203 arb_div_ui(t, t, 3, prec);
204 arb_sub_arf(t, t, x, prec);
205 arb_neg(z, t);
206 /* error is bounded by x^5 */
207 mag_add_ui_2exp_si(arb_radref(z), arb_radref(z), 1, 5 * mag);
208 }
209 else
210 {
211 arb_ui_div(t, 1, t, prec);
212 arb_mul(z, t, t, prec);
213 arb_mul(z, z, t, prec);
214 arb_div_ui(z, z, 3, prec);
215 arb_sub(z, t, z, prec);
216
217 arb_const_pi(t, prec + 2);
218 arb_mul_2exp_si(t, t, -1);
219
220 arb_sub(z, t, z, prec);
221 /* error is bounded by 1/x^5, and 1/x <= 2^(1-mag) */
222 mag_add_ui_2exp_si(arb_radref(z), arb_radref(z), 1, 5 * (1-mag));
223 }
224
225 arb_clear(t);
226 return;
227 }
228
229 argred_bits = 8;
230 start_bits = 16;
231
232 /* Argument reduction q times. */
233 q = FLINT_MAX(0, mag + argred_bits);
234
235 /* Determine working precision. */
236 wp = prec + 10 + 2 * q + 2 * FLINT_BIT_COUNT(prec);
237 if (mag < 0)
238 wp += (-mag);
239
240 fmpz_init(s);
241 fmpz_init(t);
242 fmpz_init(u);
243 fmpz_init(Q);
244 fmpz_init(P);
245 fmpz_init(err); /* in fixed-point ulp */
246 mag_init(inp_err); /* absolute error */
247
248 arb_atan_bb_reduce(t, inp_err, x, mag, q, wp);
249 inverse = mag > 0; /* todo: compute in function, or pass to it */
250
251 /* s = 0, t = x */
252
253 for (iter = 0, bits = start_bits; !fmpz_is_zero(t);
254 iter++, bits *= 2)
255 {
256 /* Extract bits. */
257 r = FLINT_MIN(bits, wp);
258 fmpz_tdiv_q_2exp(u, t, wp - r);
259
260 if (!fmpz_is_zero(u))
261 {
262 /* Binary splitting (+1 fixed-point ulp truncation error). */
263 mag = fmpz_bits(u) - r;
264
265 N = bs_num_terms(mag, wp);
266
267 if (N != 0)
268 {
269 _arb_atan_sum_bs_powtab(P, Q, Qexp, u, r, N);
270
271 /* multiply by u/2^r */
272 fmpz_mul(P, P, u);
273 *Qexp += r;
274
275 /* T = T / Q (+1 fixed-point ulp error). */
276 if (*Qexp >= wp)
277 {
278 fmpz_tdiv_q_2exp(P, P, *Qexp - wp);
279 fmpz_tdiv_q(P, P, Q);
280 }
281 else
282 {
283 fmpz_mul_2exp(P, P, wp - *Qexp);
284 fmpz_tdiv_q(P, P, Q);
285 }
286
287 fmpz_add(s, s, P);
288 }
289
290 /* add u/2^r */
291 fmpz_mul_2exp(Q, u, wp - r);
292 fmpz_add(s, s, Q);
293
294 /* 1 ulp from the division,
295 1 ulp from truncating the Taylor series */
296 fmpz_add_ui(err, err, 2);
297 }
298
299 /* atan(t) = atan(u/2^r) + atan((t 2^r - u)/(2^r + u t)) */
300 fmpz_mul_2exp(P, t, r);
301 fmpz_mul_2exp(Q, u, wp);
302 fmpz_sub(P, P, Q);
303
304 fmpz_one(Q);
305 fmpz_mul_2exp(Q, Q, r + wp);
306 fmpz_addmul(Q, t, u);
307
308 fmpz_mul_2exp(P, P, wp);
309 fmpz_tdiv_q(t, P, Q);
310
311 /* 1 ulp error from the division */
312 fmpz_add_ui(err, err, 1);
313 }
314
315 /* add both err and inp_err */
316 arf_set_fmpz(arb_midref(z), s);
317 mag_set_fmpz(arb_radref(z), err);
318 arb_mul_2exp_si(z, z, -wp);
319 mag_add(arb_radref(z), arb_radref(z), inp_err);
320
321 /* argument reduction: atan(x) = 2^q atan(x') */
322 arb_mul_2exp_si(z, z, q);
323
324 /* outmost argument reduction: atan(x) = +/-pi/2 - atan(1/x) */
325 if (inverse)
326 {
327 arb_t pi2;
328 arb_init(pi2);
329 arb_const_pi(pi2, wp);
330 arb_mul_2exp_si(pi2, pi2, -1);
331 arb_sub(z, pi2, z, wp);
332 arb_clear(pi2);
333 }
334
335 arb_set_round(z, z, prec);
336
337 fmpz_clear(s);
338 fmpz_clear(t);
339 fmpz_clear(u);
340 fmpz_clear(Q);
341 fmpz_clear(P);
342 fmpz_clear(err);
343 mag_clear(inp_err);
344 }
345
346