1 /*
2     Copyright (C) 2014 Fredrik Johansson
3 
4     This file is part of Arb.
5 
6     Arb is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include "arb.h"
13 
14 /*
15 Determine N such that the error is bounded by 2^-prec.
16 We choose an N with many trailing zeros to improve efficiency
17 of the binary splitting.
18 
19 With N = 0, 1, 2, ... the highest included term is x, x^3, x^5, ...
20 so the error is bounded by x^3, x^5, x^7, ... = x^(2N+3)
21 */
22 static slong
bs_num_terms(slong mag,slong prec)23 bs_num_terms(slong mag, slong prec)
24 {
25     slong N;
26 
27     if (mag >= 0)
28         flint_abort();
29 
30     N = 0;
31 
32     while (mag * (2 * N + 3) > -prec)
33         N++;
34 
35     if (N > 10000)
36         while (N % 128 != 0)
37             N++;
38 
39     if (N > 1000)
40         while (N % 16 != 0)
41             N++;
42 
43     if (N > 100)
44         while (N % 2 != 0)
45             N++;
46 
47     return N;
48 }
49 
50 /* Argument reduction: apply atan(x) = 2 atan(x/(1+sqrt(1+x^2))) a total
51    of r times, and convert result to a fixed-point number res
52    together with absolute error err. With an initial inversion
53    if xmag > 0.
54 */
55 void
arb_atan_bb_reduce(fmpz_t res,mag_t err,const arf_t x,slong xmag,slong r,slong prec)56 arb_atan_bb_reduce(fmpz_t res, mag_t err, const arf_t x, slong xmag, slong r, slong prec)
57 {
58     int inexact;
59 
60     if (r == 0)
61     {
62         if (xmag <= 0)
63         {
64             inexact = arf_get_fmpz_fixed_si(res, x, -prec);
65             mag_set_ui_2exp_si(err, inexact, -prec);
66         }
67         else
68         {
69             slong wp;
70             arb_t t;
71 
72             wp = FLINT_MAX(8, prec - xmag);
73 
74             arb_init(t);
75             arb_set_arf(t, x);
76             arb_set_round(t, t, wp);
77             arb_ui_div(t, 1, t, wp);
78 
79             mag_set(err, arb_radref(t));
80             inexact = arf_get_fmpz_fixed_si(res, arb_midref(t), -prec);
81             mag_add_ui_2exp_si(err, err, inexact, -prec);
82 
83             arb_clear(t);
84         }
85     }
86     else
87     {
88         slong k;
89         arb_t p, p2, q, q2;
90 
91         arb_init(p);
92         arb_init(p2);
93         arb_init(q);
94         arb_init(q2);
95 
96         if (xmag <= 0)
97         {
98             arb_set_arf(p, x);
99             arb_set_round(p, p, prec);
100             arb_mul(p2, p, p, prec);
101             arb_add_ui(q, p2, 1, prec);
102             arb_sqrt(q, q, prec);
103             arb_add_ui(q, q, 1, prec);
104 
105             for (k = 1; k < r; k++)
106             {
107                 if (k == 1)
108                 {
109                     arb_mul_2exp_si(q2, q, 1);
110                     arb_add(q2, q2, p2, prec);
111                 }
112                 else
113                 {
114                     arb_mul(q2, q, q, prec);
115                 }
116 
117                 arb_add(q2, p2, q2, prec);
118                 arb_sqrt(q2, q2, prec);
119                 arb_add(q, q, q2, prec);
120             }
121         }
122         else
123         {
124             arb_one(p);
125             arb_one(p2);
126             arb_set_arf(q, x);
127             arb_set_round(q, q, prec);
128 
129             for (k = 0; k < r; k++)
130             {
131                 arb_mul(q2, q, q, prec);
132                 arb_add(q2, p2, q2, prec);
133                 arb_sqrt(q2, q2, prec);
134                 arb_add(q, q, q2, prec);
135             }
136         }
137 
138         arb_div(p, p, q, prec);
139 
140         mag_set(err, arb_radref(p));
141         inexact = arf_get_fmpz_fixed_si(res, arb_midref(p), -prec);
142         mag_add_ui_2exp_si(err, err, inexact, -prec);
143 
144         arb_clear(p);
145         arb_clear(p2);
146         arb_clear(q);
147         arb_clear(q2);
148     }
149 }
150 
151 void
arb_atan_arf_bb(arb_t z,const arf_t x,slong prec)152 arb_atan_arf_bb(arb_t z, const arf_t x, slong prec)
153 {
154     slong iter, bits, r, mag, q, wp, N;
155     slong argred_bits, start_bits;
156     flint_bitcnt_t Qexp[1];
157     int inverse;
158     mag_t inp_err;
159     fmpz_t s, t, u, P, Q, err;
160 
161     if (arf_is_zero(x))
162     {
163         arb_zero(z);
164         return;
165     }
166 
167     if (arf_is_special(x))
168     {
169         flint_abort();
170     }
171 
172     if (ARF_SGNBIT(x))
173     {
174         arf_t y;
175         arf_init_neg_shallow(y, x);
176         arb_atan_arf_bb(z, y, prec);
177         arb_neg(z, z);
178         return;
179     }
180 
181     mag = arf_abs_bound_lt_2exp_si(x);
182 
183     /* We assume that this function only gets called with something
184        reasonable as input (huge/tiny input will be handled by
185        the main atan wrapper). */
186     if (FLINT_ABS(mag) > 2 * prec + 100)
187     {
188         flint_printf("arb_atan_arf_bb: unexpectedly large/small input\n");
189         flint_abort();
190     }
191 
192     /* approximate by x - x^3 / 3 or pi/2 - 1/x + (1/3)/x^3 */
193     if (mag < -prec / 4 - 2 || (mag-1) > prec / 5 + 3)
194     {
195         arb_t t;
196         arb_init(t);
197         arb_set_arf(t, x);
198 
199         if (mag < 0)
200         {
201             arb_mul(t, t, t, prec);
202             arb_mul_arf(t, t, x, prec);
203             arb_div_ui(t, t, 3, prec);
204             arb_sub_arf(t, t, x, prec);
205             arb_neg(z, t);
206             /* error is bounded by x^5 */
207             mag_add_ui_2exp_si(arb_radref(z), arb_radref(z), 1, 5 * mag);
208         }
209         else
210         {
211             arb_ui_div(t, 1, t, prec);
212             arb_mul(z, t, t, prec);
213             arb_mul(z, z, t, prec);
214             arb_div_ui(z, z, 3, prec);
215             arb_sub(z, t, z, prec);
216 
217             arb_const_pi(t, prec + 2);
218             arb_mul_2exp_si(t, t, -1);
219 
220             arb_sub(z, t, z, prec);
221             /* error is bounded by 1/x^5, and 1/x <= 2^(1-mag) */
222             mag_add_ui_2exp_si(arb_radref(z), arb_radref(z), 1, 5 * (1-mag));
223         }
224 
225         arb_clear(t);
226         return;
227     }
228 
229     argred_bits = 8;
230     start_bits = 16;
231 
232     /* Argument reduction q times. */
233     q = FLINT_MAX(0, mag + argred_bits);
234 
235     /* Determine working precision. */
236     wp = prec + 10 + 2 * q + 2 * FLINT_BIT_COUNT(prec);
237     if (mag < 0)
238         wp += (-mag);
239 
240     fmpz_init(s);
241     fmpz_init(t);
242     fmpz_init(u);
243     fmpz_init(Q);
244     fmpz_init(P);
245     fmpz_init(err);  /* in fixed-point ulp */
246     mag_init(inp_err); /* absolute error */
247 
248     arb_atan_bb_reduce(t, inp_err, x, mag, q, wp);
249     inverse = mag > 0; /* todo: compute in function, or pass to it */
250 
251     /* s = 0, t = x */
252 
253     for (iter = 0, bits = start_bits; !fmpz_is_zero(t);
254         iter++, bits *= 2)
255     {
256         /* Extract bits. */
257         r = FLINT_MIN(bits, wp);
258         fmpz_tdiv_q_2exp(u, t, wp - r);
259 
260         if (!fmpz_is_zero(u))
261         {
262             /* Binary splitting (+1 fixed-point ulp truncation error). */
263             mag = fmpz_bits(u) - r;
264 
265             N = bs_num_terms(mag, wp);
266 
267             if (N != 0)
268             {
269                 _arb_atan_sum_bs_powtab(P, Q, Qexp, u, r, N);
270 
271                 /* multiply by u/2^r */
272                 fmpz_mul(P, P, u);
273                 *Qexp += r;
274 
275                 /* T = T / Q  (+1 fixed-point ulp error). */
276                 if (*Qexp >= wp)
277                 {
278                     fmpz_tdiv_q_2exp(P, P, *Qexp - wp);
279                     fmpz_tdiv_q(P, P, Q);
280                 }
281                 else
282                 {
283                     fmpz_mul_2exp(P, P, wp - *Qexp);
284                     fmpz_tdiv_q(P, P, Q);
285                 }
286 
287                 fmpz_add(s, s, P);
288             }
289 
290             /* add u/2^r */
291             fmpz_mul_2exp(Q, u, wp - r);
292             fmpz_add(s, s, Q);
293 
294             /* 1 ulp from the division,
295                1 ulp from truncating the Taylor series */
296             fmpz_add_ui(err, err, 2);
297         }
298 
299         /* atan(t) = atan(u/2^r) + atan((t 2^r - u)/(2^r + u t)) */
300         fmpz_mul_2exp(P, t, r);
301         fmpz_mul_2exp(Q, u, wp);
302         fmpz_sub(P, P, Q);
303 
304         fmpz_one(Q);
305         fmpz_mul_2exp(Q, Q, r + wp);
306         fmpz_addmul(Q, t, u);
307 
308         fmpz_mul_2exp(P, P, wp);
309         fmpz_tdiv_q(t, P, Q);
310 
311         /* 1 ulp error from the division */
312         fmpz_add_ui(err, err, 1);
313     }
314 
315     /* add both err and inp_err */
316     arf_set_fmpz(arb_midref(z), s);
317     mag_set_fmpz(arb_radref(z), err);
318     arb_mul_2exp_si(z, z, -wp);
319     mag_add(arb_radref(z), arb_radref(z), inp_err);
320 
321     /* argument reduction: atan(x) = 2^q atan(x') */
322     arb_mul_2exp_si(z, z, q);
323 
324     /* outmost argument reduction: atan(x) = +/-pi/2 - atan(1/x) */
325     if (inverse)
326     {
327         arb_t pi2;
328         arb_init(pi2);
329         arb_const_pi(pi2, wp);
330         arb_mul_2exp_si(pi2, pi2, -1);
331         arb_sub(z, pi2, z, wp);
332         arb_clear(pi2);
333     }
334 
335     arb_set_round(z, z, prec);
336 
337     fmpz_clear(s);
338     fmpz_clear(t);
339     fmpz_clear(u);
340     fmpz_clear(Q);
341     fmpz_clear(P);
342     fmpz_clear(err);
343     mag_clear(inp_err);
344 }
345 
346