1 /* mpfr_atan -- arc-tangent of a floating-point number
2
3 Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramel projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25
26 /* If x = p/2^r, put in y an approximation of atan(x)/x using 2^m terms
27 for the series expansion, with an error of at most 1 ulp.
28 Assumes |x| < 1.
29
30 If X=x^2, we want 1 - X/3 + X^2/5 - ... + (-1)^k*X^k/(2k+1) + ...
31
32 Assume p is non-zero.
33
34 When we sum terms up to x^k/(2k+1), the denominator Q[0] is
35 3*5*7*...*(2k+1) ~ (2k/e)^k.
36 */
37 static void
mpfr_atan_aux(mpfr_ptr y,mpz_ptr p,long r,int m,mpz_t * tab)38 mpfr_atan_aux (mpfr_ptr y, mpz_ptr p, long r, int m, mpz_t *tab)
39 {
40 mpz_t *S, *Q, *ptoj;
41 unsigned long n, i, k, j, l;
42 mpfr_exp_t diff, expo;
43 int im, done;
44 mpfr_prec_t mult, *accu, *log2_nb_terms;
45 mpfr_prec_t precy = MPFR_PREC(y);
46
47 MPFR_ASSERTD(mpz_cmp_ui (p, 0) != 0);
48
49 accu = (mpfr_prec_t*) (*__gmp_allocate_func) ((2 * m + 2) * sizeof (mpfr_prec_t));
50 log2_nb_terms = accu + m + 1;
51
52 /* Set Tables */
53 S = tab; /* S */
54 ptoj = S + 1*(m+1); /* p^2^j Precomputed table */
55 Q = S + 2*(m+1); /* Product of Odd integer table */
56
57 /* From p to p^2, and r to 2r */
58 mpz_mul (p, p, p);
59 MPFR_ASSERTD (2 * r > r);
60 r = 2 * r;
61
62 /* Normalize p */
63 n = mpz_scan1 (p, 0);
64 mpz_tdiv_q_2exp (p, p, n); /* exact */
65 MPFR_ASSERTD (r > n);
66 r -= n;
67 /* since |p/2^r| < 1, and p is a non-zero integer, necessarily r > 0 */
68
69 MPFR_ASSERTD (mpz_sgn (p) > 0);
70 MPFR_ASSERTD (m > 0);
71
72 /* check if p=1 (special case) */
73 l = 0;
74 /*
75 We compute by binary splitting, with X = x^2 = p/2^r:
76 P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise
77 Q(a,b) = (2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
78 S(a,b) = p*(2a+1) if a+1=b, Q(c,b)*S(a,c)+Q(a,c)*P(a,c)*S(c,b) otherwise
79 Then atan(x)/x ~ S(0,i)/Q(0,i) for i so that (p/2^r)^i/i is small enough.
80 The factor 2^(r*(b-a)) in Q(a,b) is implicit, thus we have to take it
81 into account when we compute with Q.
82 */
83 accu[0] = 0; /* accu[k] = Mult[0] + ... + Mult[k], where Mult[j] is the
84 number of bits of the corresponding term S[j]/Q[j] */
85 if (mpz_cmp_ui (p, 1) != 0)
86 {
87 /* p <> 1: precompute ptoj table */
88 mpz_set (ptoj[0], p);
89 for (im = 1 ; im <= m ; im ++)
90 mpz_mul (ptoj[im], ptoj[im - 1], ptoj[im - 1]);
91 /* main loop */
92 n = 1UL << m;
93 /* the ith term being X^i/(2i+1) with X=p/2^r, we can stop when
94 p^i/2^(r*i) < 2^(-precy), i.e. r*i > precy + log2(p^i) */
95 for (i = k = done = 0; (i < n) && (done == 0); i += 2, k ++)
96 {
97 /* initialize both S[k],Q[k] and S[k+1],Q[k+1] */
98 mpz_set_ui (Q[k+1], 2 * i + 3); /* Q(i+1,i+2) */
99 mpz_mul_ui (S[k+1], p, 2 * i + 1); /* S(i+1,i+2) */
100 mpz_mul_2exp (S[k], Q[k+1], r);
101 mpz_sub (S[k], S[k], S[k+1]); /* S(i,i+2) */
102 mpz_mul_ui (Q[k], Q[k+1], 2 * i + 1); /* Q(i,i+2) */
103 log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */
104 for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l ++, j >>= 1, k --)
105 {
106 /* invariant: S[k-1]/Q[k-1] and S[k]/Q[k] correspond
107 to 2^l terms each. We combine them into S[k-1]/Q[k-1] */
108 MPFR_ASSERTD (k > 0);
109 mpz_mul (S[k], S[k], Q[k-1]);
110 mpz_mul (S[k], S[k], ptoj[l]);
111 mpz_mul (S[k-1], S[k-1], Q[k]);
112 mpz_mul_2exp (S[k-1], S[k-1], r << l);
113 mpz_add (S[k-1], S[k-1], S[k]);
114 mpz_mul (Q[k-1], Q[k-1], Q[k]);
115 log2_nb_terms[k-1] = l + 1;
116 /* now S[k-1]/Q[k-1] corresponds to 2^(l+1) terms */
117 MPFR_MPZ_SIZEINBASE2(mult, ptoj[l+1]);
118 /* FIXME: precompute bits(ptoj[l+1]) outside the loop? */
119 mult = (r << (l + 1)) - mult - 1;
120 accu[k-1] = (k == 1) ? mult : accu[k-2] + mult;
121 if (accu[k-1] > precy)
122 done = 1;
123 }
124 }
125 }
126 else /* special case p=1: the ith term being X^i/(2i+1) with X=1/2^r,
127 we can stop when r*i > precy i.e. i > precy/r */
128 {
129 n = 1UL << m;
130 for (i = k = 0; (i < n) && (i <= precy / r); i += 2, k ++)
131 {
132 mpz_set_ui (Q[k + 1], 2 * i + 3);
133 mpz_mul_2exp (S[k], Q[k+1], r);
134 mpz_sub_ui (S[k], S[k], 1 + 2 * i);
135 mpz_mul_ui (Q[k], Q[k + 1], 1 + 2 * i);
136 log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */
137 for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l++, j >>= 1, k --)
138 {
139 MPFR_ASSERTD (k > 0);
140 mpz_mul (S[k], S[k], Q[k-1]);
141 mpz_mul (S[k-1], S[k-1], Q[k]);
142 mpz_mul_2exp (S[k-1], S[k-1], r << l);
143 mpz_add (S[k-1], S[k-1], S[k]);
144 mpz_mul (Q[k-1], Q[k-1], Q[k]);
145 log2_nb_terms[k-1] = l + 1;
146 }
147 }
148 }
149
150 /* we need to combine S[0]/Q[0]...S[k-1]/Q[k-1] */
151 l = 0; /* number of terms accumulated in S[k]/Q[k] */
152 while (k > 1)
153 {
154 k --;
155 /* combine S[k-1]/Q[k-1] and S[k]/Q[k] */
156 j = log2_nb_terms[k-1];
157 mpz_mul (S[k], S[k], Q[k-1]);
158 if (mpz_cmp_ui (p, 1) != 0)
159 mpz_mul (S[k], S[k], ptoj[j]);
160 mpz_mul (S[k-1], S[k-1], Q[k]);
161 l += 1 << log2_nb_terms[k];
162 mpz_mul_2exp (S[k-1], S[k-1], r * l);
163 mpz_add (S[k-1], S[k-1], S[k]);
164 mpz_mul (Q[k-1], Q[k-1], Q[k]);
165 }
166 (*__gmp_free_func) (accu, (2 * m + 2) * sizeof (mpfr_prec_t));
167
168 MPFR_MPZ_SIZEINBASE2 (diff, S[0]);
169 diff -= 2 * precy;
170 expo = diff;
171 if (diff >= 0)
172 mpz_tdiv_q_2exp (S[0], S[0], diff);
173 else
174 mpz_mul_2exp (S[0], S[0], -diff);
175
176 MPFR_MPZ_SIZEINBASE2 (diff, Q[0]);
177 diff -= precy;
178 expo -= diff;
179 if (diff >= 0)
180 mpz_tdiv_q_2exp (Q[0], Q[0], diff);
181 else
182 mpz_mul_2exp (Q[0], Q[0], -diff);
183
184 mpz_tdiv_q (S[0], S[0], Q[0]);
185 mpfr_set_z (y, S[0], MPFR_RNDD);
186 MPFR_SET_EXP (y, MPFR_EXP(y) + expo - r * (i - 1));
187 }
188
189 int
mpfr_atan(mpfr_ptr atan,mpfr_srcptr x,mpfr_rnd_t rnd_mode)190 mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
191 {
192 mpfr_t xp, arctgt, sk, tmp, tmp2;
193 mpz_t ukz;
194 mpz_t *tabz;
195 mpfr_exp_t exptol;
196 mpfr_prec_t prec, realprec, est_lost, lost;
197 unsigned long twopoweri, log2p, red;
198 int comparaison, inexact;
199 int i, n0, oldn0;
200 MPFR_GROUP_DECL (group);
201 MPFR_SAVE_EXPO_DECL (expo);
202 MPFR_ZIV_DECL (loop);
203
204 MPFR_LOG_FUNC
205 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode),
206 ("atan[%Pu]=%.*Rg inexact=%d",
207 mpfr_get_prec (atan), mpfr_log_prec, atan, inexact));
208
209 /* Singular cases */
210 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
211 {
212 if (MPFR_IS_NAN (x))
213 {
214 MPFR_SET_NAN (atan);
215 MPFR_RET_NAN;
216 }
217 else if (MPFR_IS_INF (x))
218 {
219 MPFR_SAVE_EXPO_MARK (expo);
220 if (MPFR_IS_POS (x)) /* arctan(+inf) = Pi/2 */
221 inexact = mpfr_const_pi (atan, rnd_mode);
222 else /* arctan(-inf) = -Pi/2 */
223 {
224 inexact = -mpfr_const_pi (atan,
225 MPFR_INVERT_RND (rnd_mode));
226 MPFR_CHANGE_SIGN (atan);
227 }
228 mpfr_div_2ui (atan, atan, 1, rnd_mode); /* exact (no exceptions) */
229 MPFR_SAVE_EXPO_FREE (expo);
230 return mpfr_check_range (atan, inexact, rnd_mode);
231 }
232 else /* x is necessarily 0 */
233 {
234 MPFR_ASSERTD (MPFR_IS_ZERO (x));
235 MPFR_SET_ZERO (atan);
236 MPFR_SET_SAME_SIGN (atan, x);
237 MPFR_RET (0);
238 }
239 }
240
241 /* atan(x) = x - x^3/3 + x^5/5...
242 so the error is < 2^(3*EXP(x)-1)
243 so `EXP(x)-(3*EXP(x)-1)` = -2*EXP(x)+1 */
244 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (atan, x, -2 * MPFR_GET_EXP (x), 1, 0,
245 rnd_mode, {});
246
247 /* Set x_p=|x| */
248 MPFR_TMP_INIT_ABS (xp, x);
249
250 MPFR_SAVE_EXPO_MARK (expo);
251
252 /* Other simple case arctan(-+1)=-+pi/4 */
253 comparaison = mpfr_cmp_ui (xp, 1);
254 if (MPFR_UNLIKELY (comparaison == 0))
255 {
256 int neg = MPFR_IS_NEG (x);
257 inexact = mpfr_const_pi (atan, MPFR_IS_POS (x) ? rnd_mode
258 : MPFR_INVERT_RND (rnd_mode));
259 if (neg)
260 {
261 inexact = -inexact;
262 MPFR_CHANGE_SIGN (atan);
263 }
264 mpfr_div_2ui (atan, atan, 2, rnd_mode); /* exact (no exceptions) */
265 MPFR_SAVE_EXPO_FREE (expo);
266 return mpfr_check_range (atan, inexact, rnd_mode);
267 }
268
269 realprec = MPFR_PREC (atan) + MPFR_INT_CEIL_LOG2 (MPFR_PREC (atan)) + 4;
270 prec = realprec + GMP_NUMB_BITS;
271
272 /* Initialisation */
273 mpz_init (ukz);
274 MPFR_GROUP_INIT_4 (group, prec, sk, tmp, tmp2, arctgt);
275 oldn0 = 0;
276 tabz = (mpz_t *) 0;
277
278 MPFR_ZIV_INIT (loop, prec);
279 for (;;)
280 {
281 /* First, if |x| < 1, we need to have more prec to be able to round (sup)
282 n0 = ceil(log(prec_requested + 2 + 1+ln(2.4)/ln(2))/log(2)) */
283 mpfr_prec_t sup;
284 sup = MPFR_GET_EXP (xp) < 0 ? 2 - MPFR_GET_EXP (xp) : 1; /* sup >= 1 */
285
286 n0 = MPFR_INT_CEIL_LOG2 ((realprec + sup) + 3);
287 /* since realprec >= 4, n0 >= ceil(log2(8)) >= 3, thus 3*n0 > 2 */
288 prec = (realprec + sup) + 1 + MPFR_INT_CEIL_LOG2 (3*n0-2);
289
290 /* the number of lost bits due to argument reduction is
291 9 - 2 * EXP(sk), which we estimate by 9 + 2*ceil(log2(p))
292 since we manage that sk < 1/p */
293 if (MPFR_PREC (atan) > 100)
294 {
295 log2p = MPFR_INT_CEIL_LOG2(prec) / 2 - 3;
296 est_lost = 9 + 2 * log2p;
297 prec += est_lost;
298 }
299 else
300 log2p = est_lost = 0; /* don't reduce the argument */
301
302 /* Initialisation */
303 MPFR_GROUP_REPREC_4 (group, prec, sk, tmp, tmp2, arctgt);
304 if (MPFR_LIKELY (oldn0 == 0))
305 {
306 oldn0 = 3 * (n0 + 1);
307 tabz = (mpz_t *) (*__gmp_allocate_func) (oldn0 * sizeof (mpz_t));
308 for (i = 0; i < oldn0; i++)
309 mpz_init (tabz[i]);
310 }
311 else if (MPFR_UNLIKELY (oldn0 < 3 * (n0 + 1)))
312 {
313 tabz = (mpz_t *) (*__gmp_reallocate_func)
314 (tabz, oldn0 * sizeof (mpz_t), 3 * (n0 + 1)*sizeof (mpz_t));
315 for (i = oldn0; i < 3 * (n0 + 1); i++)
316 mpz_init (tabz[i]);
317 oldn0 = 3 * (n0 + 1);
318 }
319
320 /* The mpfr_ui_div below mustn't underflow. This is guaranteed by
321 MPFR_SAVE_EXPO_MARK, but let's check that for maintainability. */
322 MPFR_ASSERTD (__gmpfr_emax <= 1 - __gmpfr_emin);
323
324 if (comparaison > 0) /* use atan(xp) = Pi/2 - atan(1/xp) */
325 mpfr_ui_div (sk, 1, xp, MPFR_RNDN);
326 else
327 mpfr_set (sk, xp, MPFR_RNDN);
328
329 /* now 0 < sk <= 1 */
330
331 /* Argument reduction: atan(x) = 2 atan((sqrt(1+x^2)-1)/x).
332 We want |sk| < k/sqrt(p) where p is the target precision. */
333 lost = 0;
334 for (red = 0; MPFR_GET_EXP(sk) > - (mpfr_exp_t) log2p; red ++)
335 {
336 lost = 9 - 2 * MPFR_EXP(sk);
337 mpfr_mul (tmp, sk, sk, MPFR_RNDN);
338 mpfr_add_ui (tmp, tmp, 1, MPFR_RNDN);
339 mpfr_sqrt (tmp, tmp, MPFR_RNDN);
340 mpfr_sub_ui (tmp, tmp, 1, MPFR_RNDN);
341 if (red == 0 && comparaison > 0)
342 /* use xp = 1/sk */
343 mpfr_mul (sk, tmp, xp, MPFR_RNDN);
344 else
345 mpfr_div (sk, tmp, sk, MPFR_RNDN);
346 }
347
348 /* we started from x0 = 1/|x| if |x| > 1, and |x| otherwise, thus
349 we had x0 = min(|x|, 1/|x|) <= 1, and applied 'red' times the
350 argument reduction x -> (sqrt(1+x^2)-1)/x, which keeps 0 < x < 1,
351 thus 0 < sk <= 1, and sk=1 can occur only if red=0 */
352
353 /* If sk=1, then if |x| < 1, we have 1 - 2^(-prec-1) <= |x| < 1,
354 or if |x| > 1, we have 1 - 2^(-prec-1) <= 1/|x| < 1, thus in all
355 cases ||x| - 1| <= 2^(-prec), from which it follows
356 |atan|x| - Pi/4| <= 2^(-prec), given the Taylor expansion
357 atan(1+x) = Pi/4 + x/2 - x^2/4 + ...
358 Since Pi/4 = 0.785..., the error is at most one ulp.
359 */
360 if (MPFR_UNLIKELY(mpfr_cmp_ui (sk, 1) == 0))
361 {
362 mpfr_const_pi (arctgt, MPFR_RNDN); /* 1/2 ulp extra error */
363 mpfr_div_2ui (arctgt, arctgt, 2, MPFR_RNDN); /* exact */
364 realprec = prec - 2;
365 goto can_round;
366 }
367
368 /* Assignation */
369 MPFR_SET_ZERO (arctgt);
370 twopoweri = 1 << 0;
371 MPFR_ASSERTD (n0 >= 4);
372 for (i = 0 ; i < n0; i++)
373 {
374 if (MPFR_UNLIKELY (MPFR_IS_ZERO (sk)))
375 break;
376 /* Calculation of trunc(tmp) --> mpz */
377 mpfr_mul_2ui (tmp, sk, twopoweri, MPFR_RNDN);
378 mpfr_trunc (tmp, tmp);
379 if (!MPFR_IS_ZERO (tmp))
380 {
381 /* tmp = ukz*2^exptol */
382 exptol = mpfr_get_z_2exp (ukz, tmp);
383 /* since the s_k are decreasing (see algorithms.tex),
384 and s_0 = min(|x|, 1/|x|) < 1, we have sk < 1,
385 thus exptol < 0 */
386 MPFR_ASSERTD (exptol < 0);
387 mpz_tdiv_q_2exp (ukz, ukz, (unsigned long int) (-exptol));
388 /* since tmp is a non-zero integer, and tmp = ukzold*2^exptol,
389 we now have ukz = tmp, thus ukz is non-zero */
390 /* Calculation of arctan(Ak) */
391 mpfr_set_z (tmp, ukz, MPFR_RNDN);
392 mpfr_div_2ui (tmp, tmp, twopoweri, MPFR_RNDN);
393 mpfr_atan_aux (tmp2, ukz, twopoweri, n0 - i, tabz);
394 mpfr_mul (tmp2, tmp2, tmp, MPFR_RNDN);
395 /* Addition */
396 mpfr_add (arctgt, arctgt, tmp2, MPFR_RNDN);
397 /* Next iteration */
398 mpfr_sub (tmp2, sk, tmp, MPFR_RNDN);
399 mpfr_mul (sk, sk, tmp, MPFR_RNDN);
400 mpfr_add_ui (sk, sk, 1, MPFR_RNDN);
401 mpfr_div (sk, tmp2, sk, MPFR_RNDN);
402 }
403 twopoweri <<= 1;
404 }
405 /* Add last step (Arctan(sk) ~= sk */
406 mpfr_add (arctgt, arctgt, sk, MPFR_RNDN);
407
408 /* argument reduction */
409 mpfr_mul_2exp (arctgt, arctgt, red, MPFR_RNDN);
410
411 if (comparaison > 0)
412 { /* atan(x) = Pi/2-atan(1/x) for x > 0 */
413 mpfr_const_pi (tmp, MPFR_RNDN);
414 mpfr_div_2ui (tmp, tmp, 1, MPFR_RNDN);
415 mpfr_sub (arctgt, tmp, arctgt, MPFR_RNDN);
416 }
417 MPFR_SET_POS (arctgt);
418
419 can_round:
420 if (MPFR_LIKELY (MPFR_CAN_ROUND (arctgt, realprec + est_lost - lost,
421 MPFR_PREC (atan), rnd_mode)))
422 break;
423 MPFR_ZIV_NEXT (loop, realprec);
424 }
425 MPFR_ZIV_FREE (loop);
426
427 inexact = mpfr_set4 (atan, arctgt, rnd_mode, MPFR_SIGN (x));
428
429 for (i = 0 ; i < oldn0 ; i++)
430 mpz_clear (tabz[i]);
431 mpz_clear (ukz);
432 (*__gmp_free_func) (tabz, oldn0 * sizeof (mpz_t));
433 MPFR_GROUP_CLEAR (group);
434
435 MPFR_SAVE_EXPO_FREE (expo);
436 return mpfr_check_range (atan, inexact, rnd_mode);
437 }
438