1 /* mpfr_atan -- arc-tangent of a floating-point number 2 3 Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramel projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 /* If x = p/2^r, put in y an approximation of atan(x)/x using 2^m terms 27 for the series expansion, with an error of at most 1 ulp. 28 Assumes |x| < 1. 29 30 If X=x^2, we want 1 - X/3 + X^2/5 - ... + (-1)^k*X^k/(2k+1) + ... 31 32 Assume p is non-zero. 33 34 When we sum terms up to x^k/(2k+1), the denominator Q[0] is 35 3*5*7*...*(2k+1) ~ (2k/e)^k. 36 */ 37 static void 38 mpfr_atan_aux (mpfr_ptr y, mpz_ptr p, long r, int m, mpz_t *tab) 39 { 40 mpz_t *S, *Q, *ptoj; 41 unsigned long n, i, k, j, l; 42 mpfr_exp_t diff, expo; 43 int im, done; 44 mpfr_prec_t mult, *accu, *log2_nb_terms; 45 mpfr_prec_t precy = MPFR_PREC(y); 46 47 MPFR_ASSERTD(mpz_cmp_ui (p, 0) != 0); 48 49 accu = (mpfr_prec_t*) (*__gmp_allocate_func) ((2 * m + 2) * sizeof (mpfr_prec_t)); 50 log2_nb_terms = accu + m + 1; 51 52 /* Set Tables */ 53 S = tab; /* S */ 54 ptoj = S + 1*(m+1); /* p^2^j Precomputed table */ 55 Q = S + 2*(m+1); /* Product of Odd integer table */ 56 57 /* From p to p^2, and r to 2r */ 58 mpz_mul (p, p, p); 59 MPFR_ASSERTD (2 * r > r); 60 r = 2 * r; 61 62 /* Normalize p */ 63 n = mpz_scan1 (p, 0); 64 mpz_tdiv_q_2exp (p, p, n); /* exact */ 65 MPFR_ASSERTD (r > n); 66 r -= n; 67 /* since |p/2^r| < 1, and p is a non-zero integer, necessarily r > 0 */ 68 69 MPFR_ASSERTD (mpz_sgn (p) > 0); 70 MPFR_ASSERTD (m > 0); 71 72 /* check if p=1 (special case) */ 73 l = 0; 74 /* 75 We compute by binary splitting, with X = x^2 = p/2^r: 76 P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise 77 Q(a,b) = (2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise 78 S(a,b) = p*(2a+1) if a+1=b, Q(c,b)*S(a,c)+Q(a,c)*P(a,c)*S(c,b) otherwise 79 Then atan(x)/x ~ S(0,i)/Q(0,i) for i so that (p/2^r)^i/i is small enough. 80 The factor 2^(r*(b-a)) in Q(a,b) is implicit, thus we have to take it 81 into account when we compute with Q. 82 */ 83 accu[0] = 0; /* accu[k] = Mult[0] + ... + Mult[k], where Mult[j] is the 84 number of bits of the corresponding term S[j]/Q[j] */ 85 if (mpz_cmp_ui (p, 1) != 0) 86 { 87 /* p <> 1: precompute ptoj table */ 88 mpz_set (ptoj[0], p); 89 for (im = 1 ; im <= m ; im ++) 90 mpz_mul (ptoj[im], ptoj[im - 1], ptoj[im - 1]); 91 /* main loop */ 92 n = 1UL << m; 93 /* the ith term being X^i/(2i+1) with X=p/2^r, we can stop when 94 p^i/2^(r*i) < 2^(-precy), i.e. r*i > precy + log2(p^i) */ 95 for (i = k = done = 0; (i < n) && (done == 0); i += 2, k ++) 96 { 97 /* initialize both S[k],Q[k] and S[k+1],Q[k+1] */ 98 mpz_set_ui (Q[k+1], 2 * i + 3); /* Q(i+1,i+2) */ 99 mpz_mul_ui (S[k+1], p, 2 * i + 1); /* S(i+1,i+2) */ 100 mpz_mul_2exp (S[k], Q[k+1], r); 101 mpz_sub (S[k], S[k], S[k+1]); /* S(i,i+2) */ 102 mpz_mul_ui (Q[k], Q[k+1], 2 * i + 1); /* Q(i,i+2) */ 103 log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */ 104 for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l ++, j >>= 1, k --) 105 { 106 /* invariant: S[k-1]/Q[k-1] and S[k]/Q[k] correspond 107 to 2^l terms each. We combine them into S[k-1]/Q[k-1] */ 108 MPFR_ASSERTD (k > 0); 109 mpz_mul (S[k], S[k], Q[k-1]); 110 mpz_mul (S[k], S[k], ptoj[l]); 111 mpz_mul (S[k-1], S[k-1], Q[k]); 112 mpz_mul_2exp (S[k-1], S[k-1], r << l); 113 mpz_add (S[k-1], S[k-1], S[k]); 114 mpz_mul (Q[k-1], Q[k-1], Q[k]); 115 log2_nb_terms[k-1] = l + 1; 116 /* now S[k-1]/Q[k-1] corresponds to 2^(l+1) terms */ 117 MPFR_MPZ_SIZEINBASE2(mult, ptoj[l+1]); 118 /* FIXME: precompute bits(ptoj[l+1]) outside the loop? */ 119 mult = (r << (l + 1)) - mult - 1; 120 accu[k-1] = (k == 1) ? mult : accu[k-2] + mult; 121 if (accu[k-1] > precy) 122 done = 1; 123 } 124 } 125 } 126 else /* special case p=1: the ith term being X^i/(2i+1) with X=1/2^r, 127 we can stop when r*i > precy i.e. i > precy/r */ 128 { 129 n = 1UL << m; 130 for (i = k = 0; (i < n) && (i <= precy / r); i += 2, k ++) 131 { 132 mpz_set_ui (Q[k + 1], 2 * i + 3); 133 mpz_mul_2exp (S[k], Q[k+1], r); 134 mpz_sub_ui (S[k], S[k], 1 + 2 * i); 135 mpz_mul_ui (Q[k], Q[k + 1], 1 + 2 * i); 136 log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */ 137 for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l++, j >>= 1, k --) 138 { 139 MPFR_ASSERTD (k > 0); 140 mpz_mul (S[k], S[k], Q[k-1]); 141 mpz_mul (S[k-1], S[k-1], Q[k]); 142 mpz_mul_2exp (S[k-1], S[k-1], r << l); 143 mpz_add (S[k-1], S[k-1], S[k]); 144 mpz_mul (Q[k-1], Q[k-1], Q[k]); 145 log2_nb_terms[k-1] = l + 1; 146 } 147 } 148 } 149 150 /* we need to combine S[0]/Q[0]...S[k-1]/Q[k-1] */ 151 l = 0; /* number of terms accumulated in S[k]/Q[k] */ 152 while (k > 1) 153 { 154 k --; 155 /* combine S[k-1]/Q[k-1] and S[k]/Q[k] */ 156 j = log2_nb_terms[k-1]; 157 mpz_mul (S[k], S[k], Q[k-1]); 158 if (mpz_cmp_ui (p, 1) != 0) 159 mpz_mul (S[k], S[k], ptoj[j]); 160 mpz_mul (S[k-1], S[k-1], Q[k]); 161 l += 1 << log2_nb_terms[k]; 162 mpz_mul_2exp (S[k-1], S[k-1], r * l); 163 mpz_add (S[k-1], S[k-1], S[k]); 164 mpz_mul (Q[k-1], Q[k-1], Q[k]); 165 } 166 (*__gmp_free_func) (accu, (2 * m + 2) * sizeof (mpfr_prec_t)); 167 168 MPFR_MPZ_SIZEINBASE2 (diff, S[0]); 169 diff -= 2 * precy; 170 expo = diff; 171 if (diff >= 0) 172 mpz_tdiv_q_2exp (S[0], S[0], diff); 173 else 174 mpz_mul_2exp (S[0], S[0], -diff); 175 176 MPFR_MPZ_SIZEINBASE2 (diff, Q[0]); 177 diff -= precy; 178 expo -= diff; 179 if (diff >= 0) 180 mpz_tdiv_q_2exp (Q[0], Q[0], diff); 181 else 182 mpz_mul_2exp (Q[0], Q[0], -diff); 183 184 mpz_tdiv_q (S[0], S[0], Q[0]); 185 mpfr_set_z (y, S[0], MPFR_RNDD); 186 MPFR_SET_EXP (y, MPFR_EXP(y) + expo - r * (i - 1)); 187 } 188 189 int 190 mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mpfr_rnd_t rnd_mode) 191 { 192 mpfr_t xp, arctgt, sk, tmp, tmp2; 193 mpz_t ukz; 194 mpz_t *tabz; 195 mpfr_exp_t exptol; 196 mpfr_prec_t prec, realprec, est_lost, lost; 197 unsigned long twopoweri, log2p, red; 198 int comparaison, inexact; 199 int i, n0, oldn0; 200 MPFR_GROUP_DECL (group); 201 MPFR_SAVE_EXPO_DECL (expo); 202 MPFR_ZIV_DECL (loop); 203 204 MPFR_LOG_FUNC 205 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode), 206 ("atan[%Pu]=%.*Rg inexact=%d", 207 mpfr_get_prec (atan), mpfr_log_prec, atan, inexact)); 208 209 /* Singular cases */ 210 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) 211 { 212 if (MPFR_IS_NAN (x)) 213 { 214 MPFR_SET_NAN (atan); 215 MPFR_RET_NAN; 216 } 217 else if (MPFR_IS_INF (x)) 218 { 219 MPFR_SAVE_EXPO_MARK (expo); 220 if (MPFR_IS_POS (x)) /* arctan(+inf) = Pi/2 */ 221 inexact = mpfr_const_pi (atan, rnd_mode); 222 else /* arctan(-inf) = -Pi/2 */ 223 { 224 inexact = -mpfr_const_pi (atan, 225 MPFR_INVERT_RND (rnd_mode)); 226 MPFR_CHANGE_SIGN (atan); 227 } 228 mpfr_div_2ui (atan, atan, 1, rnd_mode); /* exact (no exceptions) */ 229 MPFR_SAVE_EXPO_FREE (expo); 230 return mpfr_check_range (atan, inexact, rnd_mode); 231 } 232 else /* x is necessarily 0 */ 233 { 234 MPFR_ASSERTD (MPFR_IS_ZERO (x)); 235 MPFR_SET_ZERO (atan); 236 MPFR_SET_SAME_SIGN (atan, x); 237 MPFR_RET (0); 238 } 239 } 240 241 /* atan(x) = x - x^3/3 + x^5/5... 242 so the error is < 2^(3*EXP(x)-1) 243 so `EXP(x)-(3*EXP(x)-1)` = -2*EXP(x)+1 */ 244 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (atan, x, -2 * MPFR_GET_EXP (x), 1, 0, 245 rnd_mode, {}); 246 247 /* Set x_p=|x| */ 248 MPFR_TMP_INIT_ABS (xp, x); 249 250 MPFR_SAVE_EXPO_MARK (expo); 251 252 /* Other simple case arctan(-+1)=-+pi/4 */ 253 comparaison = mpfr_cmp_ui (xp, 1); 254 if (MPFR_UNLIKELY (comparaison == 0)) 255 { 256 int neg = MPFR_IS_NEG (x); 257 inexact = mpfr_const_pi (atan, MPFR_IS_POS (x) ? rnd_mode 258 : MPFR_INVERT_RND (rnd_mode)); 259 if (neg) 260 { 261 inexact = -inexact; 262 MPFR_CHANGE_SIGN (atan); 263 } 264 mpfr_div_2ui (atan, atan, 2, rnd_mode); /* exact (no exceptions) */ 265 MPFR_SAVE_EXPO_FREE (expo); 266 return mpfr_check_range (atan, inexact, rnd_mode); 267 } 268 269 realprec = MPFR_PREC (atan) + MPFR_INT_CEIL_LOG2 (MPFR_PREC (atan)) + 4; 270 prec = realprec + GMP_NUMB_BITS; 271 272 /* Initialisation */ 273 mpz_init (ukz); 274 MPFR_GROUP_INIT_4 (group, prec, sk, tmp, tmp2, arctgt); 275 oldn0 = 0; 276 tabz = (mpz_t *) 0; 277 278 MPFR_ZIV_INIT (loop, prec); 279 for (;;) 280 { 281 /* First, if |x| < 1, we need to have more prec to be able to round (sup) 282 n0 = ceil(log(prec_requested + 2 + 1+ln(2.4)/ln(2))/log(2)) */ 283 mpfr_prec_t sup; 284 sup = MPFR_GET_EXP (xp) < 0 ? 2 - MPFR_GET_EXP (xp) : 1; /* sup >= 1 */ 285 286 n0 = MPFR_INT_CEIL_LOG2 ((realprec + sup) + 3); 287 /* since realprec >= 4, n0 >= ceil(log2(8)) >= 3, thus 3*n0 > 2 */ 288 prec = (realprec + sup) + 1 + MPFR_INT_CEIL_LOG2 (3*n0-2); 289 290 /* the number of lost bits due to argument reduction is 291 9 - 2 * EXP(sk), which we estimate by 9 + 2*ceil(log2(p)) 292 since we manage that sk < 1/p */ 293 if (MPFR_PREC (atan) > 100) 294 { 295 log2p = MPFR_INT_CEIL_LOG2(prec) / 2 - 3; 296 est_lost = 9 + 2 * log2p; 297 prec += est_lost; 298 } 299 else 300 log2p = est_lost = 0; /* don't reduce the argument */ 301 302 /* Initialisation */ 303 MPFR_GROUP_REPREC_4 (group, prec, sk, tmp, tmp2, arctgt); 304 if (MPFR_LIKELY (oldn0 == 0)) 305 { 306 oldn0 = 3 * (n0 + 1); 307 tabz = (mpz_t *) (*__gmp_allocate_func) (oldn0 * sizeof (mpz_t)); 308 for (i = 0; i < oldn0; i++) 309 mpz_init (tabz[i]); 310 } 311 else if (MPFR_UNLIKELY (oldn0 < 3 * (n0 + 1))) 312 { 313 tabz = (mpz_t *) (*__gmp_reallocate_func) 314 (tabz, oldn0 * sizeof (mpz_t), 3 * (n0 + 1)*sizeof (mpz_t)); 315 for (i = oldn0; i < 3 * (n0 + 1); i++) 316 mpz_init (tabz[i]); 317 oldn0 = 3 * (n0 + 1); 318 } 319 320 /* The mpfr_ui_div below mustn't underflow. This is guaranteed by 321 MPFR_SAVE_EXPO_MARK, but let's check that for maintainability. */ 322 MPFR_ASSERTD (__gmpfr_emax <= 1 - __gmpfr_emin); 323 324 if (comparaison > 0) /* use atan(xp) = Pi/2 - atan(1/xp) */ 325 mpfr_ui_div (sk, 1, xp, MPFR_RNDN); 326 else 327 mpfr_set (sk, xp, MPFR_RNDN); 328 329 /* now 0 < sk <= 1 */ 330 331 /* Argument reduction: atan(x) = 2 atan((sqrt(1+x^2)-1)/x). 332 We want |sk| < k/sqrt(p) where p is the target precision. */ 333 lost = 0; 334 for (red = 0; MPFR_GET_EXP(sk) > - (mpfr_exp_t) log2p; red ++) 335 { 336 lost = 9 - 2 * MPFR_EXP(sk); 337 mpfr_mul (tmp, sk, sk, MPFR_RNDN); 338 mpfr_add_ui (tmp, tmp, 1, MPFR_RNDN); 339 mpfr_sqrt (tmp, tmp, MPFR_RNDN); 340 mpfr_sub_ui (tmp, tmp, 1, MPFR_RNDN); 341 if (red == 0 && comparaison > 0) 342 /* use xp = 1/sk */ 343 mpfr_mul (sk, tmp, xp, MPFR_RNDN); 344 else 345 mpfr_div (sk, tmp, sk, MPFR_RNDN); 346 } 347 348 /* we started from x0 = 1/|x| if |x| > 1, and |x| otherwise, thus 349 we had x0 = min(|x|, 1/|x|) <= 1, and applied 'red' times the 350 argument reduction x -> (sqrt(1+x^2)-1)/x, which keeps 0 < x < 1, 351 thus 0 < sk <= 1, and sk=1 can occur only if red=0 */ 352 353 /* If sk=1, then if |x| < 1, we have 1 - 2^(-prec-1) <= |x| < 1, 354 or if |x| > 1, we have 1 - 2^(-prec-1) <= 1/|x| < 1, thus in all 355 cases ||x| - 1| <= 2^(-prec), from which it follows 356 |atan|x| - Pi/4| <= 2^(-prec), given the Taylor expansion 357 atan(1+x) = Pi/4 + x/2 - x^2/4 + ... 358 Since Pi/4 = 0.785..., the error is at most one ulp. 359 */ 360 if (MPFR_UNLIKELY(mpfr_cmp_ui (sk, 1) == 0)) 361 { 362 mpfr_const_pi (arctgt, MPFR_RNDN); /* 1/2 ulp extra error */ 363 mpfr_div_2ui (arctgt, arctgt, 2, MPFR_RNDN); /* exact */ 364 realprec = prec - 2; 365 goto can_round; 366 } 367 368 /* Assignation */ 369 MPFR_SET_ZERO (arctgt); 370 twopoweri = 1 << 0; 371 MPFR_ASSERTD (n0 >= 4); 372 for (i = 0 ; i < n0; i++) 373 { 374 if (MPFR_UNLIKELY (MPFR_IS_ZERO (sk))) 375 break; 376 /* Calculation of trunc(tmp) --> mpz */ 377 mpfr_mul_2ui (tmp, sk, twopoweri, MPFR_RNDN); 378 mpfr_trunc (tmp, tmp); 379 if (!MPFR_IS_ZERO (tmp)) 380 { 381 /* tmp = ukz*2^exptol */ 382 exptol = mpfr_get_z_2exp (ukz, tmp); 383 /* since the s_k are decreasing (see algorithms.tex), 384 and s_0 = min(|x|, 1/|x|) < 1, we have sk < 1, 385 thus exptol < 0 */ 386 MPFR_ASSERTD (exptol < 0); 387 mpz_tdiv_q_2exp (ukz, ukz, (unsigned long int) (-exptol)); 388 /* since tmp is a non-zero integer, and tmp = ukzold*2^exptol, 389 we now have ukz = tmp, thus ukz is non-zero */ 390 /* Calculation of arctan(Ak) */ 391 mpfr_set_z (tmp, ukz, MPFR_RNDN); 392 mpfr_div_2ui (tmp, tmp, twopoweri, MPFR_RNDN); 393 mpfr_atan_aux (tmp2, ukz, twopoweri, n0 - i, tabz); 394 mpfr_mul (tmp2, tmp2, tmp, MPFR_RNDN); 395 /* Addition */ 396 mpfr_add (arctgt, arctgt, tmp2, MPFR_RNDN); 397 /* Next iteration */ 398 mpfr_sub (tmp2, sk, tmp, MPFR_RNDN); 399 mpfr_mul (sk, sk, tmp, MPFR_RNDN); 400 mpfr_add_ui (sk, sk, 1, MPFR_RNDN); 401 mpfr_div (sk, tmp2, sk, MPFR_RNDN); 402 } 403 twopoweri <<= 1; 404 } 405 /* Add last step (Arctan(sk) ~= sk */ 406 mpfr_add (arctgt, arctgt, sk, MPFR_RNDN); 407 408 /* argument reduction */ 409 mpfr_mul_2exp (arctgt, arctgt, red, MPFR_RNDN); 410 411 if (comparaison > 0) 412 { /* atan(x) = Pi/2-atan(1/x) for x > 0 */ 413 mpfr_const_pi (tmp, MPFR_RNDN); 414 mpfr_div_2ui (tmp, tmp, 1, MPFR_RNDN); 415 mpfr_sub (arctgt, tmp, arctgt, MPFR_RNDN); 416 } 417 MPFR_SET_POS (arctgt); 418 419 can_round: 420 if (MPFR_LIKELY (MPFR_CAN_ROUND (arctgt, realprec + est_lost - lost, 421 MPFR_PREC (atan), rnd_mode))) 422 break; 423 MPFR_ZIV_NEXT (loop, realprec); 424 } 425 MPFR_ZIV_FREE (loop); 426 427 inexact = mpfr_set4 (atan, arctgt, rnd_mode, MPFR_SIGN (x)); 428 429 for (i = 0 ; i < oldn0 ; i++) 430 mpz_clear (tabz[i]); 431 mpz_clear (ukz); 432 (*__gmp_free_func) (tabz, oldn0 * sizeof (mpz_t)); 433 MPFR_GROUP_CLEAR (group); 434 435 MPFR_SAVE_EXPO_FREE (expo); 436 return mpfr_check_range (atan, inexact, rnd_mode); 437 } 438