1 /* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers 2 3 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 4 Contributed by the Arenaire and Caramel projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 /* agm(x,y) is between x and y, so we don't need to save exponent range */ 27 int 28 mpfr_agm (mpfr_ptr r, mpfr_srcptr op2, mpfr_srcptr op1, mpfr_rnd_t rnd_mode) 29 { 30 int compare, inexact; 31 mp_size_t s; 32 mpfr_prec_t p, q; 33 mp_limb_t *up, *vp, *ufp, *vfp; 34 mpfr_t u, v, uf, vf, sc1, sc2; 35 mpfr_exp_t scaleop = 0, scaleit; 36 unsigned long n; /* number of iterations */ 37 MPFR_ZIV_DECL (loop); 38 MPFR_TMP_DECL(marker); 39 MPFR_SAVE_EXPO_DECL (expo); 40 41 MPFR_LOG_FUNC 42 (("op2[%Pu]=%.*Rg op1[%Pu]=%.*Rg rnd=%d", 43 mpfr_get_prec (op2), mpfr_log_prec, op2, 44 mpfr_get_prec (op1), mpfr_log_prec, op1, rnd_mode), 45 ("r[%Pu]=%.*Rg inexact=%d", 46 mpfr_get_prec (r), mpfr_log_prec, r, inexact)); 47 48 /* Deal with special values */ 49 if (MPFR_ARE_SINGULAR (op1, op2)) 50 { 51 /* If a or b is NaN, the result is NaN */ 52 if (MPFR_IS_NAN(op1) || MPFR_IS_NAN(op2)) 53 { 54 MPFR_SET_NAN(r); 55 MPFR_RET_NAN; 56 } 57 /* now one of a or b is Inf or 0 */ 58 /* If a and b is +Inf, the result is +Inf. 59 Otherwise if a or b is -Inf or 0, the result is NaN */ 60 else if (MPFR_IS_INF(op1) || MPFR_IS_INF(op2)) 61 { 62 if (MPFR_IS_STRICTPOS(op1) && MPFR_IS_STRICTPOS(op2)) 63 { 64 MPFR_SET_INF(r); 65 MPFR_SET_SAME_SIGN(r, op1); 66 MPFR_RET(0); /* exact */ 67 } 68 else 69 { 70 MPFR_SET_NAN(r); 71 MPFR_RET_NAN; 72 } 73 } 74 else /* a and b are neither NaN nor Inf, and one is zero */ 75 { /* If a or b is 0, the result is +0 since a sqrt is positive */ 76 MPFR_ASSERTD (MPFR_IS_ZERO (op1) || MPFR_IS_ZERO (op2)); 77 MPFR_SET_POS (r); 78 MPFR_SET_ZERO (r); 79 MPFR_RET (0); /* exact */ 80 } 81 } 82 83 /* If a or b is negative (excluding -Infinity), the result is NaN */ 84 if (MPFR_UNLIKELY(MPFR_IS_NEG(op1) || MPFR_IS_NEG(op2))) 85 { 86 MPFR_SET_NAN(r); 87 MPFR_RET_NAN; 88 } 89 90 /* Precision of the following calculus */ 91 q = MPFR_PREC(r); 92 p = q + MPFR_INT_CEIL_LOG2(q) + 15; 93 MPFR_ASSERTD (p >= 7); /* see algorithms.tex */ 94 s = (p - 1) / GMP_NUMB_BITS + 1; 95 96 /* b (op2) and a (op1) are the 2 operands but we want b >= a */ 97 compare = mpfr_cmp (op1, op2); 98 if (MPFR_UNLIKELY( compare == 0 )) 99 { 100 mpfr_set (r, op1, rnd_mode); 101 MPFR_RET (0); /* exact */ 102 } 103 else if (compare > 0) 104 { 105 mpfr_srcptr t = op1; 106 op1 = op2; 107 op2 = t; 108 } 109 110 /* Now b (=op2) > a (=op1) */ 111 112 MPFR_SAVE_EXPO_MARK (expo); 113 114 MPFR_TMP_MARK(marker); 115 116 /* Main loop */ 117 MPFR_ZIV_INIT (loop, p); 118 for (;;) 119 { 120 mpfr_prec_t eq; 121 unsigned long err = 0; /* must be set to 0 at each Ziv iteration */ 122 MPFR_BLOCK_DECL (flags); 123 124 /* Init temporary vars */ 125 MPFR_TMP_INIT (up, u, p, s); 126 MPFR_TMP_INIT (vp, v, p, s); 127 MPFR_TMP_INIT (ufp, uf, p, s); 128 MPFR_TMP_INIT (vfp, vf, p, s); 129 130 /* Calculus of un and vn */ 131 retry: 132 MPFR_BLOCK (flags, 133 mpfr_mul (u, op1, op2, MPFR_RNDN); 134 /* mpfr_mul(...): faster since PREC(op) < PREC(u) */ 135 mpfr_add (v, op1, op2, MPFR_RNDN); 136 /* mpfr_add with !=prec is still good */); 137 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags))) 138 { 139 mpfr_exp_t e1 , e2; 140 141 MPFR_ASSERTN (scaleop == 0); 142 e1 = MPFR_GET_EXP (op1); 143 e2 = MPFR_GET_EXP (op2); 144 145 /* Let's determine scaleop to avoid an overflow/underflow. */ 146 if (MPFR_OVERFLOW (flags)) 147 { 148 /* Let's recall that emin <= e1 <= e2 <= emax. 149 There has been an overflow. Thus e2 >= emax/2. 150 If the mpfr_mul overflowed, then e1 + e2 > emax. 151 If the mpfr_add overflowed, then e2 = emax. 152 We want: (e1 + scale) + (e2 + scale) <= emax, 153 i.e. scale <= (emax - e1 - e2) / 2. Let's take 154 scale = min(floor((emax - e1 - e2) / 2), -1). 155 This is OK, as: 156 1. emin <= scale <= -1. 157 2. e1 + scale >= emin. Indeed: 158 * If e1 + e2 > emax, then 159 e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1 160 >= (emax + e1 - emax) / 2 - 1 161 >= e1 / 2 - 1 >= emin. 162 * Otherwise, mpfr_mul didn't overflow, therefore 163 mpfr_add overflowed and e2 = emax, so that 164 e1 > emin (see restriction below). 165 e1 + scale > emin - 1, thus e1 + scale >= emin. 166 3. e2 + scale <= emax, since scale < 0. */ 167 if (e1 + e2 > MPFR_EXT_EMAX) 168 { 169 scaleop = - (((e1 + e2) - MPFR_EXT_EMAX + 1) / 2); 170 MPFR_ASSERTN (scaleop < 0); 171 } 172 else 173 { 174 /* The addition necessarily overflowed. */ 175 MPFR_ASSERTN (e2 == MPFR_EXT_EMAX); 176 /* The case where e1 = emin and e2 = emax is not supported 177 here. This would mean that the precision of e2 would be 178 huge (and possibly not supported in practice anyway). */ 179 MPFR_ASSERTN (e1 > MPFR_EXT_EMIN); 180 scaleop = -1; 181 } 182 183 } 184 else /* underflow only (in the multiplication) */ 185 { 186 /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0). 187 We want: (e1 + scale) + (e2 + scale) >= emin + 1, 188 i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take 189 scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as: 190 1. 1 <= scale <= emax. 191 2. e1 + scale >= emin + 1 >= emin. 192 3. e2 + scale <= scale <= emax. */ 193 MPFR_ASSERTN (e1 <= e2 && e2 <= 0); 194 scaleop = (MPFR_EXT_EMIN + 2 - e1 - e2) / 2; 195 MPFR_ASSERTN (scaleop > 0); 196 } 197 198 MPFR_ALIAS (sc1, op1, MPFR_SIGN (op1), e1 + scaleop); 199 MPFR_ALIAS (sc2, op2, MPFR_SIGN (op2), e2 + scaleop); 200 op1 = sc1; 201 op2 = sc2; 202 MPFR_LOG_MSG (("Exception in pre-iteration, scale = %" 203 MPFR_EXP_FSPEC "d\n", scaleop)); 204 goto retry; 205 } 206 207 mpfr_clear_flags (); 208 mpfr_sqrt (u, u, MPFR_RNDN); 209 mpfr_div_2ui (v, v, 1, MPFR_RNDN); 210 211 scaleit = 0; 212 n = 1; 213 while (mpfr_cmp2 (u, v, &eq) != 0 && eq <= p - 2) 214 { 215 MPFR_BLOCK_DECL (flags2); 216 217 MPFR_LOG_MSG (("Iteration n = %lu\n", n)); 218 219 retry2: 220 mpfr_add (vf, u, v, MPFR_RNDN); /* No overflow? */ 221 mpfr_div_2ui (vf, vf, 1, MPFR_RNDN); 222 /* See proof in algorithms.tex */ 223 if (4*eq > p) 224 { 225 mpfr_t w; 226 MPFR_BLOCK_DECL (flags3); 227 228 MPFR_LOG_MSG (("4*eq > p\n", 0)); 229 230 /* vf = V(k) */ 231 mpfr_init2 (w, (p + 1) / 2); 232 MPFR_BLOCK 233 (flags3, 234 mpfr_sub (w, v, u, MPFR_RNDN); /* e = V(k-1)-U(k-1) */ 235 mpfr_sqr (w, w, MPFR_RNDN); /* e = e^2 */ 236 mpfr_div_2ui (w, w, 4, MPFR_RNDN); /* e*= (1/2)^2*1/4 */ 237 mpfr_div (w, w, vf, MPFR_RNDN); /* 1/4*e^2/V(k) */ 238 ); 239 if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3))) 240 { 241 mpfr_sub (v, vf, w, MPFR_RNDN); 242 err = MPFR_GET_EXP (vf) - MPFR_GET_EXP (v); /* 0 or 1 */ 243 mpfr_clear (w); 244 break; 245 } 246 /* There has been an underflow because of the cancellation 247 between V(k-1) and U(k-1). Let's use the conventional 248 method. */ 249 MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0)); 250 mpfr_clear (w); 251 mpfr_clear_underflow (); 252 } 253 /* U(k) increases, so that U.V can overflow (but not underflow). */ 254 MPFR_BLOCK (flags2, mpfr_mul (uf, u, v, MPFR_RNDN);); 255 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2))) 256 { 257 mpfr_exp_t scale2; 258 259 scale2 = - (((MPFR_GET_EXP (u) + MPFR_GET_EXP (v)) 260 - MPFR_EXT_EMAX + 1) / 2); 261 MPFR_EXP (u) += scale2; 262 MPFR_EXP (v) += scale2; 263 scaleit += scale2; 264 MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %" 265 MPFR_EXP_FSPEC "d (%" MPFR_EXP_FSPEC "d)\n", 266 n, scaleit, scale2)); 267 mpfr_clear_overflow (); 268 goto retry2; 269 } 270 mpfr_sqrt (u, uf, MPFR_RNDN); 271 mpfr_swap (v, vf); 272 n ++; 273 } 274 275 MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n)); 276 277 /* the error on v is bounded by (18n+51) ulps, or twice if there 278 was an exponent loss in the final subtraction */ 279 err += MPFR_INT_CEIL_LOG2(18 * n + 51); /* 18n+51 should not overflow 280 since n is about log(p) */ 281 /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */ 282 if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n + 2) <= p / 4 && 283 MPFR_CAN_ROUND (v, p - err, q, rnd_mode))) 284 break; /* Stop the loop */ 285 286 /* Next iteration */ 287 MPFR_ZIV_NEXT (loop, p); 288 s = (p - 1) / GMP_NUMB_BITS + 1; 289 } 290 MPFR_ZIV_FREE (loop); 291 292 if (MPFR_UNLIKELY ((__gmpfr_flags & (MPFR_FLAGS_ALL ^ MPFR_FLAGS_INEXACT)) 293 != 0)) 294 { 295 MPFR_ASSERTN (! mpfr_overflow_p ()); /* since mpfr_clear_flags */ 296 MPFR_ASSERTN (! mpfr_underflow_p ()); /* since mpfr_clear_flags */ 297 MPFR_ASSERTN (! mpfr_divby0_p ()); /* since mpfr_clear_flags */ 298 MPFR_ASSERTN (! mpfr_nanflag_p ()); /* since mpfr_clear_flags */ 299 } 300 301 /* Setting of the result */ 302 inexact = mpfr_set (r, v, rnd_mode); 303 MPFR_EXP (r) -= scaleop + scaleit; 304 305 /* Let's clean */ 306 MPFR_TMP_FREE(marker); 307 308 MPFR_SAVE_EXPO_FREE (expo); 309 /* From the definition of the AGM, underflow and overflow 310 are not possible. */ 311 return mpfr_check_range (r, inexact, rnd_mode); 312 /* agm(u,v) can be exact for u, v rational only for u=v. 313 Proof (due to Nicolas Brisebarre): it suffices to consider 314 u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2), 315 and a theorem due to G.V. Chudnovsky states that for x a 316 non-zero algebraic number with |x|<1, then 317 2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically 318 independent over Q. */ 319 } 320