1 /* mpfr_pow_z -- power function x^z with z a MPZ 2 3 Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 4 Contributed by the Arenaire and Caramel projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 /* y <- x^|z| with z != 0 27 if cr=1: ensures correct rounding of y 28 if cr=0: does not ensure correct rounding, but avoid spurious overflow 29 or underflow, and uses the precision of y as working precision (warning, 30 y and x might be the same variable). */ 31 static int 32 mpfr_pow_pos_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mpfr_rnd_t rnd, int cr) 33 { 34 mpfr_t res; 35 mpfr_prec_t prec, err; 36 int inexact; 37 mpfr_rnd_t rnd1, rnd2; 38 mpz_t absz; 39 mp_size_t size_z; 40 MPFR_ZIV_DECL (loop); 41 MPFR_BLOCK_DECL (flags); 42 43 MPFR_LOG_FUNC 44 (("x[%Pu]=%.*Rg z=%Zd rnd=%d cr=%d", 45 mpfr_get_prec (x), mpfr_log_prec, x, z, rnd, cr), 46 ("y[%Pu]=%.*Rg inexact=%d", 47 mpfr_get_prec (y), mpfr_log_prec, y, inexact)); 48 49 MPFR_ASSERTD (mpz_sgn (z) != 0); 50 51 if (MPFR_UNLIKELY (mpz_cmpabs_ui (z, 1) == 0)) 52 return mpfr_set (y, x, rnd); 53 54 absz[0] = z[0]; 55 SIZ (absz) = ABS(SIZ(absz)); /* Hack to get abs(z) */ 56 MPFR_MPZ_SIZEINBASE2 (size_z, z); 57 58 /* round toward 1 (or -1) to avoid spurious overflow or underflow, 59 i.e. if an overflow or underflow occurs, it is a real exception 60 and is not just due to the rounding error. */ 61 rnd1 = (MPFR_EXP(x) >= 1) ? MPFR_RNDZ 62 : (MPFR_IS_POS(x) ? MPFR_RNDU : MPFR_RNDD); 63 rnd2 = (MPFR_EXP(x) >= 1) ? MPFR_RNDD : MPFR_RNDU; 64 65 if (cr != 0) 66 prec = MPFR_PREC (y) + 3 + size_z + MPFR_INT_CEIL_LOG2 (MPFR_PREC (y)); 67 else 68 prec = MPFR_PREC (y); 69 mpfr_init2 (res, prec); 70 71 MPFR_ZIV_INIT (loop, prec); 72 for (;;) 73 { 74 unsigned int inexmul; /* will be non-zero if res may be inexact */ 75 mp_size_t i = size_z; 76 77 /* now 2^(i-1) <= z < 2^i */ 78 /* see below (case z < 0) for the error analysis, which is identical, 79 except if z=n, the maximal relative error is here 2(n-1)2^(-prec) 80 instead of 2(2n-1)2^(-prec) for z<0. */ 81 MPFR_ASSERTD (prec > (mpfr_prec_t) i); 82 err = prec - 1 - (mpfr_prec_t) i; 83 84 MPFR_BLOCK (flags, 85 inexmul = mpfr_mul (res, x, x, rnd2); 86 MPFR_ASSERTD (i >= 2); 87 if (mpz_tstbit (absz, i - 2)) 88 inexmul |= mpfr_mul (res, res, x, rnd1); 89 for (i -= 3; i >= 0 && !MPFR_BLOCK_EXCEP; i--) 90 { 91 inexmul |= mpfr_mul (res, res, res, rnd2); 92 if (mpz_tstbit (absz, i)) 93 inexmul |= mpfr_mul (res, res, x, rnd1); 94 }); 95 if (MPFR_LIKELY (inexmul == 0 || cr == 0 96 || MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags) 97 || MPFR_CAN_ROUND (res, err, MPFR_PREC (y), rnd))) 98 break; 99 /* Can't decide correct rounding, increase the precision */ 100 MPFR_ZIV_NEXT (loop, prec); 101 mpfr_set_prec (res, prec); 102 } 103 MPFR_ZIV_FREE (loop); 104 105 /* Check Overflow */ 106 if (MPFR_OVERFLOW (flags)) 107 { 108 MPFR_LOG_MSG (("overflow\n", 0)); 109 inexact = mpfr_overflow (y, rnd, mpz_odd_p (absz) ? 110 MPFR_SIGN (x) : MPFR_SIGN_POS); 111 } 112 /* Check Underflow */ 113 else if (MPFR_UNDERFLOW (flags)) 114 { 115 MPFR_LOG_MSG (("underflow\n", 0)); 116 if (rnd == MPFR_RNDN) 117 { 118 mpfr_t y2, zz; 119 120 /* We cannot decide now whether the result should be rounded 121 toward zero or +Inf. So, let's use the general case of 122 mpfr_pow, which can do that. But the problem is that the 123 result can be exact! However, it is sufficient to try to 124 round on 2 bits (the precision does not matter in case of 125 underflow, since MPFR does not have subnormals), in which 126 case, the result cannot be exact due to previous filtering 127 of trivial cases. */ 128 MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x), 129 MPFR_EXP (x) - 1) != 0); 130 mpfr_init2 (y2, 2); 131 mpfr_init2 (zz, ABS (SIZ (z)) * GMP_NUMB_BITS); 132 inexact = mpfr_set_z (zz, z, MPFR_RNDN); 133 MPFR_ASSERTN (inexact == 0); 134 inexact = mpfr_pow_general (y2, x, zz, rnd, 1, 135 (mpfr_save_expo_t *) NULL); 136 mpfr_clear (zz); 137 mpfr_set (y, y2, MPFR_RNDN); 138 mpfr_clear (y2); 139 __gmpfr_flags = MPFR_FLAGS_INEXACT | MPFR_FLAGS_UNDERFLOW; 140 } 141 else 142 { 143 inexact = mpfr_underflow (y, rnd, mpz_odd_p (absz) ? 144 MPFR_SIGN (x) : MPFR_SIGN_POS); 145 } 146 } 147 else 148 inexact = mpfr_set (y, res, rnd); 149 150 mpfr_clear (res); 151 return inexact; 152 } 153 154 /* The computation of y = pow(x,z) is done by 155 * y = set_ui(1) if z = 0 156 * y = pow_ui(x,z) if z > 0 157 * y = pow_ui(1/x,-z) if z < 0 158 * 159 * Note: in case z < 0, we could also compute 1/pow_ui(x,-z). However, in 160 * case MAX < 1/MIN, where MAX is the largest positive value, i.e., 161 * MAX = nextbelow(+Inf), and MIN is the smallest positive value, i.e., 162 * MIN = nextabove(+0), then x^(-z) might produce an overflow, whereas 163 * x^z is representable. 164 */ 165 166 int 167 mpfr_pow_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mpfr_rnd_t rnd) 168 { 169 int inexact; 170 mpz_t tmp; 171 MPFR_SAVE_EXPO_DECL (expo); 172 173 MPFR_LOG_FUNC 174 (("x[%Pu]=%.*Rg z=%Zd rnd=%d", 175 mpfr_get_prec (x), mpfr_log_prec, x, z, rnd), 176 ("y[%Pu]=%.*Rg inexact=%d", 177 mpfr_get_prec (y), mpfr_log_prec, y, inexact)); 178 179 /* x^0 = 1 for any x, even a NaN */ 180 if (MPFR_UNLIKELY (mpz_sgn (z) == 0)) 181 return mpfr_set_ui (y, 1, rnd); 182 183 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) 184 { 185 if (MPFR_IS_NAN (x)) 186 { 187 MPFR_SET_NAN (y); 188 MPFR_RET_NAN; 189 } 190 else if (MPFR_IS_INF (x)) 191 { 192 /* Inf^n = Inf, (-Inf)^n = Inf for n even, -Inf for n odd */ 193 /* Inf ^(-n) = 0, sign = + if x>0 or z even */ 194 if (mpz_sgn (z) > 0) 195 MPFR_SET_INF (y); 196 else 197 MPFR_SET_ZERO (y); 198 if (MPFR_UNLIKELY (MPFR_IS_NEG (x) && mpz_odd_p (z))) 199 MPFR_SET_NEG (y); 200 else 201 MPFR_SET_POS (y); 202 MPFR_RET (0); 203 } 204 else /* x is zero */ 205 { 206 MPFR_ASSERTD (MPFR_IS_ZERO(x)); 207 if (mpz_sgn (z) > 0) 208 /* 0^n = +/-0 for any n */ 209 MPFR_SET_ZERO (y); 210 else 211 { 212 /* 0^(-n) if +/- INF */ 213 MPFR_SET_INF (y); 214 mpfr_set_divby0 (); 215 } 216 if (MPFR_LIKELY (MPFR_IS_POS (x) || mpz_even_p (z))) 217 MPFR_SET_POS (y); 218 else 219 MPFR_SET_NEG (y); 220 MPFR_RET(0); 221 } 222 } 223 224 MPFR_SAVE_EXPO_MARK (expo); 225 226 /* detect exact powers: x^-n is exact iff x is a power of 2 227 Do it if n > 0 too as this is faster and this filtering is 228 needed in case of underflow. */ 229 if (MPFR_UNLIKELY (mpfr_cmp_si_2exp (x, MPFR_SIGN (x), 230 MPFR_EXP (x) - 1) == 0)) 231 { 232 mpfr_exp_t expx = MPFR_EXP (x); /* warning: x and y may be the same 233 variable */ 234 235 MPFR_LOG_MSG (("x^n with x power of two\n", 0)); 236 mpfr_set_si (y, mpz_odd_p (z) ? MPFR_INT_SIGN(x) : 1, rnd); 237 MPFR_ASSERTD (MPFR_IS_FP (y)); 238 mpz_init (tmp); 239 mpz_mul_si (tmp, z, expx - 1); 240 MPFR_ASSERTD (MPFR_GET_EXP (y) == 1); 241 mpz_add_ui (tmp, tmp, 1); 242 inexact = 0; 243 if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emin) < 0)) 244 { 245 MPFR_LOG_MSG (("underflow\n", 0)); 246 /* |y| is a power of two, thus |y| <= 2^(emin-2), and in 247 rounding to nearest, the value must be rounded to 0. */ 248 if (rnd == MPFR_RNDN) 249 rnd = MPFR_RNDZ; 250 inexact = mpfr_underflow (y, rnd, MPFR_SIGN (y)); 251 } 252 else if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emax) > 0)) 253 { 254 MPFR_LOG_MSG (("overflow\n", 0)); 255 inexact = mpfr_overflow (y, rnd, MPFR_SIGN (y)); 256 } 257 else 258 MPFR_SET_EXP (y, mpz_get_si (tmp)); 259 mpz_clear (tmp); 260 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 261 } 262 else if (mpz_sgn (z) > 0) 263 { 264 inexact = mpfr_pow_pos_z (y, x, z, rnd, 1); 265 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 266 } 267 else 268 { 269 /* Declaration of the intermediary variable */ 270 mpfr_t t; 271 mpfr_prec_t Nt; /* Precision of the intermediary variable */ 272 mpfr_rnd_t rnd1; 273 mp_size_t size_z; 274 MPFR_ZIV_DECL (loop); 275 276 MPFR_MPZ_SIZEINBASE2 (size_z, z); 277 278 /* initial working precision */ 279 Nt = MPFR_PREC (y); 280 Nt = Nt + size_z + 3 + MPFR_INT_CEIL_LOG2 (Nt); 281 /* ensures Nt >= bits(z)+2 */ 282 283 /* initialise of intermediary variable */ 284 mpfr_init2 (t, Nt); 285 286 /* We will compute rnd(rnd1(1/x) ^ (-z)), where rnd1 is the rounding 287 toward sign(x), to avoid spurious overflow or underflow. */ 288 rnd1 = MPFR_EXP (x) < 1 ? MPFR_RNDZ : 289 (MPFR_SIGN (x) > 0 ? MPFR_RNDU : MPFR_RNDD); 290 291 MPFR_ZIV_INIT (loop, Nt); 292 for (;;) 293 { 294 MPFR_BLOCK_DECL (flags); 295 296 /* compute (1/x)^(-z), -z>0 */ 297 /* As emin = -emax, an underflow cannot occur in the division. 298 And if an overflow occurs, then this means that x^z overflows 299 too (since we have rounded toward 1 or -1). */ 300 MPFR_BLOCK (flags, mpfr_ui_div (t, 1, x, rnd1)); 301 MPFR_ASSERTD (! MPFR_UNDERFLOW (flags)); 302 /* t = (1/x)*(1+theta) where |theta| <= 2^(-Nt) */ 303 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags))) 304 goto overflow; 305 MPFR_BLOCK (flags, mpfr_pow_pos_z (t, t, z, rnd, 0)); 306 /* Now if z=-n, t = x^z*(1+theta)^(2n-1) where |theta| <= 2^(-Nt), 307 with theta maybe different from above. If (2n-1)*2^(-Nt) <= 1/2, 308 which is satisfied as soon as Nt >= bits(z)+2, then we can use 309 Lemma \ref{lemma_graillat} from algorithms.tex, which yields 310 t = x^z*(1+theta) with |theta| <= 2(2n-1)*2^(-Nt), thus the 311 error is bounded by 2(2n-1) ulps <= 2^(bits(z)+2) ulps. */ 312 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags))) 313 { 314 overflow: 315 MPFR_ZIV_FREE (loop); 316 mpfr_clear (t); 317 MPFR_SAVE_EXPO_FREE (expo); 318 MPFR_LOG_MSG (("overflow\n", 0)); 319 return mpfr_overflow (y, rnd, 320 mpz_odd_p (z) ? MPFR_SIGN (x) : 321 MPFR_SIGN_POS); 322 } 323 if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags))) 324 { 325 MPFR_ZIV_FREE (loop); 326 mpfr_clear (t); 327 MPFR_LOG_MSG (("underflow\n", 0)); 328 if (rnd == MPFR_RNDN) 329 { 330 mpfr_t y2, zz; 331 332 /* We cannot decide now whether the result should be 333 rounded toward zero or away from zero. So, like 334 in mpfr_pow_pos_z, let's use the general case of 335 mpfr_pow in precision 2. */ 336 MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x), 337 MPFR_EXP (x) - 1) != 0); 338 mpfr_init2 (y2, 2); 339 mpfr_init2 (zz, ABS (SIZ (z)) * GMP_NUMB_BITS); 340 inexact = mpfr_set_z (zz, z, MPFR_RNDN); 341 MPFR_ASSERTN (inexact == 0); 342 inexact = mpfr_pow_general (y2, x, zz, rnd, 1, 343 (mpfr_save_expo_t *) NULL); 344 mpfr_clear (zz); 345 mpfr_set (y, y2, MPFR_RNDN); 346 mpfr_clear (y2); 347 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW); 348 goto end; 349 } 350 else 351 { 352 MPFR_SAVE_EXPO_FREE (expo); 353 return mpfr_underflow (y, rnd, mpz_odd_p (z) ? 354 MPFR_SIGN (x) : MPFR_SIGN_POS); 355 } 356 } 357 if (MPFR_LIKELY (MPFR_CAN_ROUND (t, Nt - size_z - 2, MPFR_PREC (y), 358 rnd))) 359 break; 360 /* actualisation of the precision */ 361 MPFR_ZIV_NEXT (loop, Nt); 362 mpfr_set_prec (t, Nt); 363 } 364 MPFR_ZIV_FREE (loop); 365 366 inexact = mpfr_set (y, t, rnd); 367 mpfr_clear (t); 368 } 369 370 end: 371 MPFR_SAVE_EXPO_FREE (expo); 372 return mpfr_check_range (y, inexact, rnd); 373 } 374