1 /* mpfr_fma -- Floating multiply-add 2 3 Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. 4 Contributed by the Arenaire and Caramel projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #include "mpfr-impl.h" 24 25 /* The fused-multiply-add (fma) of x, y and z is defined by: 26 fma(x,y,z)= x*y + z 27 */ 28 29 int 30 mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z, 31 mpfr_rnd_t rnd_mode) 32 { 33 int inexact; 34 mpfr_t u; 35 MPFR_SAVE_EXPO_DECL (expo); 36 MPFR_GROUP_DECL(group); 37 38 MPFR_LOG_FUNC 39 (("x[%Pu]=%.*Rg y[%Pu]=%.*Rg z[%Pu]=%.*Rg rnd=%d", 40 mpfr_get_prec (x), mpfr_log_prec, x, 41 mpfr_get_prec (y), mpfr_log_prec, y, 42 mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode), 43 ("s[%Pu]=%.*Rg inexact=%d", 44 mpfr_get_prec (s), mpfr_log_prec, s, inexact)); 45 46 /* particular cases */ 47 if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) || 48 MPFR_IS_SINGULAR(y) || 49 MPFR_IS_SINGULAR(z) )) 50 { 51 if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z)) 52 { 53 MPFR_SET_NAN(s); 54 MPFR_RET_NAN; 55 } 56 /* now neither x, y or z is NaN */ 57 else if (MPFR_IS_INF(x) || MPFR_IS_INF(y)) 58 { 59 /* cases Inf*0+z, 0*Inf+z, Inf-Inf */ 60 if ((MPFR_IS_ZERO(y)) || 61 (MPFR_IS_ZERO(x)) || 62 (MPFR_IS_INF(z) && 63 ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z)))) 64 { 65 MPFR_SET_NAN(s); 66 MPFR_RET_NAN; 67 } 68 else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */ 69 { 70 MPFR_SET_INF(s); 71 MPFR_SET_SAME_SIGN(s, z); 72 MPFR_RET(0); 73 } 74 else /* z is finite */ 75 { 76 MPFR_SET_INF(s); 77 MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y))); 78 MPFR_RET(0); 79 } 80 } 81 /* now x and y are finite */ 82 else if (MPFR_IS_INF(z)) 83 { 84 MPFR_SET_INF(s); 85 MPFR_SET_SAME_SIGN(s, z); 86 MPFR_RET(0); 87 } 88 else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y)) 89 { 90 if (MPFR_IS_ZERO(z)) 91 { 92 int sign_p; 93 sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) ); 94 MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ? 95 ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z)) 96 ? -1 : 1) : 97 ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z)) 98 ? 1 : -1))); 99 MPFR_SET_ZERO(s); 100 MPFR_RET(0); 101 } 102 else 103 return mpfr_set (s, z, rnd_mode); 104 } 105 else /* necessarily z is zero here */ 106 { 107 MPFR_ASSERTD(MPFR_IS_ZERO(z)); 108 return mpfr_mul (s, x, y, rnd_mode); 109 } 110 } 111 112 /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y 113 is exact, except in case of overflow or underflow. */ 114 MPFR_SAVE_EXPO_MARK (expo); 115 MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u); 116 117 if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN))) 118 { 119 /* overflow or underflow - this case is regarded as rare, thus 120 does not need to be very efficient (even if some tests below 121 could have been done earlier). 122 It is an overflow iff u is an infinity (since MPFR_RNDN was used). 123 Alternatively, we could test the overflow flag, but in this case, 124 mpfr_clear_flags would have been necessary. */ 125 if (MPFR_IS_INF (u)) /* overflow */ 126 { 127 /* Let's eliminate the obvious case where x*y and z have the 128 same sign. No possible cancellation -> real overflow. 129 Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3, 130 then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case 131 is also an overflow. */ 132 if (MPFR_SIGN (u) == MPFR_SIGN (z) || 133 MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3) 134 { 135 MPFR_GROUP_CLEAR (group); 136 MPFR_SAVE_EXPO_FREE (expo); 137 return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z)); 138 } 139 140 /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and 141 (x/4)*y does not overflow (let's recall that the result 142 is exact with an unbounded exponent range). It does not 143 underflow either, because x*y overflows and the exponent 144 range is large enough. */ 145 inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN); 146 MPFR_ASSERTN (inexact == 0); 147 inexact = mpfr_mul (u, u, y, MPFR_RNDN); 148 MPFR_ASSERTN (inexact == 0); 149 150 /* Now, we need to add z/4... But it may underflow! */ 151 { 152 mpfr_t zo4; 153 mpfr_srcptr zz; 154 MPFR_BLOCK_DECL (flags); 155 156 if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) && 157 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u)) 158 { 159 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */ 160 zz = z; 161 } 162 else 163 { 164 mpfr_init2 (zo4, MPFR_PREC (z)); 165 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ)) 166 { 167 /* The division by 4 underflowed! */ 168 MPFR_ASSERTN (0); /* TODO... */ 169 } 170 zz = zo4; 171 } 172 173 /* Let's recall that u = x*y/4 and zz = z/4 (or z if the 174 following addition would give the same result). */ 175 MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode)); 176 /* u and zz have different signs, so that an overflow 177 is not possible. But an underflow is theoretically 178 possible! */ 179 if (MPFR_UNDERFLOW (flags)) 180 { 181 MPFR_ASSERTN (zz != z); 182 MPFR_ASSERTN (0); /* TODO... */ 183 mpfr_clears (zo4, u, (mpfr_ptr) 0); 184 } 185 else 186 { 187 int inex2; 188 189 if (zz != z) 190 mpfr_clear (zo4); 191 MPFR_GROUP_CLEAR (group); 192 MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); 193 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode); 194 if (inex2) /* overflow */ 195 { 196 inexact = inex2; 197 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 198 } 199 goto end; 200 } 201 } 202 } 203 else /* underflow: one has |xy| < 2^(emin-1). */ 204 { 205 unsigned long scale = 0; 206 mpfr_t scaled_z; 207 mpfr_srcptr new_z; 208 mpfr_exp_t diffexp; 209 mpfr_prec_t pzs; 210 int xy_underflows; 211 212 /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin 213 (the + 1 on MPFR_PREC (s) is necessary because the exponent 214 of the result can be EXP(z) - 1). */ 215 diffexp = MPFR_GET_EXP (z) - __gmpfr_emin; 216 pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1); 217 if (diffexp <= pzs) 218 { 219 mpfr_uexp_t uscale; 220 mpfr_t scaled_v; 221 MPFR_BLOCK_DECL (flags); 222 223 uscale = (mpfr_uexp_t) pzs - diffexp + 1; 224 MPFR_ASSERTN (uscale > 0); 225 MPFR_ASSERTN (uscale <= ULONG_MAX); 226 scale = uscale; 227 mpfr_init2 (scaled_z, MPFR_PREC (z)); 228 inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN); 229 MPFR_ASSERTN (inexact == 0); /* TODO: overflow case */ 230 new_z = scaled_z; 231 /* Now we need to recompute u = xy * 2^scale. */ 232 MPFR_BLOCK (flags, 233 if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y)) 234 { 235 mpfr_init2 (scaled_v, MPFR_PREC (x)); 236 mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN); 237 mpfr_mul (u, scaled_v, y, MPFR_RNDN); 238 } 239 else 240 { 241 mpfr_init2 (scaled_v, MPFR_PREC (y)); 242 mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN); 243 mpfr_mul (u, x, scaled_v, MPFR_RNDN); 244 }); 245 mpfr_clear (scaled_v); 246 MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); 247 xy_underflows = MPFR_UNDERFLOW (flags); 248 } 249 else 250 { 251 new_z = z; 252 xy_underflows = 1; 253 } 254 255 if (xy_underflows) 256 { 257 /* Let's replace xy by sign(xy) * 2^(emin-1). */ 258 MPFR_PREC (u) = MPFR_PREC_MIN; 259 mpfr_setmin (u, __gmpfr_emin); 260 MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x), 261 MPFR_SIGN (y))); 262 } 263 264 { 265 MPFR_BLOCK_DECL (flags); 266 267 MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode)); 268 MPFR_GROUP_CLEAR (group); 269 if (scale != 0) 270 { 271 int inex2; 272 273 mpfr_clear (scaled_z); 274 /* Here an overflow is theoretically possible, in which case 275 the result may be wrong, hence the assert. An underflow 276 is not possible, but let's check that anyway. */ 277 MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); /* TODO... */ 278 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags)); /* not possible */ 279 inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN); 280 /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode? 281 Also, handle the double rounding case: 282 s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */ 283 if (inex2) /* underflow */ 284 inexact = inex2; 285 } 286 } 287 288 /* FIXME/TODO: I'm not sure that the following is correct. 289 Check for possible spurious exceptions due to intermediate 290 computations. */ 291 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 292 goto end; 293 } 294 } 295 296 inexact = mpfr_add (s, u, z, rnd_mode); 297 MPFR_GROUP_CLEAR (group); 298 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags); 299 end: 300 MPFR_SAVE_EXPO_FREE (expo); 301 return mpfr_check_range (s, inexact, rnd_mode); 302 } 303