1 /* mpc_fma -- Fused multiply-add of three complex numbers 2 3 Copyright (C) 2011, 2012 INRIA 4 5 This file is part of GNU MPC. 6 7 GNU MPC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU Lesser General Public License as published by the 9 Free Software Foundation; either version 3 of the License, or (at your 10 option) any later version. 11 12 GNU MPC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for 15 more details. 16 17 You should have received a copy of the GNU Lesser General Public License 18 along with this program. If not, see http://www.gnu.org/licenses/ . 19 */ 20 21 #include "mpc-impl.h" 22 23 /* return a bound on the precision needed to add or subtract x and y exactly */ 24 static mpfr_prec_t 25 bound_prec_addsub (mpfr_srcptr x, mpfr_srcptr y) 26 { 27 if (!mpfr_regular_p (x)) 28 return mpfr_get_prec (y); 29 else if (!mpfr_regular_p (y)) 30 return mpfr_get_prec (x); 31 else /* neither x nor y are NaN, Inf or zero */ 32 { 33 mpfr_exp_t ex = mpfr_get_exp (x); 34 mpfr_exp_t ey = mpfr_get_exp (y); 35 mpfr_exp_t ulpx = ex - mpfr_get_prec (x); 36 mpfr_exp_t ulpy = ey - mpfr_get_prec (y); 37 return ((ex >= ey) ? ex : ey) + 1 - ((ulpx <= ulpy) ? ulpx : ulpy); 38 } 39 } 40 41 /* r <- a*b+c */ 42 int 43 mpc_fma_naive (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd) 44 { 45 mpfr_t rea_reb, rea_imb, ima_reb, ima_imb, tmp; 46 mpfr_prec_t pre12, pre13, pre23, pim12, pim13, pim23; 47 int inex_re, inex_im; 48 49 mpfr_init2 (rea_reb, mpfr_get_prec (mpc_realref(a)) + mpfr_get_prec (mpc_realref(b))); 50 mpfr_init2 (rea_imb, mpfr_get_prec (mpc_realref(a)) + mpfr_get_prec (mpc_imagref(b))); 51 mpfr_init2 (ima_reb, mpfr_get_prec (mpc_imagref(a)) + mpfr_get_prec (mpc_realref(b))); 52 mpfr_init2 (ima_imb, mpfr_get_prec (mpc_imagref(a)) + mpfr_get_prec (mpc_imagref(b))); 53 54 mpfr_mul (rea_reb, mpc_realref(a), mpc_realref(b), GMP_RNDZ); /* exact */ 55 mpfr_mul (rea_imb, mpc_realref(a), mpc_imagref(b), GMP_RNDZ); /* exact */ 56 mpfr_mul (ima_reb, mpc_imagref(a), mpc_realref(b), GMP_RNDZ); /* exact */ 57 mpfr_mul (ima_imb, mpc_imagref(a), mpc_imagref(b), GMP_RNDZ); /* exact */ 58 59 /* Re(r) <- rea_reb - ima_imb + Re(c) */ 60 61 pre12 = bound_prec_addsub (rea_reb, ima_imb); /* bound on exact precision for 62 rea_reb - ima_imb */ 63 pre13 = bound_prec_addsub (rea_reb, mpc_realref(c)); 64 /* bound for rea_reb + Re(c) */ 65 pre23 = bound_prec_addsub (ima_imb, mpc_realref(c)); 66 /* bound for ima_imb - Re(c) */ 67 if (pre12 <= pre13 && pre12 <= pre23) /* (rea_reb - ima_imb) + Re(c) */ 68 { 69 mpfr_init2 (tmp, pre12); 70 mpfr_sub (tmp, rea_reb, ima_imb, GMP_RNDZ); /* exact */ 71 inex_re = mpfr_add (mpc_realref(r), tmp, mpc_realref(c), MPC_RND_RE(rnd)); 72 /* the only possible bad overlap is between r and c, but since we are 73 only touching the real part of both, it is ok */ 74 } 75 else if (pre13 <= pre23) /* (rea_reb + Re(c)) - ima_imb */ 76 { 77 mpfr_init2 (tmp, pre13); 78 mpfr_add (tmp, rea_reb, mpc_realref(c), GMP_RNDZ); /* exact */ 79 inex_re = mpfr_sub (mpc_realref(r), tmp, ima_imb, MPC_RND_RE(rnd)); 80 /* the only possible bad overlap is between r and c, but since we are 81 only touching the real part of both, it is ok */ 82 } 83 else /* rea_reb + (Re(c) - ima_imb) */ 84 { 85 mpfr_init2 (tmp, pre23); 86 mpfr_sub (tmp, mpc_realref(c), ima_imb, GMP_RNDZ); /* exact */ 87 inex_re = mpfr_add (mpc_realref(r), tmp, rea_reb, MPC_RND_RE(rnd)); 88 /* the only possible bad overlap is between r and c, but since we are 89 only touching the real part of both, it is ok */ 90 } 91 92 /* Im(r) <- rea_imb + ima_reb + Im(c) */ 93 pim12 = bound_prec_addsub (rea_imb, ima_reb); /* bound on exact precision for 94 rea_imb + ima_reb */ 95 pim13 = bound_prec_addsub (rea_imb, mpc_imagref(c)); 96 /* bound for rea_imb + Im(c) */ 97 pim23 = bound_prec_addsub (ima_reb, mpc_imagref(c)); 98 /* bound for ima_reb + Im(c) */ 99 if (pim12 <= pim13 && pim12 <= pim23) /* (rea_imb + ima_reb) + Im(c) */ 100 { 101 mpfr_set_prec (tmp, pim12); 102 mpfr_add (tmp, rea_imb, ima_reb, GMP_RNDZ); /* exact */ 103 inex_im = mpfr_add (mpc_imagref(r), tmp, mpc_imagref(c), MPC_RND_IM(rnd)); 104 /* the only possible bad overlap is between r and c, but since we are 105 only touching the imaginary part of both, it is ok */ 106 } 107 else if (pim13 <= pim23) /* (rea_imb + Im(c)) + ima_reb */ 108 { 109 mpfr_set_prec (tmp, pim13); 110 mpfr_add (tmp, rea_imb, mpc_imagref(c), GMP_RNDZ); /* exact */ 111 inex_im = mpfr_add (mpc_imagref(r), tmp, ima_reb, MPC_RND_IM(rnd)); 112 /* the only possible bad overlap is between r and c, but since we are 113 only touching the imaginary part of both, it is ok */ 114 } 115 else /* rea_imb + (Im(c) + ima_reb) */ 116 { 117 mpfr_set_prec (tmp, pre23); 118 mpfr_add (tmp, mpc_imagref(c), ima_reb, GMP_RNDZ); /* exact */ 119 inex_im = mpfr_add (mpc_imagref(r), tmp, rea_imb, MPC_RND_IM(rnd)); 120 /* the only possible bad overlap is between r and c, but since we are 121 only touching the imaginary part of both, it is ok */ 122 } 123 124 mpfr_clear (rea_reb); 125 mpfr_clear (rea_imb); 126 mpfr_clear (ima_reb); 127 mpfr_clear (ima_imb); 128 mpfr_clear (tmp); 129 130 return MPC_INEX(inex_re, inex_im); 131 } 132 133 /* The algorithm is as follows: 134 - in a first pass, we use the target precision + some extra bits 135 - if it fails, we add the number of cancelled bits when adding 136 Re(a*b) and Re(c) [similarly for the imaginary part] 137 - it is fails again, we call the mpc_fma_naive function, which also 138 deals with the special cases */ 139 int 140 mpc_fma (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd) 141 { 142 mpc_t ab; 143 mpfr_prec_t pre, pim, wpre, wpim; 144 mpfr_exp_t diffre, diffim; 145 int i, inex = 0, okre = 0, okim = 0; 146 147 if (mpc_fin_p (a) == 0 || mpc_fin_p (b) == 0 || mpc_fin_p (c) == 0) 148 return mpc_fma_naive (r, a, b, c, rnd); 149 150 pre = mpfr_get_prec (mpc_realref(r)); 151 pim = mpfr_get_prec (mpc_imagref(r)); 152 wpre = pre + mpc_ceil_log2 (pre) + 10; 153 wpim = pim + mpc_ceil_log2 (pim) + 10; 154 mpc_init3 (ab, wpre, wpim); 155 for (i = 0; i < 2; ++i) 156 { 157 mpc_mul (ab, a, b, MPC_RNDZZ); 158 if (mpfr_zero_p (mpc_realref(ab)) || mpfr_zero_p (mpc_imagref(ab))) 159 break; 160 diffre = mpfr_get_exp (mpc_realref(ab)); 161 diffim = mpfr_get_exp (mpc_imagref(ab)); 162 mpc_add (ab, ab, c, MPC_RNDZZ); 163 if (mpfr_zero_p (mpc_realref(ab)) || mpfr_zero_p (mpc_imagref(ab))) 164 break; 165 diffre -= mpfr_get_exp (mpc_realref(ab)); 166 diffim -= mpfr_get_exp (mpc_imagref(ab)); 167 diffre = (diffre > 0 ? diffre + 1 : 1); 168 diffim = (diffim > 0 ? diffim + 1 : 1); 169 okre = diffre > (mpfr_exp_t) wpre ? 0 : mpfr_can_round (mpc_realref(ab), 170 wpre - diffre, GMP_RNDN, GMP_RNDZ, 171 pre + (MPC_RND_RE (rnd) == GMP_RNDN)); 172 okim = diffim > (mpfr_exp_t) wpim ? 0 : mpfr_can_round (mpc_imagref(ab), 173 wpim - diffim, GMP_RNDN, GMP_RNDZ, 174 pim + (MPC_RND_IM (rnd) == GMP_RNDN)); 175 if (okre && okim) 176 { 177 inex = mpc_set (r, ab, rnd); 178 break; 179 } 180 if (i == 1) 181 break; 182 if (okre == 0 && diffre > 1) 183 wpre += diffre; 184 if (okim == 0 && diffim > 1) 185 wpim += diffim; 186 mpfr_set_prec (mpc_realref(ab), wpre); 187 mpfr_set_prec (mpc_imagref(ab), wpim); 188 } 189 mpc_clear (ab); 190 return okre && okim ? inex : mpc_fma_naive (r, a, b, c, rnd); 191 } 192