1/****************************************************************************** 2 Copyright (c) 2007-2011, Intel Corp. 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the distribution. 13 * Neither the name of Intel Corporation nor the names of its contributors 14 may be used to endorse or promote products derived from this software 15 without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 THE POSSIBILITY OF SUCH DAMAGE. 28******************************************************************************/ 29 30/***************************************************************************** 31 * 32 * BID128 fma x * y + z 33 * 34 ****************************************************************************/ 35 36#include "bid_internal.h" 37 38static void 39bid_rounding_correction (unsigned int rnd_mode, 40 unsigned int is_inexact_lt_midpoint, 41 unsigned int is_inexact_gt_midpoint, 42 unsigned int is_midpoint_lt_even, 43 unsigned int is_midpoint_gt_even, 44 int unbexp, 45 BID_UINT128 * ptrres, _IDEC_flags * ptrfpsf) { 46 // unbiased true exponent unbexp may be larger than emax 47 48 BID_UINT128 res = *ptrres; // expected to have the correct sign and coefficient 49 // (the exponent field is ignored, as unbexp is used instead) 50 BID_UINT64 sign, exp; 51 BID_UINT64 C_hi, C_lo; 52 53 // general correction from RN to RA, RM, RP, RZ 54 // Note: if the result is negative, then is_inexact_lt_midpoint, 55 // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even 56 // have to be considered as if determined for the absolute value of the 57 // result (so they seem to be reversed) 58 59 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 60 is_midpoint_lt_even || is_midpoint_gt_even) { 61 *ptrfpsf |= BID_INEXACT_EXCEPTION; 62 } 63 // apply correction to result calculated with unbounded exponent 64 sign = res.w[1] & MASK_SIGN; 65 exp = (BID_UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax 66 C_hi = res.w[1] & MASK_COEFF; 67 C_lo = res.w[0]; 68 if ((!sign && ((rnd_mode == BID_ROUNDING_UP && is_inexact_lt_midpoint) || 69 ((rnd_mode == BID_ROUNDING_TIES_AWAY || rnd_mode == BID_ROUNDING_UP) && 70 is_midpoint_gt_even))) || 71 (sign && ((rnd_mode == BID_ROUNDING_DOWN && is_inexact_lt_midpoint) || 72 ((rnd_mode == BID_ROUNDING_TIES_AWAY || rnd_mode == BID_ROUNDING_DOWN) && 73 is_midpoint_gt_even)))) { 74 // C = C + 1 75 C_lo = C_lo + 1; 76 if (C_lo == 0) 77 C_hi = C_hi + 1; 78 if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) { 79 // C = 10^34 => rounding overflow 80 C_hi = 0x0000314dc6448d93ull; 81 C_lo = 0x38c15b0a00000000ull; // 10^33 82 // exp = exp + EXP_P1; 83 unbexp = unbexp + 1; 84 exp = (BID_UINT64) (unbexp + 6176) << 49; 85 } 86 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) && 87 ((sign && (rnd_mode == BID_ROUNDING_UP || rnd_mode == BID_ROUNDING_TO_ZERO)) || 88 (!sign && (rnd_mode == BID_ROUNDING_DOWN || rnd_mode == BID_ROUNDING_TO_ZERO)))) { 89 // C = C - 1 90 C_lo = C_lo - 1; 91 if (C_lo == 0xffffffffffffffffull) 92 C_hi--; 93 // check if we crossed into the lower decade 94 if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) { 95 // C = 10^33 - 1 96 if (exp > 0) { 97 C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 98 C_lo = 0x378d8e63ffffffffull; 99 // exp = exp - EXP_P1; 100 unbexp = unbexp - 1; 101 exp = (BID_UINT64) (unbexp + 6176) << 49; 102 } else { // if exp = 0 the result is tiny & inexact 103 *ptrfpsf |= BID_UNDERFLOW_EXCEPTION; 104 } 105 } 106 } else { 107 ; // the result is already correct 108 } 109 if (unbexp > expmax) { // 6111 110 *ptrfpsf |= (BID_INEXACT_EXCEPTION | BID_OVERFLOW_EXCEPTION); 111 exp = 0; 112 if (!sign) { // result is positive 113 if (rnd_mode == BID_ROUNDING_UP || rnd_mode == BID_ROUNDING_TIES_AWAY) { // +inf 114 C_hi = 0x7800000000000000ull; 115 C_lo = 0x0000000000000000ull; 116 } else { // res = +MAXFP = (10^34-1) * 10^emax 117 C_hi = 0x5fffed09bead87c0ull; 118 C_lo = 0x378d8e63ffffffffull; 119 } 120 } else { // result is negative 121 if (rnd_mode == BID_ROUNDING_DOWN || rnd_mode == BID_ROUNDING_TIES_AWAY) { // -inf 122 C_hi = 0xf800000000000000ull; 123 C_lo = 0x0000000000000000ull; 124 } else { // res = -MAXFP = -(10^34-1) * 10^emax 125 C_hi = 0xdfffed09bead87c0ull; 126 C_lo = 0x378d8e63ffffffffull; 127 } 128 } 129 } 130 // assemble the result 131 res.w[1] = sign | exp | C_hi; 132 res.w[0] = C_lo; 133 *ptrres = res; 134} 135 136static void 137bid_add256 (BID_UINT256 x, BID_UINT256 y, BID_UINT256 * pz) { 138 // *z = x + yl assume the sum fits in 256 bits 139 BID_UINT256 z; 140 z.w[0] = x.w[0] + y.w[0]; 141 if (z.w[0] < x.w[0]) { 142 x.w[1]++; 143 if (x.w[1] == 0x0000000000000000ull) { 144 x.w[2]++; 145 if (x.w[2] == 0x0000000000000000ull) { 146 x.w[3]++; 147 } 148 } 149 } 150 z.w[1] = x.w[1] + y.w[1]; 151 if (z.w[1] < x.w[1]) { 152 x.w[2]++; 153 if (x.w[2] == 0x0000000000000000ull) { 154 x.w[3]++; 155 } 156 } 157 z.w[2] = x.w[2] + y.w[2]; 158 if (z.w[2] < x.w[2]) { 159 x.w[3]++; 160 } 161 z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible 162 *pz = z; 163} 164 165static void 166bid_sub256 (BID_UINT256 x, BID_UINT256 y, BID_UINT256 * pz) { 167 // *z = x - y; assume x >= y 168 BID_UINT256 z; 169 z.w[0] = x.w[0] - y.w[0]; 170 if (z.w[0] > x.w[0]) { 171 x.w[1]--; 172 if (x.w[1] == 0xffffffffffffffffull) { 173 x.w[2]--; 174 if (x.w[2] == 0xffffffffffffffffull) { 175 x.w[3]--; 176 } 177 } 178 } 179 z.w[1] = x.w[1] - y.w[1]; 180 if (z.w[1] > x.w[1]) { 181 x.w[2]--; 182 if (x.w[2] == 0xffffffffffffffffull) { 183 x.w[3]--; 184 } 185 } 186 z.w[2] = x.w[2] - y.w[2]; 187 if (z.w[2] > x.w[2]) { 188 x.w[3]--; 189 } 190 z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y 191 *pz = z; 192} 193 194 195static int 196bid_bid_nr_digits256 (BID_UINT256 R256) { 197 int ind; 198 // determine the number of decimal digits in R256 199 if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) { 200 // between 1 and 19 digits 201 for (ind = 1; ind <= 19; ind++) { 202 if (R256.w[0] < bid_ten2k64[ind]) { 203 break; 204 } 205 } 206 // ind digits 207 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && 208 (R256.w[1] < bid_ten2k128[0].w[1] || 209 (R256.w[1] == bid_ten2k128[0].w[1] 210 && R256.w[0] < bid_ten2k128[0].w[0]))) { 211 // 20 digits 212 ind = 20; 213 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) { 214 // between 21 and 38 digits 215 for (ind = 1; ind <= 18; ind++) { 216 if (R256.w[1] < bid_ten2k128[ind].w[1] || 217 (R256.w[1] == bid_ten2k128[ind].w[1] && 218 R256.w[0] < bid_ten2k128[ind].w[0])) { 219 break; 220 } 221 } 222 // ind + 20 digits 223 ind = ind + 20; 224 } else if (R256.w[3] == 0x0 && 225 (R256.w[2] < bid_ten2k256[0].w[2] || 226 (R256.w[2] == bid_ten2k256[0].w[2] && 227 R256.w[1] < bid_ten2k256[0].w[1]) || 228 (R256.w[2] == bid_ten2k256[0].w[2] && 229 R256.w[1] == bid_ten2k256[0].w[1] && 230 R256.w[0] < bid_ten2k256[0].w[0]))) { 231 // 39 digits 232 ind = 39; 233 } else { 234 // between 40 and 68 digits 235 for (ind = 1; ind <= 29; ind++) { 236 if (R256.w[3] < bid_ten2k256[ind].w[3] || 237 (R256.w[3] == bid_ten2k256[ind].w[3] && 238 R256.w[2] < bid_ten2k256[ind].w[2]) || 239 (R256.w[3] == bid_ten2k256[ind].w[3] && 240 R256.w[2] == bid_ten2k256[ind].w[2] && 241 R256.w[1] < bid_ten2k256[ind].w[1]) || 242 (R256.w[3] == bid_ten2k256[ind].w[3] && 243 R256.w[2] == bid_ten2k256[ind].w[2] && 244 R256.w[1] == bid_ten2k256[ind].w[1] && 245 R256.w[0] < bid_ten2k256[ind].w[0])) { 246 break; 247 } 248 } 249 // ind + 39 digits 250 ind = ind + 39; 251 } 252 return (ind); 253} 254 255// add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so 256// use the rounding information from ptr_is_* to avoid a double rounding error 257static void 258bid_add_and_round (int q3, 259 int q4, 260 int e4, 261 int delta, 262 int p34, 263 BID_UINT64 z_sign, 264 BID_UINT64 p_sign, 265 BID_UINT128 C3, 266 BID_UINT256 C4, 267 int rnd_mode, 268 int *ptr_is_midpoint_lt_even, 269 int *ptr_is_midpoint_gt_even, 270 int *ptr_is_inexact_lt_midpoint, 271 int *ptr_is_inexact_gt_midpoint, 272 _IDEC_flags * ptrfpsf, BID_UINT128 * ptrres) { 273 274 int scale; 275 int x0; 276 int ind; 277 BID_UINT64 R64; 278 BID_UINT128 P128, R128; 279 BID_UINT192 P192, R192; 280 BID_UINT256 R256; 281 int is_midpoint_lt_even = 0; 282 int is_midpoint_gt_even = 0; 283 int is_inexact_lt_midpoint = 0; 284 int is_inexact_gt_midpoint = 0; 285 int is_midpoint_lt_even0 = 0; 286 int is_midpoint_gt_even0 = 0; 287 int is_inexact_lt_midpoint0 = 0; 288 int is_inexact_gt_midpoint0 = 0; 289 int incr_exp = 0; 290 int is_tiny = 0; 291 int lt_half_ulp = 0; 292 int eq_half_ulp = 0; 293 // int gt_half_ulp = 0; 294 BID_UINT128 res = *ptrres; 295 296 // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66 297 scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this 298 // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1 299 300 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for 301 // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6)) 302 if (scale == 0) { 303 R256.w[3] = 0x0ull; 304 R256.w[2] = 0x0ull; 305 R256.w[1] = C3.w[1]; 306 R256.w[0] = C3.w[0]; 307 } else if (scale <= 19) { // 10^scale fits in 64 bits 308 P128.w[1] = 0; 309 P128.w[0] = bid_ten2k64[scale]; 310 __mul_128x128_to_256 (R256, P128, C3); 311 } else if (scale <= 38) { // 10^scale fits in 128 bits 312 __mul_128x128_to_256 (R256, bid_ten2k128[scale - 20], C3); 313 } else if (scale <= 57) { // 39 <= scale <= 57 314 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits 315 // (10^67 has 223 bits; 10^69 has 230 bits); 316 // must split the computation: 317 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 318 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty 319 // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits 320 __mul_64x128_to_128 (R128, bid_ten2k64[scale - 38], C3); 321 // now multiply R128 by 10^38 322 __mul_128x128_to_256 (R256, R128, bid_ten2k128[18]); 323 } else { // 58 <= scale <= 66 324 // 10^scale takes between 193 and 220 bits, 325 // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits) 326 // must split the computation: 327 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 328 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty 329 // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits 330 // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because 331 // 10^(scale-38) takes more than 64 bits, C3 will take less than 64 332 __mul_64x128_to_128 (R128, C3.w[0], bid_ten2k128[scale - 58]); 333 // now calculate 10*38 * 10^(scale-38) * C3 334 __mul_128x128_to_256 (R256, R128, bid_ten2k128[18]); 335 } 336 // C3 * 10^scale is now in R256 337 338 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least 339 // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is 340 // possible 341 // add/subtract C4 and C3 * 10^scale; the exponent is e4 342 if (p_sign == z_sign) { // R256 = C4 + R256 343 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact, 344 // but may require rounding 345 bid_add256 (C4, R256, &R256); 346 } else { // if (p_sign != z_sign) { // R256 = C4 - R256 347 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or 348 // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact, 349 // but may require rounding 350 351 // compare first R256 = C3 * 10^scale and C4 352 if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || 353 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) || 354 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] && 355 R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4 356 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact, 357 // but may require rounding 358 bid_sub256 (R256, C4, &R256); 359 // flip p_sign too, because the result has the sign of z 360 p_sign = z_sign; 361 } else { // if C4 > C3 * 10^scale 362 // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact, 363 // but may require rounding 364 bid_sub256 (C4, R256, &R256); 365 } 366 // if the result is pure zero, the sign depends on the rounding mode 367 // (x*y and z had opposite signs) 368 if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull && 369 R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) { 370 if (rnd_mode != BID_ROUNDING_DOWN) 371 p_sign = 0x0000000000000000ull; 372 else 373 p_sign = 0x8000000000000000ull; 374 // the exponent is max (e4, expmin) 375 if (e4 < -6176) 376 e4 = expmin; 377 // assemble result 378 res.w[1] = p_sign | ((BID_UINT64) (e4 + 6176) << 49); 379 res.w[0] = 0x0; 380 *ptrres = res; 381 return; 382 } 383 } 384 385 // determine the number of decimal digits in R256 386 ind = bid_bid_nr_digits256 (R256); 387 388 // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind; 389 // round to the destination precision, with unbounded exponent 390 391 if (ind <= p34) { 392 // result rounded to the destination precision with unbounded exponent 393 // is exact 394 if (ind + e4 < p34 + expmin) { 395 is_tiny = 1; // applies to all rounding modes 396 // (regardless of the tininess detection method) 397 } 398 res.w[1] = p_sign | ((BID_UINT64) (e4 + 6176) << 49) | R256.w[1]; 399 res.w[0] = R256.w[0]; 400 // Note: res is correct only if expmin <= e4 <= expmax 401 } else { // if (ind > p34) 402 // if more than P digits, round to nearest to P digits 403 // round R256 to p34 digits 404 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 405 if (ind <= 38) { 406 P128.w[1] = R256.w[1]; 407 P128.w[0] = R256.w[0]; 408 bid_round128_19_38 (ind, x0, P128, &R128, &incr_exp, 409 &is_midpoint_lt_even, &is_midpoint_gt_even, 410 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 411 } else if (ind <= 57) { 412 P192.w[2] = R256.w[2]; 413 P192.w[1] = R256.w[1]; 414 P192.w[0] = R256.w[0]; 415 bid_round192_39_57 (ind, x0, P192, &R192, &incr_exp, 416 &is_midpoint_lt_even, &is_midpoint_gt_even, 417 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 418 R128.w[1] = R192.w[1]; 419 R128.w[0] = R192.w[0]; 420 } else { // if (ind <= 68) 421 bid_round256_58_76 (ind, x0, R256, &R256, &incr_exp, 422 &is_midpoint_lt_even, &is_midpoint_gt_even, 423 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 424 R128.w[1] = R256.w[1]; 425 R128.w[0] = R256.w[0]; 426 } 427#if !DECIMAL_TINY_DETECTION_AFTER_ROUNDING 428 if (e4 + x0 < expmin) { // for all rounding modes 429 is_tiny = 1; 430 } 431#endif 432 // the rounded result has p34 = 34 digits 433 e4 = e4 + x0 + incr_exp; 434 if (rnd_mode == BID_ROUNDING_TO_NEAREST) { 435#if DECIMAL_TINY_DETECTION_AFTER_ROUNDING 436 if (e4 < expmin) { 437 is_tiny = 1; // for other rounding modes apply correction 438 } 439#endif 440 } else { 441 // for RM, RP, RZ, RA apply correction in order to determine tininess 442 // but do not save the result; apply the correction to 443 // (-1)^p_sign * significand * 10^0 444 P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1]; 445 P128.w[0] = R128.w[0]; 446 bid_rounding_correction (rnd_mode, 447 is_inexact_lt_midpoint, 448 is_inexact_gt_midpoint, is_midpoint_lt_even, 449 is_midpoint_gt_even, 0, &P128, ptrfpsf); 450 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 451 // the number of digits in the significand is p34 = 34 452#if DECIMAL_TINY_DETECTION_AFTER_ROUNDING 453 if (e4 + scale < expmin) { 454 is_tiny = 1; 455 } 456#endif 457 } 458 ind = p34; // the number of decimal digits in the signifcand of res 459 res.w[1] = p_sign | ((BID_UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN 460 res.w[0] = R128.w[0]; 461 // Note: res is correct only if expmin <= e4 <= expmax 462 // set the inexact flag after rounding with bounded exponent, if any 463 } 464 // at this point we have the result rounded with unbounded exponent in 465 // res and we know its tininess: 466 // res = (-1)^p_sign * significand * 10^e4, 467 // where q (significand) = ind <= p34 468 // Note: res is correct only if expmin <= e4 <= expmax 469 470 // check for overflow if RN 471 if (rnd_mode == BID_ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) { 472 res.w[1] = p_sign | 0x7800000000000000ull; 473 res.w[0] = 0x0000000000000000ull; 474 *ptrres = res; 475 *ptrfpsf |= (BID_INEXACT_EXCEPTION | BID_OVERFLOW_EXCEPTION); 476 return; // BID_RETURN (res) 477 } // else not overflow or not RN, so continue 478 479 // if (e4 >= expmin) we have the result rounded with bounded exponent 480 if (e4 < expmin) { 481 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res 482 // where the result rounded [at most] once is 483 // (-1)^p_sign * significand_res * 10^e4 484 485 // avoid double rounding error 486 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 487 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 488 is_midpoint_lt_even0 = is_midpoint_lt_even; 489 is_midpoint_gt_even0 = is_midpoint_gt_even; 490 is_inexact_lt_midpoint = 0; 491 is_inexact_gt_midpoint = 0; 492 is_midpoint_lt_even = 0; 493 is_midpoint_gt_even = 0; 494 495 if (x0 > ind) { 496 // nothing is left of res when moving the decimal point left x0 digits 497 is_inexact_lt_midpoint = 1; 498 res.w[1] = p_sign | 0x0000000000000000ull; 499 res.w[0] = 0x0000000000000000ull; 500 e4 = expmin; 501 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 502 // this is <, =, or > 1/2 ulp 503 // compare the ind-digit value in the significand of res with 504 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is 505 // less than, equal to, or greater than 1/2 ulp (significand of res) 506 R128.w[1] = res.w[1] & MASK_COEFF; 507 R128.w[0] = res.w[0]; 508 if (ind <= 19) { 509 if (R128.w[0] < bid_midpoint64[ind - 1]) { // < 1/2 ulp 510 lt_half_ulp = 1; 511 is_inexact_lt_midpoint = 1; 512 } else if (R128.w[0] == bid_midpoint64[ind - 1]) { // = 1/2 ulp 513 eq_half_ulp = 1; 514 is_midpoint_gt_even = 1; 515 } else { // > 1/2 ulp 516 // gt_half_ulp = 1; 517 is_inexact_gt_midpoint = 1; 518 } 519 } else { // if (ind <= 38) { 520 if (R128.w[1] < bid_midpoint128[ind - 20].w[1] || 521 (R128.w[1] == bid_midpoint128[ind - 20].w[1] && 522 R128.w[0] < bid_midpoint128[ind - 20].w[0])) { // < 1/2 ulp 523 lt_half_ulp = 1; 524 is_inexact_lt_midpoint = 1; 525 } else if (R128.w[1] == bid_midpoint128[ind - 20].w[1] && 526 R128.w[0] == bid_midpoint128[ind - 20].w[0]) { // = 1/2 ulp 527 eq_half_ulp = 1; 528 is_midpoint_gt_even = 1; 529 } else { // > 1/2 ulp 530 // gt_half_ulp = 1; 531 is_inexact_gt_midpoint = 1; 532 } 533 } 534 if (lt_half_ulp || eq_half_ulp) { 535 // res = +0.0 * 10^expmin 536 res.w[1] = 0x0000000000000000ull; 537 res.w[0] = 0x0000000000000000ull; 538 } else { // if (gt_half_ulp) 539 // res = +1 * 10^expmin 540 res.w[1] = 0x0000000000000000ull; 541 res.w[0] = 0x0000000000000001ull; 542 } 543 res.w[1] = p_sign | res.w[1]; 544 e4 = expmin; 545 } else { // if (1 <= x0 <= ind - 1 <= 33) 546 // round the ind-digit result to ind - x0 digits 547 548 if (ind <= 18) { // 2 <= ind <= 18 549 bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, 550 &is_midpoint_lt_even, &is_midpoint_gt_even, 551 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 552 res.w[1] = 0x0; 553 res.w[0] = R64; 554 } else if (ind <= 38) { 555 P128.w[1] = res.w[1] & MASK_COEFF; 556 P128.w[0] = res.w[0]; 557 bid_round128_19_38 (ind, x0, P128, &res, &incr_exp, 558 &is_midpoint_lt_even, &is_midpoint_gt_even, 559 &is_inexact_lt_midpoint, 560 &is_inexact_gt_midpoint); 561 } 562 e4 = e4 + x0; // expmin 563 // we want the exponent to be expmin, so if incr_exp = 1 then 564 // multiply the rounded result by 10 - it will still fit in 113 bits 565 if (incr_exp) { 566 // 64 x 128 -> 128 567 P128.w[1] = res.w[1] & MASK_COEFF; 568 P128.w[0] = res.w[0]; 569 __mul_64x128_to_128 (res, bid_ten2k64[1], P128); 570 } 571 res.w[1] = 572 p_sign | ((BID_UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF); 573 // avoid a double rounding error 574 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 575 is_midpoint_lt_even) { // double rounding error upward 576 // res = res - 1 577 res.w[0]--; 578 if (res.w[0] == 0xffffffffffffffffull) 579 res.w[1]--; 580 // Note: a double rounding error upward is not possible; for this 581 // the result after the first rounding would have to be 99...95 582 // (35 digits in all), possibly followed by a number of zeros; this 583 // is not possible in Cases (2)-(6) or (15)-(17) which may get here 584 is_midpoint_lt_even = 0; 585 is_inexact_lt_midpoint = 1; 586 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 587 is_midpoint_gt_even) { // double rounding error downward 588 // res = res + 1 589 res.w[0]++; 590 if (res.w[0] == 0) 591 res.w[1]++; 592 is_midpoint_gt_even = 0; 593 is_inexact_gt_midpoint = 1; 594 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 595 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 596 // if this second rounding was exact the result may still be 597 // inexact because of the first rounding 598 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 599 is_inexact_gt_midpoint = 1; 600 } 601 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 602 is_inexact_lt_midpoint = 1; 603 } 604 } else if (is_midpoint_gt_even && 605 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 606 // pulled up to a midpoint 607 is_inexact_lt_midpoint = 1; 608 is_inexact_gt_midpoint = 0; 609 is_midpoint_lt_even = 0; 610 is_midpoint_gt_even = 0; 611 } else if (is_midpoint_lt_even && 612 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 613 // pulled down to a midpoint 614 is_inexact_lt_midpoint = 0; 615 is_inexact_gt_midpoint = 1; 616 is_midpoint_lt_even = 0; 617 is_midpoint_gt_even = 0; 618 } else { 619 ; 620 } 621 } 622 } 623 // res contains the correct result 624 // apply correction if not rounding to nearest 625 if (rnd_mode != BID_ROUNDING_TO_NEAREST) { 626 bid_rounding_correction (rnd_mode, 627 is_inexact_lt_midpoint, is_inexact_gt_midpoint, 628 is_midpoint_lt_even, is_midpoint_gt_even, 629 e4, &res, ptrfpsf); 630 } 631 if (is_midpoint_lt_even || is_midpoint_gt_even || 632 is_inexact_lt_midpoint || is_inexact_gt_midpoint) { 633 // set the inexact flag 634 *ptrfpsf |= BID_INEXACT_EXCEPTION; 635 if (is_tiny) 636 *ptrfpsf |= BID_UNDERFLOW_EXCEPTION; 637 } 638 639 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 640 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 641 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 642 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 643 *ptrres = res; 644 return; 645} 646 647 648#if DECIMAL_CALL_BY_REFERENCE 649static void 650bid128_ext_fma (int *ptr_is_midpoint_lt_even, 651 int *ptr_is_midpoint_gt_even, 652 int *ptr_is_inexact_lt_midpoint, 653 int *ptr_is_inexact_gt_midpoint, BID_UINT128 * pres, 654 BID_UINT128 * px, BID_UINT128 * py, 655 BID_UINT128 * 656 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 657 _EXC_INFO_PARAM) { 658 BID_UINT128 x = *px, y = *py, z = *pz; 659#if !DECIMAL_GLOBAL_ROUNDING 660 unsigned int rnd_mode = *prnd_mode; 661#endif 662#else 663static BID_UINT128 664bid128_ext_fma (int *ptr_is_midpoint_lt_even, 665 int *ptr_is_midpoint_gt_even, 666 int *ptr_is_inexact_lt_midpoint, 667 int *ptr_is_inexact_gt_midpoint, BID_UINT128 x, BID_UINT128 y, 668 BID_UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM 669 _EXC_MASKS_PARAM _EXC_INFO_PARAM) { 670#endif 671 672 BID_UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 673 BID_UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign; 674 BID_UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp; 675 int true_p_exp; 676 BID_UINT128 C1, C2, C3; 677 BID_UINT256 C4; 678 int q1 = 0, q2 = 0, q3 = 0, q4; 679 int e1, e2, e3, e4; 680 int scale, ind, delta, x0; 681 int p34 = P34; // used to modify the limit on the number of digits 682 BID_UI64DOUBLE tmp; 683 int x_nr_bits, y_nr_bits, z_nr_bits; 684 unsigned int save_fpsf; 685 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0; 686 int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 687 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0; 688 int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; 689 int incr_exp = 0; 690 int lsb; 691 int lt_half_ulp = 0; 692 int eq_half_ulp = 0; 693 int gt_half_ulp = 0; 694 int is_tiny = 0; 695 BID_UINT64 R64, tmp64; 696 BID_UINT128 P128, R128; 697 BID_UINT192 P192, R192; 698 BID_UINT256 R256; 699#if DECIMAL_TINY_DETECTION_AFTER_ROUNDING 700 unsigned int C4gt5toq4m1; 701#endif 702 703 // the following are based on the table of special cases for fma; the NaN 704 // behavior is similar to that of the IA-64 Architecture fma 705 706 // identify cases where at least one operand is NaN 707 708 BID_SWAP128 (x); 709 BID_SWAP128 (y); 710 BID_SWAP128 (z); 711 if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN 712 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) 713 // check first for non-canonical NaN payload 714 if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || 715 (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && 716 (y.w[0] > 0x38c15b09ffffffffull))) { 717 y.w[1] = y.w[1] & 0xffffc00000000000ull; 718 y.w[0] = 0x0ull; 719 } 720 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN 721 // set invalid flag 722 *pfpsf |= BID_INVALID_EXCEPTION; 723 // return quiet (y) 724 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] 725 res.w[0] = y.w[0]; 726 } else { // y is QNaN 727 // return y 728 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] 729 res.w[0] = y.w[0]; 730 // if z = SNaN or x = SNaN signal invalid exception 731 if ((z.w[1] & MASK_SNAN) == MASK_SNAN || 732 (x.w[1] & MASK_SNAN) == MASK_SNAN) { 733 // set invalid flag 734 *pfpsf |= BID_INVALID_EXCEPTION; 735 } 736 } 737 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 738 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 739 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 740 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 741 BID_SWAP128 (res); 742 BID_RETURN (res) 743 } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN 744 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) 745 // check first for non-canonical NaN payload 746 if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || 747 (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && 748 (z.w[0] > 0x38c15b09ffffffffull))) { 749 z.w[1] = z.w[1] & 0xffffc00000000000ull; 750 z.w[0] = 0x0ull; 751 } 752 if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN 753 // set invalid flag 754 *pfpsf |= BID_INVALID_EXCEPTION; 755 // return quiet (z) 756 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] 757 res.w[0] = z.w[0]; 758 } else { // z is QNaN 759 // return z 760 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] 761 res.w[0] = z.w[0]; 762 // if x = SNaN signal invalid exception 763 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { 764 // set invalid flag 765 *pfpsf |= BID_INVALID_EXCEPTION; 766 } 767 } 768 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 769 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 770 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 771 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 772 BID_SWAP128 (res); 773 BID_RETURN (res) 774 } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN 775 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) 776 // check first for non-canonical NaN payload 777 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || 778 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && 779 (x.w[0] > 0x38c15b09ffffffffull))) { 780 x.w[1] = x.w[1] & 0xffffc00000000000ull; 781 x.w[0] = 0x0ull; 782 } 783 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN 784 // set invalid flag 785 *pfpsf |= BID_INVALID_EXCEPTION; 786 // return quiet (x) 787 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] 788 res.w[0] = x.w[0]; 789 } else { // x is QNaN 790 // return x 791 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] 792 res.w[0] = x.w[0]; 793 } 794 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 795 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 796 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 797 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 798 BID_SWAP128 (res); 799 BID_RETURN (res) 800 } 801 // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check 802 // for non-canonical values 803 804 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 805 C1.w[1] = x.w[1] & MASK_COEFF; 806 C1.w[0] = x.w[0]; 807 if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf 808 // if x is not infinity check for non-canonical values - treated as zero 809 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 810 // non-canonical 811 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits 812 C1.w[1] = 0; // significand high 813 C1.w[0] = 0; // significand low 814 } else { // G0_G1 != 11 815 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits 816 if (C1.w[1] > 0x0001ed09bead87c0ull || 817 (C1.w[1] == 0x0001ed09bead87c0ull && 818 C1.w[0] > 0x378d8e63ffffffffull)) { 819 // x is non-canonical if coefficient is larger than 10^34 -1 820 C1.w[1] = 0; 821 C1.w[0] = 0; 822 } else { // canonical 823 ; 824 } 825 } 826 } 827 y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 828 C2.w[1] = y.w[1] & MASK_COEFF; 829 C2.w[0] = y.w[0]; 830 if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf 831 // if y is not infinity check for non-canonical values - treated as zero 832 if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 833 // non-canonical 834 y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits 835 C2.w[1] = 0; // significand high 836 C2.w[0] = 0; // significand low 837 } else { // G0_G1 != 11 838 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits 839 if (C2.w[1] > 0x0001ed09bead87c0ull || 840 (C2.w[1] == 0x0001ed09bead87c0ull && 841 C2.w[0] > 0x378d8e63ffffffffull)) { 842 // y is non-canonical if coefficient is larger than 10^34 -1 843 C2.w[1] = 0; 844 C2.w[0] = 0; 845 } else { // canonical 846 ; 847 } 848 } 849 } 850 z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 851 C3.w[1] = z.w[1] & MASK_COEFF; 852 C3.w[0] = z.w[0]; 853 if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf 854 // if z is not infinity check for non-canonical values - treated as zero 855 if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 856 // non-canonical 857 z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits 858 C3.w[1] = 0; // significand high 859 C3.w[0] = 0; // significand low 860 } else { // G0_G1 != 11 861 z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits 862 if (C3.w[1] > 0x0001ed09bead87c0ull || 863 (C3.w[1] == 0x0001ed09bead87c0ull && 864 C3.w[0] > 0x378d8e63ffffffffull)) { 865 // z is non-canonical if coefficient is larger than 10^34 -1 866 C3.w[1] = 0; 867 C3.w[0] = 0; 868 } else { // canonical 869 ; 870 } 871 } 872 } 873 874 p_sign = x_sign ^ y_sign; // sign of the product 875 876 // identify cases where at least one operand is infinity 877 878 if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf 879 if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf 880 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 881 if (p_sign == z_sign) { 882 res.w[1] = z_sign | MASK_INF; 883 res.w[0] = 0x0; 884 } else { 885 // return QNaN Indefinite 886 res.w[1] = 0x7c00000000000000ull; 887 res.w[0] = 0x0000000000000000ull; 888 // set invalid flag 889 *pfpsf |= BID_INVALID_EXCEPTION; 890 } 891 } else { // z = 0 or z = f 892 res.w[1] = p_sign | MASK_INF; 893 res.w[0] = 0x0; 894 } 895 } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f 896 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 897 if (p_sign == z_sign) { 898 res.w[1] = z_sign | MASK_INF; 899 res.w[0] = 0x0; 900 } else { 901 // return QNaN Indefinite 902 res.w[1] = 0x7c00000000000000ull; 903 res.w[0] = 0x0000000000000000ull; 904 // set invalid flag 905 *pfpsf |= BID_INVALID_EXCEPTION; 906 } 907 } else { // z = 0 or z = f 908 res.w[1] = p_sign | MASK_INF; 909 res.w[0] = 0x0; 910 } 911 } else { // y = 0 912 // return QNaN Indefinite 913 res.w[1] = 0x7c00000000000000ull; 914 res.w[0] = 0x0000000000000000ull; 915 // set invalid flag 916 *pfpsf |= BID_INVALID_EXCEPTION; 917 } 918 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 919 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 920 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 921 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 922 BID_SWAP128 (res); 923 BID_RETURN (res) 924 } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf 925 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 926 // x = f, necessarily 927 if ((p_sign != z_sign) 928 || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) { 929 // return QNaN Indefinite 930 res.w[1] = 0x7c00000000000000ull; 931 res.w[0] = 0x0000000000000000ull; 932 // set invalid flag 933 *pfpsf |= BID_INVALID_EXCEPTION; 934 } else { 935 res.w[1] = z_sign | MASK_INF; 936 res.w[0] = 0x0; 937 } 938 } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0 939 // z = 0, f, inf 940 // return QNaN Indefinite 941 res.w[1] = 0x7c00000000000000ull; 942 res.w[0] = 0x0000000000000000ull; 943 // set invalid flag 944 *pfpsf |= BID_INVALID_EXCEPTION; 945 } else { 946 // x = f and z = 0, f, necessarily 947 res.w[1] = p_sign | MASK_INF; 948 res.w[0] = 0x0; 949 } 950 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 951 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 952 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 953 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 954 BID_SWAP128 (res); 955 BID_RETURN (res) 956 } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf 957 // x = 0, f and y = 0, f, necessarily 958 res.w[1] = z_sign | MASK_INF; 959 res.w[0] = 0x0; 960 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 961 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 962 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 963 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 964 BID_SWAP128 (res); 965 BID_RETURN (res) 966 } 967 968 true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176; 969 if (true_p_exp < -6176) 970 p_exp = 0; // cannot be less than EXP_MIN 971 else 972 p_exp = (BID_UINT64) (true_p_exp + 6176) << 49; 973 974 if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0 975 // the result is 0 976 if (p_exp < z_exp) 977 res.w[1] = p_exp; // preferred exponent 978 else 979 res.w[1] = z_exp; // preferred exponent 980 if (p_sign == z_sign) { 981 res.w[1] |= z_sign; 982 res.w[0] = 0x0; 983 } else { // x * y and z have opposite signs 984 if (rnd_mode == BID_ROUNDING_DOWN) { 985 // res = -0.0 986 res.w[1] |= MASK_SIGN; 987 res.w[0] = 0x0; 988 } else { 989 // res = +0.0 990 // res.w[1] |= 0x0; 991 res.w[0] = 0x0; 992 } 993 } 994 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 995 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 996 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 997 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 998 BID_SWAP128 (res); 999 BID_RETURN (res) 1000 } 1001 // from this point on, we may need to know the number of decimal digits 1002 // in the significands of x, y, z when x, y, z != 0 1003 1004 if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite) 1005 // q1 = nr. of decimal digits in x 1006 // determine first the nr. of bits in x 1007 if (C1.w[1] == 0) { 1008 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 1009 // split the 64-bit value in two 32-bit halves to avoid rounding errors 1010 tmp.d = (double) (C1.w[0] >> 32); // exact conversion 1011 x_nr_bits = 1012 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1013 } else { // if x < 2^53 1014 tmp.d = (double) C1.w[0]; // exact conversion 1015 x_nr_bits = 1016 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1017 } 1018 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) 1019 tmp.d = (double) C1.w[1]; // exact conversion 1020 x_nr_bits = 1021 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1022 } 1023 q1 = bid_nr_digits[x_nr_bits - 1].digits; 1024 if (q1 == 0) { 1025 q1 = bid_nr_digits[x_nr_bits - 1].digits1; 1026 if (C1.w[1] > bid_nr_digits[x_nr_bits - 1].threshold_hi || 1027 (C1.w[1] == bid_nr_digits[x_nr_bits - 1].threshold_hi && 1028 C1.w[0] >= bid_nr_digits[x_nr_bits - 1].threshold_lo)) 1029 q1++; 1030 } 1031 } 1032 1033 if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite) 1034 // q2 = nr. of decimal digits in y 1035 // determine first the nr. of bits in y 1036 if (C2.w[1] == 0) { 1037 if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53 1038 // split the 64-bit value in two 32-bit halves to avoid rounding errors 1039 tmp.d = (double) (C2.w[0] >> 32); // exact conversion 1040 y_nr_bits = 1041 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1042 } else { // if y < 2^53 1043 tmp.d = (double) C2.w[0]; // exact conversion 1044 y_nr_bits = 1045 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1046 } 1047 } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1]) 1048 tmp.d = (double) C2.w[1]; // exact conversion 1049 y_nr_bits = 1050 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1051 } 1052 q2 = bid_nr_digits[y_nr_bits - 1].digits; 1053 if (q2 == 0) { 1054 q2 = bid_nr_digits[y_nr_bits - 1].digits1; 1055 if (C2.w[1] > bid_nr_digits[y_nr_bits - 1].threshold_hi || 1056 (C2.w[1] == bid_nr_digits[y_nr_bits - 1].threshold_hi && 1057 C2.w[0] >= bid_nr_digits[y_nr_bits - 1].threshold_lo)) 1058 q2++; 1059 } 1060 } 1061 1062 if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite) 1063 // q3 = nr. of decimal digits in z 1064 // determine first the nr. of bits in z 1065 if (C3.w[1] == 0) { 1066 if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53 1067 // split the 64-bit value in two 32-bit halves to avoid rounding errors 1068 tmp.d = (double) (C3.w[0] >> 32); // exact conversion 1069 z_nr_bits = 1070 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1071 } else { // if z < 2^53 1072 tmp.d = (double) C3.w[0]; // exact conversion 1073 z_nr_bits = 1074 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1075 } 1076 } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1]) 1077 tmp.d = (double) C3.w[1]; // exact conversion 1078 z_nr_bits = 1079 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 1080 } 1081 q3 = bid_nr_digits[z_nr_bits - 1].digits; 1082 if (q3 == 0) { 1083 q3 = bid_nr_digits[z_nr_bits - 1].digits1; 1084 if (C3.w[1] > bid_nr_digits[z_nr_bits - 1].threshold_hi || 1085 (C3.w[1] == bid_nr_digits[z_nr_bits - 1].threshold_hi && 1086 C3.w[0] >= bid_nr_digits[z_nr_bits - 1].threshold_lo)) 1087 q3++; 1088 } 1089 } 1090 1091 if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || 1092 (C2.w[1] == 0x0 && C2.w[0] == 0x0)) { 1093 // x = 0 or y = 0 1094 // z = f, necessarily; for 0 + z return z, with the preferred exponent 1095 // the result is z, but need to get the preferred exponent 1096 if (z_exp <= p_exp) { // the preferred exponent is z_exp 1097 res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1]; 1098 res.w[0] = C3.w[0]; 1099 } else { // if (p_exp < z_exp) the preferred exponent is p_exp 1100 // return (C3 * 10^scale) * 10^(z_exp - scale) 1101 // where scale = min (p34-q3, (z_exp-p_exp) >> 49) 1102 scale = p34 - q3; 1103 ind = (z_exp - p_exp) >> 49; 1104 if (ind < scale) 1105 scale = ind; 1106 if (scale == 0) { 1107 res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant 1108 res.w[0] = z.w[0]; 1109 } else if (q3 <= 19) { // z fits in 64 bits 1110 if (scale <= 19) { // 10^scale fits in 64 bits 1111 // 64 x 64 C3.w[0] * bid_ten2k64[scale] 1112 __mul_64x64_to_128MACH (res, C3.w[0], bid_ten2k64[scale]); 1113 } else { // 10^scale fits in 128 bits 1114 // 64 x 128 C3.w[0] * bid_ten2k128[scale - 20] 1115 __mul_128x64_to_128 (res, C3.w[0], bid_ten2k128[scale - 20]); 1116 } 1117 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 1118 // 64 x 128 bid_ten2k64[scale] * C3 1119 __mul_128x64_to_128 (res, bid_ten2k64[scale], C3); 1120 } 1121 // subtract scale from the exponent 1122 z_exp = z_exp - ((BID_UINT64) scale << 49); 1123 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 1124 } 1125 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1126 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1127 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1128 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1129 BID_SWAP128 (res); 1130 BID_RETURN (res) 1131 } else { 1132 ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f 1133 } 1134 1135 e1 = (x_exp >> 49) - 6176; // unbiased exponent of x 1136 e2 = (y_exp >> 49) - 6176; // unbiased exponent of y 1137 e3 = (z_exp >> 49) - 6176; // unbiased exponent of z 1138 e4 = e1 + e2; // unbiased exponent of the exact x * y 1139 1140 // calculate C1 * C2 and its number of decimal digits, q4 1141 1142 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits 1143 // where 2 <= q1 + q2 <= 68 1144 // calculate C4 = C1 * C2 and determine q 1145 C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0; 1146 if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits 1147 C4.w[0] = C1.w[0] * C2.w[0]; 1148 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1149 if (C4.w[0] < bid_ten2k64[q1 + q2 - 1]) 1150 q4 = q1 + q2 - 1; // q4 in [1, 18] 1151 else 1152 q4 = q1 + q2; // q4 in [2, 19] 1153 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1154 } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits 1155 // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits 1156 __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]); 1157 // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20 1158 if (C4.w[1] == 0 && C4.w[0] < bid_ten2k64[19]) { // 19 = q1+q2-1 1159 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1160 q4 = 19; // 19 = q1 + q2 - 1 1161 } else { 1162 // if (C4.w[1] == 0) 1163 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1164 // else 1165 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1166 q4 = 20; // 20 = q1 + q2 1167 } 1168 } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38 1169 // C4 = C1 * C2 fits in 64 or 128 bits 1170 // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits) 1171 // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits 1172 if (q1 <= 19) { 1173 __mul_128x64_to_128 (C4, C1.w[0], C2); 1174 } else { // q2 <= 19 1175 __mul_128x64_to_128 (C4, C2.w[0], C1); 1176 } 1177 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1178 if (C4.w[1] < bid_ten2k128[q1 + q2 - 21].w[1] || 1179 (C4.w[1] == bid_ten2k128[q1 + q2 - 21].w[1] && 1180 C4.w[0] < bid_ten2k128[q1 + q2 - 21].w[0])) { 1181 // if (C4.w[1] == 0) // q4 = 20, necessarily 1182 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; 1183 // else 1184 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1185 q4 = q1 + q2 - 1; // q4 in [20, 37] 1186 } else { 1187 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1188 q4 = q1 + q2; // q4 in [21, 38] 1189 } 1190 } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits 1191 // both C1 and C2 fit in 128 bits (actually in 113 bits) 1192 // may replace this by 128x128_to192 1193 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0 1194 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39 1195 if (C4.w[2] == 0 && (C4.w[1] < bid_ten2k128[18].w[1] || 1196 (C4.w[1] == bid_ten2k128[18].w[1] 1197 && C4.w[0] < bid_ten2k128[18].w[0]))) { 1198 // 18 = 38 - 20 = q1+q2-1 - 20 1199 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1200 q4 = 38; // 38 = q1 + q2 - 1 1201 } else { 1202 // if (C4.w[2] == 0) 1203 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1204 // else 1205 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1206 q4 = 39; // 39 = q1 + q2 1207 } 1208 } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57 1209 // C4 = C1 * C2 fits in 128 or 192 bits 1210 // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits) 1211 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one 1212 // may fit in 64 bits 1213 if (C1.w[1] == 0) { // C1 fits in 64 bits 1214 // __mul_64x128_full (REShi64, RESlo128, A64, B128) 1215 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); 1216 } else if (C2.w[1] == 0) { // C2 fits in 64 bits 1217 // __mul_64x128_full (REShi64, RESlo128, A64, B128) 1218 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); 1219 } else { // both C1 and C2 require 128 bits 1220 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); 1221 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 1222 } 1223 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1224 if (C4.w[2] < bid_ten2k256[q1 + q2 - 40].w[2] || 1225 (C4.w[2] == bid_ten2k256[q1 + q2 - 40].w[2] && 1226 (C4.w[1] < bid_ten2k256[q1 + q2 - 40].w[1] || 1227 (C4.w[1] == bid_ten2k256[q1 + q2 - 40].w[1] && 1228 C4.w[0] < bid_ten2k256[q1 + q2 - 40].w[0])))) { 1229 // if (C4.w[2] == 0) // q4 = 39, necessarily 1230 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; 1231 // else 1232 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1233 q4 = q1 + q2 - 1; // q4 in [39, 56] 1234 } else { 1235 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1236 q4 = q1 + q2; // q4 in [40, 57] 1237 } 1238 } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits; 1239 // both C1 and C2 fit in 128 bits (actually in 113 bits); none can 1240 // fit in 64 bits, because each number must have at least 24 decimal 1241 // digits for the sum to have 58 (as the max. nr. of digits is 34) => 1242 // C1.w[1] != 0 and C2.w[1] != 0 1243 __mul_128x128_to_256 (C4, C1, C2); 1244 // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58 1245 if (C4.w[3] == 0 && (C4.w[2] < bid_ten2k256[18].w[2] || 1246 (C4.w[2] == bid_ten2k256[18].w[2] 1247 && (C4.w[1] < bid_ten2k256[18].w[1] 1248 || (C4.w[1] == bid_ten2k256[18].w[1] 1249 && C4.w[0] < bid_ten2k256[18].w[0]))))) { 1250 // 18 = 57 - 39 = q1+q2-1 - 39 1251 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1252 q4 = 57; // 57 = q1 + q2 - 1 1253 } else { 1254 // if (C4.w[3] == 0) 1255 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1256 // else 1257 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; 1258 q4 = 58; // 58 = q1 + q2 1259 } 1260 } else { // if 59 <= q1 + q2 <= 68 1261 // C4 = C1 * C2 fits in 192 or 256 bits 1262 // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits) 1263 // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in 1264 // 64 bits 1265 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); 1266 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 1267 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 1268 if (C4.w[3] < bid_ten2k256[q1 + q2 - 40].w[3] || 1269 (C4.w[3] == bid_ten2k256[q1 + q2 - 40].w[3] && 1270 (C4.w[2] < bid_ten2k256[q1 + q2 - 40].w[2] || 1271 (C4.w[2] == bid_ten2k256[q1 + q2 - 40].w[2] && 1272 (C4.w[1] < bid_ten2k256[q1 + q2 - 40].w[1] || 1273 (C4.w[1] == bid_ten2k256[q1 + q2 - 40].w[1] && 1274 C4.w[0] < bid_ten2k256[q1 + q2 - 40].w[0])))))) { 1275 // if (C4.w[3] == 0) // q4 = 58, necessarily 1276 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; 1277 // else 1278 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; 1279 q4 = q1 + q2 - 1; // q4 in [58, 67] 1280 } else { 1281 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; 1282 q4 = q1 + q2; // q4 in [59, 68] 1283 } 1284 } 1285 1286 if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0 1287 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved 1288 *pfpsf = 0; 1289 1290 if (q4 > p34) { 1291 1292 // truncate C4 to p34 digits into res 1293 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 1294 x0 = q4 - p34; 1295 if (q4 <= 38) { 1296 P128.w[1] = C4.w[1]; 1297 P128.w[0] = C4.w[0]; 1298 bid_round128_19_38 (q4, x0, P128, &res, &incr_exp, 1299 &is_midpoint_lt_even, &is_midpoint_gt_even, 1300 &is_inexact_lt_midpoint, 1301 &is_inexact_gt_midpoint); 1302 } else if (q4 <= 57) { // 35 <= q4 <= 57 1303 P192.w[2] = C4.w[2]; 1304 P192.w[1] = C4.w[1]; 1305 P192.w[0] = C4.w[0]; 1306 bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp, 1307 &is_midpoint_lt_even, &is_midpoint_gt_even, 1308 &is_inexact_lt_midpoint, 1309 &is_inexact_gt_midpoint); 1310 res.w[0] = R192.w[0]; 1311 res.w[1] = R192.w[1]; 1312 } else { // if (q4 <= 68) 1313 bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp, 1314 &is_midpoint_lt_even, &is_midpoint_gt_even, 1315 &is_inexact_lt_midpoint, 1316 &is_inexact_gt_midpoint); 1317 res.w[0] = R256.w[0]; 1318 res.w[1] = R256.w[1]; 1319 } 1320 e4 = e4 + x0; 1321 q4 = p34; 1322 if (incr_exp) { 1323 e4 = e4 + 1; 1324#if !DECIMAL_TINY_DETECTION_AFTER_ROUNDING 1325 if (q4 + e4 == expmin + p34) *pfpsf |= (BID_INEXACT_EXCEPTION | BID_UNDERFLOW_EXCEPTION); 1326#endif 1327 } 1328 // res is now the coefficient of the result rounded to the destination 1329 // precision, with unbounded exponent; the exponent is e4; q4=digits(res) 1330 } else { // if (q4 <= p34) 1331 // C4 * 10^e4 is the result rounded to the destination precision, with 1332 // unbounded exponent (which is exact) 1333 1334 if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) { 1335 // e4 is too large, but can be brought within range by scaling up C4 1336 scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2 1337 // res = (C4 * 10^scale) * 10^expmax 1338 if (q4 <= 19) { // C4 fits in 64 bits 1339 if (scale <= 19) { // 10^scale fits in 64 bits 1340 // 64 x 64 C4.w[0] * bid_ten2k64[scale] 1341 __mul_64x64_to_128MACH (res, C4.w[0], bid_ten2k64[scale]); 1342 } else { // 10^scale fits in 128 bits 1343 // 64 x 128 C4.w[0] * bid_ten2k128[scale - 20] 1344 __mul_128x64_to_128 (res, C4.w[0], bid_ten2k128[scale - 20]); 1345 } 1346 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits 1347 // 64 x 128 bid_ten2k64[scale] * CC43 1348 __mul_128x64_to_128 (res, bid_ten2k64[scale], C4); 1349 } 1350 e4 = e4 - scale; // expmax 1351 q4 = q4 + scale; 1352 } else { 1353 res.w[1] = C4.w[1]; 1354 res.w[0] = C4.w[0]; 1355 } 1356 // res is the coefficient of the result rounded to the destination 1357 // precision, with unbounded exponent (it has q4 digits); the exponent 1358 // is e4 (exact result) 1359 } 1360 1361 // check for overflow 1362 if (q4 + e4 > p34 + expmax) { 1363 if (rnd_mode == BID_ROUNDING_TO_NEAREST) { 1364 res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf 1365 res.w[0] = 0x0000000000000000ull; 1366 *pfpsf |= (BID_INEXACT_EXCEPTION | BID_OVERFLOW_EXCEPTION); 1367 } else { 1368 res.w[1] = p_sign | res.w[1]; 1369 bid_rounding_correction (rnd_mode, 1370 is_inexact_lt_midpoint, 1371 is_inexact_gt_midpoint, 1372 is_midpoint_lt_even, is_midpoint_gt_even, 1373 e4, &res, pfpsf); 1374 } 1375 *pfpsf |= save_fpsf; 1376 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1377 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1378 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1379 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1380 BID_SWAP128 (res); 1381 BID_RETURN (res) 1382 } 1383 // check for underflow 1384 if (q4 + e4 < expmin + p34) { 1385 is_tiny = 1; // the result is tiny 1386 // (good also for most cases if 'before rounding') 1387 if (e4 < expmin) { 1388 // if e4 < expmin, we must truncate more of res 1389 x0 = expmin - e4; // x0 >= 1 1390 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 1391 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 1392 is_midpoint_lt_even0 = is_midpoint_lt_even; 1393 is_midpoint_gt_even0 = is_midpoint_gt_even; 1394 is_inexact_lt_midpoint = 0; 1395 is_inexact_gt_midpoint = 0; 1396 is_midpoint_lt_even = 0; 1397 is_midpoint_gt_even = 0; 1398 // the number of decimal digits in res is q4 1399 if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits 1400 if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17 1401 bid_round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp, 1402 &is_midpoint_lt_even, &is_midpoint_gt_even, 1403 &is_inexact_lt_midpoint, 1404 &is_inexact_gt_midpoint); 1405 if (incr_exp) { 1406 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 1407 R64 = bid_ten2k64[q4 - x0]; 1408 } 1409 // res.w[1] = 0; (from above) 1410 res.w[0] = R64; 1411 } else { // if (q4 <= 34) 1412 // 19 <= q4 <= 38 1413 P128.w[1] = res.w[1]; 1414 P128.w[0] = res.w[0]; 1415 bid_round128_19_38 (q4, x0, P128, &res, &incr_exp, 1416 &is_midpoint_lt_even, &is_midpoint_gt_even, 1417 &is_inexact_lt_midpoint, 1418 &is_inexact_gt_midpoint); 1419 if (incr_exp) { 1420 // increase coefficient by a factor of 10; this will be <= 10^33 1421 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 1422 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 1423 // res.w[1] = 0; 1424 res.w[0] = bid_ten2k64[q4 - x0]; 1425 } else { // 20 <= q4 - x0 <= 37 1426 res.w[0] = bid_ten2k128[q4 - x0 - 20].w[0]; 1427 res.w[1] = bid_ten2k128[q4 - x0 - 20].w[1]; 1428 } 1429 } 1430 } 1431 e4 = e4 + x0; // expmin 1432 } else if (x0 == q4) { 1433 // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin 1434 // determine relationship with 1/2 ulp 1435 if (q4 <= 19) { 1436 if (res.w[0] < bid_midpoint64[q4 - 1]) { // < 1/2 ulp 1437 lt_half_ulp = 1; 1438 is_inexact_lt_midpoint = 1; 1439 } else if (res.w[0] == bid_midpoint64[q4 - 1]) { // = 1/2 ulp 1440 eq_half_ulp = 1; 1441 is_midpoint_gt_even = 1; 1442 } else { // > 1/2 ulp 1443 // gt_half_ulp = 1; 1444 is_inexact_gt_midpoint = 1; 1445 } 1446 } else { // if (q4 <= 34) 1447 if (res.w[1] < bid_midpoint128[q4 - 20].w[1] || 1448 (res.w[1] == bid_midpoint128[q4 - 20].w[1] && 1449 res.w[0] < bid_midpoint128[q4 - 20].w[0])) { // < 1/2 ulp 1450 lt_half_ulp = 1; 1451 is_inexact_lt_midpoint = 1; 1452 } else if (res.w[1] == bid_midpoint128[q4 - 20].w[1] && 1453 res.w[0] == bid_midpoint128[q4 - 20].w[0]) { // = 1/2 ulp 1454 eq_half_ulp = 1; 1455 is_midpoint_gt_even = 1; 1456 } else { // > 1/2 ulp 1457 // gt_half_ulp = 1; 1458 is_inexact_gt_midpoint = 1; 1459 } 1460 } 1461 if (lt_half_ulp || eq_half_ulp) { 1462 // res = +0.0 * 10^expmin 1463 res.w[1] = 0x0000000000000000ull; 1464 res.w[0] = 0x0000000000000000ull; 1465 } else { // if (gt_half_ulp) 1466 // res = +1 * 10^expmin 1467 res.w[1] = 0x0000000000000000ull; 1468 res.w[0] = 0x0000000000000001ull; 1469 } 1470 e4 = expmin; 1471 } else { // if (x0 > q4) 1472 // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin 1473 res.w[1] = 0; 1474 res.w[0] = 0; 1475 e4 = expmin; 1476 is_inexact_lt_midpoint = 1; 1477 } 1478 // avoid a double rounding error 1479 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 1480 is_midpoint_lt_even) { // double rounding error upward 1481 // res = res - 1 1482 res.w[0]--; 1483 if (res.w[0] == 0xffffffffffffffffull) 1484 res.w[1]--; 1485 // Note: a double rounding error upward is not possible; for this 1486 // the result after the first rounding would have to be 99...95 1487 // (35 digits in all), possibly followed by a number of zeros; this 1488 // not possible for f * f + 0 1489 is_midpoint_lt_even = 0; 1490 is_inexact_lt_midpoint = 1; 1491 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 1492 is_midpoint_gt_even) { // double rounding error downward 1493 // res = res + 1 1494 res.w[0]++; 1495 if (res.w[0] == 0) 1496 res.w[1]++; 1497 is_midpoint_gt_even = 0; 1498 is_inexact_gt_midpoint = 1; 1499 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 1500 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 1501 // if this second rounding was exact the result may still be 1502 // inexact because of the first rounding 1503 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 1504 is_inexact_gt_midpoint = 1; 1505 } 1506 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 1507 is_inexact_lt_midpoint = 1; 1508 } 1509 } else if (is_midpoint_gt_even && 1510 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 1511 // pulled up to a midpoint 1512 is_inexact_lt_midpoint = 1; 1513 is_inexact_gt_midpoint = 0; 1514 is_midpoint_lt_even = 0; 1515 is_midpoint_gt_even = 0; 1516 } else if (is_midpoint_lt_even && 1517 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 1518 // pulled down to a midpoint 1519 is_inexact_lt_midpoint = 0; 1520 is_inexact_gt_midpoint = 1; 1521 is_midpoint_lt_even = 0; 1522 is_midpoint_gt_even = 0; 1523 } else { 1524 ; 1525 } 1526 } else { // if e4 >= emin then q4 < P and the result is tiny and exact 1527 if (e3 < e4) { 1528 // if (e3 < e4) the preferred exponent is e3 1529 // return (C4 * 10^scale) * 10^(e4 - scale) 1530 // where scale = min (p34-q4, (e4 - e3)) 1531 scale = p34 - q4; 1532 ind = e4 - e3; 1533 if (ind < scale) 1534 scale = ind; 1535 if (scale == 0) { 1536 ; // res and e4 are unchanged 1537 } else if (q4 <= 19) { // C4 fits in 64 bits 1538 if (scale <= 19) { // 10^scale fits in 64 bits 1539 // 64 x 64 res.w[0] * bid_ten2k64[scale] 1540 __mul_64x64_to_128MACH (res, res.w[0], bid_ten2k64[scale]); 1541 } else { // 10^scale fits in 128 bits 1542 // 64 x 128 res.w[0] * bid_ten2k128[scale - 20] 1543 __mul_128x64_to_128 (res, res.w[0], bid_ten2k128[scale - 20]); 1544 } 1545 } else { // res fits in 128 bits, but 10^scale must fit in 64 bits 1546 // 64 x 128 bid_ten2k64[scale] * C3 1547 __mul_128x64_to_128 (res, bid_ten2k64[scale], res); 1548 } 1549 // subtract scale from the exponent 1550 e4 = e4 - scale; 1551 } 1552 } 1553 1554 // check for inexact result 1555 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 1556 is_midpoint_lt_even || is_midpoint_gt_even) { 1557 // set the inexact flag and the underflow flag 1558 *pfpsf |= BID_INEXACT_EXCEPTION; 1559 *pfpsf |= BID_UNDERFLOW_EXCEPTION; 1560 } 1561 res.w[1] = p_sign | ((BID_UINT64) (e4 + 6176) << 49) | res.w[1]; 1562 if (rnd_mode != BID_ROUNDING_TO_NEAREST) { 1563 bid_rounding_correction (rnd_mode, 1564 is_inexact_lt_midpoint, 1565 is_inexact_gt_midpoint, 1566 is_midpoint_lt_even, is_midpoint_gt_even, 1567 e4, &res, pfpsf); 1568 } 1569 *pfpsf |= save_fpsf; 1570 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1571 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1572 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1573 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1574 BID_SWAP128 (res); 1575 BID_RETURN (res) 1576 } 1577 // no overflow, and no underflow for rounding to nearest 1578 // (although if tininess is detected 'before rounding', we may 1579 // get here if incr_exp = 1 and then q4 + e4 == expmin + p34) 1580 res.w[1] = p_sign | ((BID_UINT64) (e4 + 6176) << 49) | res.w[1]; 1581 1582 if (rnd_mode != BID_ROUNDING_TO_NEAREST) { 1583 bid_rounding_correction (rnd_mode, 1584 is_inexact_lt_midpoint, 1585 is_inexact_gt_midpoint, 1586 is_midpoint_lt_even, is_midpoint_gt_even, 1587 e4, &res, pfpsf); 1588 // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ) 1589 if (e4 == expmin) { 1590 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || 1591 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && 1592 res.w[0] < 0x38c15b0a00000000ull)) { 1593 is_tiny = 1; 1594 } 1595 } 1596 } 1597 1598 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 1599 is_midpoint_lt_even || is_midpoint_gt_even) { 1600 // set the inexact flag 1601 *pfpsf |= BID_INEXACT_EXCEPTION; 1602 if (is_tiny) 1603 *pfpsf |= BID_UNDERFLOW_EXCEPTION; 1604 } 1605 1606 if ((*pfpsf & BID_INEXACT_EXCEPTION) == 0) { // x * y is exact 1607 // need to ensure that the result has the preferred exponent 1608 p_exp = res.w[1] & MASK_EXP; 1609 if (z_exp < p_exp) { // the preferred exponent is z_exp 1610 // signficand of res in C3 1611 C3.w[1] = res.w[1] & MASK_COEFF; 1612 C3.w[0] = res.w[0]; 1613 // the number of decimal digits of x * y is q4 <= 34 1614 // Note: the coefficient fits in 128 bits 1615 1616 // return (C3 * 10^scale) * 10^(p_exp - scale) 1617 // where scale = min (p34-q4, (p_exp-z_exp) >> 49) 1618 scale = p34 - q4; 1619 ind = (p_exp - z_exp) >> 49; 1620 if (ind < scale) 1621 scale = ind; 1622 // subtract scale from the exponent 1623 p_exp = p_exp - ((BID_UINT64) scale << 49); 1624 if (scale == 0) { 1625 ; // leave res unchanged 1626 } else if (q4 <= 19) { // x * y fits in 64 bits 1627 if (scale <= 19) { // 10^scale fits in 64 bits 1628 // 64 x 64 C3.w[0] * bid_ten2k64[scale] 1629 __mul_64x64_to_128MACH (res, C3.w[0], bid_ten2k64[scale]); 1630 } else { // 10^scale fits in 128 bits 1631 // 64 x 128 C3.w[0] * bid_ten2k128[scale - 20] 1632 __mul_128x64_to_128 (res, C3.w[0], bid_ten2k128[scale - 20]); 1633 } 1634 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; 1635 } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits 1636 // 64 x 128 bid_ten2k64[scale] * C3 1637 __mul_128x64_to_128 (res, bid_ten2k64[scale], C3); 1638 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; 1639 } 1640 } // else leave the result as it is, because p_exp <= z_exp 1641 } 1642 *pfpsf |= save_fpsf; 1643 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1644 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1645 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1646 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1647 BID_SWAP128 (res); 1648 BID_RETURN (res) 1649 } // else we have f * f + f 1650 1651 // continue with x = f, y = f, z = f 1652 1653 delta = q3 + e3 - q4 - e4; 1654delta_ge_zero: 1655 if (delta >= 0) { 1656 1657 if (p34 <= delta - 1 || // Case (1') 1658 (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A) 1659 // check for overflow, which can occur only in Case (1') 1660 if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) { 1661 // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary 1662 // condition for (q3 + e3) > (p34 + expmax) 1663 if (rnd_mode == BID_ROUNDING_TO_NEAREST) { 1664 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 1665 res.w[0] = 0x0000000000000000ull; 1666 *pfpsf |= (BID_INEXACT_EXCEPTION | BID_OVERFLOW_EXCEPTION); 1667 } else { 1668 if (p_sign == z_sign) { 1669 is_inexact_lt_midpoint = 1; 1670 } else { 1671 is_inexact_gt_midpoint = 1; 1672 } 1673 // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3) 1674 scale = p34 - q3; 1675 if (scale == 0) { 1676 res.w[1] = z_sign | C3.w[1]; 1677 res.w[0] = C3.w[0]; 1678 } else { 1679 if (q3 <= 19) { // C3 fits in 64 bits 1680 if (scale <= 19) { // 10^scale fits in 64 bits 1681 // 64 x 64 C3.w[0] * bid_ten2k64[scale] 1682 __mul_64x64_to_128MACH (res, C3.w[0], bid_ten2k64[scale]); 1683 } else { // 10^scale fits in 128 bits 1684 // 64 x 128 C3.w[0] * bid_ten2k128[scale - 20] 1685 __mul_128x64_to_128 (res, C3.w[0], 1686 bid_ten2k128[scale - 20]); 1687 } 1688 } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits 1689 // 64 x 128 bid_ten2k64[scale] * C3 1690 __mul_128x64_to_128 (res, bid_ten2k64[scale], C3); 1691 } 1692 // the coefficient in res has q3 + scale = p34 digits 1693 } 1694 e3 = e3 - scale; 1695 res.w[1] = z_sign | res.w[1]; 1696 bid_rounding_correction (rnd_mode, 1697 is_inexact_lt_midpoint, 1698 is_inexact_gt_midpoint, 1699 is_midpoint_lt_even, is_midpoint_gt_even, 1700 e3, &res, pfpsf); 1701 } 1702 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1703 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1704 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1705 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1706 BID_SWAP128 (res); 1707 BID_RETURN (res) 1708 } 1709 // res = z 1710 if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3) 1711 // return (C3 * 10^scale) * 10^(z_exp - scale) 1712 // where scale = min (p34-q3, z_exp-EMIN) 1713 scale = p34 - q3; 1714 ind = e3 + 6176; 1715 if (ind < scale) 1716 scale = ind; 1717 if (scale == 0) { 1718 res.w[1] = C3.w[1]; 1719 res.w[0] = C3.w[0]; 1720 } else if (q3 <= 19) { // z fits in 64 bits 1721 if (scale <= 19) { // 10^scale fits in 64 bits 1722 // 64 x 64 C3.w[0] * bid_ten2k64[scale] 1723 __mul_64x64_to_128MACH (res, C3.w[0], bid_ten2k64[scale]); 1724 } else { // 10^scale fits in 128 bits 1725 // 64 x 128 C3.w[0] * bid_ten2k128[scale - 20] 1726 __mul_128x64_to_128 (res, C3.w[0], bid_ten2k128[scale - 20]); 1727 } 1728 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 1729 // 64 x 128 bid_ten2k64[scale] * C3 1730 __mul_128x64_to_128 (res, bid_ten2k64[scale], C3); 1731 } 1732 // the coefficient in res has q3 + scale digits 1733 // subtract scale from the exponent 1734 z_exp = z_exp - ((BID_UINT64) scale << 49); 1735 e3 = e3 - scale; 1736 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 1737 if (scale + q3 < p34) 1738 *pfpsf |= BID_UNDERFLOW_EXCEPTION; // OK for tininess detection 1739 // before or after rounding, because the exponent of the 1740 // rounded result with unbounded exponent does not change 1741 // due to rounding overflow 1742 } else { // if q3 = p34 1743 scale = 0; 1744 res.w[1] = z_sign | ((BID_UINT64) (e3 + 6176) << 49) | C3.w[1]; 1745 res.w[0] = C3.w[0]; 1746 } 1747 1748 // use the following to avoid double rounding errors when operating on 1749 // mixed formats in rounding to nearest, and for correcting the result 1750 // if not rounding to nearest 1751 if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) { 1752 // there is a gap of exactly one digit between the scaled C3 and C4 1753 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is a special case 1754 if ((q3 <= 19 && C3.w[0] != bid_ten2k64[q3 - 1]) || 1755 (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != bid_ten2k64[19])) || 1756 (q3 >= 21 && (C3.w[1] != bid_ten2k128[q3 - 21].w[1] || 1757 C3.w[0] != bid_ten2k128[q3 - 21].w[0]))) { 1758 // C3 * 10^ scale != 10^(q3-1) 1759 // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull || 1760 // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 1761 is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value 1762 } else { // if C3 * 10^scale = 10^(q3+scale-1) 1763 // ok from above e3 = (z_exp >> 49) - 6176; 1764 // the result is always inexact 1765 if (q4 == 1) { 1766 R64 = C4.w[0]; 1767 } else { 1768 // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 1769 // x = q4-1, 1 <= x <= 67 and check if this operation is exact 1770 if (q4 <= 18) { // 2 <= q4 <= 18 1771 bid_round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, 1772 &is_midpoint_lt_even, &is_midpoint_gt_even, 1773 &is_inexact_lt_midpoint, 1774 &is_inexact_gt_midpoint); 1775 } else if (q4 <= 38) { 1776 P128.w[1] = C4.w[1]; 1777 P128.w[0] = C4.w[0]; 1778 bid_round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, 1779 &is_midpoint_lt_even, 1780 &is_midpoint_gt_even, 1781 &is_inexact_lt_midpoint, 1782 &is_inexact_gt_midpoint); 1783 R64 = R128.w[0]; // one decimal digit 1784 } else if (q4 <= 57) { 1785 P192.w[2] = C4.w[2]; 1786 P192.w[1] = C4.w[1]; 1787 P192.w[0] = C4.w[0]; 1788 bid_round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, 1789 &is_midpoint_lt_even, 1790 &is_midpoint_gt_even, 1791 &is_inexact_lt_midpoint, 1792 &is_inexact_gt_midpoint); 1793 R64 = R192.w[0]; // one decimal digit 1794 } else { // if (q4 <= 68) 1795 bid_round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, 1796 &is_midpoint_lt_even, 1797 &is_midpoint_gt_even, 1798 &is_inexact_lt_midpoint, 1799 &is_inexact_gt_midpoint); 1800 R64 = R256.w[0]; // one decimal digit 1801 } 1802 if (incr_exp) { 1803 R64 = 10; 1804 } 1805 } 1806 if (R64 == 5 && !is_inexact_lt_midpoint && !is_inexact_gt_midpoint && 1807 !is_midpoint_lt_even && !is_midpoint_gt_even) { 1808 is_inexact_lt_midpoint = 0; 1809 is_inexact_gt_midpoint = 0; 1810 is_midpoint_lt_even = 1; 1811 is_midpoint_gt_even = 0; 1812 } else if ((e3 == expmin) || 1813 R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) { 1814 // result does not change 1815 is_inexact_lt_midpoint = 0; 1816 is_inexact_gt_midpoint = 1; 1817 is_midpoint_lt_even = 0; 1818 is_midpoint_gt_even = 0; 1819 } else { 1820 is_inexact_lt_midpoint = 1; 1821 is_inexact_gt_midpoint = 0; 1822 is_midpoint_lt_even = 0; 1823 is_midpoint_gt_even = 0; 1824 // result decremented is 10^(q3+scale) - 1 1825 if ((q3 + scale) <= 19) { 1826 res.w[1] = 0; 1827 res.w[0] = bid_ten2k64[q3 + scale]; 1828 } else { // if ((q3 + scale + 1) <= 35) 1829 res.w[1] = bid_ten2k128[q3 + scale - 20].w[1]; 1830 res.w[0] = bid_ten2k128[q3 + scale - 20].w[0]; 1831 } 1832 res.w[0] = res.w[0] - 1; // borrow never occurs 1833 z_exp = z_exp - EXP_P1; 1834 e3 = e3 - 1; 1835 res.w[1] = z_sign | ((BID_UINT64) (e3 + 6176) << 49) | res.w[1]; 1836 } 1837 if (e3 == expmin) { 1838#if DECIMAL_TINY_DETECTION_AFTER_ROUNDING 1839 if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) { 1840 ; // result not tiny (in round-to-nearest mode) 1841 // rounds to 10^33 * 10^emin 1842 } else { 1843 *pfpsf |= BID_UNDERFLOW_EXCEPTION; 1844 } 1845#else 1846 *pfpsf |= BID_UNDERFLOW_EXCEPTION; // tiny if detected before rounding 1847#endif 1848 } 1849 } // end 10^(q3+scale-1) 1850 // set the inexact flag 1851 *pfpsf |= BID_INEXACT_EXCEPTION; 1852 } else { 1853 if (p_sign == z_sign) { 1854 // if (z_sign), set as if for absolute value 1855 is_inexact_lt_midpoint = 1; 1856 } else { // if (p_sign != z_sign) 1857 // if (z_sign), set as if for absolute value 1858 is_inexact_gt_midpoint = 1; 1859 } 1860 *pfpsf |= BID_INEXACT_EXCEPTION; 1861 } 1862 // the result is always inexact => set the inexact flag 1863 // Determine tininess: 1864 // if (exp > expmin) 1865 // the result is not tiny 1866 // else // if exp = emin 1867 // if (q3 + scale < p34) 1868 // the result is tiny 1869 // else // if (q3 + scale = p34) 1870 // if (C3 * 10^scale > 10^33) 1871 // the result is not tiny 1872 // else // if C3 * 10^scale = 10^33 1873 // if (xy * z > 0) 1874 // the result is not tiny 1875 // else // if (xy * z < 0) 1876 // if (rnd_mode = RN || rnd_mode = RA) and (delta = P+1) and 1877 // C4 > 5 * 10^(q4-1) 1878 // the result is tiny 1879 // else 1880 // the result is not tiny 1881 // endif 1882 // endif 1883 // endif 1884 // endif 1885 1886#if DECIMAL_TINY_DETECTION_AFTER_ROUNDING 1887 // determine if C4 > 5 * 10^(q4-1) 1888 if (q4 <= 19) { 1889 C4gt5toq4m1 = 1890 C4.w[0] > bid_midpoint64[q4 - 1]; 1891 } else if (q4 <= 38) { 1892 C4gt5toq4m1 = 1893 C4.w[1] > bid_midpoint128[q4 - 1].w[1] || 1894 (C4.w[1] == bid_midpoint128[q4 - 1].w[1] && 1895 C4.w[0] > bid_midpoint128[q4 - 1].w[0]); 1896 } else if (q4 <= 58) { 1897 C4gt5toq4m1 = 1898 C4.w[2] > bid_midpoint192[q4 - 1].w[2] || 1899 (C4.w[2] == bid_midpoint192[q4 - 1].w[2] && 1900 C4.w[1] > bid_midpoint192[q4 - 1].w[1]) || 1901 (C4.w[2] == bid_midpoint192[q4 - 1].w[2] && 1902 C4.w[1] == bid_midpoint192[q4 - 1].w[1] && 1903 C4.w[0] > bid_midpoint192[q4 - 1].w[0]); 1904 } else { // if (q4 <= 68) 1905 C4gt5toq4m1 = 1906 C4.w[3] > bid_midpoint256[q4 - 1].w[3] || 1907 (C4.w[3] == bid_midpoint256[q4 - 1].w[3] && 1908 C4.w[2] > bid_midpoint256[q4 - 1].w[2]) || 1909 (C4.w[3] == bid_midpoint256[q4 - 1].w[3] && 1910 C4.w[2] == bid_midpoint256[q4 - 1].w[2] && 1911 C4.w[1] > bid_midpoint256[q4 - 1].w[1]) || 1912 (C4.w[3] == bid_midpoint256[q4 - 1].w[3] && 1913 C4.w[2] == bid_midpoint256[q4 - 1].w[2] && 1914 C4.w[1] == bid_midpoint256[q4 - 1].w[1] && 1915 C4.w[0] > bid_midpoint256[q4 - 1].w[0]); 1916 } 1917 1918 if ((e3 == expmin && (q3 + scale) < p34) || 1919 (e3 == expmin && (q3 + scale) == p34 && 1920 (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high 1921 res.w[0] == 0x38c15b0a00000000ull && // 10^33_low 1922 z_sign != p_sign && 1923 (rnd_mode == BID_ROUNDING_TO_NEAREST || rnd_mode == BID_ROUNDING_TIES_AWAY) && 1924 (delta == (p34 + 1)) && C4gt5toq4m1)) { 1925 *pfpsf |= BID_UNDERFLOW_EXCEPTION; 1926 } 1927#else 1928 if ((e3 == expmin && (q3 + scale) < p34) || 1929 (e3 == expmin && (q3 + scale) == p34 && 1930 (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high 1931 res.w[0] == 0x38c15b0a00000000ull && // 10^33_low 1932 z_sign != p_sign)) { 1933 *pfpsf |= BID_UNDERFLOW_EXCEPTION; // for all rounding modes 1934 } 1935#endif 1936 if (rnd_mode != BID_ROUNDING_TO_NEAREST) { 1937 bid_rounding_correction (rnd_mode, 1938 is_inexact_lt_midpoint, 1939 is_inexact_gt_midpoint, 1940 is_midpoint_lt_even, is_midpoint_gt_even, 1941 e3, &res, pfpsf); 1942 } 1943 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 1944 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 1945 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 1946 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 1947 BID_SWAP128 (res); 1948 BID_RETURN (res) 1949 1950 } else if (p34 == delta) { // Case (1''B) 1951 1952 // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3 1953 // and C3 can be scaled up to p34 digits if needed 1954 1955 // scale C3 to p34 digits if needed 1956 scale = p34 - q3; // 0 <= scale <= p34 - 1 1957 if (scale == 0) { 1958 res.w[1] = C3.w[1]; 1959 res.w[0] = C3.w[0]; 1960 } else if (q3 <= 19) { // z fits in 64 bits 1961 if (scale <= 19) { // 10^scale fits in 64 bits 1962 // 64 x 64 C3.w[0] * bid_ten2k64[scale] 1963 __mul_64x64_to_128MACH (res, C3.w[0], bid_ten2k64[scale]); 1964 } else { // 10^scale fits in 128 bits 1965 // 64 x 128 C3.w[0] * bid_ten2k128[scale - 20] 1966 __mul_128x64_to_128 (res, C3.w[0], bid_ten2k128[scale - 20]); 1967 } 1968 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 1969 // 64 x 128 bid_ten2k64[scale] * C3 1970 __mul_128x64_to_128 (res, bid_ten2k64[scale], C3); 1971 } 1972 // subtract scale from the exponent 1973 z_exp = z_exp - ((BID_UINT64) scale << 49); 1974 e3 = e3 - scale; 1975 // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits 1976 1977 // determine whether x * y is less than, equal to, or greater than 1978 // 1/2 ulp (z) 1979 if (q4 <= 19) { 1980 if (C4.w[0] < bid_midpoint64[q4 - 1]) { // < 1/2 ulp 1981 lt_half_ulp = 1; 1982 } else if (C4.w[0] == bid_midpoint64[q4 - 1]) { // = 1/2 ulp 1983 eq_half_ulp = 1; 1984 } else { // > 1/2 ulp 1985 gt_half_ulp = 1; 1986 } 1987 } else if (q4 <= 38) { 1988 if (C4.w[2] == 0 && (C4.w[1] < bid_midpoint128[q4 - 20].w[1] || 1989 (C4.w[1] == bid_midpoint128[q4 - 20].w[1] && 1990 C4.w[0] < bid_midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp 1991 lt_half_ulp = 1; 1992 } else if (C4.w[2] == 0 && C4.w[1] == bid_midpoint128[q4 - 20].w[1] && 1993 C4.w[0] == bid_midpoint128[q4 - 20].w[0]) { // = 1/2 ulp 1994 eq_half_ulp = 1; 1995 } else { // > 1/2 ulp 1996 gt_half_ulp = 1; 1997 } 1998 } else if (q4 <= 58) { 1999 if (C4.w[3] == 0 && (C4.w[2] < bid_midpoint192[q4 - 39].w[2] || 2000 (C4.w[2] == bid_midpoint192[q4 - 39].w[2] && 2001 C4.w[1] < bid_midpoint192[q4 - 39].w[1]) || 2002 (C4.w[2] == bid_midpoint192[q4 - 39].w[2] && 2003 C4.w[1] == bid_midpoint192[q4 - 39].w[1] && 2004 C4.w[0] < bid_midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp 2005 lt_half_ulp = 1; 2006 } else if (C4.w[3] == 0 && C4.w[2] == bid_midpoint192[q4 - 39].w[2] && 2007 C4.w[1] == bid_midpoint192[q4 - 39].w[1] && 2008 C4.w[0] == bid_midpoint192[q4 - 39].w[0]) { // = 1/2 ulp 2009 eq_half_ulp = 1; 2010 } else { // > 1/2 ulp 2011 gt_half_ulp = 1; 2012 } 2013 } else { 2014 if (C4.w[3] < bid_midpoint256[q4 - 59].w[3] || 2015 (C4.w[3] == bid_midpoint256[q4 - 59].w[3] && 2016 C4.w[2] < bid_midpoint256[q4 - 59].w[2]) || 2017 (C4.w[3] == bid_midpoint256[q4 - 59].w[3] && 2018 C4.w[2] == bid_midpoint256[q4 - 59].w[2] && 2019 C4.w[1] < bid_midpoint256[q4 - 59].w[1]) || 2020 (C4.w[3] == bid_midpoint256[q4 - 59].w[3] && 2021 C4.w[2] == bid_midpoint256[q4 - 59].w[2] && 2022 C4.w[1] == bid_midpoint256[q4 - 59].w[1] && 2023 C4.w[0] < bid_midpoint256[q4 - 59].w[0])) { // < 1/2 ulp 2024 lt_half_ulp = 1; 2025 } else if (C4.w[3] == bid_midpoint256[q4 - 59].w[3] && 2026 C4.w[2] == bid_midpoint256[q4 - 59].w[2] && 2027 C4.w[1] == bid_midpoint256[q4 - 59].w[1] && 2028 C4.w[0] == bid_midpoint256[q4 - 59].w[0]) { // = 1/2 ulp 2029 eq_half_ulp = 1; 2030 } else { // > 1/2 ulp 2031 gt_half_ulp = 1; 2032 } 2033 } 2034 2035 if (p_sign == z_sign) { 2036 if (lt_half_ulp) { 2037 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2038 // use the following to avoid double rounding errors when operating on 2039 // mixed formats in rounding to nearest 2040 is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value 2041 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { 2042 // add 1 ulp to the significand 2043 res.w[0]++; 2044 if (res.w[0] == 0x0ull) 2045 res.w[1]++; 2046 // check for rounding overflow, when coeff == 10^34 2047 if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull && 2048 res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34 2049 e3 = e3 + 1; 2050 // coeff = 10^33 2051 z_exp = ((BID_UINT64) (e3 + 6176) << 49) & MASK_EXP; 2052 res.w[1] = 0x0000314dc6448d93ull; 2053 res.w[0] = 0x38c15b0a00000000ull; 2054 } 2055 // end add 1 ulp 2056 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2057 if (eq_half_ulp) { 2058 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value 2059 } else { 2060 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value 2061 } 2062 } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) 2063 // leave unchanged 2064 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2065 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value 2066 } 2067 // the result is always inexact, and never tiny 2068 // set the inexact flag 2069 *pfpsf |= BID_INEXACT_EXCEPTION; 2070 // check for overflow 2071 if (e3 > expmax && rnd_mode == BID_ROUNDING_TO_NEAREST) { 2072 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2073 res.w[0] = 0x0000000000000000ull; 2074 *pfpsf |= (BID_INEXACT_EXCEPTION | BID_OVERFLOW_EXCEPTION); 2075 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2076 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2077 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2078 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2079 BID_SWAP128 (res); 2080 BID_RETURN (res) 2081 } 2082 if (rnd_mode != BID_ROUNDING_TO_NEAREST) { 2083 bid_rounding_correction (rnd_mode, 2084 is_inexact_lt_midpoint, 2085 is_inexact_gt_midpoint, 2086 is_midpoint_lt_even, is_midpoint_gt_even, 2087 e3, &res, pfpsf); 2088 z_exp = res.w[1] & MASK_EXP; 2089 } 2090 } else { // if (p_sign != z_sign) 2091 // consider two cases, because C3 * 10^scale = 10^33 is a special case 2092 if (res.w[1] != 0x0000314dc6448d93ull || 2093 res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 2094 if (lt_half_ulp) { 2095 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2096 // use the following to avoid double rounding errors when operating 2097 // on mixed formats in rounding to nearest 2098 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value 2099 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { 2100 // subtract 1 ulp from the significand 2101 res.w[0]--; 2102 if (res.w[0] == 0xffffffffffffffffull) 2103 res.w[1]--; 2104 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2105 if (eq_half_ulp) { 2106 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value 2107 } else { 2108 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value 2109 } 2110 } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) 2111 // leave unchanged 2112 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2113 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value 2114 } 2115 // the result is always inexact, and never tiny 2116 // check for overflow for RN 2117 if (e3 > expmax) { 2118 if (rnd_mode == BID_ROUNDING_TO_NEAREST) { 2119 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2120 res.w[0] = 0x0000000000000000ull; 2121 *pfpsf |= (BID_INEXACT_EXCEPTION | BID_OVERFLOW_EXCEPTION); 2122 } else { 2123 bid_rounding_correction (rnd_mode, 2124 is_inexact_lt_midpoint, 2125 is_inexact_gt_midpoint, 2126 is_midpoint_lt_even, 2127 is_midpoint_gt_even, e3, &res, 2128 pfpsf); 2129 } 2130 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2131 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2132 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2133 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2134 BID_SWAP128 (res); 2135 BID_RETURN (res) 2136 } 2137 // set the inexact flag 2138 *pfpsf |= BID_INEXACT_EXCEPTION; 2139 if (rnd_mode != BID_ROUNDING_TO_NEAREST) { 2140 bid_rounding_correction (rnd_mode, 2141 is_inexact_lt_midpoint, 2142 is_inexact_gt_midpoint, 2143 is_midpoint_lt_even, 2144 is_midpoint_gt_even, e3, &res, pfpsf); 2145 } 2146 z_exp = res.w[1] & MASK_EXP; 2147 } else { // if C3 * 10^scale = 10^33 2148 e3 = (z_exp >> 49) - 6176; 2149 if (e3 > expmin) { 2150 // the result is exact if exp > expmin and C4 = d*10^(q4-1), 2151 // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact 2152 if (q4 == 1) { 2153 // if q4 = 1 the result is exact 2154 // result coefficient = 10^34 - C4 2155 res.w[1] = 0x0001ed09bead87c0ull; 2156 res.w[0] = 0x378d8e6400000000ull - C4.w[0]; 2157 z_exp = z_exp - EXP_P1; 2158 e3 = e3 - 1; 2159 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2160 } else { 2161 // if q4 > 1 then truncate C4 from q4 digits to 1 digit; 2162 // x = q4-1, 1 <= x <= 67 and check if this operation is exact 2163 if (q4 <= 18) { // 2 <= q4 <= 18 2164 bid_round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, 2165 &is_midpoint_lt_even, 2166 &is_midpoint_gt_even, 2167 &is_inexact_lt_midpoint, 2168 &is_inexact_gt_midpoint); 2169 } else if (q4 <= 38) { 2170 P128.w[1] = C4.w[1]; 2171 P128.w[0] = C4.w[0]; 2172 bid_round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, 2173 &is_midpoint_lt_even, 2174 &is_midpoint_gt_even, 2175 &is_inexact_lt_midpoint, 2176 &is_inexact_gt_midpoint); 2177 R64 = R128.w[0]; // one decimal digit 2178 } else if (q4 <= 57) { 2179 P192.w[2] = C4.w[2]; 2180 P192.w[1] = C4.w[1]; 2181 P192.w[0] = C4.w[0]; 2182 bid_round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, 2183 &is_midpoint_lt_even, 2184 &is_midpoint_gt_even, 2185 &is_inexact_lt_midpoint, 2186 &is_inexact_gt_midpoint); 2187 R64 = R192.w[0]; // one decimal digit 2188 } else { // if (q4 <= 68) 2189 bid_round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, 2190 &is_midpoint_lt_even, 2191 &is_midpoint_gt_even, 2192 &is_inexact_lt_midpoint, 2193 &is_inexact_gt_midpoint); 2194 R64 = R256.w[0]; // one decimal digit 2195 } 2196 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2197 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2198 // the result is exact: 10^34 - R64 2199 // incr_exp = 0 with certainty 2200 z_exp = z_exp - EXP_P1; 2201 e3 = e3 - 1; 2202 res.w[1] = 2203 z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull; 2204 res.w[0] = 0x378d8e6400000000ull - R64; 2205 } else { 2206 // We want R64 to be the top digit of C4, but we actually 2207 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed, 2208 // because the top digit is (C4 * 10^(-q4+1))RZ 2209 // however, if incr_exp = 1 then R64 = 10 with certainty 2210 if (incr_exp) { 2211 R64 = 10; 2212 } 2213 // the result is inexact as C4 has more than 1 significant digit 2214 // and C3 * 10^scale = 10^33 2215 // example of case that is treated here: 2216 // 100...0 * 10^e3 - 0.41 * 10^e3 = 2217 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1) 2218 // note that (e3 > expmin} 2219 // in order to round, subtract R64 from 10^34 and then compare 2220 // C4 - R64 * 10^(q4-1) with 1/2 ulp 2221 // calculate 10^34 - R64 2222 res.w[1] = 0x0001ed09bead87c0ull; 2223 res.w[0] = 0x378d8e6400000000ull - R64; 2224 z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand 2225 // calculate C4 - R64 * 10^(q4-1); this is a rare case and 2226 // R64 is small, 1 <= R64 <= 9 2227 e3 = e3 - 1; 2228 if (is_inexact_lt_midpoint) { 2229 is_inexact_lt_midpoint = 0; 2230 is_inexact_gt_midpoint = 1; 2231 } else if (is_inexact_gt_midpoint) { 2232 is_inexact_gt_midpoint = 0; 2233 is_inexact_lt_midpoint = 1; 2234 } else if (is_midpoint_lt_even) { 2235 is_midpoint_lt_even = 0; 2236 is_midpoint_gt_even = 1; 2237 } else if (is_midpoint_gt_even) { 2238 is_midpoint_gt_even = 0; 2239 is_midpoint_lt_even = 1; 2240 } else { 2241 ; 2242 } 2243 // the result is always inexact, and never tiny 2244 // check for overflow for RN 2245 if (e3 > expmax) { 2246 if (rnd_mode == BID_ROUNDING_TO_NEAREST) { 2247 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2248 res.w[0] = 0x0000000000000000ull; 2249 *pfpsf |= (BID_INEXACT_EXCEPTION | BID_OVERFLOW_EXCEPTION); 2250 } else { 2251 bid_rounding_correction (rnd_mode, 2252 is_inexact_lt_midpoint, 2253 is_inexact_gt_midpoint, 2254 is_midpoint_lt_even, 2255 is_midpoint_gt_even, e3, &res, 2256 pfpsf); 2257 } 2258 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2259 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2260 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2261 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2262 BID_SWAP128 (res); 2263 BID_RETURN (res) 2264 } 2265 // set the inexact flag 2266 *pfpsf |= BID_INEXACT_EXCEPTION; 2267 res.w[1] = 2268 z_sign | ((BID_UINT64) (e3 + 6176) << 49) | res.w[1]; 2269 if (rnd_mode != BID_ROUNDING_TO_NEAREST) { 2270 bid_rounding_correction (rnd_mode, 2271 is_inexact_lt_midpoint, 2272 is_inexact_gt_midpoint, 2273 is_midpoint_lt_even, 2274 is_midpoint_gt_even, e3, &res, 2275 pfpsf); 2276 } 2277 z_exp = res.w[1] & MASK_EXP; 2278 } // end result is inexact 2279 } // end q4 > 1 2280 } else { // if (e3 = emin) 2281 // if e3 = expmin the result is also tiny (the condition for 2282 // tininess is C4 > 050...0 [q4 digits] which is met because 2283 // the msd of C4 is not zero) 2284 // the result is tiny and inexact in all rounding modes; 2285 // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp, 2286 // gt_half_ulp to calculate) 2287 // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged 2288 2289 // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp 2290 if (gt_half_ulp) { // res = 10^33 - 1 2291 res.w[1] = 0x0000314dc6448d93ull; 2292 res.w[0] = 0x38c15b09ffffffffull; 2293 } else { 2294 res.w[1] = 0x0000314dc6448d93ull; 2295 res.w[0] = 0x38c15b0a00000000ull; 2296 } 2297 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2298 *pfpsf |= BID_UNDERFLOW_EXCEPTION; // inexact is set later 2299 2300 if (eq_half_ulp) { 2301 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value 2302 } else if (lt_half_ulp) { 2303 is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value 2304 } else { // if (gt_half_ulp) 2305 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value 2306 } 2307 2308 if (rnd_mode != BID_ROUNDING_TO_NEAREST) { 2309 bid_rounding_correction (rnd_mode, 2310 is_inexact_lt_midpoint, 2311 is_inexact_gt_midpoint, 2312 is_midpoint_lt_even, 2313 is_midpoint_gt_even, e3, &res, 2314 pfpsf); 2315 z_exp = res.w[1] & MASK_EXP; 2316 } 2317 } // end e3 = emin 2318 // set the inexact flag (if the result was not exact) 2319 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 2320 is_midpoint_lt_even || is_midpoint_gt_even) 2321 *pfpsf |= BID_INEXACT_EXCEPTION; 2322 } // end 10^33 2323 } // end if (p_sign != z_sign) 2324 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; 2325 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2326 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2327 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2328 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2329 BID_SWAP128 (res); 2330 BID_RETURN (res) 2331 2332 } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) 2333 (q3 <= delta && delta + q4 <= p34) || // Case (3) 2334 (delta < q3 && p34 < delta + q4) || // Case (4) 2335 (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5) 2336 (delta + q4 < q3)) && // Case (6) 2337 !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6) 2338 2339 // the result has the sign of z 2340 2341 if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) 2342 (delta < q3 && p34 < delta + q4)) { // Case (4) 2343 // round first the sum x * y + z with unbounded exponent 2344 // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1, 2345 // 1 <= scale <= 33 2346 // calculate res = C3 * 10^scale 2347 scale = p34 - q3; 2348 x0 = delta + q4 - p34; 2349 } else if (delta + q4 < q3) { // Case (6) 2350 // make Case (6) look like Case (3) or Case (5) with scale = 0 2351 // by scaling up C4 by 10^(q3 - delta - q4) 2352 scale = q3 - delta - q4; // 1 <= scale <= 33 2353 if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits 2354 if (scale <= 19) { // 10^scale fits in 64 bits 2355 // 64 x 64 C4.w[0] * bid_ten2k64[scale] 2356 __mul_64x64_to_128MACH (P128, C4.w[0], bid_ten2k64[scale]); 2357 } else { // 10^scale fits in 128 bits 2358 // 64 x 128 C4.w[0] * bid_ten2k128[scale - 20] 2359 __mul_128x64_to_128 (P128, C4.w[0], bid_ten2k128[scale - 20]); 2360 } 2361 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits 2362 // 64 x 128 bid_ten2k64[scale] * C4 2363 __mul_128x64_to_128 (P128, bid_ten2k64[scale], C4); 2364 } 2365 C4.w[0] = P128.w[0]; 2366 C4.w[1] = P128.w[1]; 2367 // e4 does not need adjustment, as it is not used from this point on 2368 scale = 0; 2369 x0 = 0; 2370 // now Case (6) looks like Case (3) or Case (5) with scale = 0 2371 } else { // if Case (3) or Case (5) 2372 // Note: Case (3) is similar to Case (2), but scale differs and the 2373 // result is exact, unless it is tiny (so x0 = 0 when calculating the 2374 // result with unbounded exponent) 2375 2376 // calculate first the sum x * y + z with unbounded exponent (exact) 2377 // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1, 2378 // 1 <= scale <= 33 2379 // calculate res = C3 * 10^scale 2380 scale = delta + q4 - q3; 2381 x0 = 0; 2382 // Note: the comments which follow refer [mainly] to Case (2)] 2383 } 2384 2385 case2_repeat: 2386 if (scale == 0) { // this could happen e.g. if we return to case2_repeat 2387 // or in Case (4) 2388 res.w[1] = C3.w[1]; 2389 res.w[0] = C3.w[0]; 2390 } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits 2391 if (scale <= 19) { // 10^scale fits in 64 bits 2392 // 64 x 64 C3.w[0] * bid_ten2k64[scale] 2393 __mul_64x64_to_128MACH (res, C3.w[0], bid_ten2k64[scale]); 2394 } else { // 10^scale fits in 128 bits 2395 // 64 x 128 C3.w[0] * bid_ten2k128[scale - 20] 2396 __mul_128x64_to_128 (res, C3.w[0], bid_ten2k128[scale - 20]); 2397 } 2398 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits 2399 // 64 x 128 bid_ten2k64[scale] * C3 2400 __mul_128x64_to_128 (res, bid_ten2k64[scale], C3); 2401 } 2402 // e3 is already calculated 2403 e3 = e3 - scale; 2404 // now res = C3 * 10^scale and e3 = e3 - scale 2405 // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat 2406 // because the result was too small 2407 2408 // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34, 2409 // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67) 2410 // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of 2411 // the rounding fits in 128 bits!) 2412 // x0 = delta + q4 - p34 (calculated before reaching case2_repeat) 2413 // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34 2414 if (x0 == 0) { // this could happen only if we return to case2_repeat, or 2415 // for Case (3) or Case (6) 2416 R128.w[1] = C4.w[1]; 2417 R128.w[0] = C4.w[0]; 2418 } else if (q4 <= 18) { 2419 // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17 2420 bid_round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp, 2421 &is_midpoint_lt_even, &is_midpoint_gt_even, 2422 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 2423 if (incr_exp) { 2424 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 2425 R64 = bid_ten2k64[q4 - x0]; 2426 } 2427 R128.w[1] = 0; 2428 R128.w[0] = R64; 2429 } else if (q4 <= 38) { 2430 // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37 2431 P128.w[1] = C4.w[1]; 2432 P128.w[0] = C4.w[0]; 2433 bid_round128_19_38 (q4, x0, P128, &R128, &incr_exp, 2434 &is_midpoint_lt_even, &is_midpoint_gt_even, 2435 &is_inexact_lt_midpoint, 2436 &is_inexact_gt_midpoint); 2437 if (incr_exp) { 2438 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 2439 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 2440 R128.w[0] = bid_ten2k64[q4 - x0]; 2441 // R128.w[1] stays 0 2442 } else { // 20 <= q4 - x0 <= 37 2443 R128.w[0] = bid_ten2k128[q4 - x0 - 20].w[0]; 2444 R128.w[1] = bid_ten2k128[q4 - x0 - 20].w[1]; 2445 } 2446 } 2447 } else if (q4 <= 57) { 2448 // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56 2449 P192.w[2] = C4.w[2]; 2450 P192.w[1] = C4.w[1]; 2451 P192.w[0] = C4.w[0]; 2452 bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp, 2453 &is_midpoint_lt_even, &is_midpoint_gt_even, 2454 &is_inexact_lt_midpoint, 2455 &is_inexact_gt_midpoint); 2456 // R192.w[2] is always 0 2457 if (incr_exp) { 2458 // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52 2459 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 2460 R192.w[0] = bid_ten2k64[q4 - x0]; 2461 // R192.w[1] stays 0 2462 // R192.w[2] stays 0 2463 } else { // 20 <= q4 - x0 <= 33 2464 R192.w[0] = bid_ten2k128[q4 - x0 - 20].w[0]; 2465 R192.w[1] = bid_ten2k128[q4 - x0 - 20].w[1]; 2466 // R192.w[2] stays 0 2467 } 2468 } 2469 R128.w[1] = R192.w[1]; 2470 R128.w[0] = R192.w[0]; 2471 } else { 2472 // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67 2473 bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp, 2474 &is_midpoint_lt_even, &is_midpoint_gt_even, 2475 &is_inexact_lt_midpoint, 2476 &is_inexact_gt_midpoint); 2477 // R256.w[3] and R256.w[2] are always 0 2478 if (incr_exp) { 2479 // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43 2480 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 2481 R256.w[0] = bid_ten2k64[q4 - x0]; 2482 // R256.w[1] stays 0 2483 // R256.w[2] stays 0 2484 // R256.w[3] stays 0 2485 } else { // 20 <= q4 - x0 <= 33 2486 R256.w[0] = bid_ten2k128[q4 - x0 - 20].w[0]; 2487 R256.w[1] = bid_ten2k128[q4 - x0 - 20].w[1]; 2488 // R256.w[2] stays 0 2489 // R256.w[3] stays 0 2490 } 2491 } 2492 R128.w[1] = R256.w[1]; 2493 R128.w[0] = R256.w[0]; 2494 } 2495 // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4, 2496 // rounded to nearest, which were copied into R128 2497 if (z_sign == p_sign) { 2498 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale 2499 // the sum can result in [up to] p34 or p34 + 1 digits 2500 res.w[0] = res.w[0] + R128.w[0]; 2501 res.w[1] = res.w[1] + R128.w[1]; 2502 if (res.w[0] < R128.w[0]) 2503 res.w[1]++; // carry 2504 // if res > 10^34 - 1 need to increase x0 and decrease scale by 1 2505 if (res.w[1] > 0x0001ed09bead87c0ull || 2506 (res.w[1] == 0x0001ed09bead87c0ull && 2507 res.w[0] > 0x378d8e63ffffffffull)) { 2508 // avoid double rounding error 2509 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 2510 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 2511 is_midpoint_lt_even0 = is_midpoint_lt_even; 2512 is_midpoint_gt_even0 = is_midpoint_gt_even; 2513 is_inexact_lt_midpoint = 0; 2514 is_inexact_gt_midpoint = 0; 2515 is_midpoint_lt_even = 0; 2516 is_midpoint_gt_even = 0; 2517 P128.w[1] = res.w[1]; 2518 P128.w[0] = res.w[0]; 2519 bid_round128_19_38 (35, 1, P128, &res, &incr_exp, 2520 &is_midpoint_lt_even, &is_midpoint_gt_even, 2521 &is_inexact_lt_midpoint, 2522 &is_inexact_gt_midpoint); 2523 // incr_exp is 0 with certainty in this case 2524 // avoid a double rounding error 2525 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 2526 is_midpoint_lt_even) { // double rounding error upward 2527 // res = res - 1 2528 res.w[0]--; 2529 if (res.w[0] == 0xffffffffffffffffull) 2530 res.w[1]--; 2531 // Note: a double rounding error upward is not possible; for this 2532 // the result after the first rounding would have to be 99...95 2533 // (35 digits in all), possibly followed by a number of zeros; this 2534 // not possible in Cases (2)-(6) or (15)-(17) which may get here 2535 is_midpoint_lt_even = 0; 2536 is_inexact_lt_midpoint = 1; 2537 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 2538 is_midpoint_gt_even) { // double rounding error downward 2539 // res = res + 1 2540 res.w[0]++; 2541 if (res.w[0] == 0) 2542 res.w[1]++; 2543 is_midpoint_gt_even = 0; 2544 is_inexact_gt_midpoint = 1; 2545 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2546 !is_inexact_lt_midpoint 2547 && !is_inexact_gt_midpoint) { 2548 // if this second rounding was exact the result may still be 2549 // inexact because of the first rounding 2550 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 2551 is_inexact_gt_midpoint = 1; 2552 } 2553 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 2554 is_inexact_lt_midpoint = 1; 2555 } 2556 } else if (is_midpoint_gt_even && 2557 (is_inexact_gt_midpoint0 2558 || is_midpoint_lt_even0)) { 2559 // pulled up to a midpoint 2560 is_inexact_lt_midpoint = 1; 2561 is_inexact_gt_midpoint = 0; 2562 is_midpoint_lt_even = 0; 2563 is_midpoint_gt_even = 0; 2564 } else if (is_midpoint_lt_even && 2565 (is_inexact_lt_midpoint0 2566 || is_midpoint_gt_even0)) { 2567 // pulled down to a midpoint 2568 is_inexact_lt_midpoint = 0; 2569 is_inexact_gt_midpoint = 1; 2570 is_midpoint_lt_even = 0; 2571 is_midpoint_gt_even = 0; 2572 } else { 2573 ; 2574 } 2575 // adjust exponent 2576 e3 = e3 + 1; 2577 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2578 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2579 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || 2580 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { 2581 is_inexact_lt_midpoint = 1; 2582 } 2583 } 2584 } else { 2585 // this is the result rounded with unbounded exponent, unless a 2586 // correction is needed 2587 res.w[1] = res.w[1] & MASK_COEFF; 2588 if (lsb == 1) { 2589 if (is_midpoint_gt_even) { 2590 // res = res + 1 2591 is_midpoint_gt_even = 0; 2592 is_midpoint_lt_even = 1; 2593 res.w[0]++; 2594 if (res.w[0] == 0x0) 2595 res.w[1]++; 2596 // check for rounding overflow 2597 if (res.w[1] == 0x0001ed09bead87c0ull && 2598 res.w[0] == 0x378d8e6400000000ull) { 2599 // res = 10^34 => rounding overflow 2600 res.w[1] = 0x0000314dc6448d93ull; 2601 res.w[0] = 0x38c15b0a00000000ull; // 10^33 2602 e3++; 2603 } 2604 } else if (is_midpoint_lt_even) { 2605 // res = res - 1 2606 is_midpoint_lt_even = 0; 2607 is_midpoint_gt_even = 1; 2608 res.w[0]--; 2609 if (res.w[0] == 0xffffffffffffffffull) 2610 res.w[1]--; 2611 // if the result is pure zero, the sign depends on the rounding 2612 // mode (x*y and z had opposite signs) 2613 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { 2614 if (rnd_mode != BID_ROUNDING_DOWN) 2615 z_sign = 0x0000000000000000ull; 2616 else 2617 z_sign = 0x8000000000000000ull; 2618 // the exponent is max (e3, expmin) 2619 res.w[1] = 0x0; 2620 res.w[0] = 0x0; 2621 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2622 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2623 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2624 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2625 BID_SWAP128 (res); 2626 BID_RETURN (res) 2627 } 2628 } else { 2629 ; 2630 } 2631 } 2632 } 2633 } else { // if (z_sign != p_sign) 2634 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4 2635 // used to swap rounding indicators if p_sign != z_sign 2636 // the sum can result in [up to] p34 or p34 - 1 digits 2637 tmp64 = res.w[0]; 2638 res.w[0] = res.w[0] - R128.w[0]; 2639 res.w[1] = res.w[1] - R128.w[1]; 2640 if (res.w[0] > tmp64) 2641 res.w[1]--; // borrow 2642 // if res < 10^33 and exp > expmin need to decrease x0 and 2643 // increase scale by 1 2644 if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull || 2645 (res.w[1] == 0x0000314dc6448d93ull && 2646 res.w[0] < 0x38c15b0a00000000ull)) || 2647 (is_inexact_lt_midpoint 2648 && res.w[1] == 0x0000314dc6448d93ull 2649 && res.w[0] == 0x38c15b0a00000000ull)) 2650 && x0 >= 1) { 2651 x0 = x0 - 1; 2652 // first restore e3, otherwise it will be too small 2653 e3 = e3 + scale; 2654 scale = scale + 1; 2655 is_inexact_lt_midpoint = 0; 2656 is_inexact_gt_midpoint = 0; 2657 is_midpoint_lt_even = 0; 2658 is_midpoint_gt_even = 0; 2659 incr_exp = 0; 2660 goto case2_repeat; 2661 } 2662 // else this is the result rounded with unbounded exponent; 2663 // because the result has opposite sign to that of C4 which was 2664 // rounded, need to change the rounding indicators 2665 if (is_inexact_lt_midpoint) { 2666 is_inexact_lt_midpoint = 0; 2667 is_inexact_gt_midpoint = 1; 2668 } else if (is_inexact_gt_midpoint) { 2669 is_inexact_gt_midpoint = 0; 2670 is_inexact_lt_midpoint = 1; 2671 } else if (lsb == 0) { 2672 if (is_midpoint_lt_even) { 2673 is_midpoint_lt_even = 0; 2674 is_midpoint_gt_even = 1; 2675 } else if (is_midpoint_gt_even) { 2676 is_midpoint_gt_even = 0; 2677 is_midpoint_lt_even = 1; 2678 } else { 2679 ; 2680 } 2681 } else if (lsb == 1) { 2682 if (is_midpoint_lt_even) { 2683 // res = res + 1 2684 res.w[0]++; 2685 if (res.w[0] == 0x0) 2686 res.w[1]++; 2687 // check for rounding overflow 2688 if (res.w[1] == 0x0001ed09bead87c0ull && 2689 res.w[0] == 0x378d8e6400000000ull) { 2690 // res = 10^34 => rounding overflow 2691 res.w[1] = 0x0000314dc6448d93ull; 2692 res.w[0] = 0x38c15b0a00000000ull; // 10^33 2693 e3++; 2694 } 2695 } else if (is_midpoint_gt_even) { 2696 // res = res - 1 2697 res.w[0]--; 2698 if (res.w[0] == 0xffffffffffffffffull) 2699 res.w[1]--; 2700 // if the result is pure zero, the sign depends on the rounding 2701 // mode (x*y and z had opposite signs) 2702 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { 2703 if (rnd_mode != BID_ROUNDING_DOWN) 2704 z_sign = 0x0000000000000000ull; 2705 else 2706 z_sign = 0x8000000000000000ull; 2707 // the exponent is max (e3, expmin) 2708 res.w[1] = 0x0; 2709 res.w[0] = 0x0; 2710 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2711 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2712 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2713 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2714 BID_SWAP128 (res); 2715 BID_RETURN (res) 2716 } 2717 } else { 2718 ; 2719 } 2720 } else { 2721 ; 2722 } 2723 } 2724 // check for underflow 2725 if (e3 == expmin) { // and if significand < 10^33 => result is tiny 2726 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || 2727 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && 2728 res.w[0] < 0x38c15b0a00000000ull)) { 2729 is_tiny = 1; 2730 } 2731#if !DECIMAL_TINY_DETECTION_AFTER_ROUNDING 2732 if (((res.w[1] & 0x7fffffffffffffffull) == 0x0000314dc6448d93ull) && 2733 (res.w[0] == 0x38c15b0a00000000ull) && // 10^33*10^-6176 2734 (z_sign != p_sign)) is_tiny = 1; 2735#endif 2736 } else if (e3 < expmin) { 2737 // the result is tiny, so we must truncate more of res 2738 is_tiny = 1; 2739 x0 = expmin - e3; 2740 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 2741 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 2742 is_midpoint_lt_even0 = is_midpoint_lt_even; 2743 is_midpoint_gt_even0 = is_midpoint_gt_even; 2744 is_inexact_lt_midpoint = 0; 2745 is_inexact_gt_midpoint = 0; 2746 is_midpoint_lt_even = 0; 2747 is_midpoint_gt_even = 0; 2748 // determine the number of decimal digits in res 2749 if (res.w[1] == 0x0) { 2750 // between 1 and 19 digits 2751 for (ind = 1; ind <= 19; ind++) { 2752 if (res.w[0] < bid_ten2k64[ind]) { 2753 break; 2754 } 2755 } 2756 // ind digits 2757 } else if (res.w[1] < bid_ten2k128[0].w[1] || 2758 (res.w[1] == bid_ten2k128[0].w[1] 2759 && res.w[0] < bid_ten2k128[0].w[0])) { 2760 // 20 digits 2761 ind = 20; 2762 } else { // between 21 and 38 digits 2763 for (ind = 1; ind <= 18; ind++) { 2764 if (res.w[1] < bid_ten2k128[ind].w[1] || 2765 (res.w[1] == bid_ten2k128[ind].w[1] && 2766 res.w[0] < bid_ten2k128[ind].w[0])) { 2767 break; 2768 } 2769 } 2770 // ind + 20 digits 2771 ind = ind + 20; 2772 } 2773 2774 // at this point ind >= x0; because delta >= 2 on this path, the case 2775 // ind = x0 can occur only in Case (2) or case (3), when C3 has one 2776 // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin), 2777 // the signs of x * y and z are opposite, and through cancellation 2778 // the most significant decimal digit in res has the weight 2779 // 10^(emin-1); however, it is clear that in this case the most 2780 // significant digit is 9, so the result before rounding is 2781 // 0.9... * 10^emin 2782 // Otherwise, ind > x0 because there are non-zero decimal digits in the 2783 // result with weight of at least 10^emin, and correction for underflow 2784 // can be carried out using the round*_*_2_* () routines 2785 if (x0 == ind) { // the result before rounding is 0.9... * 10^emin 2786 res.w[1] = 0x0; 2787 res.w[0] = 0x1; 2788 is_inexact_gt_midpoint = 1; 2789 } else if (ind <= 18) { // check that 2 <= ind 2790 // 2 <= ind <= 18, 1 <= x0 <= 17 2791 bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, 2792 &is_midpoint_lt_even, &is_midpoint_gt_even, 2793 &is_inexact_lt_midpoint, 2794 &is_inexact_gt_midpoint); 2795 if (incr_exp) { 2796 // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17 2797 R64 = bid_ten2k64[ind - x0]; 2798 } 2799 res.w[1] = 0; 2800 res.w[0] = R64; 2801 } else if (ind <= 38) { 2802 // 19 <= ind <= 38 2803 P128.w[1] = res.w[1]; 2804 P128.w[0] = res.w[0]; 2805 bid_round128_19_38 (ind, x0, P128, &res, &incr_exp, 2806 &is_midpoint_lt_even, &is_midpoint_gt_even, 2807 &is_inexact_lt_midpoint, 2808 &is_inexact_gt_midpoint); 2809 if (incr_exp) { 2810 // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37 2811 if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19 2812 res.w[0] = bid_ten2k64[ind - x0]; 2813 // res.w[1] stays 0 2814 } else { // 20 <= ind - x0 <= 37 2815 res.w[0] = bid_ten2k128[ind - x0 - 20].w[0]; 2816 res.w[1] = bid_ten2k128[ind - x0 - 20].w[1]; 2817 } 2818 } 2819 } 2820 // avoid a double rounding error 2821 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 2822 is_midpoint_lt_even) { // double rounding error upward 2823 // res = res - 1 2824 res.w[0]--; 2825 if (res.w[0] == 0xffffffffffffffffull) 2826 res.w[1]--; 2827 // Note: a double rounding error upward is not possible; for this 2828 // the result after the first rounding would have to be 99...95 2829 // (35 digits in all), possibly followed by a number of zeros; this 2830 // not possible in Cases (2)-(6) which may get here 2831 is_midpoint_lt_even = 0; 2832 is_inexact_lt_midpoint = 1; 2833 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 2834 is_midpoint_gt_even) { // double rounding error downward 2835 // res = res + 1 2836 res.w[0]++; 2837 if (res.w[0] == 0) 2838 res.w[1]++; 2839 is_midpoint_gt_even = 0; 2840 is_inexact_gt_midpoint = 1; 2841 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2842 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2843 // if this second rounding was exact the result may still be 2844 // inexact because of the first rounding 2845 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 2846 is_inexact_gt_midpoint = 1; 2847 } 2848 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 2849 is_inexact_lt_midpoint = 1; 2850 } 2851 } else if (is_midpoint_gt_even && 2852 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 2853 // pulled up to a midpoint 2854 is_inexact_lt_midpoint = 1; 2855 is_inexact_gt_midpoint = 0; 2856 is_midpoint_lt_even = 0; 2857 is_midpoint_gt_even = 0; 2858 } else if (is_midpoint_lt_even && 2859 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 2860 // pulled down to a midpoint 2861 is_inexact_lt_midpoint = 0; 2862 is_inexact_gt_midpoint = 1; 2863 is_midpoint_lt_even = 0; 2864 is_midpoint_gt_even = 0; 2865 } else { 2866 ; 2867 } 2868 // adjust exponent 2869 e3 = e3 + x0; 2870 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 2871 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 2872 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || 2873 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { 2874 is_inexact_lt_midpoint = 1; 2875 } 2876 } 2877 } else { 2878 ; // not underflow 2879 } 2880 // check for inexact result 2881 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 2882 is_midpoint_lt_even || is_midpoint_gt_even) { 2883 // set the inexact flag 2884 *pfpsf |= BID_INEXACT_EXCEPTION; 2885 if (is_tiny) 2886 *pfpsf |= BID_UNDERFLOW_EXCEPTION; 2887 } 2888 // now check for significand = 10^34 (may have resulted from going 2889 // back to case2_repeat) 2890 if (res.w[1] == 0x0001ed09bead87c0ull && 2891 res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34 2892 res.w[1] = 0x0000314dc6448d93ull; // res = 10^33 2893 res.w[0] = 0x38c15b0a00000000ull; 2894 e3 = e3 + 1; 2895 } 2896 res.w[1] = z_sign | ((BID_UINT64) (e3 + 6176) << 49) | res.w[1]; 2897 // check for overflow 2898 if (rnd_mode == BID_ROUNDING_TO_NEAREST && e3 > expmax) { 2899 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf 2900 res.w[0] = 0x0000000000000000ull; 2901 *pfpsf |= (BID_INEXACT_EXCEPTION | BID_OVERFLOW_EXCEPTION); 2902 } 2903 if (rnd_mode != BID_ROUNDING_TO_NEAREST) { 2904 bid_rounding_correction (rnd_mode, 2905 is_inexact_lt_midpoint, 2906 is_inexact_gt_midpoint, 2907 is_midpoint_lt_even, is_midpoint_gt_even, 2908 e3, &res, pfpsf); 2909 } 2910 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2911 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2912 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2913 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2914 BID_SWAP128 (res); 2915 BID_RETURN (res) 2916 2917 } else { 2918 2919 // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and 2920 // the signs of x*y and z are opposite; in these cases massive 2921 // cancellation can occur, so it is better to scale either C3 or C4 and 2922 // to perform the subtraction before rounding; rounding is performed 2923 // next, depending on the number of decimal digits in the result and on 2924 // the exponent value 2925 // Note: overlow is not possible in this case 2926 // this is similar to Cases (15), (16), and (17) 2927 2928 if (delta + q4 < q3) { // from Case (6) 2929 // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and 2930 // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign) 2931 // and call bid_add_and_round; delta stays positive 2932 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 2933 P128.w[1] = C3.w[1]; 2934 P128.w[0] = C3.w[0]; 2935 C3.w[1] = C4.w[1]; 2936 C3.w[0] = C4.w[0]; 2937 C4.w[1] = P128.w[1]; 2938 C4.w[0] = P128.w[0]; 2939 ind = q3; 2940 q3 = q4; 2941 q4 = ind; 2942 ind = e3; 2943 e3 = e4; 2944 e4 = ind; 2945 tmp_sign = z_sign; 2946 z_sign = p_sign; 2947 p_sign = tmp_sign; 2948 } else { // from Cases (2), (3), (4), (5) 2949 // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be 2950 // scaled up by q4 + delta - q3; this is the same as in Cases (15), 2951 // (16), and (17) if we just change the sign of delta 2952 delta = -delta; 2953 } 2954 bid_add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, 2955 rnd_mode, &is_midpoint_lt_even, 2956 &is_midpoint_gt_even, &is_inexact_lt_midpoint, 2957 &is_inexact_gt_midpoint, pfpsf, &res); 2958 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 2959 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 2960 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 2961 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 2962 BID_SWAP128 (res); 2963 BID_RETURN (res) 2964 2965 } 2966 2967 } else { // if delta < 0 2968 2969 delta = -delta; 2970 2971 if (p34 < q4 && q4 <= delta) { // Case (7) 2972 2973 // truncate C4 to p34 digits into res 2974 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 2975 x0 = q4 - p34; 2976 if (q4 <= 38) { 2977 P128.w[1] = C4.w[1]; 2978 P128.w[0] = C4.w[0]; 2979 bid_round128_19_38 (q4, x0, P128, &res, &incr_exp, 2980 &is_midpoint_lt_even, &is_midpoint_gt_even, 2981 &is_inexact_lt_midpoint, 2982 &is_inexact_gt_midpoint); 2983 } else if (q4 <= 57) { // 35 <= q4 <= 57 2984 P192.w[2] = C4.w[2]; 2985 P192.w[1] = C4.w[1]; 2986 P192.w[0] = C4.w[0]; 2987 bid_round192_39_57 (q4, x0, P192, &R192, &incr_exp, 2988 &is_midpoint_lt_even, &is_midpoint_gt_even, 2989 &is_inexact_lt_midpoint, 2990 &is_inexact_gt_midpoint); 2991 res.w[0] = R192.w[0]; 2992 res.w[1] = R192.w[1]; 2993 } else { // if (q4 <= 68) 2994 bid_round256_58_76 (q4, x0, C4, &R256, &incr_exp, 2995 &is_midpoint_lt_even, &is_midpoint_gt_even, 2996 &is_inexact_lt_midpoint, 2997 &is_inexact_gt_midpoint); 2998 res.w[0] = R256.w[0]; 2999 res.w[1] = R256.w[1]; 3000 } 3001 e4 = e4 + x0; 3002 if (incr_exp) { 3003 e4 = e4 + 1; 3004 } 3005 if (!is_midpoint_lt_even && !is_midpoint_gt_even && 3006 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 3007 // if C4 rounded to p34 digits is exact then the result is inexact, 3008 // in a way that depends on the signs of x * y and z 3009 if (p_sign == z_sign) { 3010 is_inexact_lt_midpoint = 1; 3011 } else { // if (p_sign != z_sign) 3012 if (res.w[1] != 0x0000314dc6448d93ull || 3013 res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33 3014 is_inexact_gt_midpoint = 1; 3015 } else { // res = 10^33 and exact is a special case 3016 // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1 3017 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1 3018 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1 3019 // Note: ulp is really ulp/10 (after borrow which propagates to msd) 3020 if (delta > p34 + 1) { // C3 < 1/2 3021 // res = 10^33, unchanged 3022 is_inexact_gt_midpoint = 1; 3023 } else { // if (delta == p34 + 1) 3024 if (q3 <= 19) { 3025 if (C3.w[0] < bid_midpoint64[q3 - 1]) { // C3 < 1/2 ulp 3026 // res = 10^33, unchanged 3027 is_inexact_gt_midpoint = 1; 3028 } else if (C3.w[0] == bid_midpoint64[q3 - 1]) { // C3 = 1/2 ulp 3029 // res = 10^33, unchanged 3030 is_midpoint_lt_even = 1; 3031 } else { // if (C3.w[0] > bid_midpoint64[q3-1]), C3 > 1/2 ulp 3032 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 3033 res.w[0] = 0x378d8e63ffffffffull; 3034 e4 = e4 - 1; 3035 is_inexact_lt_midpoint = 1; 3036 } 3037 } else { // if (20 <= q3 <=34) 3038 if (C3.w[1] < bid_midpoint128[q3 - 20].w[1] || 3039 (C3.w[1] == bid_midpoint128[q3 - 20].w[1] && 3040 C3.w[0] < bid_midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp 3041 // res = 10^33, unchanged 3042 is_inexact_gt_midpoint = 1; 3043 } else if (C3.w[1] == bid_midpoint128[q3 - 20].w[1] && 3044 C3.w[0] == bid_midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp 3045 // res = 10^33, unchanged 3046 is_midpoint_lt_even = 1; 3047 } else { // if (C3 > bid_midpoint128[q3-20]), C3 > 1/2 ulp 3048 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 3049 res.w[0] = 0x378d8e63ffffffffull; 3050 e4 = e4 - 1; 3051 is_inexact_lt_midpoint = 1; 3052 } 3053 } 3054 } 3055 } 3056 } 3057 } else if (is_midpoint_lt_even) { 3058 if (z_sign != p_sign) { 3059 // needs correction: res = res - 1 3060 res.w[0] = res.w[0] - 1; 3061 if (res.w[0] == 0xffffffffffffffffull) 3062 res.w[1]--; 3063 // if it is (10^33-1)*10^e4 then the corect result is 3064 // (10^34-1)*10(e4-1) 3065 if (res.w[1] == 0x0000314dc6448d93ull && 3066 res.w[0] == 0x38c15b09ffffffffull) { 3067 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 3068 res.w[0] = 0x378d8e63ffffffffull; 3069 e4 = e4 - 1; 3070 } 3071 is_midpoint_lt_even = 0; 3072 is_inexact_lt_midpoint = 1; 3073 } else { // if (z_sign == p_sign) 3074 is_midpoint_lt_even = 0; 3075 is_inexact_gt_midpoint = 1; 3076 } 3077 } else if (is_midpoint_gt_even) { 3078 if (z_sign == p_sign) { 3079 // needs correction: res = res + 1 (cannot cross in the next binade) 3080 res.w[0] = res.w[0] + 1; 3081 if (res.w[0] == 0x0000000000000000ull) 3082 res.w[1]++; 3083 is_midpoint_gt_even = 0; 3084 is_inexact_gt_midpoint = 1; 3085 } else { // if (z_sign != p_sign) 3086 is_midpoint_gt_even = 0; 3087 is_inexact_lt_midpoint = 1; 3088 } 3089 } else { 3090 ; // the rounded result is already correct 3091 } 3092 // check for overflow 3093 if (rnd_mode == BID_ROUNDING_TO_NEAREST && e4 > expmax) { 3094 res.w[1] = p_sign | 0x7800000000000000ull; 3095 res.w[0] = 0x0000000000000000ull; 3096 *pfpsf |= (BID_OVERFLOW_EXCEPTION | BID_INEXACT_EXCEPTION); 3097 } else { // no overflow or not RN 3098 p_exp = ((BID_UINT64) (e4 + 6176) << 49); 3099 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; 3100 } 3101 if (rnd_mode != BID_ROUNDING_TO_NEAREST) { 3102 bid_rounding_correction (rnd_mode, 3103 is_inexact_lt_midpoint, 3104 is_inexact_gt_midpoint, 3105 is_midpoint_lt_even, is_midpoint_gt_even, 3106 e4, &res, pfpsf); 3107 } 3108 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 3109 is_midpoint_lt_even || is_midpoint_gt_even) { 3110 // set the inexact flag 3111 *pfpsf |= BID_INEXACT_EXCEPTION; 3112 } 3113 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3114 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3115 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3116 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3117 BID_SWAP128 (res); 3118 BID_RETURN (res) 3119 3120 } else if ((q4 <= p34 && p34 <= delta) || // Case (8) 3121 (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9) 3122 (q4 <= delta && delta + q3 <= p34) || // Case (10) 3123 (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13) 3124 (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14) 3125 (delta + q3 < q4 && q4 <= p34)) { // Case (18) 3126 3127 // Case (8) is similar to Case (1), with C3 and C4 swapped 3128 // Case (9) is similar to Case (2), with C3 and C4 swapped 3129 // Case (10) is similar to Case (3), with C3 and C4 swapped 3130 // Case (13) is similar to Case (4), with C3 and C4 swapped 3131 // Case (14) is similar to Case (5), with C3 and C4 swapped 3132 // Case (18) is similar to Case (6), with C3 and C4 swapped 3133 3134 // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp) 3135 // and go back to delta_ge_zero 3136 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 3137 P128.w[1] = C3.w[1]; 3138 P128.w[0] = C3.w[0]; 3139 C3.w[1] = C4.w[1]; 3140 C3.w[0] = C4.w[0]; 3141 C4.w[1] = P128.w[1]; 3142 C4.w[0] = P128.w[0]; 3143 ind = q3; 3144 q3 = q4; 3145 q4 = ind; 3146 ind = e3; 3147 e3 = e4; 3148 e4 = ind; 3149 tmp_sign = z_sign; 3150 z_sign = p_sign; 3151 p_sign = tmp_sign; 3152 tmp.ui64 = z_exp; 3153 z_exp = p_exp; 3154 p_exp = tmp.ui64; 3155 goto delta_ge_zero; 3156 3157 } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11) 3158 (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12) 3159 3160 // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3, 3161 // 1 <= x0 <= q3 - 1 <= p34 - 1 3162 x0 = e4 - e3; // or x0 = delta + q3 - q4 3163 if (q3 <= 18) { // 2 <= q3 <= 18 3164 bid_round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp, 3165 &is_midpoint_lt_even, &is_midpoint_gt_even, 3166 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 3167 // C3.w[1] = 0; 3168 C3.w[0] = R64; 3169 } else if (q3 <= 38) { 3170 bid_round128_19_38 (q3, x0, C3, &R128, &incr_exp, 3171 &is_midpoint_lt_even, &is_midpoint_gt_even, 3172 &is_inexact_lt_midpoint, 3173 &is_inexact_gt_midpoint); 3174 C3.w[1] = R128.w[1]; 3175 C3.w[0] = R128.w[0]; 3176 } 3177 // the rounded result has q3 - x0 digits 3178 // we want the exponent to be e4, so if incr_exp = 1 then 3179 // multiply the rounded result by 10 - it will still fit in 113 bits 3180 if (incr_exp) { 3181 // 64 x 128 -> 128 3182 P128.w[1] = C3.w[1]; 3183 P128.w[0] = C3.w[0]; 3184 __mul_64x128_to_128 (C3, bid_ten2k64[1], P128); 3185 } 3186 e3 = e3 + x0; // this is e4 3187 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3; 3188 // the result will have the sign of x * y; the exponent is e4 3189 R256.w[3] = 0; 3190 R256.w[2] = 0; 3191 R256.w[1] = C3.w[1]; 3192 R256.w[0] = C3.w[0]; 3193 if (p_sign == z_sign) { // R256 = C4 + R256 3194 bid_add256 (C4, R256, &R256); 3195 } else { // if (p_sign != z_sign) { // R256 = C4 - R256 3196 bid_sub256 (C4, R256, &R256); // the result cannot be pure zero 3197 // because the result has opposite sign to that of R256 which was 3198 // rounded, need to change the rounding indicators 3199 lsb = C4.w[0] & 0x01; 3200 if (is_inexact_lt_midpoint) { 3201 is_inexact_lt_midpoint = 0; 3202 is_inexact_gt_midpoint = 1; 3203 } else if (is_inexact_gt_midpoint) { 3204 is_inexact_gt_midpoint = 0; 3205 is_inexact_lt_midpoint = 1; 3206 } else if (lsb == 0) { 3207 if (is_midpoint_lt_even) { 3208 is_midpoint_lt_even = 0; 3209 is_midpoint_gt_even = 1; 3210 } else if (is_midpoint_gt_even) { 3211 is_midpoint_gt_even = 0; 3212 is_midpoint_lt_even = 1; 3213 } else { 3214 ; 3215 } 3216 } else if (lsb == 1) { 3217 if (is_midpoint_lt_even) { 3218 // res = res + 1 3219 R256.w[0]++; 3220 if (R256.w[0] == 0x0) { 3221 R256.w[1]++; 3222 if (R256.w[1] == 0x0) { 3223 R256.w[2]++; 3224 if (R256.w[2] == 0x0) { 3225 R256.w[3]++; 3226 } 3227 } 3228 } 3229 // no check for rounding overflow - R256 was a difference 3230 } else if (is_midpoint_gt_even) { 3231 // res = res - 1 3232 R256.w[0]--; 3233 if (R256.w[0] == 0xffffffffffffffffull) { 3234 R256.w[1]--; 3235 if (R256.w[1] == 0xffffffffffffffffull) { 3236 R256.w[2]--; 3237 if (R256.w[2] == 0xffffffffffffffffull) { 3238 R256.w[3]--; 3239 } 3240 } 3241 } 3242 } else { 3243 ; 3244 } 3245 } else { 3246 ; 3247 } 3248 } 3249 // determine the number of decimal digits in R256 3250 ind = bid_bid_nr_digits256 (R256); // ind >= p34 3251 // if R256 is sum, then ind > p34; if R256 is a difference, then 3252 // ind >= p34; this means that we can calculate the result rounded to 3253 // the destination precision, with unbounded exponent, starting from R256 3254 // and using the indicators from the rounding of C3 to avoid a double 3255 // rounding error 3256 3257 if (ind < p34) { 3258 ; 3259 } else if (ind == p34) { 3260 // the result rounded to the destination precision with 3261 // unbounded exponent 3262 // is (-1)^p_sign * R256 * 10^e4 3263 res.w[1] = R256.w[1]; 3264 res.w[0] = R256.w[0]; 3265 } else { // if (ind > p34) 3266 // if more than P digits, round to nearest to P digits 3267 // round R256 to p34 digits 3268 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 3269 // save C3 rounding indicators to help avoid double rounding error 3270 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 3271 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 3272 is_midpoint_lt_even0 = is_midpoint_lt_even; 3273 is_midpoint_gt_even0 = is_midpoint_gt_even; 3274 // initialize rounding indicators 3275 is_inexact_lt_midpoint = 0; 3276 is_inexact_gt_midpoint = 0; 3277 is_midpoint_lt_even = 0; 3278 is_midpoint_gt_even = 0; 3279 // round to p34 digits; the result fits in 113 bits 3280 if (ind <= 38) { 3281 P128.w[1] = R256.w[1]; 3282 P128.w[0] = R256.w[0]; 3283 bid_round128_19_38 (ind, x0, P128, &R128, &incr_exp, 3284 &is_midpoint_lt_even, &is_midpoint_gt_even, 3285 &is_inexact_lt_midpoint, 3286 &is_inexact_gt_midpoint); 3287 } else if (ind <= 57) { 3288 P192.w[2] = R256.w[2]; 3289 P192.w[1] = R256.w[1]; 3290 P192.w[0] = R256.w[0]; 3291 bid_round192_39_57 (ind, x0, P192, &R192, &incr_exp, 3292 &is_midpoint_lt_even, &is_midpoint_gt_even, 3293 &is_inexact_lt_midpoint, 3294 &is_inexact_gt_midpoint); 3295 R128.w[1] = R192.w[1]; 3296 R128.w[0] = R192.w[0]; 3297 } else { // if (ind <= 68) 3298 bid_round256_58_76 (ind, x0, R256, &R256, &incr_exp, 3299 &is_midpoint_lt_even, &is_midpoint_gt_even, 3300 &is_inexact_lt_midpoint, 3301 &is_inexact_gt_midpoint); 3302 R128.w[1] = R256.w[1]; 3303 R128.w[0] = R256.w[0]; 3304 } 3305 // the rounded result has p34 = 34 digits 3306 e4 = e4 + x0 + incr_exp; 3307 3308 res.w[1] = R128.w[1]; 3309 res.w[0] = R128.w[0]; 3310 3311 // avoid a double rounding error 3312 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 3313 is_midpoint_lt_even) { // double rounding error upward 3314 // res = res - 1 3315 res.w[0]--; 3316 if (res.w[0] == 0xffffffffffffffffull) 3317 res.w[1]--; 3318 is_midpoint_lt_even = 0; 3319 is_inexact_lt_midpoint = 1; 3320 // Note: a double rounding error upward is not possible; for this 3321 // the result after the first rounding would have to be 99...95 3322 // (35 digits in all), possibly followed by a number of zeros; this 3323 // not possible in Cases (2)-(6) or (15)-(17) which may get here 3324 // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent 3325 if (res.w[1] == 0x0000314dc6448d93ull && 3326 res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1 3327 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 3328 res.w[0] = 0x378d8e63ffffffffull; 3329 e4--; 3330 } 3331 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 3332 is_midpoint_gt_even) { // double rounding error downward 3333 // res = res + 1 3334 res.w[0]++; 3335 if (res.w[0] == 0) 3336 res.w[1]++; 3337 is_midpoint_gt_even = 0; 3338 is_inexact_gt_midpoint = 1; 3339 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 3340 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 3341 // if this second rounding was exact the result may still be 3342 // inexact because of the first rounding 3343 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 3344 is_inexact_gt_midpoint = 1; 3345 } 3346 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 3347 is_inexact_lt_midpoint = 1; 3348 } 3349 } else if (is_midpoint_gt_even && 3350 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 3351 // pulled up to a midpoint 3352 is_inexact_lt_midpoint = 1; 3353 is_inexact_gt_midpoint = 0; 3354 is_midpoint_lt_even = 0; 3355 is_midpoint_gt_even = 0; 3356 } else if (is_midpoint_lt_even && 3357 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 3358 // pulled down to a midpoint 3359 is_inexact_lt_midpoint = 0; 3360 is_inexact_gt_midpoint = 1; 3361 is_midpoint_lt_even = 0; 3362 is_midpoint_gt_even = 0; 3363 } else { 3364 ; 3365 } 3366 } 3367 3368 // determine tininess 3369 if (rnd_mode == BID_ROUNDING_TO_NEAREST) { 3370 if (e4 < expmin) { 3371 is_tiny = 1; // for other rounding modes apply correction 3372 } 3373 } else { 3374 // for RM, RP, RZ, RA apply correction in order to determine tininess 3375 // but do not save the result; apply the correction to 3376 // (-1)^p_sign * res * 10^0 3377 P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1]; 3378 P128.w[0] = res.w[0]; 3379 bid_rounding_correction (rnd_mode, 3380 is_inexact_lt_midpoint, 3381 is_inexact_gt_midpoint, 3382 is_midpoint_lt_even, is_midpoint_gt_even, 3383 0, &P128, pfpsf); 3384 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 3385 // the number of digits in the significand is p34 = 34 3386 if (e4 + scale < expmin) { 3387 is_tiny = 1; 3388 } 3389 } 3390 3391 // the result rounded to the destination precision with unbounded exponent 3392 // is (-1)^p_sign * res * 10^e4 3393 res.w[1] = p_sign | ((BID_UINT64) (e4 + 6176) << 49) | res.w[1]; // RN 3394 // res.w[0] unchanged; 3395 // Note: res is correct only if expmin <= e4 <= expmax 3396 ind = p34; // the number of decimal digits in the signifcand of res 3397 3398 // at this point we have the result rounded with unbounded exponent in 3399 // res and we know its tininess: 3400 // res = (-1)^p_sign * significand * 10^e4, 3401 // where q (significand) = ind = p34 3402 // Note: res is correct only if expmin <= e4 <= expmax 3403 3404 // check for overflow if RN 3405 if (rnd_mode == BID_ROUNDING_TO_NEAREST 3406 && (ind + e4) > (p34 + expmax)) { 3407 res.w[1] = p_sign | 0x7800000000000000ull; 3408 res.w[0] = 0x0000000000000000ull; 3409 *pfpsf |= (BID_INEXACT_EXCEPTION | BID_OVERFLOW_EXCEPTION); 3410 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3411 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3412 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3413 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3414 BID_SWAP128 (res); 3415 BID_RETURN (res) 3416 } // else not overflow or not RN, so continue 3417 3418 // from this point on this is similar to the last part of the computation 3419 // for Cases (15), (16), (17) 3420 3421 // if (e4 >= expmin) we have the result rounded with bounded exponent 3422 if (e4 < expmin) { 3423 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res 3424 // where the result rounded [at most] once is 3425 // (-1)^p_sign * significand_res * 10^e4 3426 3427 // avoid double rounding error 3428 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 3429 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 3430 is_midpoint_lt_even0 = is_midpoint_lt_even; 3431 is_midpoint_gt_even0 = is_midpoint_gt_even; 3432 is_inexact_lt_midpoint = 0; 3433 is_inexact_gt_midpoint = 0; 3434 is_midpoint_lt_even = 0; 3435 is_midpoint_gt_even = 0; 3436 3437 if (x0 > ind) { 3438 // nothing is left of res when moving the decimal point left x0 digits 3439 is_inexact_lt_midpoint = 1; 3440 res.w[1] = p_sign | 0x0000000000000000ull; 3441 res.w[0] = 0x0000000000000000ull; 3442 e4 = expmin; 3443 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 3444 // this is <, =, or > 1/2 ulp 3445 // compare the ind-digit value in the significand of res with 3446 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is 3447 // less than, equal to, or greater than 1/2 ulp (significand of res) 3448 R128.w[1] = res.w[1] & MASK_COEFF; 3449 R128.w[0] = res.w[0]; 3450 if (ind <= 19) { 3451 if (R128.w[0] < bid_midpoint64[ind - 1]) { // < 1/2 ulp 3452 lt_half_ulp = 1; 3453 is_inexact_lt_midpoint = 1; 3454 } else if (R128.w[0] == bid_midpoint64[ind - 1]) { // = 1/2 ulp 3455 eq_half_ulp = 1; 3456 is_midpoint_gt_even = 1; 3457 } else { // > 1/2 ulp 3458 gt_half_ulp = 1; 3459 is_inexact_gt_midpoint = 1; 3460 } 3461 } else { // if (ind <= 38) 3462 if (R128.w[1] < bid_midpoint128[ind - 20].w[1] || 3463 (R128.w[1] == bid_midpoint128[ind - 20].w[1] && 3464 R128.w[0] < bid_midpoint128[ind - 20].w[0])) { // < 1/2 ulp 3465 lt_half_ulp = 1; 3466 is_inexact_lt_midpoint = 1; 3467 } else if (R128.w[1] == bid_midpoint128[ind - 20].w[1] && 3468 R128.w[0] == bid_midpoint128[ind - 20].w[0]) { // = 1/2 ulp 3469 eq_half_ulp = 1; 3470 is_midpoint_gt_even = 1; 3471 } else { // > 1/2 ulp 3472 gt_half_ulp = 1; 3473 is_inexact_gt_midpoint = 1; 3474 } 3475 } 3476 if (lt_half_ulp || eq_half_ulp) { 3477 // res = +0.0 * 10^expmin 3478 res.w[1] = 0x0000000000000000ull; 3479 res.w[0] = 0x0000000000000000ull; 3480 } else { // if (gt_half_ulp) 3481 // res = +1 * 10^expmin 3482 res.w[1] = 0x0000000000000000ull; 3483 res.w[0] = 0x0000000000000001ull; 3484 } 3485 res.w[1] = p_sign | res.w[1]; 3486 e4 = expmin; 3487 } else { // if (1 <= x0 <= ind - 1 <= 33) 3488 // round the ind-digit result to ind - x0 digits 3489 3490 if (ind <= 18) { // 2 <= ind <= 18 3491 bid_round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, 3492 &is_midpoint_lt_even, &is_midpoint_gt_even, 3493 &is_inexact_lt_midpoint, 3494 &is_inexact_gt_midpoint); 3495 res.w[1] = 0x0; 3496 res.w[0] = R64; 3497 } else if (ind <= 38) { 3498 P128.w[1] = res.w[1] & MASK_COEFF; 3499 P128.w[0] = res.w[0]; 3500 bid_round128_19_38 (ind, x0, P128, &res, &incr_exp, 3501 &is_midpoint_lt_even, &is_midpoint_gt_even, 3502 &is_inexact_lt_midpoint, 3503 &is_inexact_gt_midpoint); 3504 } 3505 e4 = e4 + x0; // expmin 3506 // we want the exponent to be expmin, so if incr_exp = 1 then 3507 // multiply the rounded result by 10 - it will still fit in 113 bits 3508 if (incr_exp) { 3509 // 64 x 128 -> 128 3510 P128.w[1] = res.w[1] & MASK_COEFF; 3511 P128.w[0] = res.w[0]; 3512 __mul_64x128_to_128 (res, bid_ten2k64[1], P128); 3513 } 3514 res.w[1] = 3515 p_sign | ((BID_UINT64) (e4 + 6176) << 49) | (res. 3516 w[1] & MASK_COEFF); 3517 // avoid a double rounding error 3518 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 3519 is_midpoint_lt_even) { // double rounding error upward 3520 // res = res - 1 3521 res.w[0]--; 3522 if (res.w[0] == 0xffffffffffffffffull) 3523 res.w[1]--; 3524 // Note: a double rounding error upward is not possible; for this 3525 // the result after the first rounding would have to be 99...95 3526 // (35 digits in all), possibly followed by a number of zeros; this 3527 // not possible in this underflow case 3528 is_midpoint_lt_even = 0; 3529 is_inexact_lt_midpoint = 1; 3530 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 3531 is_midpoint_gt_even) { // double rounding error downward 3532 // res = res + 1 3533 res.w[0]++; 3534 if (res.w[0] == 0) 3535 res.w[1]++; 3536 is_midpoint_gt_even = 0; 3537 is_inexact_gt_midpoint = 1; 3538 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 3539 !is_inexact_lt_midpoint 3540 && !is_inexact_gt_midpoint) { 3541 // if this second rounding was exact the result may still be 3542 // inexact because of the first rounding 3543 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 3544 is_inexact_gt_midpoint = 1; 3545 } 3546 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 3547 is_inexact_lt_midpoint = 1; 3548 } 3549 } else if (is_midpoint_gt_even && 3550 (is_inexact_gt_midpoint0 3551 || is_midpoint_lt_even0)) { 3552 // pulled up to a midpoint 3553 is_inexact_lt_midpoint = 1; 3554 is_inexact_gt_midpoint = 0; 3555 is_midpoint_lt_even = 0; 3556 is_midpoint_gt_even = 0; 3557 } else if (is_midpoint_lt_even && 3558 (is_inexact_lt_midpoint0 3559 || is_midpoint_gt_even0)) { 3560 // pulled down to a midpoint 3561 is_inexact_lt_midpoint = 0; 3562 is_inexact_gt_midpoint = 1; 3563 is_midpoint_lt_even = 0; 3564 is_midpoint_gt_even = 0; 3565 } else { 3566 ; 3567 } 3568 } 3569 } 3570 // res contains the correct result 3571 // apply correction if not rounding to nearest 3572 if (rnd_mode != BID_ROUNDING_TO_NEAREST) { 3573 bid_rounding_correction (rnd_mode, 3574 is_inexact_lt_midpoint, 3575 is_inexact_gt_midpoint, 3576 is_midpoint_lt_even, is_midpoint_gt_even, 3577 e4, &res, pfpsf); 3578 } 3579#if !DECIMAL_TINY_DETECTION_AFTER_ROUNDING 3580 // correction needed for tininess detection before rounding 3581 if ((((res.w[1] & 0x7fffffffffffffffull) == 0x0000314dc6448d93ull) && 3582 // 10^33*10^-6176_high 3583 (res.w[0] == 0x38c15b0a00000000ull)) && // 10^33*10^-6176_low 3584 (((rnd_mode == BID_ROUNDING_TO_NEAREST || 3585 rnd_mode == BID_ROUNDING_TIES_AWAY) && 3586 (is_midpoint_lt_even || is_inexact_gt_midpoint)) || 3587 ((((rnd_mode == BID_ROUNDING_UP) && !(res.w[1] & MASK_SIGN)) || 3588 ((rnd_mode == BID_ROUNDING_DOWN) && (res.w[1] & MASK_SIGN))) 3589 && (is_midpoint_lt_even || is_midpoint_gt_even || 3590 is_inexact_lt_midpoint || is_inexact_gt_midpoint)))) { 3591 is_tiny = 1; 3592 } 3593#endif 3594 if (is_midpoint_lt_even || is_midpoint_gt_even || 3595 is_inexact_lt_midpoint || is_inexact_gt_midpoint) { 3596 // set the inexact flag 3597 *pfpsf |= BID_INEXACT_EXCEPTION; 3598 if (is_tiny) 3599 *pfpsf |= BID_UNDERFLOW_EXCEPTION; 3600 } 3601 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3602 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3603 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3604 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3605 BID_SWAP128 (res); 3606 BID_RETURN (res) 3607 3608 } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15) 3609 (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16) 3610 (delta + q3 <= p34 && p34 < q4)) { // Case (17) 3611 3612 // calculate first the result rounded to the destination precision, with 3613 // unbounded exponent 3614 3615 bid_add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, 3616 rnd_mode, &is_midpoint_lt_even, 3617 &is_midpoint_gt_even, &is_inexact_lt_midpoint, 3618 &is_inexact_gt_midpoint, pfpsf, &res); 3619 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3620 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3621 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3622 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3623 BID_SWAP128 (res); 3624 BID_RETURN (res) 3625 3626 } else { 3627 ; 3628 } 3629 3630 } // end if delta < 0 3631 3632 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; 3633 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; 3634 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; 3635 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; 3636 BID_SWAP128 (res); 3637 BID_RETURN (res) 3638 3639} 3640 3641 3642#if DECIMAL_CALL_BY_REFERENCE 3643void 3644bid128_fma (BID_UINT128 * pres, BID_UINT128 * px, BID_UINT128 * py, BID_UINT128 * pz 3645 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3646 _EXC_INFO_PARAM) { 3647 BID_UINT128 x = *px, y = *py, z = *pz; 3648#if !DECIMAL_GLOBAL_ROUNDING 3649 unsigned int rnd_mode = *prnd_mode; 3650#endif 3651#else 3652DFP_WRAPFN_DFP_DFP_DFP(128, bid128_fma, 128, 128, 128) 3653BID_UINT128 3654bid128_fma (BID_UINT128 x, BID_UINT128 y, BID_UINT128 z 3655 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3656 _EXC_INFO_PARAM) { 3657#endif 3658 int is_midpoint_lt_even, is_midpoint_gt_even, 3659 is_inexact_lt_midpoint, is_inexact_gt_midpoint; 3660 BID_UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3661 3662#if DECIMAL_CALL_BY_REFERENCE 3663 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3664 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3665 &res, &x, &y, &z 3666 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3667 _EXC_INFO_ARG); 3668#else 3669 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3670 &is_inexact_lt_midpoint, 3671 &is_inexact_gt_midpoint, x, y, 3672 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3673 _EXC_INFO_ARG); 3674#endif 3675 BID_RETURN (res); 3676} 3677 3678 3679#if DECIMAL_CALL_BY_REFERENCE 3680void 3681bid128ddd_fma (BID_UINT128 * pres, BID_UINT64 * px, BID_UINT64 * py, BID_UINT64 * pz 3682 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3683 _EXC_INFO_PARAM) { 3684 BID_UINT64 x = *px, y = *py, z = *pz; 3685#if !DECIMAL_GLOBAL_ROUNDING 3686 unsigned int rnd_mode = *prnd_mode; 3687#endif 3688#else 3689BID_UINT128 3690bid128ddd_fma (BID_UINT64 x, BID_UINT64 y, BID_UINT64 z 3691 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3692 _EXC_INFO_PARAM) { 3693#endif 3694 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3695 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3696 BID_UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3697 BID_UINT128 x1, y1, z1; 3698 3699#if DECIMAL_CALL_BY_REFERENCE 3700 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3701 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3702 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3703 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3704 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3705 &res, &x1, &y1, &z1 3706 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3707 _EXC_INFO_ARG); 3708#else 3709 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3710 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3711 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3712 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3713 &is_inexact_lt_midpoint, 3714 &is_inexact_gt_midpoint, x1, y1, 3715 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3716 _EXC_INFO_ARG); 3717#endif 3718 BID_RETURN (res); 3719} 3720 3721 3722#if DECIMAL_CALL_BY_REFERENCE 3723void 3724bid128ddq_fma (BID_UINT128 * pres, BID_UINT64 * px, BID_UINT64 * py, BID_UINT128 * pz 3725 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3726 _EXC_INFO_PARAM) { 3727 BID_UINT64 x = *px, y = *py; 3728 BID_UINT128 z = *pz; 3729#if !DECIMAL_GLOBAL_ROUNDING 3730 unsigned int rnd_mode = *prnd_mode; 3731#endif 3732#else 3733BID_UINT128 3734bid128ddq_fma (BID_UINT64 x, BID_UINT64 y, BID_UINT128 z 3735 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3736 _EXC_INFO_PARAM) { 3737#endif 3738 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3739 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3740 BID_UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3741 BID_UINT128 x1, y1; 3742 3743#if DECIMAL_CALL_BY_REFERENCE 3744 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3745 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3746 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3747 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3748 &res, &x1, &y1, &z 3749 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3750 _EXC_INFO_ARG); 3751#else 3752 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3753 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3754 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3755 &is_inexact_lt_midpoint, 3756 &is_inexact_gt_midpoint, x1, y1, 3757 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3758 _EXC_INFO_ARG); 3759#endif 3760 BID_RETURN (res); 3761} 3762 3763 3764#if DECIMAL_CALL_BY_REFERENCE 3765void 3766bid128dqd_fma (BID_UINT128 * pres, BID_UINT64 * px, BID_UINT128 * py, BID_UINT64 * pz 3767 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3768 _EXC_INFO_PARAM) { 3769 BID_UINT64 x = *px, z = *pz; 3770#if !DECIMAL_GLOBAL_ROUNDING 3771 unsigned int rnd_mode = *prnd_mode; 3772#endif 3773#else 3774BID_UINT128 3775bid128dqd_fma (BID_UINT64 x, BID_UINT128 y, BID_UINT64 z 3776 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3777 _EXC_INFO_PARAM) { 3778#endif 3779 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3780 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3781 BID_UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3782 BID_UINT128 x1, z1; 3783 3784#if DECIMAL_CALL_BY_REFERENCE 3785 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3786 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3787 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3788 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3789 &res, &x1, py, &z1 3790 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3791 _EXC_INFO_ARG); 3792#else 3793 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3794 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3795 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3796 &is_inexact_lt_midpoint, 3797 &is_inexact_gt_midpoint, x1, y, 3798 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3799 _EXC_INFO_ARG); 3800#endif 3801 BID_RETURN (res); 3802} 3803 3804 3805#if DECIMAL_CALL_BY_REFERENCE 3806void 3807bid128dqq_fma (BID_UINT128 * pres, BID_UINT64 * px, BID_UINT128 * py, BID_UINT128 * pz 3808 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3809 _EXC_INFO_PARAM) { 3810 BID_UINT64 x = *px; 3811#if !DECIMAL_GLOBAL_ROUNDING 3812 unsigned int rnd_mode = *prnd_mode; 3813#endif 3814#else 3815BID_UINT128 3816bid128dqq_fma (BID_UINT64 x, BID_UINT128 y, BID_UINT128 z 3817 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3818 _EXC_INFO_PARAM) { 3819#endif 3820 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3821 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3822 BID_UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3823 BID_UINT128 x1; 3824 3825#if DECIMAL_CALL_BY_REFERENCE 3826 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3827 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3828 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3829 &res, &x1, py, pz 3830 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3831 _EXC_INFO_ARG); 3832#else 3833 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3834 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3835 &is_inexact_lt_midpoint, 3836 &is_inexact_gt_midpoint, x1, y, 3837 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3838 _EXC_INFO_ARG); 3839#endif 3840 BID_RETURN (res); 3841} 3842 3843 3844#if DECIMAL_CALL_BY_REFERENCE 3845void 3846bid128qdd_fma (BID_UINT128 * pres, BID_UINT128 * px, BID_UINT64 * py, BID_UINT64 * pz 3847 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3848 _EXC_INFO_PARAM) { 3849 BID_UINT64 y = *py, z = *pz; 3850#if !DECIMAL_GLOBAL_ROUNDING 3851 unsigned int rnd_mode = *prnd_mode; 3852#endif 3853#else 3854BID_UINT128 3855bid128qdd_fma (BID_UINT128 x, BID_UINT64 y, BID_UINT64 z 3856 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3857 _EXC_INFO_PARAM) { 3858#endif 3859 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3860 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3861 BID_UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3862 BID_UINT128 y1, z1; 3863 3864#if DECIMAL_CALL_BY_REFERENCE 3865 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3866 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3867 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3868 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3869 &res, px, &y1, &z1 3870 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3871 _EXC_INFO_ARG); 3872#else 3873 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3874 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3875 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3876 &is_inexact_lt_midpoint, 3877 &is_inexact_gt_midpoint, x, y1, 3878 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3879 _EXC_INFO_ARG); 3880#endif 3881 BID_RETURN (res); 3882} 3883 3884 3885#if DECIMAL_CALL_BY_REFERENCE 3886void 3887bid128qdq_fma (BID_UINT128 * pres, BID_UINT128 * px, BID_UINT64 * py, BID_UINT128 * pz 3888 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3889 _EXC_INFO_PARAM) { 3890 BID_UINT64 y = *py; 3891#if !DECIMAL_GLOBAL_ROUNDING 3892 unsigned int rnd_mode = *prnd_mode; 3893#endif 3894#else 3895BID_UINT128 3896bid128qdq_fma (BID_UINT128 x, BID_UINT64 y, BID_UINT128 z 3897 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3898 _EXC_INFO_PARAM) { 3899#endif 3900 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3901 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3902 BID_UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3903 BID_UINT128 y1; 3904 3905#if DECIMAL_CALL_BY_REFERENCE 3906 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3907 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3908 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3909 &res, px, &y1, pz 3910 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3911 _EXC_INFO_ARG); 3912#else 3913 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3914 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3915 &is_inexact_lt_midpoint, 3916 &is_inexact_gt_midpoint, x, y1, 3917 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3918 _EXC_INFO_ARG); 3919#endif 3920 BID_RETURN (res); 3921} 3922 3923 3924#if DECIMAL_CALL_BY_REFERENCE 3925void 3926bid128qqd_fma (BID_UINT128 * pres, BID_UINT128 * px, BID_UINT128 * py, BID_UINT64 * pz 3927 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3928 _EXC_INFO_PARAM) { 3929 BID_UINT64 z = *pz; 3930#if !DECIMAL_GLOBAL_ROUNDING 3931 unsigned int rnd_mode = *prnd_mode; 3932#endif 3933#else 3934BID_UINT128 3935bid128qqd_fma (BID_UINT128 x, BID_UINT128 y, BID_UINT64 z 3936 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3937 _EXC_INFO_PARAM) { 3938#endif 3939 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 3940 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 3941 BID_UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 3942 BID_UINT128 z1; 3943 3944#if DECIMAL_CALL_BY_REFERENCE 3945 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3946 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3947 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, 3948 &res, px, py, &z1 3949 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3950 _EXC_INFO_ARG); 3951#else 3952 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3953 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, 3954 &is_inexact_lt_midpoint, 3955 &is_inexact_gt_midpoint, x, y, 3956 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3957 _EXC_INFO_ARG); 3958#endif 3959 BID_RETURN (res); 3960} 3961 3962// Note: bid128qqq_fma is represented by bid128_fma 3963 3964// Note: bid64ddd_fma is represented by bid64_fma 3965 3966#if DECIMAL_CALL_BY_REFERENCE 3967void 3968bid64ddq_fma (BID_UINT64 * pres, BID_UINT64 * px, BID_UINT64 * py, BID_UINT128 * pz 3969 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3970 _EXC_INFO_PARAM) { 3971 BID_UINT64 x = *px, y = *py; 3972#if !DECIMAL_GLOBAL_ROUNDING 3973 unsigned int rnd_mode = *prnd_mode; 3974#endif 3975#else 3976BID_UINT64 3977bid64ddq_fma (BID_UINT64 x, BID_UINT64 y, BID_UINT128 z 3978 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 3979 _EXC_INFO_PARAM) { 3980#endif 3981 BID_UINT64 res1 = 0xbaddbaddbaddbaddull; 3982 BID_UINT128 x1, y1; 3983 3984#if DECIMAL_CALL_BY_REFERENCE 3985 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3986 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3987 bid64qqq_fma (&res1, &x1, &y1, pz 3988 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3989 _EXC_INFO_ARG); 3990#else 3991 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3992 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 3993 res1 = bid64qqq_fma (x1, y1, z 3994 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 3995 _EXC_INFO_ARG); 3996#endif 3997 BID_RETURN (res1); 3998} 3999 4000 4001#if DECIMAL_CALL_BY_REFERENCE 4002void 4003bid64dqd_fma (BID_UINT64 * pres, BID_UINT64 * px, BID_UINT128 * py, BID_UINT64 * pz 4004 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4005 _EXC_INFO_PARAM) { 4006 BID_UINT64 x = *px, z = *pz; 4007#if !DECIMAL_GLOBAL_ROUNDING 4008 unsigned int rnd_mode = *prnd_mode; 4009#endif 4010#else 4011BID_UINT64 4012bid64dqd_fma (BID_UINT64 x, BID_UINT128 y, BID_UINT64 z 4013 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4014 _EXC_INFO_PARAM) { 4015#endif 4016 BID_UINT64 res1 = 0xbaddbaddbaddbaddull; 4017 BID_UINT128 x1, z1; 4018 4019#if DECIMAL_CALL_BY_REFERENCE 4020 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4021 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4022 bid64qqq_fma (&res1, &x1, py, &z1 4023 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4024 _EXC_INFO_ARG); 4025#else 4026 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4027 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4028 res1 = bid64qqq_fma (x1, y, z1 4029 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4030 _EXC_INFO_ARG); 4031#endif 4032 BID_RETURN (res1); 4033} 4034 4035 4036#if DECIMAL_CALL_BY_REFERENCE 4037void 4038bid64dqq_fma (BID_UINT64 * pres, BID_UINT64 * px, BID_UINT128 * py, BID_UINT128 * pz 4039 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4040 _EXC_INFO_PARAM) { 4041 BID_UINT64 x = *px; 4042#if !DECIMAL_GLOBAL_ROUNDING 4043 unsigned int rnd_mode = *prnd_mode; 4044#endif 4045#else 4046BID_UINT64 4047bid64dqq_fma (BID_UINT64 x, BID_UINT128 y, BID_UINT128 z 4048 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4049 _EXC_INFO_PARAM) { 4050#endif 4051 BID_UINT64 res1 = 0xbaddbaddbaddbaddull; 4052 BID_UINT128 x1; 4053 4054#if DECIMAL_CALL_BY_REFERENCE 4055 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4056 bid64qqq_fma (&res1, &x1, py, pz 4057 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4058 _EXC_INFO_ARG); 4059#else 4060 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4061 res1 = bid64qqq_fma (x1, y, z 4062 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4063 _EXC_INFO_ARG); 4064#endif 4065 BID_RETURN (res1); 4066} 4067 4068 4069#if DECIMAL_CALL_BY_REFERENCE 4070void 4071bid64qdd_fma (BID_UINT64 * pres, BID_UINT128 * px, BID_UINT64 * py, BID_UINT64 * pz 4072 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4073 _EXC_INFO_PARAM) { 4074 BID_UINT64 y = *py, z = *pz; 4075#if !DECIMAL_GLOBAL_ROUNDING 4076 unsigned int rnd_mode = *prnd_mode; 4077#endif 4078#else 4079BID_UINT64 4080bid64qdd_fma (BID_UINT128 x, BID_UINT64 y, BID_UINT64 z 4081 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4082 _EXC_INFO_PARAM) { 4083#endif 4084 BID_UINT64 res1 = 0xbaddbaddbaddbaddull; 4085 BID_UINT128 y1, z1; 4086 4087#if DECIMAL_CALL_BY_REFERENCE 4088 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4089 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4090 bid64qqq_fma (&res1, px, &y1, &z1 4091 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4092 _EXC_INFO_ARG); 4093#else 4094 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4095 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4096 res1 = bid64qqq_fma (x, y1, z1 4097 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4098 _EXC_INFO_ARG); 4099#endif 4100 BID_RETURN (res1); 4101} 4102 4103 4104#if DECIMAL_CALL_BY_REFERENCE 4105void 4106bid64qdq_fma (BID_UINT64 * pres, BID_UINT128 * px, BID_UINT64 * py, BID_UINT128 * pz 4107 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4108 _EXC_INFO_PARAM) { 4109 BID_UINT64 y = *py; 4110#if !DECIMAL_GLOBAL_ROUNDING 4111 unsigned int rnd_mode = *prnd_mode; 4112#endif 4113#else 4114BID_UINT64 4115bid64qdq_fma (BID_UINT128 x, BID_UINT64 y, BID_UINT128 z 4116 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4117 _EXC_INFO_PARAM) { 4118#endif 4119 BID_UINT64 res1 = 0xbaddbaddbaddbaddull; 4120 BID_UINT128 y1; 4121 4122#if DECIMAL_CALL_BY_REFERENCE 4123 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4124 bid64qqq_fma (&res1, px, &y1, pz 4125 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4126 _EXC_INFO_ARG); 4127#else 4128 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4129 res1 = bid64qqq_fma (x, y1, z 4130 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4131 _EXC_INFO_ARG); 4132#endif 4133 BID_RETURN (res1); 4134} 4135 4136 4137#if DECIMAL_CALL_BY_REFERENCE 4138void 4139bid64qqd_fma (BID_UINT64 * pres, BID_UINT128 * px, BID_UINT128 * py, BID_UINT64 * pz 4140 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4141 _EXC_INFO_PARAM) { 4142 BID_UINT64 z = *pz; 4143#if !DECIMAL_GLOBAL_ROUNDING 4144 unsigned int rnd_mode = *prnd_mode; 4145#endif 4146#else 4147BID_UINT64 4148bid64qqd_fma (BID_UINT128 x, BID_UINT128 y, BID_UINT64 z 4149 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4150 _EXC_INFO_PARAM) { 4151#endif 4152 BID_UINT64 res1 = 0xbaddbaddbaddbaddull; 4153 BID_UINT128 z1; 4154 4155#if DECIMAL_CALL_BY_REFERENCE 4156 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4157 bid64qqq_fma (&res1, px, py, &z1 4158 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4159 _EXC_INFO_ARG); 4160#else 4161 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); 4162 res1 = bid64qqq_fma (x, y, z1 4163 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4164 _EXC_INFO_ARG); 4165#endif 4166 BID_RETURN (res1); 4167} 4168 4169 4170#if DECIMAL_CALL_BY_REFERENCE 4171void 4172bid64qqq_fma (BID_UINT64 * pres, BID_UINT128 * px, BID_UINT128 * py, BID_UINT128 * pz 4173 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4174 _EXC_INFO_PARAM) { 4175 BID_UINT128 x = *px, y = *py, z = *pz; 4176#if !DECIMAL_GLOBAL_ROUNDING 4177 unsigned int rnd_mode = *prnd_mode; 4178#endif 4179#else 4180BID_UINT64 4181bid64qqq_fma (BID_UINT128 x, BID_UINT128 y, BID_UINT128 z 4182 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 4183 _EXC_INFO_PARAM) { 4184#endif 4185 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0, 4186 is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; 4187 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, 4188 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; 4189 int incr_exp; 4190 BID_UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 4191 BID_UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; 4192 BID_UINT64 res1 = 0xbaddbaddbaddbaddull; 4193 unsigned int save_fpsf; // needed because of the call to bid128_ext_fma 4194 BID_UINT64 sign; 4195 BID_UINT64 exp; 4196 int unbexp; 4197 BID_UINT128 C; 4198 BID_UI64DOUBLE tmp; 4199 int nr_bits; 4200 int q, x0; 4201 int scale; 4202 int lt_half_ulp = 0, eq_half_ulp = 0; 4203 4204 // Note: for rounding modes other than RN or RA, the result can be obtained 4205 // by rounding first to BID128 and then to BID64 4206 4207 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved 4208 *pfpsf = 0; 4209 4210#if DECIMAL_CALL_BY_REFERENCE 4211 bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, 4212 &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0, 4213 &res, &x, &y, &z 4214 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4215 _EXC_INFO_ARG); 4216#else 4217 res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, 4218 &is_inexact_lt_midpoint0, 4219 &is_inexact_gt_midpoint0, x, y, 4220 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 4221 _EXC_INFO_ARG); 4222#endif 4223 4224 if ((rnd_mode == BID_ROUNDING_DOWN) || (rnd_mode == BID_ROUNDING_UP) || 4225 (rnd_mode == BID_ROUNDING_TO_ZERO) || // no double rounding error is possible 4226 ((res.w[BID_HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN) 4227 ((res.w[BID_HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity 4228#if DECIMAL_CALL_BY_REFERENCE 4229 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); 4230#else 4231 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); 4232#endif 4233 // determine the unbiased exponent of the result 4234 unbexp = ((res1 >> 53) & 0x3ff) - 398; 4235 4236 if (!((res1 & MASK_NAN) == MASK_NAN)) { // res1 not NaN 4237 // if subnormal, res1 must have exp = -398 4238 // if tiny and inexact set underflow and inexact status flags 4239 if ((unbexp == -398) 4240 && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull) 4241 && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 4242 || is_midpoint_lt_even0 || is_midpoint_gt_even0)) { 4243 // set the inexact flag and the underflow flag 4244 *pfpsf |= (BID_INEXACT_EXCEPTION | BID_UNDERFLOW_EXCEPTION); 4245 } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || 4246 is_midpoint_lt_even0 || is_midpoint_gt_even0) { 4247 // set the inexact flag and the underflow flag 4248 *pfpsf |= BID_INEXACT_EXCEPTION; 4249 } 4250#if !DECIMAL_TINY_DETECTION_AFTER_ROUNDING 4251 // correction needed for tininess detection before rounding 4252 if (((res1 & 0x7fffffffffffffffull) == 1000000000000000ull) && 4253 // 10^15*10^-398 4254 (((rnd_mode == BID_ROUNDING_TO_NEAREST || 4255 rnd_mode == BID_ROUNDING_TIES_AWAY) && 4256 (is_midpoint_lt_even || is_inexact_gt_midpoint)) || 4257 ((((rnd_mode == BID_ROUNDING_UP) && !(res1 & MASK_SIGN)) || 4258 ((rnd_mode == BID_ROUNDING_DOWN) && (res1 & MASK_SIGN))) 4259 && (is_midpoint_lt_even || is_midpoint_gt_even || 4260 is_inexact_lt_midpoint || is_inexact_gt_midpoint)))) { 4261 *pfpsf |= BID_UNDERFLOW_EXCEPTION; 4262 } 4263#endif 4264 } // else the result is NaN 4265 4266 *pfpsf |= save_fpsf; 4267 BID_RETURN (res1); 4268 } // else continue, and use rounding to nearest to round to 16 digits 4269 4270 // at this point the result is rounded to nearest (even or away) to 34 digits 4271 // (or less if exact), and it is zero or finite non-zero canonical [sub]normal 4272 sign = res.w[BID_HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative 4273 exp = res.w[BID_HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits 4274 unbexp = (exp >> 49) - 6176; 4275 C.w[1] = res.w[BID_HIGH_128W] & MASK_COEFF; 4276 C.w[0] = res.w[BID_LOW_128W]; 4277 4278 if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero 4279 (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) { 4280 // clear under/overflow 4281#if DECIMAL_CALL_BY_REFERENCE 4282 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); 4283#else 4284 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); 4285#endif 4286 *pfpsf |= save_fpsf; 4287 BID_RETURN (res1); 4288 } // else continue 4289 4290 // -398 - 34 <= unbexp <= 369 + 15 4291 if (rnd_mode == BID_ROUNDING_TIES_AWAY) { 4292 // apply correction, if needed, to make the result rounded to nearest-even 4293 if (is_midpoint_gt_even) { 4294 // res = res - 1 4295 res1--; // res1 is now even 4296 } // else the result is already correctly rounded to nearest-even 4297 } 4298 // at this point the result is finite, non-zero canonical normal or subnormal, 4299 // and in most cases overflow or underflow will not occur 4300 4301 // determine the number of digits q in the result 4302 // q = nr. of decimal digits in x 4303 // determine first the nr. of bits in x 4304 if (C.w[1] == 0) { 4305 if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53 4306 // split the 64-bit value in two 32-bit halves to avoid rounding errors 4307 tmp.d = (double) (C.w[0] >> 32); // exact conversion 4308 nr_bits = 4309 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4310 } else { // if x < 2^53 4311 tmp.d = (double) C.w[0]; // exact conversion 4312 nr_bits = 4313 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4314 } 4315 } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1]) 4316 tmp.d = (double) C.w[1]; // exact conversion 4317 nr_bits = 4318 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); 4319 } 4320 q = bid_nr_digits[nr_bits - 1].digits; 4321 if (q == 0) { 4322 q = bid_nr_digits[nr_bits - 1].digits1; 4323 if (C.w[1] > bid_nr_digits[nr_bits - 1].threshold_hi || 4324 (C.w[1] == bid_nr_digits[nr_bits - 1].threshold_hi && 4325 C.w[0] >= bid_nr_digits[nr_bits - 1].threshold_lo)) 4326 q++; 4327 } 4328 // if q > 16, round to nearest even to 16 digits (but for underflow it may 4329 // have to be truncated even more) 4330 if (q > 16) { 4331 x0 = q - 16; 4332 if (q <= 18) { 4333 bid_round64_2_18 (q, x0, C.w[0], &res1, &incr_exp, 4334 &is_midpoint_lt_even, &is_midpoint_gt_even, 4335 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 4336 } else { // 19 <= q <= 34 4337 bid_round128_19_38 (q, x0, C, &res128, &incr_exp, 4338 &is_midpoint_lt_even, &is_midpoint_gt_even, 4339 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 4340 res1 = res128.w[0]; // the result fits in 64 bits 4341 } 4342 unbexp = unbexp + x0; 4343 if (incr_exp) 4344 unbexp++; 4345 q = 16; // need to set in case denormalization is necessary 4346 } else { 4347 // the result does not require a second rounding (and it must have 4348 // been exact in the first rounding, since q <= 16) 4349 res1 = C.w[0]; 4350 } 4351 4352 // avoid a double rounding error 4353 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 4354 is_midpoint_lt_even) { // double rounding error upward 4355 // res = res - 1 4356 res1--; // res1 becomes odd 4357 is_midpoint_lt_even = 0; 4358 is_inexact_lt_midpoint = 1; 4359 if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1 4360 res1 = 0x002386f26fc0ffffull; // 10^16 - 1 4361 unbexp--; 4362 } 4363 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 4364 is_midpoint_gt_even) { // double rounding error downward 4365 // res = res + 1 4366 res1++; // res1 becomes odd (so it cannot be 10^16) 4367 is_midpoint_gt_even = 0; 4368 is_inexact_gt_midpoint = 1; 4369 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 4370 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 4371 // if this second rounding was exact the result may still be 4372 // inexact because of the first rounding 4373 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 4374 is_inexact_gt_midpoint = 1; 4375 } 4376 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 4377 is_inexact_lt_midpoint = 1; 4378 } 4379 } else if (is_midpoint_gt_even && 4380 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 4381 // pulled up to a midpoint 4382 is_inexact_lt_midpoint = 1; 4383 is_inexact_gt_midpoint = 0; 4384 is_midpoint_lt_even = 0; 4385 is_midpoint_gt_even = 0; 4386 } else if (is_midpoint_lt_even && 4387 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 4388 // pulled down to a midpoint 4389 is_inexact_lt_midpoint = 0; 4390 is_inexact_gt_midpoint = 1; 4391 is_midpoint_lt_even = 0; 4392 is_midpoint_gt_even = 0; 4393 } else { 4394 ; 4395 } 4396 // this is the result rounded correctly to nearest even, with unbounded exp. 4397 4398 // check for overflow 4399 if (q + unbexp > P16 + expmax16) { 4400 res1 = sign | 0x7800000000000000ull; 4401 *pfpsf |= (BID_INEXACT_EXCEPTION | BID_OVERFLOW_EXCEPTION); 4402 *pfpsf |= save_fpsf; 4403 BID_RETURN (res1) 4404 } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16 4405 // not overflow; the result must be exact, and we can multiply res1 by 4406 // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits 4407 scale = unbexp - expmax16; 4408 res1 = res1 * bid_ten2k64[scale]; // res1 * 10^scale 4409 unbexp = expmax16; // unbexp - scale 4410 } else { 4411 ; // continue 4412 } 4413 4414 // check for underflow 4415 if (q + unbexp < P16 + expmin16) { 4416 if (unbexp < expmin16) { 4417 // we must truncate more of res 4418 x0 = expmin16 - unbexp; // x0 >= 1 4419 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; 4420 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; 4421 is_midpoint_lt_even0 = is_midpoint_lt_even; 4422 is_midpoint_gt_even0 = is_midpoint_gt_even; 4423 is_inexact_lt_midpoint = 0; 4424 is_inexact_gt_midpoint = 0; 4425 is_midpoint_lt_even = 0; 4426 is_midpoint_gt_even = 0; 4427 // the number of decimal digits in res1 is q 4428 if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits 4429 // 2 <= q <= 16, 1 <= x0 <= 15 4430 bid_round64_2_18 (q, x0, res1, &res1, &incr_exp, 4431 &is_midpoint_lt_even, &is_midpoint_gt_even, 4432 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); 4433 if (incr_exp) { 4434 // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15 4435 res1 = bid_ten2k64[q - x0]; 4436 } 4437 unbexp = unbexp + x0; // expmin16 4438 } else if (x0 == q) { 4439 // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin 4440 // determine relationship with 1/2 ulp 4441 // q <= 16 4442 if (res1 < bid_midpoint64[q - 1]) { // < 1/2 ulp 4443 lt_half_ulp = 1; 4444 is_inexact_lt_midpoint = 1; 4445 } else if (res1 == bid_midpoint64[q - 1]) { // = 1/2 ulp 4446 eq_half_ulp = 1; 4447 is_midpoint_gt_even = 1; 4448 } else { // > 1/2 ulp 4449 // gt_half_ulp = 1; 4450 is_inexact_gt_midpoint = 1; 4451 } 4452 if (lt_half_ulp || eq_half_ulp) { 4453 // res = +0.0 * 10^expmin16 4454 res1 = 0x0000000000000000ull; 4455 } else { // if (gt_half_ulp) 4456 // res = +1 * 10^expmin16 4457 res1 = 0x0000000000000001ull; 4458 } 4459 unbexp = expmin16; 4460 } else { // if (x0 > q) 4461 // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin 4462 res1 = 0x0000000000000000ull; 4463 unbexp = expmin16; 4464 is_inexact_lt_midpoint = 1; 4465 } 4466 // avoid a double rounding error 4467 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && 4468 is_midpoint_lt_even) { // double rounding error upward 4469 // res = res - 1 4470 res1--; // res1 becomes odd 4471 is_midpoint_lt_even = 0; 4472 is_inexact_lt_midpoint = 1; 4473 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && 4474 is_midpoint_gt_even) { // double rounding error downward 4475 // res = res + 1 4476 res1++; // res1 becomes odd 4477 is_midpoint_gt_even = 0; 4478 is_inexact_gt_midpoint = 1; 4479 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && 4480 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { 4481 // if this rounding was exact the result may still be 4482 // inexact because of the previous roundings 4483 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { 4484 is_inexact_gt_midpoint = 1; 4485 } 4486 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { 4487 is_inexact_lt_midpoint = 1; 4488 } 4489 } else if (is_midpoint_gt_even && 4490 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { 4491 // pulled up to a midpoint 4492 is_inexact_lt_midpoint = 1; 4493 is_inexact_gt_midpoint = 0; 4494 is_midpoint_lt_even = 0; 4495 is_midpoint_gt_even = 0; 4496 } else if (is_midpoint_lt_even && 4497 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { 4498 // pulled down to a midpoint 4499 is_inexact_lt_midpoint = 0; 4500 is_inexact_gt_midpoint = 1; 4501 is_midpoint_lt_even = 0; 4502 is_midpoint_gt_even = 0; 4503 } else { 4504 ; 4505 } 4506 } 4507 // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16) 4508 // and the result is tiny and exact 4509 4510 // check for inexact result 4511 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 4512 is_midpoint_lt_even || is_midpoint_gt_even || 4513 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || 4514 is_midpoint_lt_even0 || is_midpoint_gt_even0) { 4515 // set the inexact flag and the underflow flag 4516 *pfpsf |= (BID_INEXACT_EXCEPTION | BID_UNDERFLOW_EXCEPTION); 4517 } 4518 } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || 4519 is_midpoint_lt_even || is_midpoint_gt_even) { 4520 *pfpsf |= BID_INEXACT_EXCEPTION; 4521 } 4522 // this is the result rounded correctly to nearest, with bounded exponent 4523 4524 if (rnd_mode == BID_ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction 4525 // res = res + 1 4526 res1++; // res1 is now odd 4527 } // else the result is already correct 4528 4529 // assemble the result 4530 if (res1 < 0x0020000000000000ull) { // res < 2^53 4531 res1 = sign | ((BID_UINT64) (unbexp + 398) << 53) | res1; 4532 } else { // res1 >= 2^53 4533 res1 = sign | MASK_STEERING_BITS | 4534 ((BID_UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2); 4535 } 4536#if !DECIMAL_TINY_DETECTION_AFTER_ROUNDING 4537 // correction needed for tininess detection before rounding 4538 if (((res1 & 0x7fffffffffffffffull) == 1000000000000000ull) && 4539 // 10^15*10^-398 4540 (((rnd_mode == BID_ROUNDING_TO_NEAREST || 4541 rnd_mode == BID_ROUNDING_TIES_AWAY) && 4542 (is_midpoint_lt_even || is_inexact_gt_midpoint)) || 4543 ((((rnd_mode == BID_ROUNDING_UP) && !(res1 & MASK_SIGN)) || 4544 ((rnd_mode == BID_ROUNDING_DOWN) && (res1 & MASK_SIGN))) 4545 && (is_midpoint_lt_even || is_midpoint_gt_even || 4546 is_inexact_lt_midpoint || is_inexact_gt_midpoint)))) { 4547 *pfpsf |= BID_UNDERFLOW_EXCEPTION; 4548 } 4549#endif 4550 *pfpsf |= save_fpsf; 4551 BID_RETURN (res1); 4552} 4553