1 /****************************************************************************** 2 Copyright (c) 2007-2011, Intel Corp. 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the distribution. 13 * Neither the name of Intel Corporation nor the names of its contributors 14 may be used to endorse or promote products derived from this software 15 without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 THE POSSIBILITY OF SUCH DAMAGE. 28 ******************************************************************************/ 29 30 /***************************************************************************** 31 * BID64 fma 32 ***************************************************************************** 33 * 34 * Algorithm description: 35 * 36 * if multiplication is guranteed exact (short coefficients) 37 * call the unpacked arg. equivalent of bid64_add(x*y, z) 38 * else 39 * get full coefficient_x*coefficient_y product 40 * call subroutine to perform addition of 64-bit argument 41 * to 128-bit product 42 * 43 ****************************************************************************/ 44 45 #define BID_FUNCTION_SETS_BINARY_FLAGS 46 47 #include "bid_inline_add.h" 48 49 #if DECIMAL_CALL_BY_REFERENCE 50 BID_EXTERN_C void bid64_mul (BID_UINT64 * pres, BID_UINT64 * px, 51 BID_UINT64 * 52 py _RND_MODE_PARAM _EXC_FLAGS_PARAM 53 _EXC_MASKS_PARAM _EXC_INFO_PARAM); 54 #else 55 56 BID_EXTERN_C BID_UINT64 bid64_mul (BID_UINT64 x, 57 BID_UINT64 y _RND_MODE_PARAM 58 _EXC_FLAGS_PARAM _EXC_MASKS_PARAM 59 _EXC_INFO_PARAM); 60 #endif 61 62 63 BID_TYPE0_FUNCTION_ARGTYPE1_ARGTYPE2_ARGTYPE3(BID_UINT64, bid64_fma, BID_UINT64, x, BID_UINT64, y, BID_UINT64, z) 64 BID_UINT128 P, CT, CZ; 65 #if DECIMAL_TINY_DETECTION_AFTER_ROUNDING 66 BID_UINT128 PU; 67 #endif 68 BID_UINT64 sign_x, sign_y, coefficient_x, coefficient_y, sign_z, 69 coefficient_z; 70 BID_UINT64 C64, remainder_y, res; 71 BID_UINT64 CYh, CY0L, T, valid_x, valid_y, valid_z; 72 int_double tempx, tempy; 73 int extra_digits, exponent_x, exponent_y, bin_expon_cx, bin_expon_cy, 74 bin_expon_product; 75 #if DECIMAL_TINY_DETECTION_AFTER_ROUNDING 76 int rmode; 77 #endif 78 int digits_p, bp, final_exponent, exponent_z, digits_z, ez, ey, 79 scale_z, uf_status; 80 81 BID_OPT_SAVE_BINARY_FLAGS() 82 83 valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x); 84 valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y); 85 valid_z = unpack_BID64 (&sign_z, &exponent_z, &coefficient_z, z); 86 87 // unpack arguments, check for NaN, Infinity, or 0 88 if (!valid_x || !valid_y || !valid_z) { 89 90 if ((y & MASK_NAN) == MASK_NAN) { // y is NAN 91 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) 92 // check first for non-canonical NaN payload 93 y = y & 0xfe03ffffffffffffull; // clear G6-G12 94 if ((y & 0x0003ffffffffffffull) > 999999999999999ull) { 95 y = y & 0xfe00000000000000ull; // clear G6-G12 and the payload bits 96 } 97 if ((y & MASK_SNAN) == MASK_SNAN) { // y is SNAN 98 // set invalid flag 99 *pfpsf |= BID_INVALID_EXCEPTION; 100 // return quiet (y) 101 res = y & 0xfdffffffffffffffull; 102 } else { // y is QNaN 103 // return y 104 res = y; 105 // if z = SNaN or x = SNaN signal invalid exception 106 if ((z & MASK_SNAN) == MASK_SNAN 107 || (x & MASK_SNAN) == MASK_SNAN) { 108 // set invalid flag 109 *pfpsf |= BID_INVALID_EXCEPTION; 110 } 111 } 112 BID_RETURN (res) 113 } else if ((z & MASK_NAN) == MASK_NAN) { // z is NAN 114 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) 115 // check first for non-canonical NaN payload 116 z = z & 0xfe03ffffffffffffull; // clear G6-G12 117 if ((z & 0x0003ffffffffffffull) > 999999999999999ull) { 118 z = z & 0xfe00000000000000ull; // clear G6-G12 and the payload bits 119 } 120 if ((z & MASK_SNAN) == MASK_SNAN) { // z is SNAN 121 // set invalid flag 122 *pfpsf |= BID_INVALID_EXCEPTION; 123 // return quiet (z) 124 res = z & 0xfdffffffffffffffull; 125 } else { // z is QNaN 126 // return z 127 res = z; 128 // if x = SNaN signal invalid exception 129 if ((x & MASK_SNAN) == MASK_SNAN) { 130 // set invalid flag 131 *pfpsf |= BID_INVALID_EXCEPTION; 132 } 133 } 134 BID_RETURN (res) 135 } else if ((x & MASK_NAN) == MASK_NAN) { // x is NAN 136 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) 137 // check first for non-canonical NaN payload 138 x = x & 0xfe03ffffffffffffull; // clear G6-G12 139 if ((x & 0x0003ffffffffffffull) > 999999999999999ull) { 140 x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits 141 } 142 if ((x & MASK_SNAN) == MASK_SNAN) { // x is SNAN 143 // set invalid flag 144 *pfpsf |= BID_INVALID_EXCEPTION; 145 // return quiet (x) 146 res = x & 0xfdffffffffffffffull; 147 } else { // x is QNaN 148 // return x 149 res = x; // clear out G[6]-G[16] 150 } 151 BID_RETURN (res) 152 } 153 154 if (!valid_x) { 155 // x is Inf. or 0 156 157 // x is Infinity? 158 if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) { 159 // check if y is 0 160 if (!coefficient_y) { 161 // y==0, return NaN 162 #ifdef BID_SET_STATUS_FLAGS 163 if ((z & 0x7e00000000000000ull) != 0x7c00000000000000ull) 164 __set_status_flags (pfpsf, BID_INVALID_EXCEPTION); 165 #endif 166 BID_RETURN (0x7c00000000000000ull); 167 } 168 // test if z is Inf of oposite sign 169 if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull) 170 && (((x ^ y) ^ z) & 0x8000000000000000ull)) { 171 // return NaN 172 #ifdef BID_SET_STATUS_FLAGS 173 __set_status_flags (pfpsf, BID_INVALID_EXCEPTION); 174 #endif 175 BID_RETURN (0x7c00000000000000ull); 176 } 177 // otherwise return +/-Inf 178 BID_RETURN (((x ^ y) & 0x8000000000000000ull) | 179 0x7800000000000000ull); 180 } 181 // x is 0 182 if (((y & 0x7800000000000000ull) != 0x7800000000000000ull) 183 && ((z & 0x7800000000000000ull) != 0x7800000000000000ull)) { 184 185 if (coefficient_z) { 186 exponent_y = exponent_x - DECIMAL_EXPONENT_BIAS + exponent_y; 187 188 sign_z = z & 0x8000000000000000ull; 189 190 if (exponent_y >= exponent_z) 191 BID_RETURN (z); 192 res = 193 add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z, 194 &rnd_mode, pfpsf); 195 BID_RETURN (res); 196 } 197 } 198 } 199 if (!valid_y) { 200 // y is Inf. or 0 201 202 // y is Infinity? 203 if ((y & 0x7800000000000000ull) == 0x7800000000000000ull) { 204 // check if x is 0 205 if (!coefficient_x) { 206 // y==0, return NaN 207 #ifdef BID_SET_STATUS_FLAGS 208 __set_status_flags (pfpsf, BID_INVALID_EXCEPTION); 209 #endif 210 BID_RETURN (0x7c00000000000000ull); 211 } 212 // test if z is Inf of oposite sign 213 if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull) 214 && (((x ^ y) ^ z) & 0x8000000000000000ull)) { 215 #ifdef BID_SET_STATUS_FLAGS 216 __set_status_flags (pfpsf, BID_INVALID_EXCEPTION); 217 #endif 218 // return NaN 219 BID_RETURN (0x7c00000000000000ull); 220 } 221 // otherwise return +/-Inf 222 BID_RETURN (((x ^ y) & 0x8000000000000000ull) | 223 0x7800000000000000ull); 224 } 225 // y is 0 226 if (((z & 0x7800000000000000ull) != 0x7800000000000000ull)) { 227 228 if (coefficient_z) { 229 exponent_y += exponent_x - DECIMAL_EXPONENT_BIAS; 230 231 sign_z = z & 0x8000000000000000ull; 232 233 if (exponent_y >= exponent_z) 234 BID_RETURN (z); 235 res = 236 add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z, 237 &rnd_mode, pfpsf); 238 BID_RETURN (res); 239 } 240 } 241 } 242 243 if (!valid_z) { 244 // y is Inf. or 0 245 246 // test if y is NaN/Inf 247 if ((z & 0x7800000000000000ull) == 0x7800000000000000ull) { 248 BID_RETURN (coefficient_z & QUIET_MASK64); 249 } 250 // z is 0, return x*y 251 if ((!coefficient_x) || (!coefficient_y)) { 252 //0+/-0 253 exponent_x += exponent_y - DECIMAL_EXPONENT_BIAS; 254 if (exponent_x > DECIMAL_MAX_EXPON_64) 255 exponent_x = DECIMAL_MAX_EXPON_64; 256 else if (exponent_x < 0) 257 exponent_x = 0; 258 if (exponent_x <= exponent_z) 259 res = ((BID_UINT64) exponent_x) << 53; 260 else 261 res = ((BID_UINT64) exponent_z) << 53; 262 if ((sign_x ^ sign_y) == sign_z) 263 res |= sign_z; 264 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 265 #ifndef IEEE_ROUND_NEAREST 266 else if (rnd_mode == BID_ROUNDING_DOWN) 267 res |= 0x8000000000000000ull; 268 #endif 269 #endif 270 BID_RETURN (res); 271 } 272 } 273 } 274 275 /* get binary coefficients of x and y */ 276 277 //--- get number of bits in the coefficients of x and y --- 278 // version 2 (original) 279 tempx.d = (double) coefficient_x; 280 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52); 281 282 tempy.d = (double) coefficient_y; 283 bin_expon_cy = ((tempy.i & MASK_BINARY_EXPONENT) >> 52); 284 285 // magnitude estimate for coefficient_x*coefficient_y is 286 // 2^(unbiased_bin_expon_cx + unbiased_bin_expon_cx) 287 bin_expon_product = bin_expon_cx + bin_expon_cy; 288 289 // check if coefficient_x*coefficient_y<2^(10*k+3) 290 // equivalent to unbiased_bin_expon_cx + unbiased_bin_expon_cx < 10*k+1 291 if (bin_expon_product < UPPER_EXPON_LIMIT + 2 * BINARY_EXPONENT_BIAS) { 292 // easy multiply 293 C64 = coefficient_x * coefficient_y; 294 final_exponent = exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS; 295 if ((final_exponent > 0) || (!coefficient_z)) { 296 res = 297 bid_get_add64 (sign_x ^ sign_y, 298 final_exponent, C64, sign_z, exponent_z, coefficient_z, rnd_mode, pfpsf); 299 BID_RETURN (res); 300 } else { 301 P.w[0] = C64; 302 P.w[1] = 0; 303 extra_digits = 0; 304 } 305 } else { 306 if (!coefficient_z) { 307 #if DECIMAL_CALL_BY_REFERENCE 308 bid64_mul (&res, &x, 309 &y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 310 _EXC_INFO_ARG); 311 #else 312 res = 313 bid64_mul (x, 314 y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG 315 _EXC_INFO_ARG); 316 #endif 317 BID_RETURN (res); 318 } 319 // get 128-bit product: coefficient_x*coefficient_y 320 __mul_64x64_to_128 (P, coefficient_x, coefficient_y); 321 322 // tighten binary range of P: leading bit is 2^bp 323 // unbiased_bin_expon_product <= bp <= unbiased_bin_expon_product+1 324 bin_expon_product -= 2 * BINARY_EXPONENT_BIAS; 325 __tight_bin_range_128 (bp, P, bin_expon_product); 326 327 // get number of decimal digits in the product 328 digits_p = bid_estimate_decimal_digits[bp]; 329 if (!(__unsigned_compare_gt_128 (bid_power10_table_128[digits_p], P))) 330 digits_p++; // if bid_power10_table_128[digits_p] <= P 331 332 // determine number of decimal digits to be rounded out 333 extra_digits = digits_p - MAX_FORMAT_DIGITS; 334 final_exponent = 335 exponent_x + exponent_y + extra_digits - DECIMAL_EXPONENT_BIAS; 336 } 337 338 if (((unsigned) final_exponent) >= 3 * 256) { 339 if (final_exponent < 0) { 340 //--- get number of bits in the coefficients of z --- 341 tempx.d = (double) coefficient_z; 342 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; 343 // get number of decimal digits in the coeff_x 344 digits_z = bid_estimate_decimal_digits[bin_expon_cx]; 345 if (coefficient_z >= bid_power10_table_128[digits_z].w[0]) 346 digits_z++; 347 // underflow 348 if ((final_exponent + 16 < 0) 349 || (exponent_z + digits_z > 33 + final_exponent)) { 350 res = 351 BID_normalize (sign_z, exponent_z, coefficient_z, 352 sign_x ^ sign_y, 1, rnd_mode, pfpsf); 353 BID_RETURN (res); 354 } 355 356 ez = exponent_z + digits_z - 16; 357 if (ez < 0) 358 ez = 0; 359 scale_z = exponent_z - ez; 360 coefficient_z *= bid_power10_table_128[scale_z].w[0]; 361 ey = final_exponent - extra_digits; 362 extra_digits = ez - ey; 363 364 if (extra_digits > 17) { 365 CYh = __truncate (P, 16); 366 // get remainder 367 T = bid_power10_table_128[16].w[0]; 368 __mul_64x64_to_64 (CY0L, CYh, T); 369 remainder_y = P.w[0] - CY0L; 370 371 extra_digits -= 16; 372 P.w[0] = CYh; 373 P.w[1] = 0; 374 } else 375 remainder_y = 0; 376 377 // align coeff_x, CYh 378 __mul_64x64_to_128 (CZ, coefficient_z, 379 bid_power10_table_128[extra_digits].w[0]); 380 381 if (sign_z == (sign_y ^ sign_x)) { 382 __add_128_128 (CT, CZ, P); 383 if (__unsigned_compare_ge_128 384 (CT, bid_power10_table_128[16 + extra_digits])) { 385 extra_digits++; 386 ez++; 387 } 388 } else { 389 if (remainder_y && (__unsigned_compare_ge_128 (CZ, P))) { 390 P.w[0]++; 391 if (!P.w[0]) 392 P.w[1]++; 393 } 394 __sub_128_128 (CT, CZ, P); 395 if (((BID_SINT64) CT.w[1]) < 0) { 396 sign_z = sign_y ^ sign_x; 397 CT.w[0] = 0 - CT.w[0]; 398 CT.w[1] = 0 - CT.w[1]; 399 if (CT.w[0]) 400 CT.w[1]--; 401 } else if(!(CT.w[1]|CT.w[0])) 402 sign_z = (rnd_mode!=BID_ROUNDING_DOWN)? 0: 0x8000000000000000ull; 403 if (ez 404 && 405 (__unsigned_compare_gt_128 406 (bid_power10_table_128[15 + extra_digits], CT))) { 407 extra_digits--; 408 ez--; 409 } 410 } 411 412 #ifdef BID_SET_STATUS_FLAGS 413 uf_status = 0; 414 if ((!ez) 415 && 416 __unsigned_compare_gt_128 (bid_power10_table_128 417 [extra_digits + 15], CT)) { 418 #if DECIMAL_TINY_DETECTION_AFTER_ROUNDING 419 rmode = rnd_mode; 420 if (sign_z && (unsigned) (rmode - 1) < 2) 421 rmode = 3 - rmode; 422 PU = bid_power10_table_128[extra_digits + 15]; 423 PU.w[0]--; 424 if (__unsigned_compare_gt_128 (PU, CT) 425 || (rmode == BID_ROUNDING_DOWN) 426 || (rmode == BID_ROUNDING_TO_ZERO)) 427 uf_status = BID_UNDERFLOW_EXCEPTION; 428 else if (extra_digits < 2) { 429 if ((rmode == BID_ROUNDING_UP)) { 430 if (!extra_digits) 431 uf_status = BID_UNDERFLOW_EXCEPTION; 432 else { 433 if (remainder_y && (sign_z != (sign_y ^ sign_x))) 434 remainder_y = bid_power10_table_128[16].w[0] - remainder_y; 435 436 if (bid_power10_table_128[15].w[0] > remainder_y) 437 uf_status = BID_UNDERFLOW_EXCEPTION; 438 } 439 } else // RN or RN_away 440 { 441 if (remainder_y && (sign_z != (sign_y ^ sign_x))) 442 remainder_y = bid_power10_table_128[16].w[0] - remainder_y; 443 444 if (!extra_digits) { 445 remainder_y += bid_round_const_table[rmode][15]; 446 if (remainder_y < bid_power10_table_128[16].w[0]) 447 uf_status = BID_UNDERFLOW_EXCEPTION; 448 } else { 449 if (remainder_y < bid_round_const_table[rmode][16]) 450 uf_status = BID_UNDERFLOW_EXCEPTION; 451 } 452 } 453 //__set_status_flags (pfpsf, uf_status); 454 } 455 #else // DECIMAL_TINY_DETECTION_AFTER_ROUNDING 456 uf_status = BID_UNDERFLOW_EXCEPTION; 457 #endif 458 } 459 #endif 460 res = 461 __bid_full_round64_remainder (sign_z, ez - extra_digits, CT, 462 extra_digits, remainder_y, 463 rnd_mode, pfpsf, uf_status); 464 BID_RETURN (res); 465 466 } else { 467 if ((sign_z == (sign_x ^ sign_y)) 468 || (final_exponent > 3 * 256 + 15)) { 469 res = 470 fast_get_BID64_check_OF (sign_x ^ sign_y, final_exponent, 471 1000000000000000ull, rnd_mode, 472 pfpsf); 473 BID_RETURN (res); 474 } 475 } 476 } 477 478 479 if (extra_digits > 0) { 480 res = 481 bid_get_add128 (sign_z, exponent_z, coefficient_z, sign_x ^ sign_y, 482 final_exponent, P, extra_digits, rnd_mode, pfpsf); 483 BID_RETURN (res); 484 } 485 // go to convert_format and exit 486 else { 487 C64 = __low_64 (P); 488 489 res = 490 bid_get_add64 (sign_x ^ sign_y, 491 exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS, C64, 492 sign_z, exponent_z, coefficient_z, 493 rnd_mode, pfpsf); 494 BID_RETURN (res); 495 } 496 } 497 498 499