1 /* Copyright (C) 2007-2018 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License as published by the Free 7 Software Foundation; either version 3, or (at your option) any later 8 version. 9 10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 /***************************************************************************** 25 * 26 * Helper add functions (for fma) 27 * 28 * __BID_INLINE__ UINT64 get_add64( 29 * UINT64 sign_x, int exponent_x, UINT64 coefficient_x, 30 * UINT64 sign_y, int exponent_y, UINT64 coefficient_y, 31 * int rounding_mode) 32 * 33 * __BID_INLINE__ UINT64 get_add128( 34 * UINT64 sign_x, int exponent_x, UINT64 coefficient_x, 35 * UINT64 sign_y, int final_exponent_y, UINT128 CY, 36 * int extra_digits, int rounding_mode) 37 * 38 ***************************************************************************** 39 * 40 * Algorithm description: 41 * 42 * get_add64: same as BID64 add, but arguments are unpacked and there 43 * are no special case checks 44 * 45 * get_add128: add 64-bit coefficient to 128-bit product (which contains 46 * 16+extra_digits decimal digits), 47 * return BID64 result 48 * - the exponents are compared and the two coefficients are 49 * properly aligned for addition/subtraction 50 * - multiple paths are needed 51 * - final result exponent is calculated and the lower term is 52 * rounded first if necessary, to avoid manipulating 53 * coefficients longer than 128 bits 54 * 55 ****************************************************************************/ 56 57 #ifndef _INLINE_BID_ADD_H_ 58 #define _INLINE_BID_ADD_H_ 59 60 #include "bid_internal.h" 61 62 #define MAX_FORMAT_DIGITS 16 63 #define DECIMAL_EXPONENT_BIAS 398 64 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull 65 #define BINARY_EXPONENT_BIAS 0x3ff 66 #define UPPER_EXPON_LIMIT 51 67 68 /////////////////////////////////////////////////////////////////////// 69 // 70 // get_add64() is essentially the same as bid_add(), except that 71 // the arguments are unpacked 72 // 73 ////////////////////////////////////////////////////////////////////// 74 __BID_INLINE__ UINT64 75 get_add64 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x, 76 UINT64 sign_y, int exponent_y, UINT64 coefficient_y, 77 int rounding_mode, unsigned *fpsc) { 78 UINT128 CA, CT, CT_new; 79 UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab, 80 rem_a; 81 UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp, 82 C64_new; 83 int_double tempx; 84 int exponent_a, exponent_b, diff_dec_expon; 85 int bin_expon_ca, extra_digits, amount, scale_k, scale_ca; 86 unsigned rmode, status; 87 88 // sort arguments by exponent 89 if (exponent_x <= exponent_y) { 90 sign_a = sign_y; 91 exponent_a = exponent_y; 92 coefficient_a = coefficient_y; 93 sign_b = sign_x; 94 exponent_b = exponent_x; 95 coefficient_b = coefficient_x; 96 } else { 97 sign_a = sign_x; 98 exponent_a = exponent_x; 99 coefficient_a = coefficient_x; 100 sign_b = sign_y; 101 exponent_b = exponent_y; 102 coefficient_b = coefficient_y; 103 } 104 105 // exponent difference 106 diff_dec_expon = exponent_a - exponent_b; 107 108 /* get binary coefficients of x and y */ 109 110 //--- get number of bits in the coefficients of x and y --- 111 112 tempx.d = (double) coefficient_a; 113 bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; 114 115 if (!coefficient_a) { 116 return get_BID64 (sign_b, exponent_b, coefficient_b, rounding_mode, 117 fpsc); 118 } 119 if (diff_dec_expon > MAX_FORMAT_DIGITS) { 120 // normalize a to a 16-digit coefficient 121 122 scale_ca = estimate_decimal_digits[bin_expon_ca]; 123 if (coefficient_a >= power10_table_128[scale_ca].w[0]) 124 scale_ca++; 125 126 scale_k = 16 - scale_ca; 127 128 coefficient_a *= power10_table_128[scale_k].w[0]; 129 130 diff_dec_expon -= scale_k; 131 exponent_a -= scale_k; 132 133 /* get binary coefficients of x and y */ 134 135 //--- get number of bits in the coefficients of x and y --- 136 tempx.d = (double) coefficient_a; 137 bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; 138 139 if (diff_dec_expon > MAX_FORMAT_DIGITS) { 140 #ifdef SET_STATUS_FLAGS 141 if (coefficient_b) { 142 __set_status_flags (fpsc, INEXACT_EXCEPTION); 143 } 144 #endif 145 146 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 147 #ifndef IEEE_ROUND_NEAREST 148 if (((rounding_mode) & 3) && coefficient_b) // not ROUNDING_TO_NEAREST 149 { 150 switch (rounding_mode) { 151 case ROUNDING_DOWN: 152 if (sign_b) { 153 coefficient_a -= ((((SINT64) sign_a) >> 63) | 1); 154 if (coefficient_a < 1000000000000000ull) { 155 exponent_a--; 156 coefficient_a = 9999999999999999ull; 157 } else if (coefficient_a >= 10000000000000000ull) { 158 exponent_a++; 159 coefficient_a = 1000000000000000ull; 160 } 161 } 162 break; 163 case ROUNDING_UP: 164 if (!sign_b) { 165 coefficient_a += ((((SINT64) sign_a) >> 63) | 1); 166 if (coefficient_a < 1000000000000000ull) { 167 exponent_a--; 168 coefficient_a = 9999999999999999ull; 169 } else if (coefficient_a >= 10000000000000000ull) { 170 exponent_a++; 171 coefficient_a = 1000000000000000ull; 172 } 173 } 174 break; 175 default: // RZ 176 if (sign_a != sign_b) { 177 coefficient_a--; 178 if (coefficient_a < 1000000000000000ull) { 179 exponent_a--; 180 coefficient_a = 9999999999999999ull; 181 } 182 } 183 break; 184 } 185 } else 186 #endif 187 #endif 188 // check special case here 189 if ((coefficient_a == 1000000000000000ull) 190 && (diff_dec_expon == MAX_FORMAT_DIGITS + 1) 191 && (sign_a ^ sign_b) 192 && (coefficient_b > 5000000000000000ull)) { 193 coefficient_a = 9999999999999999ull; 194 exponent_a--; 195 } 196 197 return get_BID64 (sign_a, exponent_a, coefficient_a, 198 rounding_mode, fpsc); 199 } 200 } 201 // test whether coefficient_a*10^(exponent_a-exponent_b) may exceed 2^62 202 if (bin_expon_ca + estimate_bin_expon[diff_dec_expon] < 60) { 203 // coefficient_a*10^(exponent_a-exponent_b)<2^63 204 205 // multiply by 10^(exponent_a-exponent_b) 206 coefficient_a *= power10_table_128[diff_dec_expon].w[0]; 207 208 // sign mask 209 sign_b = ((SINT64) sign_b) >> 63; 210 // apply sign to coeff. of b 211 coefficient_b = (coefficient_b + sign_b) ^ sign_b; 212 213 // apply sign to coefficient a 214 sign_a = ((SINT64) sign_a) >> 63; 215 coefficient_a = (coefficient_a + sign_a) ^ sign_a; 216 217 coefficient_a += coefficient_b; 218 // get sign 219 sign_s = ((SINT64) coefficient_a) >> 63; 220 coefficient_a = (coefficient_a + sign_s) ^ sign_s; 221 sign_s &= 0x8000000000000000ull; 222 223 // coefficient_a < 10^16 ? 224 if (coefficient_a < power10_table_128[MAX_FORMAT_DIGITS].w[0]) { 225 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 226 #ifndef IEEE_ROUND_NEAREST 227 if (rounding_mode == ROUNDING_DOWN && (!coefficient_a) 228 && sign_a != sign_b) 229 sign_s = 0x8000000000000000ull; 230 #endif 231 #endif 232 return get_BID64 (sign_s, exponent_b, coefficient_a, 233 rounding_mode, fpsc); 234 } 235 // otherwise rounding is necessary 236 237 // already know coefficient_a<10^19 238 // coefficient_a < 10^17 ? 239 if (coefficient_a < power10_table_128[17].w[0]) 240 extra_digits = 1; 241 else if (coefficient_a < power10_table_128[18].w[0]) 242 extra_digits = 2; 243 else 244 extra_digits = 3; 245 246 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 247 #ifndef IEEE_ROUND_NEAREST 248 rmode = rounding_mode; 249 if (sign_s && (unsigned) (rmode - 1) < 2) 250 rmode = 3 - rmode; 251 #else 252 rmode = 0; 253 #endif 254 #else 255 rmode = 0; 256 #endif 257 coefficient_a += round_const_table[rmode][extra_digits]; 258 259 // get P*(2^M[extra_digits])/10^extra_digits 260 __mul_64x64_to_128 (CT, coefficient_a, 261 reciprocals10_64[extra_digits]); 262 263 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 264 amount = short_recip_scale[extra_digits]; 265 C64 = CT.w[1] >> amount; 266 267 } else { 268 // coefficient_a*10^(exponent_a-exponent_b) is large 269 sign_s = sign_a; 270 271 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 272 #ifndef IEEE_ROUND_NEAREST 273 rmode = rounding_mode; 274 if (sign_s && (unsigned) (rmode - 1) < 2) 275 rmode = 3 - rmode; 276 #else 277 rmode = 0; 278 #endif 279 #else 280 rmode = 0; 281 #endif 282 283 // check whether we can take faster path 284 scale_ca = estimate_decimal_digits[bin_expon_ca]; 285 286 sign_ab = sign_a ^ sign_b; 287 sign_ab = ((SINT64) sign_ab) >> 63; 288 289 // T1 = 10^(16-diff_dec_expon) 290 T1 = power10_table_128[16 - diff_dec_expon].w[0]; 291 292 // get number of digits in coefficient_a 293 //P_ca = power10_table_128[scale_ca].w[0]; 294 //P_ca_m1 = power10_table_128[scale_ca-1].w[0]; 295 if (coefficient_a >= power10_table_128[scale_ca].w[0]) { 296 scale_ca++; 297 //P_ca_m1 = P_ca; 298 //P_ca = power10_table_128[scale_ca].w[0]; 299 } 300 301 scale_k = 16 - scale_ca; 302 303 // apply sign 304 //Ts = (T1 + sign_ab) ^ sign_ab; 305 306 // test range of ca 307 //X = coefficient_a + Ts - P_ca_m1; 308 309 // addition 310 saved_ca = coefficient_a - T1; 311 coefficient_a = 312 (SINT64) saved_ca *(SINT64) power10_table_128[scale_k].w[0]; 313 extra_digits = diff_dec_expon - scale_k; 314 315 // apply sign 316 saved_cb = (coefficient_b + sign_ab) ^ sign_ab; 317 // add 10^16 and rounding constant 318 coefficient_b = 319 saved_cb + 10000000000000000ull + 320 round_const_table[rmode][extra_digits]; 321 322 // get P*(2^M[extra_digits])/10^extra_digits 323 __mul_64x64_to_128 (CT, coefficient_b, 324 reciprocals10_64[extra_digits]); 325 326 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 327 amount = short_recip_scale[extra_digits]; 328 C0_64 = CT.w[1] >> amount; 329 330 // result coefficient 331 C64 = C0_64 + coefficient_a; 332 // filter out difficult (corner) cases 333 // the following test is equivalent to 334 // ( (initial_coefficient_a + Ts) < P_ca && 335 // (initial_coefficient_a + Ts) > P_ca_m1 ), 336 // which ensures the number of digits in coefficient_a does not change 337 // after adding (the appropriately scaled and rounded) coefficient_b 338 if ((UINT64) (C64 - 1000000000000000ull - 1) > 339 9000000000000000ull - 2) { 340 if (C64 >= 10000000000000000ull) { 341 // result has more than 16 digits 342 if (!scale_k) { 343 // must divide coeff_a by 10 344 saved_ca = saved_ca + T1; 345 __mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull); 346 //reciprocals10_64[1]); 347 coefficient_a = CA.w[1] >> 1; 348 rem_a = 349 saved_ca - (coefficient_a << 3) - (coefficient_a << 1); 350 coefficient_a = coefficient_a - T1; 351 352 saved_cb += 353 /*90000000000000000 */ +rem_a * 354 power10_table_128[diff_dec_expon].w[0]; 355 } else 356 coefficient_a = 357 (SINT64) (saved_ca - T1 - 358 (T1 << 3)) * (SINT64) power10_table_128[scale_k - 359 1].w[0]; 360 361 extra_digits++; 362 coefficient_b = 363 saved_cb + 100000000000000000ull + 364 round_const_table[rmode][extra_digits]; 365 366 // get P*(2^M[extra_digits])/10^extra_digits 367 __mul_64x64_to_128 (CT, coefficient_b, 368 reciprocals10_64[extra_digits]); 369 370 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 371 amount = short_recip_scale[extra_digits]; 372 C0_64 = CT.w[1] >> amount; 373 374 // result coefficient 375 C64 = C0_64 + coefficient_a; 376 } else if (C64 <= 1000000000000000ull) { 377 // less than 16 digits in result 378 coefficient_a = 379 (SINT64) saved_ca *(SINT64) power10_table_128[scale_k + 380 1].w[0]; 381 //extra_digits --; 382 exponent_b--; 383 coefficient_b = 384 (saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull + 385 round_const_table[rmode][extra_digits]; 386 387 // get P*(2^M[extra_digits])/10^extra_digits 388 __mul_64x64_to_128 (CT_new, coefficient_b, 389 reciprocals10_64[extra_digits]); 390 391 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 392 amount = short_recip_scale[extra_digits]; 393 C0_64 = CT_new.w[1] >> amount; 394 395 // result coefficient 396 C64_new = C0_64 + coefficient_a; 397 if (C64_new < 10000000000000000ull) { 398 C64 = C64_new; 399 #ifdef SET_STATUS_FLAGS 400 CT = CT_new; 401 #endif 402 } else 403 exponent_b++; 404 } 405 406 } 407 408 } 409 410 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 411 #ifndef IEEE_ROUND_NEAREST 412 if (rmode == 0) //ROUNDING_TO_NEAREST 413 #endif 414 if (C64 & 1) { 415 // check whether fractional part of initial_P/10^extra_digits 416 // is exactly .5 417 // this is the same as fractional part of 418 // (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero 419 420 // get remainder 421 remainder_h = CT.w[1] << (64 - amount); 422 423 // test whether fractional part is 0 424 if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) { 425 C64--; 426 } 427 } 428 #endif 429 430 #ifdef SET_STATUS_FLAGS 431 status = INEXACT_EXCEPTION; 432 433 // get remainder 434 remainder_h = CT.w[1] << (64 - amount); 435 436 switch (rmode) { 437 case ROUNDING_TO_NEAREST: 438 case ROUNDING_TIES_AWAY: 439 // test whether fractional part is 0 440 if ((remainder_h == 0x8000000000000000ull) 441 && (CT.w[0] < reciprocals10_64[extra_digits])) 442 status = EXACT_STATUS; 443 break; 444 case ROUNDING_DOWN: 445 case ROUNDING_TO_ZERO: 446 if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) 447 status = EXACT_STATUS; 448 break; 449 default: 450 // round up 451 __add_carry_out (tmp, carry, CT.w[0], 452 reciprocals10_64[extra_digits]); 453 if ((remainder_h >> (64 - amount)) + carry >= 454 (((UINT64) 1) << amount)) 455 status = EXACT_STATUS; 456 break; 457 } 458 __set_status_flags (fpsc, status); 459 460 #endif 461 462 return get_BID64 (sign_s, exponent_b + extra_digits, C64, 463 rounding_mode, fpsc); 464 } 465 466 467 /////////////////////////////////////////////////////////////////// 468 // round 128-bit coefficient and return result in BID64 format 469 // do not worry about midpoint cases 470 ////////////////////////////////////////////////////////////////// 471 static UINT64 472 __bid_simple_round64_sticky (UINT64 sign, int exponent, UINT128 P, 473 int extra_digits, int rounding_mode, 474 unsigned *fpsc) { 475 UINT128 Q_high, Q_low, C128; 476 UINT64 C64; 477 int amount, rmode; 478 479 rmode = rounding_mode; 480 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 481 #ifndef IEEE_ROUND_NEAREST 482 if (sign && (unsigned) (rmode - 1) < 2) 483 rmode = 3 - rmode; 484 #endif 485 #endif 486 __add_128_64 (P, P, round_const_table[rmode][extra_digits]); 487 488 // get P*(2^M[extra_digits])/10^extra_digits 489 __mul_128x128_full (Q_high, Q_low, P, 490 reciprocals10_128[extra_digits]); 491 492 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 493 amount = recip_scale[extra_digits]; 494 __shr_128 (C128, Q_high, amount); 495 496 C64 = __low_64 (C128); 497 498 #ifdef SET_STATUS_FLAGS 499 500 __set_status_flags (fpsc, INEXACT_EXCEPTION); 501 502 #endif 503 504 return get_BID64 (sign, exponent, C64, rounding_mode, fpsc); 505 } 506 507 /////////////////////////////////////////////////////////////////// 508 // round 128-bit coefficient and return result in BID64 format 509 /////////////////////////////////////////////////////////////////// 510 static UINT64 511 __bid_full_round64 (UINT64 sign, int exponent, UINT128 P, 512 int extra_digits, int rounding_mode, 513 unsigned *fpsc) { 514 UINT128 Q_high, Q_low, C128, Stemp, PU; 515 UINT64 remainder_h, C64, carry, CY; 516 int amount, amount2, rmode, status = 0; 517 518 if (exponent < 0) { 519 if (exponent >= -16 && (extra_digits + exponent < 0)) { 520 extra_digits = -exponent; 521 #ifdef SET_STATUS_FLAGS 522 if (extra_digits > 0) { 523 rmode = rounding_mode; 524 if (sign && (unsigned) (rmode - 1) < 2) 525 rmode = 3 - rmode; 526 __add_128_128 (PU, P, 527 round_const_table_128[rmode][extra_digits]); 528 if (__unsigned_compare_gt_128 529 (power10_table_128[extra_digits + 15], PU)) 530 status = UNDERFLOW_EXCEPTION; 531 } 532 #endif 533 } 534 } 535 536 if (extra_digits > 0) { 537 exponent += extra_digits; 538 rmode = rounding_mode; 539 if (sign && (unsigned) (rmode - 1) < 2) 540 rmode = 3 - rmode; 541 __add_128_128 (P, P, round_const_table_128[rmode][extra_digits]); 542 543 // get P*(2^M[extra_digits])/10^extra_digits 544 __mul_128x128_full (Q_high, Q_low, P, 545 reciprocals10_128[extra_digits]); 546 547 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 548 amount = recip_scale[extra_digits]; 549 __shr_128_long (C128, Q_high, amount); 550 551 C64 = __low_64 (C128); 552 553 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 554 #ifndef IEEE_ROUND_NEAREST 555 if (rmode == 0) //ROUNDING_TO_NEAREST 556 #endif 557 if (C64 & 1) { 558 // check whether fractional part of initial_P/10^extra_digits 559 // is exactly .5 560 561 // get remainder 562 amount2 = 64 - amount; 563 remainder_h = 0; 564 remainder_h--; 565 remainder_h >>= amount2; 566 remainder_h = remainder_h & Q_high.w[0]; 567 568 if (!remainder_h 569 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1] 570 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1] 571 && Q_low.w[0] < 572 reciprocals10_128[extra_digits].w[0]))) { 573 C64--; 574 } 575 } 576 #endif 577 578 #ifdef SET_STATUS_FLAGS 579 status |= INEXACT_EXCEPTION; 580 581 // get remainder 582 remainder_h = Q_high.w[0] << (64 - amount); 583 584 switch (rmode) { 585 case ROUNDING_TO_NEAREST: 586 case ROUNDING_TIES_AWAY: 587 // test whether fractional part is 0 588 if (remainder_h == 0x8000000000000000ull 589 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1] 590 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1] 591 && Q_low.w[0] < 592 reciprocals10_128[extra_digits].w[0]))) 593 status = EXACT_STATUS; 594 break; 595 case ROUNDING_DOWN: 596 case ROUNDING_TO_ZERO: 597 if (!remainder_h 598 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1] 599 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1] 600 && Q_low.w[0] < 601 reciprocals10_128[extra_digits].w[0]))) 602 status = EXACT_STATUS; 603 break; 604 default: 605 // round up 606 __add_carry_out (Stemp.w[0], CY, Q_low.w[0], 607 reciprocals10_128[extra_digits].w[0]); 608 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1], 609 reciprocals10_128[extra_digits].w[1], CY); 610 if ((remainder_h >> (64 - amount)) + carry >= 611 (((UINT64) 1) << amount)) 612 status = EXACT_STATUS; 613 } 614 615 __set_status_flags (fpsc, status); 616 617 #endif 618 } else { 619 C64 = P.w[0]; 620 if (!C64) { 621 sign = 0; 622 if (rounding_mode == ROUNDING_DOWN) 623 sign = 0x8000000000000000ull; 624 } 625 } 626 return get_BID64 (sign, exponent, C64, rounding_mode, fpsc); 627 } 628 629 ///////////////////////////////////////////////////////////////////////////////// 630 // round 192-bit coefficient (P, remainder_P) and return result in BID64 format 631 // the lowest 64 bits (remainder_P) are used for midpoint checking only 632 //////////////////////////////////////////////////////////////////////////////// 633 static UINT64 634 __bid_full_round64_remainder (UINT64 sign, int exponent, UINT128 P, 635 int extra_digits, UINT64 remainder_P, 636 int rounding_mode, unsigned *fpsc, 637 unsigned uf_status) { 638 UINT128 Q_high, Q_low, C128, Stemp; 639 UINT64 remainder_h, C64, carry, CY; 640 int amount, amount2, rmode, status = uf_status; 641 642 rmode = rounding_mode; 643 if (sign && (unsigned) (rmode - 1) < 2) 644 rmode = 3 - rmode; 645 if (rmode == ROUNDING_UP && remainder_P) { 646 P.w[0]++; 647 if (!P.w[0]) 648 P.w[1]++; 649 } 650 651 if (extra_digits) { 652 __add_128_64 (P, P, round_const_table[rmode][extra_digits]); 653 654 // get P*(2^M[extra_digits])/10^extra_digits 655 __mul_128x128_full (Q_high, Q_low, P, 656 reciprocals10_128[extra_digits]); 657 658 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 659 amount = recip_scale[extra_digits]; 660 __shr_128 (C128, Q_high, amount); 661 662 C64 = __low_64 (C128); 663 664 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 665 #ifndef IEEE_ROUND_NEAREST 666 if (rmode == 0) //ROUNDING_TO_NEAREST 667 #endif 668 if (!remainder_P && (C64 & 1)) { 669 // check whether fractional part of initial_P/10^extra_digits 670 // is exactly .5 671 672 // get remainder 673 amount2 = 64 - amount; 674 remainder_h = 0; 675 remainder_h--; 676 remainder_h >>= amount2; 677 remainder_h = remainder_h & Q_high.w[0]; 678 679 if (!remainder_h 680 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1] 681 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1] 682 && Q_low.w[0] < 683 reciprocals10_128[extra_digits].w[0]))) { 684 C64--; 685 } 686 } 687 #endif 688 689 #ifdef SET_STATUS_FLAGS 690 status |= INEXACT_EXCEPTION; 691 692 if (!remainder_P) { 693 // get remainder 694 remainder_h = Q_high.w[0] << (64 - amount); 695 696 switch (rmode) { 697 case ROUNDING_TO_NEAREST: 698 case ROUNDING_TIES_AWAY: 699 // test whether fractional part is 0 700 if (remainder_h == 0x8000000000000000ull 701 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1] 702 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1] 703 && Q_low.w[0] < 704 reciprocals10_128[extra_digits].w[0]))) 705 status = EXACT_STATUS; 706 break; 707 case ROUNDING_DOWN: 708 case ROUNDING_TO_ZERO: 709 if (!remainder_h 710 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1] 711 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1] 712 && Q_low.w[0] < 713 reciprocals10_128[extra_digits].w[0]))) 714 status = EXACT_STATUS; 715 break; 716 default: 717 // round up 718 __add_carry_out (Stemp.w[0], CY, Q_low.w[0], 719 reciprocals10_128[extra_digits].w[0]); 720 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1], 721 reciprocals10_128[extra_digits].w[1], CY); 722 if ((remainder_h >> (64 - amount)) + carry >= 723 (((UINT64) 1) << amount)) 724 status = EXACT_STATUS; 725 } 726 } 727 __set_status_flags (fpsc, status); 728 729 #endif 730 } else { 731 C64 = P.w[0]; 732 #ifdef SET_STATUS_FLAGS 733 if (remainder_P) { 734 __set_status_flags (fpsc, uf_status | INEXACT_EXCEPTION); 735 } 736 #endif 737 } 738 739 return get_BID64 (sign, exponent + extra_digits, C64, rounding_mode, 740 fpsc); 741 } 742 743 744 /////////////////////////////////////////////////////////////////// 745 // get P/10^extra_digits 746 // result fits in 64 bits 747 /////////////////////////////////////////////////////////////////// 748 __BID_INLINE__ UINT64 749 __truncate (UINT128 P, int extra_digits) 750 // extra_digits <= 16 751 { 752 UINT128 Q_high, Q_low, C128; 753 UINT64 C64; 754 int amount; 755 756 // get P*(2^M[extra_digits])/10^extra_digits 757 __mul_128x128_full (Q_high, Q_low, P, 758 reciprocals10_128[extra_digits]); 759 760 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 761 amount = recip_scale[extra_digits]; 762 __shr_128 (C128, Q_high, amount); 763 764 C64 = __low_64 (C128); 765 766 return C64; 767 } 768 769 770 /////////////////////////////////////////////////////////////////// 771 // return number of decimal digits in 128-bit value X 772 /////////////////////////////////////////////////////////////////// 773 __BID_INLINE__ int 774 __get_dec_digits64 (UINT128 X) { 775 int_double tempx; 776 int digits_x, bin_expon_cx; 777 778 if (!X.w[1]) { 779 //--- get number of bits in the coefficients of x and y --- 780 tempx.d = (double) X.w[0]; 781 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; 782 // get number of decimal digits in the coeff_x 783 digits_x = estimate_decimal_digits[bin_expon_cx]; 784 if (X.w[0] >= power10_table_128[digits_x].w[0]) 785 digits_x++; 786 return digits_x; 787 } 788 tempx.d = (double) X.w[1]; 789 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; 790 // get number of decimal digits in the coeff_x 791 digits_x = estimate_decimal_digits[bin_expon_cx + 64]; 792 if (__unsigned_compare_ge_128 (X, power10_table_128[digits_x])) 793 digits_x++; 794 795 return digits_x; 796 } 797 798 799 //////////////////////////////////////////////////////////////////////////////// 800 // 801 // add 64-bit coefficient to 128-bit coefficient, return result in BID64 format 802 // 803 //////////////////////////////////////////////////////////////////////////////// 804 __BID_INLINE__ UINT64 805 get_add128 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x, 806 UINT64 sign_y, int final_exponent_y, UINT128 CY, 807 int extra_digits, int rounding_mode, unsigned *fpsc) { 808 UINT128 CY_L, CX, FS, F, CT, ST, T2; 809 UINT64 CYh, CY0L, T, S, coefficient_y, remainder_y; 810 SINT64 D = 0; 811 int_double tempx; 812 int diff_dec_expon, extra_digits2, exponent_y, status; 813 int extra_dx, diff_dec2, bin_expon_cx, digits_x, rmode; 814 815 // CY has more than 16 decimal digits 816 817 exponent_y = final_exponent_y - extra_digits; 818 819 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY 820 rounding_mode = 0; 821 #endif 822 #ifdef IEEE_ROUND_NEAREST 823 rounding_mode = 0; 824 #endif 825 826 if (exponent_x > exponent_y) { 827 // normalize x 828 //--- get number of bits in the coefficients of x and y --- 829 tempx.d = (double) coefficient_x; 830 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; 831 // get number of decimal digits in the coeff_x 832 digits_x = estimate_decimal_digits[bin_expon_cx]; 833 if (coefficient_x >= power10_table_128[digits_x].w[0]) 834 digits_x++; 835 836 extra_dx = 16 - digits_x; 837 coefficient_x *= power10_table_128[extra_dx].w[0]; 838 if ((sign_x ^ sign_y) && (coefficient_x == 1000000000000000ull)) { 839 extra_dx++; 840 coefficient_x = 10000000000000000ull; 841 } 842 exponent_x -= extra_dx; 843 844 if (exponent_x > exponent_y) { 845 846 // exponent_x > exponent_y 847 diff_dec_expon = exponent_x - exponent_y; 848 849 if (exponent_x <= final_exponent_y + 1) { 850 __mul_64x64_to_128 (CX, coefficient_x, 851 power10_table_128[diff_dec_expon].w[0]); 852 853 if (sign_x == sign_y) { 854 __add_128_128 (CT, CY, CX); 855 if ((exponent_x > 856 final_exponent_y) /*&& (final_exponent_y>0) */ ) 857 extra_digits++; 858 if (__unsigned_compare_ge_128 859 (CT, power10_table_128[16 + extra_digits])) 860 extra_digits++; 861 } else { 862 __sub_128_128 (CT, CY, CX); 863 if (((SINT64) CT.w[1]) < 0) { 864 CT.w[0] = 0 - CT.w[0]; 865 CT.w[1] = 0 - CT.w[1]; 866 if (CT.w[0]) 867 CT.w[1]--; 868 sign_y = sign_x; 869 } else if (!(CT.w[1] | CT.w[0])) { 870 sign_y = 871 (rounding_mode != 872 ROUNDING_DOWN) ? 0 : 0x8000000000000000ull; 873 } 874 if ((exponent_x + 1 >= 875 final_exponent_y) /*&& (final_exponent_y>=0) */ ) { 876 extra_digits = __get_dec_digits64 (CT) - 16; 877 if (extra_digits <= 0) { 878 if (!CT.w[0] && rounding_mode == ROUNDING_DOWN) 879 sign_y = 0x8000000000000000ull; 880 return get_BID64 (sign_y, exponent_y, CT.w[0], 881 rounding_mode, fpsc); 882 } 883 } else 884 if (__unsigned_compare_gt_128 885 (power10_table_128[15 + extra_digits], CT)) 886 extra_digits--; 887 } 888 889 return __bid_full_round64 (sign_y, exponent_y, CT, extra_digits, 890 rounding_mode, fpsc); 891 } 892 // diff_dec2+extra_digits is the number of digits to eliminate from 893 // argument CY 894 diff_dec2 = exponent_x - final_exponent_y; 895 896 if (diff_dec2 >= 17) { 897 #ifndef IEEE_ROUND_NEAREST 898 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 899 if ((rounding_mode) & 3) { 900 switch (rounding_mode) { 901 case ROUNDING_UP: 902 if (!sign_y) { 903 D = ((SINT64) (sign_x ^ sign_y)) >> 63; 904 D = D + D + 1; 905 coefficient_x += D; 906 } 907 break; 908 case ROUNDING_DOWN: 909 if (sign_y) { 910 D = ((SINT64) (sign_x ^ sign_y)) >> 63; 911 D = D + D + 1; 912 coefficient_x += D; 913 } 914 break; 915 case ROUNDING_TO_ZERO: 916 if (sign_y != sign_x) { 917 D = 0 - 1; 918 coefficient_x += D; 919 } 920 break; 921 } 922 if (coefficient_x < 1000000000000000ull) { 923 coefficient_x -= D; 924 coefficient_x = 925 D + (coefficient_x << 1) + (coefficient_x << 3); 926 exponent_x--; 927 } 928 } 929 #endif 930 #endif 931 #ifdef SET_STATUS_FLAGS 932 if (CY.w[1] | CY.w[0]) 933 __set_status_flags (fpsc, INEXACT_EXCEPTION); 934 #endif 935 return get_BID64 (sign_x, exponent_x, coefficient_x, 936 rounding_mode, fpsc); 937 } 938 // here exponent_x <= 16+final_exponent_y 939 940 // truncate CY to 16 dec. digits 941 CYh = __truncate (CY, extra_digits); 942 943 // get remainder 944 T = power10_table_128[extra_digits].w[0]; 945 __mul_64x64_to_64 (CY0L, CYh, T); 946 947 remainder_y = CY.w[0] - CY0L; 948 949 // align coeff_x, CYh 950 __mul_64x64_to_128 (CX, coefficient_x, 951 power10_table_128[diff_dec2].w[0]); 952 953 if (sign_x == sign_y) { 954 __add_128_64 (CT, CX, CYh); 955 if (__unsigned_compare_ge_128 956 (CT, power10_table_128[16 + diff_dec2])) 957 diff_dec2++; 958 } else { 959 if (remainder_y) 960 CYh++; 961 __sub_128_64 (CT, CX, CYh); 962 if (__unsigned_compare_gt_128 963 (power10_table_128[15 + diff_dec2], CT)) 964 diff_dec2--; 965 } 966 967 return __bid_full_round64_remainder (sign_x, final_exponent_y, CT, 968 diff_dec2, remainder_y, 969 rounding_mode, fpsc, 0); 970 } 971 } 972 // Here (exponent_x <= exponent_y) 973 { 974 diff_dec_expon = exponent_y - exponent_x; 975 976 if (diff_dec_expon > MAX_FORMAT_DIGITS) { 977 rmode = rounding_mode; 978 979 if ((sign_x ^ sign_y)) { 980 if (!CY.w[0]) 981 CY.w[1]--; 982 CY.w[0]--; 983 if (__unsigned_compare_gt_128 984 (power10_table_128[15 + extra_digits], CY)) { 985 if (rmode & 3) { 986 extra_digits--; 987 final_exponent_y--; 988 } else { 989 CY.w[0] = 1000000000000000ull; 990 CY.w[1] = 0; 991 extra_digits = 0; 992 } 993 } 994 } 995 __scale128_10 (CY, CY); 996 extra_digits++; 997 CY.w[0] |= 1; 998 999 return __bid_simple_round64_sticky (sign_y, final_exponent_y, CY, 1000 extra_digits, rmode, fpsc); 1001 } 1002 // apply sign to coeff_x 1003 sign_x ^= sign_y; 1004 sign_x = ((SINT64) sign_x) >> 63; 1005 CX.w[0] = (coefficient_x + sign_x) ^ sign_x; 1006 CX.w[1] = sign_x; 1007 1008 // check whether CY (rounded to 16 digits) and CX have 1009 // any digits in the same position 1010 diff_dec2 = final_exponent_y - exponent_x; 1011 1012 if (diff_dec2 <= 17) { 1013 // align CY to 10^ex 1014 S = power10_table_128[diff_dec_expon].w[0]; 1015 __mul_64x128_short (CY_L, S, CY); 1016 1017 __add_128_128 (ST, CY_L, CX); 1018 extra_digits2 = __get_dec_digits64 (ST) - 16; 1019 return __bid_full_round64 (sign_y, exponent_x, ST, extra_digits2, 1020 rounding_mode, fpsc); 1021 } 1022 // truncate CY to 16 dec. digits 1023 CYh = __truncate (CY, extra_digits); 1024 1025 // get remainder 1026 T = power10_table_128[extra_digits].w[0]; 1027 __mul_64x64_to_64 (CY0L, CYh, T); 1028 1029 coefficient_y = CY.w[0] - CY0L; 1030 // add rounding constant 1031 rmode = rounding_mode; 1032 if (sign_y && (unsigned) (rmode - 1) < 2) 1033 rmode = 3 - rmode; 1034 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 1035 #ifndef IEEE_ROUND_NEAREST 1036 if (!(rmode & 3)) //ROUNDING_TO_NEAREST 1037 #endif 1038 #endif 1039 { 1040 coefficient_y += round_const_table[rmode][extra_digits]; 1041 } 1042 // align coefficient_y, coefficient_x 1043 S = power10_table_128[diff_dec_expon].w[0]; 1044 __mul_64x64_to_128 (F, coefficient_y, S); 1045 1046 // fraction 1047 __add_128_128 (FS, F, CX); 1048 1049 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 1050 #ifndef IEEE_ROUND_NEAREST 1051 if (rmode == 0) //ROUNDING_TO_NEAREST 1052 #endif 1053 { 1054 // rounding code, here RN_EVEN 1055 // 10^(extra_digits+diff_dec_expon) 1056 T2 = power10_table_128[diff_dec_expon + extra_digits]; 1057 if (__unsigned_compare_gt_128 (FS, T2) 1058 || ((CYh & 1) && __test_equal_128 (FS, T2))) { 1059 CYh++; 1060 __sub_128_128 (FS, FS, T2); 1061 } 1062 } 1063 #endif 1064 #ifndef IEEE_ROUND_NEAREST 1065 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 1066 if (rmode == 4) //ROUNDING_TO_NEAREST 1067 #endif 1068 { 1069 // rounding code, here RN_AWAY 1070 // 10^(extra_digits+diff_dec_expon) 1071 T2 = power10_table_128[diff_dec_expon + extra_digits]; 1072 if (__unsigned_compare_ge_128 (FS, T2)) { 1073 CYh++; 1074 __sub_128_128 (FS, FS, T2); 1075 } 1076 } 1077 #endif 1078 #ifndef IEEE_ROUND_NEAREST 1079 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 1080 switch (rmode) { 1081 case ROUNDING_DOWN: 1082 case ROUNDING_TO_ZERO: 1083 if ((SINT64) FS.w[1] < 0) { 1084 CYh--; 1085 if (CYh < 1000000000000000ull) { 1086 CYh = 9999999999999999ull; 1087 final_exponent_y--; 1088 } 1089 } else { 1090 T2 = power10_table_128[diff_dec_expon + extra_digits]; 1091 if (__unsigned_compare_ge_128 (FS, T2)) { 1092 CYh++; 1093 __sub_128_128 (FS, FS, T2); 1094 } 1095 } 1096 break; 1097 case ROUNDING_UP: 1098 if ((SINT64) FS.w[1] < 0) 1099 break; 1100 T2 = power10_table_128[diff_dec_expon + extra_digits]; 1101 if (__unsigned_compare_gt_128 (FS, T2)) { 1102 CYh += 2; 1103 __sub_128_128 (FS, FS, T2); 1104 } else if ((FS.w[1] == T2.w[1]) && (FS.w[0] == T2.w[0])) { 1105 CYh++; 1106 FS.w[1] = FS.w[0] = 0; 1107 } else if (FS.w[1] | FS.w[0]) 1108 CYh++; 1109 break; 1110 } 1111 #endif 1112 #endif 1113 1114 #ifdef SET_STATUS_FLAGS 1115 status = INEXACT_EXCEPTION; 1116 #ifndef IEEE_ROUND_NEAREST 1117 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 1118 if (!(rmode & 3)) 1119 #endif 1120 #endif 1121 { 1122 // RN modes 1123 if ((FS.w[1] == 1124 round_const_table_128[0][diff_dec_expon + extra_digits].w[1]) 1125 && (FS.w[0] == 1126 round_const_table_128[0][diff_dec_expon + 1127 extra_digits].w[0])) 1128 status = EXACT_STATUS; 1129 } 1130 #ifndef IEEE_ROUND_NEAREST 1131 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 1132 else if (!FS.w[1] && !FS.w[0]) 1133 status = EXACT_STATUS; 1134 #endif 1135 #endif 1136 1137 __set_status_flags (fpsc, status); 1138 #endif 1139 1140 return get_BID64 (sign_y, final_exponent_y, CYh, rounding_mode, 1141 fpsc); 1142 } 1143 1144 } 1145 1146 ////////////////////////////////////////////////////////////////////////// 1147 // 1148 // If coefficient_z is less than 16 digits long, normalize to 16 digits 1149 // 1150 ///////////////////////////////////////////////////////////////////////// 1151 static UINT64 1152 BID_normalize (UINT64 sign_z, int exponent_z, 1153 UINT64 coefficient_z, UINT64 round_dir, int round_flag, 1154 int rounding_mode, unsigned *fpsc) { 1155 SINT64 D; 1156 int_double tempx; 1157 int digits_z, bin_expon, scale, rmode; 1158 1159 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 1160 #ifndef IEEE_ROUND_NEAREST 1161 rmode = rounding_mode; 1162 if (sign_z && (unsigned) (rmode - 1) < 2) 1163 rmode = 3 - rmode; 1164 #else 1165 if (coefficient_z >= power10_table_128[15].w[0]) 1166 return z; 1167 #endif 1168 #endif 1169 1170 //--- get number of bits in the coefficients of x and y --- 1171 tempx.d = (double) coefficient_z; 1172 bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; 1173 // get number of decimal digits in the coeff_x 1174 digits_z = estimate_decimal_digits[bin_expon]; 1175 if (coefficient_z >= power10_table_128[digits_z].w[0]) 1176 digits_z++; 1177 1178 scale = 16 - digits_z; 1179 exponent_z -= scale; 1180 if (exponent_z < 0) { 1181 scale += exponent_z; 1182 exponent_z = 0; 1183 } 1184 coefficient_z *= power10_table_128[scale].w[0]; 1185 1186 #ifdef SET_STATUS_FLAGS 1187 if (round_flag) { 1188 __set_status_flags (fpsc, INEXACT_EXCEPTION); 1189 if (coefficient_z < 1000000000000000ull) 1190 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION); 1191 else if ((coefficient_z == 1000000000000000ull) && !exponent_z 1192 && ((SINT64) (round_dir ^ sign_z) < 0) && round_flag 1193 && (rmode == ROUNDING_DOWN || rmode == ROUNDING_TO_ZERO)) 1194 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION); 1195 } 1196 #endif 1197 1198 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 1199 #ifndef IEEE_ROUND_NEAREST 1200 if (round_flag && (rmode & 3)) { 1201 D = round_dir ^ sign_z; 1202 1203 if (rmode == ROUNDING_UP) { 1204 if (D >= 0) 1205 coefficient_z++; 1206 } else { 1207 if (D < 0) 1208 coefficient_z--; 1209 if (coefficient_z < 1000000000000000ull && exponent_z) { 1210 coefficient_z = 9999999999999999ull; 1211 exponent_z--; 1212 } 1213 } 1214 } 1215 #endif 1216 #endif 1217 1218 return get_BID64 (sign_z, exponent_z, coefficient_z, rounding_mode, 1219 fpsc); 1220 } 1221 1222 1223 ////////////////////////////////////////////////////////////////////////// 1224 // 1225 // 0*10^ey + cz*10^ez, ey<ez 1226 // 1227 ////////////////////////////////////////////////////////////////////////// 1228 1229 __BID_INLINE__ UINT64 1230 add_zero64 (int exponent_y, UINT64 sign_z, int exponent_z, 1231 UINT64 coefficient_z, unsigned *prounding_mode, 1232 unsigned *fpsc) { 1233 int_double tempx; 1234 int bin_expon, scale_k, scale_cz; 1235 int diff_expon; 1236 1237 diff_expon = exponent_z - exponent_y; 1238 1239 tempx.d = (double) coefficient_z; 1240 bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; 1241 scale_cz = estimate_decimal_digits[bin_expon]; 1242 if (coefficient_z >= power10_table_128[scale_cz].w[0]) 1243 scale_cz++; 1244 1245 scale_k = 16 - scale_cz; 1246 if (diff_expon < scale_k) 1247 scale_k = diff_expon; 1248 coefficient_z *= power10_table_128[scale_k].w[0]; 1249 1250 return get_BID64 (sign_z, exponent_z - scale_k, coefficient_z, 1251 *prounding_mode, fpsc); 1252 } 1253 #endif 1254