1/* 2 * QEMU float support 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18static void partsN(return_nan)(FloatPartsN *a, float_status *s) 19{ 20 switch (a->cls) { 21 case float_class_snan: 22 float_raise(float_flag_invalid, s); 23 if (s->default_nan_mode) { 24 parts_default_nan(a, s); 25 } else { 26 parts_silence_nan(a, s); 27 } 28 break; 29 case float_class_qnan: 30 if (s->default_nan_mode) { 31 parts_default_nan(a, s); 32 } 33 break; 34 default: 35 g_assert_not_reached(); 36 } 37} 38 39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, 40 float_status *s) 41{ 42 if (is_snan(a->cls) || is_snan(b->cls)) { 43 float_raise(float_flag_invalid, s); 44 } 45 46 if (s->default_nan_mode) { 47 parts_default_nan(a, s); 48 } else { 49 int cmp = frac_cmp(a, b); 50 if (cmp == 0) { 51 cmp = a->sign < b->sign; 52 } 53 54 if (pickNaN(a->cls, b->cls, cmp > 0, s)) { 55 a = b; 56 } 57 if (is_snan(a->cls)) { 58 parts_silence_nan(a, s); 59 } 60 } 61 return a; 62} 63 64static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, 65 FloatPartsN *c, float_status *s, 66 int ab_mask, int abc_mask) 67{ 68 int which; 69 70 if (unlikely(abc_mask & float_cmask_snan)) { 71 float_raise(float_flag_invalid, s); 72 } 73 74 which = pickNaNMulAdd(a->cls, b->cls, c->cls, 75 ab_mask == float_cmask_infzero, s); 76 77 if (s->default_nan_mode || which == 3) { 78 /* 79 * Note that this check is after pickNaNMulAdd so that function 80 * has an opportunity to set the Invalid flag for infzero. 81 */ 82 parts_default_nan(a, s); 83 return a; 84 } 85 86 switch (which) { 87 case 0: 88 break; 89 case 1: 90 a = b; 91 break; 92 case 2: 93 a = c; 94 break; 95 default: 96 g_assert_not_reached(); 97 } 98 if (is_snan(a->cls)) { 99 parts_silence_nan(a, s); 100 } 101 return a; 102} 103 104/* 105 * Canonicalize the FloatParts structure. Determine the class, 106 * unbias the exponent, and normalize the fraction. 107 */ 108static void partsN(canonicalize)(FloatPartsN *p, float_status *status, 109 const FloatFmt *fmt) 110{ 111 if (unlikely(p->exp == 0)) { 112 if (likely(frac_eqz(p))) { 113 p->cls = float_class_zero; 114 } else if (status->flush_inputs_to_zero) { 115 float_raise(float_flag_input_denormal, status); 116 p->cls = float_class_zero; 117 frac_clear(p); 118 } else { 119 int shift = frac_normalize(p); 120 p->cls = float_class_normal; 121 p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1; 122 } 123 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { 124 p->cls = float_class_normal; 125 p->exp -= fmt->exp_bias; 126 frac_shl(p, fmt->frac_shift); 127 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 128 } else if (likely(frac_eqz(p))) { 129 p->cls = float_class_inf; 130 } else { 131 frac_shl(p, fmt->frac_shift); 132 p->cls = (parts_is_snan_frac(p->frac_hi, status) 133 ? float_class_snan : float_class_qnan); 134 } 135} 136 137/* 138 * Round and uncanonicalize a floating-point number by parts. There 139 * are FRAC_SHIFT bits that may require rounding at the bottom of the 140 * fraction; these bits will be removed. The exponent will be biased 141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. 142 */ 143static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, 144 const FloatFmt *fmt) 145{ 146 const int exp_max = fmt->exp_max; 147 const int frac_shift = fmt->frac_shift; 148 const uint64_t round_mask = fmt->round_mask; 149 const uint64_t frac_lsb = round_mask + 1; 150 const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); 151 const uint64_t roundeven_mask = round_mask | frac_lsb; 152 uint64_t inc; 153 bool overflow_norm = false; 154 int exp, flags = 0; 155 156 switch (s->float_rounding_mode) { 157 case float_round_nearest_even: 158 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0); 159 break; 160 case float_round_ties_away: 161 inc = frac_lsbm1; 162 break; 163 case float_round_to_zero: 164 overflow_norm = true; 165 inc = 0; 166 break; 167 case float_round_up: 168 inc = p->sign ? 0 : round_mask; 169 overflow_norm = p->sign; 170 break; 171 case float_round_down: 172 inc = p->sign ? round_mask : 0; 173 overflow_norm = !p->sign; 174 break; 175 case float_round_to_odd: 176 overflow_norm = true; 177 /* fall through */ 178 case float_round_to_odd_inf: 179 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 180 break; 181 default: 182 g_assert_not_reached(); 183 } 184 185 exp = p->exp + fmt->exp_bias; 186 if (likely(exp > 0)) { 187 if (p->frac_lo & round_mask) { 188 flags |= float_flag_inexact; 189 if (frac_addi(p, p, inc)) { 190 frac_shr(p, 1); 191 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 192 exp++; 193 } 194 } 195 frac_shr(p, frac_shift); 196 197 if (fmt->arm_althp) { 198 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ 199 if (unlikely(exp > exp_max)) { 200 /* Overflow. Return the maximum normal. */ 201 flags = float_flag_invalid; 202 exp = exp_max; 203 frac_allones(p); 204 } 205 } else if (unlikely(exp >= exp_max)) { 206 flags |= float_flag_overflow | float_flag_inexact; 207 if (overflow_norm) { 208 exp = exp_max - 1; 209 frac_allones(p); 210 } else { 211 p->cls = float_class_inf; 212 exp = exp_max; 213 frac_clear(p); 214 } 215 } 216 } else if (s->flush_to_zero) { 217 flags |= float_flag_output_denormal; 218 p->cls = float_class_zero; 219 exp = 0; 220 frac_clear(p); 221 } else { 222 bool is_tiny = s->tininess_before_rounding || exp < 0; 223 224 if (!is_tiny) { 225 FloatPartsN discard; 226 is_tiny = !frac_addi(&discard, p, inc); 227 } 228 229 frac_shrjam(p, 1 - exp); 230 231 if (p->frac_lo & round_mask) { 232 /* Need to recompute round-to-even/round-to-odd. */ 233 switch (s->float_rounding_mode) { 234 case float_round_nearest_even: 235 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 236 ? frac_lsbm1 : 0); 237 break; 238 case float_round_to_odd: 239 case float_round_to_odd_inf: 240 inc = p->frac_lo & frac_lsb ? 0 : round_mask; 241 break; 242 default: 243 break; 244 } 245 flags |= float_flag_inexact; 246 frac_addi(p, p, inc); 247 } 248 249 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0; 250 frac_shr(p, frac_shift); 251 252 if (is_tiny && (flags & float_flag_inexact)) { 253 flags |= float_flag_underflow; 254 } 255 if (exp == 0 && frac_eqz(p)) { 256 p->cls = float_class_zero; 257 } 258 } 259 p->exp = exp; 260 float_raise(flags, s); 261} 262 263static void partsN(uncanon)(FloatPartsN *p, float_status *s, 264 const FloatFmt *fmt) 265{ 266 if (likely(p->cls == float_class_normal)) { 267 parts_uncanon_normal(p, s, fmt); 268 } else { 269 switch (p->cls) { 270 case float_class_zero: 271 p->exp = 0; 272 frac_clear(p); 273 return; 274 case float_class_inf: 275 g_assert(!fmt->arm_althp); 276 p->exp = fmt->exp_max; 277 frac_clear(p); 278 return; 279 case float_class_qnan: 280 case float_class_snan: 281 g_assert(!fmt->arm_althp); 282 p->exp = fmt->exp_max; 283 frac_shr(p, fmt->frac_shift); 284 return; 285 default: 286 break; 287 } 288 g_assert_not_reached(); 289 } 290} 291 292/* 293 * Returns the result of adding or subtracting the values of the 294 * floating-point values `a' and `b'. The operation is performed 295 * according to the IEC/IEEE Standard for Binary Floating-Point 296 * Arithmetic. 297 */ 298static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, 299 float_status *s, bool subtract) 300{ 301 bool b_sign = b->sign ^ subtract; 302 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 303 304 if (a->sign != b_sign) { 305 /* Subtraction */ 306 if (likely(ab_mask == float_cmask_normal)) { 307 if (parts_sub_normal(a, b)) { 308 return a; 309 } 310 /* Subtract was exact, fall through to set sign. */ 311 ab_mask = float_cmask_zero; 312 } 313 314 if (ab_mask == float_cmask_zero) { 315 a->sign = s->float_rounding_mode == float_round_down; 316 return a; 317 } 318 319 if (unlikely(ab_mask & float_cmask_anynan)) { 320 goto p_nan; 321 } 322 323 if (ab_mask & float_cmask_inf) { 324 if (a->cls != float_class_inf) { 325 /* N - Inf */ 326 goto return_b; 327 } 328 if (b->cls != float_class_inf) { 329 /* Inf - N */ 330 return a; 331 } 332 /* Inf - Inf */ 333 float_raise(float_flag_invalid, s); 334 parts_default_nan(a, s); 335 return a; 336 } 337 } else { 338 /* Addition */ 339 if (likely(ab_mask == float_cmask_normal)) { 340 parts_add_normal(a, b); 341 return a; 342 } 343 344 if (ab_mask == float_cmask_zero) { 345 return a; 346 } 347 348 if (unlikely(ab_mask & float_cmask_anynan)) { 349 goto p_nan; 350 } 351 352 if (ab_mask & float_cmask_inf) { 353 a->cls = float_class_inf; 354 return a; 355 } 356 } 357 358 if (b->cls == float_class_zero) { 359 g_assert(a->cls == float_class_normal); 360 return a; 361 } 362 363 g_assert(a->cls == float_class_zero); 364 g_assert(b->cls == float_class_normal); 365 return_b: 366 b->sign = b_sign; 367 return b; 368 369 p_nan: 370 return parts_pick_nan(a, b, s); 371} 372 373/* 374 * Returns the result of multiplying the floating-point values `a' and 375 * `b'. The operation is performed according to the IEC/IEEE Standard 376 * for Binary Floating-Point Arithmetic. 377 */ 378static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, 379 float_status *s) 380{ 381 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 382 bool sign = a->sign ^ b->sign; 383 384 if (likely(ab_mask == float_cmask_normal)) { 385 FloatPartsW tmp; 386 387 frac_mulw(&tmp, a, b); 388 frac_truncjam(a, &tmp); 389 390 a->exp += b->exp + 1; 391 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 392 frac_add(a, a, a); 393 a->exp -= 1; 394 } 395 396 a->sign = sign; 397 return a; 398 } 399 400 /* Inf * Zero == NaN */ 401 if (unlikely(ab_mask == float_cmask_infzero)) { 402 float_raise(float_flag_invalid, s); 403 parts_default_nan(a, s); 404 return a; 405 } 406 407 if (unlikely(ab_mask & float_cmask_anynan)) { 408 return parts_pick_nan(a, b, s); 409 } 410 411 /* Multiply by 0 or Inf */ 412 if (ab_mask & float_cmask_inf) { 413 a->cls = float_class_inf; 414 a->sign = sign; 415 return a; 416 } 417 418 g_assert(ab_mask & float_cmask_zero); 419 a->cls = float_class_zero; 420 a->sign = sign; 421 return a; 422} 423 424/* 425 * Returns the result of multiplying the floating-point values `a' and 426 * `b' then adding 'c', with no intermediate rounding step after the 427 * multiplication. The operation is performed according to the 428 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. 429 * The flags argument allows the caller to select negation of the 430 * addend, the intermediate product, or the final result. (The 431 * difference between this and having the caller do a separate 432 * negation is that negating externally will flip the sign bit on NaNs.) 433 * 434 * Requires A and C extracted into a double-sized structure to provide the 435 * extra space for the widening multiply. 436 */ 437static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b, 438 FloatPartsN *c, int flags, float_status *s) 439{ 440 int ab_mask, abc_mask; 441 FloatPartsW p_widen, c_widen; 442 443 ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 444 abc_mask = float_cmask(c->cls) | ab_mask; 445 446 /* 447 * It is implementation-defined whether the cases of (0,inf,qnan) 448 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN 449 * they return if they do), so we have to hand this information 450 * off to the target-specific pick-a-NaN routine. 451 */ 452 if (unlikely(abc_mask & float_cmask_anynan)) { 453 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); 454 } 455 456 if (flags & float_muladd_negate_c) { 457 c->sign ^= 1; 458 } 459 460 /* Compute the sign of the product into A. */ 461 a->sign ^= b->sign; 462 if (flags & float_muladd_negate_product) { 463 a->sign ^= 1; 464 } 465 466 if (unlikely(ab_mask != float_cmask_normal)) { 467 if (unlikely(ab_mask == float_cmask_infzero)) { 468 goto d_nan; 469 } 470 471 if (ab_mask & float_cmask_inf) { 472 if (c->cls == float_class_inf && a->sign != c->sign) { 473 goto d_nan; 474 } 475 goto return_inf; 476 } 477 478 g_assert(ab_mask & float_cmask_zero); 479 if (c->cls == float_class_normal) { 480 *a = *c; 481 goto return_normal; 482 } 483 if (c->cls == float_class_zero) { 484 if (a->sign != c->sign) { 485 goto return_sub_zero; 486 } 487 goto return_zero; 488 } 489 g_assert(c->cls == float_class_inf); 490 } 491 492 if (unlikely(c->cls == float_class_inf)) { 493 a->sign = c->sign; 494 goto return_inf; 495 } 496 497 /* Perform the multiplication step. */ 498 p_widen.sign = a->sign; 499 p_widen.exp = a->exp + b->exp + 1; 500 frac_mulw(&p_widen, a, b); 501 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 502 frac_add(&p_widen, &p_widen, &p_widen); 503 p_widen.exp -= 1; 504 } 505 506 /* Perform the addition step. */ 507 if (c->cls != float_class_zero) { 508 /* Zero-extend C to less significant bits. */ 509 frac_widen(&c_widen, c); 510 c_widen.exp = c->exp; 511 512 if (a->sign == c->sign) { 513 parts_add_normal(&p_widen, &c_widen); 514 } else if (!parts_sub_normal(&p_widen, &c_widen)) { 515 goto return_sub_zero; 516 } 517 } 518 519 /* Narrow with sticky bit, for proper rounding later. */ 520 frac_truncjam(a, &p_widen); 521 a->sign = p_widen.sign; 522 a->exp = p_widen.exp; 523 524 return_normal: 525 if (flags & float_muladd_halve_result) { 526 a->exp -= 1; 527 } 528 finish_sign: 529 if (flags & float_muladd_negate_result) { 530 a->sign ^= 1; 531 } 532 return a; 533 534 return_sub_zero: 535 a->sign = s->float_rounding_mode == float_round_down; 536 return_zero: 537 a->cls = float_class_zero; 538 goto finish_sign; 539 540 return_inf: 541 a->cls = float_class_inf; 542 goto finish_sign; 543 544 d_nan: 545 float_raise(float_flag_invalid, s); 546 parts_default_nan(a, s); 547 return a; 548} 549 550/* 551 * Returns the result of dividing the floating-point value `a' by the 552 * corresponding value `b'. The operation is performed according to 553 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 554 */ 555static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, 556 float_status *s) 557{ 558 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 559 bool sign = a->sign ^ b->sign; 560 561 if (likely(ab_mask == float_cmask_normal)) { 562 a->sign = sign; 563 a->exp -= b->exp + frac_div(a, b); 564 return a; 565 } 566 567 /* 0/0 or Inf/Inf => NaN */ 568 if (unlikely(ab_mask == float_cmask_zero) || 569 unlikely(ab_mask == float_cmask_inf)) { 570 float_raise(float_flag_invalid, s); 571 parts_default_nan(a, s); 572 return a; 573 } 574 575 /* All the NaN cases */ 576 if (unlikely(ab_mask & float_cmask_anynan)) { 577 return parts_pick_nan(a, b, s); 578 } 579 580 a->sign = sign; 581 582 /* Inf / X */ 583 if (a->cls == float_class_inf) { 584 return a; 585 } 586 587 /* 0 / X */ 588 if (a->cls == float_class_zero) { 589 return a; 590 } 591 592 /* X / Inf */ 593 if (b->cls == float_class_inf) { 594 a->cls = float_class_zero; 595 return a; 596 } 597 598 /* X / 0 => Inf */ 599 g_assert(b->cls == float_class_zero); 600 float_raise(float_flag_divbyzero, s); 601 a->cls = float_class_inf; 602 return a; 603} 604 605/* 606 * Square Root 607 * 608 * The base algorithm is lifted from 609 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c 610 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c 611 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c 612 * and is thus MIT licenced. 613 */ 614static void partsN(sqrt)(FloatPartsN *a, float_status *status, 615 const FloatFmt *fmt) 616{ 617 const uint32_t three32 = 3u << 30; 618 const uint64_t three64 = 3ull << 62; 619 uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ 620 uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ 621 uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ 622 uint64_t d0h, d0l, d1h, d1l, d2h, d2l; 623 uint64_t discard; 624 bool exp_odd; 625 size_t index; 626 627 if (unlikely(a->cls != float_class_normal)) { 628 switch (a->cls) { 629 case float_class_snan: 630 case float_class_qnan: 631 parts_return_nan(a, status); 632 return; 633 case float_class_zero: 634 return; 635 case float_class_inf: 636 if (unlikely(a->sign)) { 637 goto d_nan; 638 } 639 return; 640 default: 641 g_assert_not_reached(); 642 } 643 } 644 645 if (unlikely(a->sign)) { 646 goto d_nan; 647 } 648 649 /* 650 * Argument reduction. 651 * x = 4^e frac; with integer e, and frac in [1, 4) 652 * m = frac fixed point at bit 62, since we're in base 4. 653 * If base-2 exponent is odd, exchange that for multiply by 2, 654 * which results in no shift. 655 */ 656 exp_odd = a->exp & 1; 657 index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); 658 if (!exp_odd) { 659 frac_shr(a, 1); 660 } 661 662 /* 663 * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). 664 * 665 * Initial estimate: 666 * 7-bit lookup table (1-bit exponent and 6-bit significand). 667 * 668 * The relative error (e = r0*sqrt(m)-1) of a linear estimate 669 * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; 670 * a table lookup is faster and needs one less iteration. 671 * The 7-bit table gives |e| < 0x1.fdp-9. 672 * 673 * A Newton-Raphson iteration for r is 674 * s = m*r 675 * d = s*r 676 * u = 3 - d 677 * r = r*u/2 678 * 679 * Fixed point representations: 680 * m, s, d, u, three are all 2.30; r is 0.32 681 */ 682 m64 = a->frac_hi; 683 m32 = m64 >> 32; 684 685 r32 = rsqrt_tab[index] << 16; 686 /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ 687 688 s32 = ((uint64_t)m32 * r32) >> 32; 689 d32 = ((uint64_t)s32 * r32) >> 32; 690 u32 = three32 - d32; 691 692 if (N == 64) { 693 /* float64 or smaller */ 694 695 r32 = ((uint64_t)r32 * u32) >> 31; 696 /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ 697 698 s32 = ((uint64_t)m32 * r32) >> 32; 699 d32 = ((uint64_t)s32 * r32) >> 32; 700 u32 = three32 - d32; 701 702 if (fmt->frac_size <= 23) { 703 /* float32 or smaller */ 704 705 s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ 706 s32 = (s32 - 1) >> 6; /* 9.23 */ 707 /* s < sqrt(m) < s + 0x1.08p-23 */ 708 709 /* compute nearest rounded result to 2.23 bits */ 710 uint32_t d0 = (m32 << 16) - s32 * s32; 711 uint32_t d1 = s32 - d0; 712 uint32_t d2 = d1 + s32 + 1; 713 s32 += d1 >> 31; 714 a->frac_hi = (uint64_t)s32 << (64 - 25); 715 716 /* increment or decrement for inexact */ 717 if (d2 != 0) { 718 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); 719 } 720 goto done; 721 } 722 723 /* float64 */ 724 725 r64 = (uint64_t)r32 * u32 * 2; 726 /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ 727 mul64To128(m64, r64, &s64, &discard); 728 mul64To128(s64, r64, &d64, &discard); 729 u64 = three64 - d64; 730 731 mul64To128(s64, u64, &s64, &discard); /* 3.61 */ 732 s64 = (s64 - 2) >> 9; /* 12.52 */ 733 734 /* Compute nearest rounded result */ 735 uint64_t d0 = (m64 << 42) - s64 * s64; 736 uint64_t d1 = s64 - d0; 737 uint64_t d2 = d1 + s64 + 1; 738 s64 += d1 >> 63; 739 a->frac_hi = s64 << (64 - 54); 740 741 /* increment or decrement for inexact */ 742 if (d2 != 0) { 743 a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); 744 } 745 goto done; 746 } 747 748 r64 = (uint64_t)r32 * u32 * 2; 749 /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ 750 751 mul64To128(m64, r64, &s64, &discard); 752 mul64To128(s64, r64, &d64, &discard); 753 u64 = three64 - d64; 754 mul64To128(u64, r64, &r64, &discard); 755 r64 <<= 1; 756 /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ 757 758 mul64To128(m64, r64, &s64, &discard); 759 mul64To128(s64, r64, &d64, &discard); 760 u64 = three64 - d64; 761 mul64To128(u64, r64, &rh, &rl); 762 add128(rh, rl, rh, rl, &rh, &rl); 763 /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ 764 765 mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); 766 mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); 767 sub128(three64, 0, dh, dl, &uh, &ul); 768 mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ 769 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ 770 771 sub128(sh, sl, 0, 4, &sh, &sl); 772 shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ 773 /* s < sqrt(m) < s + 1ulp */ 774 775 /* Compute nearest rounded result */ 776 mul64To128(sl, sl, &d0h, &d0l); 777 d0h += 2 * sh * sl; 778 sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); 779 sub128(sh, sl, d0h, d0l, &d1h, &d1l); 780 add128(sh, sl, 0, 1, &d2h, &d2l); 781 add128(d2h, d2l, d1h, d1l, &d2h, &d2l); 782 add128(sh, sl, 0, d1h >> 63, &sh, &sl); 783 shift128Left(sh, sl, 128 - 114, &sh, &sl); 784 785 /* increment or decrement for inexact */ 786 if (d2h | d2l) { 787 if ((int64_t)(d1h ^ d2h) < 0) { 788 sub128(sh, sl, 0, 1, &sh, &sl); 789 } else { 790 add128(sh, sl, 0, 1, &sh, &sl); 791 } 792 } 793 a->frac_lo = sl; 794 a->frac_hi = sh; 795 796 done: 797 /* Convert back from base 4 to base 2. */ 798 a->exp >>= 1; 799 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { 800 frac_add(a, a, a); 801 } else { 802 a->exp += 1; 803 } 804 return; 805 806 d_nan: 807 float_raise(float_flag_invalid, status); 808 parts_default_nan(a, status); 809} 810 811/* 812 * Rounds the floating-point value `a' to an integer, and returns the 813 * result as a floating-point value. The operation is performed 814 * according to the IEC/IEEE Standard for Binary Floating-Point 815 * Arithmetic. 816 * 817 * parts_round_to_int_normal is an internal helper function for 818 * normal numbers only, returning true for inexact but not directly 819 * raising float_flag_inexact. 820 */ 821static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, 822 int scale, int frac_size) 823{ 824 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; 825 int shift_adj; 826 827 scale = MIN(MAX(scale, -0x10000), 0x10000); 828 a->exp += scale; 829 830 if (a->exp < 0) { 831 bool one; 832 833 /* All fractional */ 834 switch (rmode) { 835 case float_round_nearest_even: 836 one = false; 837 if (a->exp == -1) { 838 FloatPartsN tmp; 839 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ 840 frac_add(&tmp, a, a); 841 /* Anything remaining means frac > 0.5. */ 842 one = !frac_eqz(&tmp); 843 } 844 break; 845 case float_round_ties_away: 846 one = a->exp == -1; 847 break; 848 case float_round_to_zero: 849 one = false; 850 break; 851 case float_round_up: 852 one = !a->sign; 853 break; 854 case float_round_down: 855 one = a->sign; 856 break; 857 case float_round_to_odd: 858 one = true; 859 break; 860 default: 861 g_assert_not_reached(); 862 } 863 864 frac_clear(a); 865 a->exp = 0; 866 if (one) { 867 a->frac_hi = DECOMPOSED_IMPLICIT_BIT; 868 } else { 869 a->cls = float_class_zero; 870 } 871 return true; 872 } 873 874 if (a->exp >= frac_size) { 875 /* All integral */ 876 return false; 877 } 878 879 if (N > 64 && a->exp < N - 64) { 880 /* 881 * Rounding is not in the low word -- shift lsb to bit 2, 882 * which leaves room for sticky and rounding bit. 883 */ 884 shift_adj = (N - 1) - (a->exp + 2); 885 frac_shrjam(a, shift_adj); 886 frac_lsb = 1 << 2; 887 } else { 888 shift_adj = 0; 889 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); 890 } 891 892 frac_lsbm1 = frac_lsb >> 1; 893 rnd_mask = frac_lsb - 1; 894 rnd_even_mask = rnd_mask | frac_lsb; 895 896 if (!(a->frac_lo & rnd_mask)) { 897 /* Fractional bits already clear, undo the shift above. */ 898 frac_shl(a, shift_adj); 899 return false; 900 } 901 902 switch (rmode) { 903 case float_round_nearest_even: 904 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); 905 break; 906 case float_round_ties_away: 907 inc = frac_lsbm1; 908 break; 909 case float_round_to_zero: 910 inc = 0; 911 break; 912 case float_round_up: 913 inc = a->sign ? 0 : rnd_mask; 914 break; 915 case float_round_down: 916 inc = a->sign ? rnd_mask : 0; 917 break; 918 case float_round_to_odd: 919 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; 920 break; 921 default: 922 g_assert_not_reached(); 923 } 924 925 if (shift_adj == 0) { 926 if (frac_addi(a, a, inc)) { 927 frac_shr(a, 1); 928 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; 929 a->exp++; 930 } 931 a->frac_lo &= ~rnd_mask; 932 } else { 933 frac_addi(a, a, inc); 934 a->frac_lo &= ~rnd_mask; 935 /* Be careful shifting back, not to overflow */ 936 frac_shl(a, shift_adj - 1); 937 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { 938 a->exp++; 939 } else { 940 frac_add(a, a, a); 941 } 942 } 943 return true; 944} 945 946static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, 947 int scale, float_status *s, 948 const FloatFmt *fmt) 949{ 950 switch (a->cls) { 951 case float_class_qnan: 952 case float_class_snan: 953 parts_return_nan(a, s); 954 break; 955 case float_class_zero: 956 case float_class_inf: 957 break; 958 case float_class_normal: 959 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { 960 float_raise(float_flag_inexact, s); 961 } 962 break; 963 default: 964 g_assert_not_reached(); 965 } 966} 967 968/* 969 * Returns the result of converting the floating-point value `a' to 970 * the two's complement integer format. The conversion is performed 971 * according to the IEC/IEEE Standard for Binary Floating-Point 972 * Arithmetic---which means in particular that the conversion is 973 * rounded according to the current rounding mode. If `a' is a NaN, 974 * the largest positive integer is returned. Otherwise, if the 975 * conversion overflows, the largest integer with the same sign as `a' 976 * is returned. 977 */ 978static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, 979 int scale, int64_t min, int64_t max, 980 float_status *s) 981{ 982 int flags = 0; 983 uint64_t r; 984 985 switch (p->cls) { 986 case float_class_snan: 987 case float_class_qnan: 988 flags = float_flag_invalid; 989 r = max; 990 break; 991 992 case float_class_inf: 993 flags = float_flag_invalid; 994 r = p->sign ? min : max; 995 break; 996 997 case float_class_zero: 998 return 0; 999 1000 case float_class_normal: 1001 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1002 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1003 flags = float_flag_inexact; 1004 } 1005 1006 if (p->exp <= DECOMPOSED_BINARY_POINT) { 1007 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1008 } else { 1009 r = UINT64_MAX; 1010 } 1011 if (p->sign) { 1012 if (r <= -(uint64_t)min) { 1013 r = -r; 1014 } else { 1015 flags = float_flag_invalid; 1016 r = min; 1017 } 1018 } else if (r > max) { 1019 flags = float_flag_invalid; 1020 r = max; 1021 } 1022 break; 1023 1024 default: 1025 g_assert_not_reached(); 1026 } 1027 1028 float_raise(flags, s); 1029 return r; 1030} 1031 1032/* 1033 * Returns the result of converting the floating-point value `a' to 1034 * the unsigned integer format. The conversion is performed according 1035 * to the IEC/IEEE Standard for Binary Floating-Point 1036 * Arithmetic---which means in particular that the conversion is 1037 * rounded according to the current rounding mode. If `a' is a NaN, 1038 * the largest unsigned integer is returned. Otherwise, if the 1039 * conversion overflows, the largest unsigned integer is returned. If 1040 * the 'a' is negative, the result is rounded and zero is returned; 1041 * values that do not round to zero will raise the inexact exception 1042 * flag. 1043 */ 1044static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, 1045 int scale, uint64_t max, float_status *s) 1046{ 1047 int flags = 0; 1048 uint64_t r; 1049 1050 switch (p->cls) { 1051 case float_class_snan: 1052 case float_class_qnan: 1053 flags = float_flag_invalid; 1054 r = max; 1055 break; 1056 1057 case float_class_inf: 1058 flags = float_flag_invalid; 1059 r = p->sign ? 0 : max; 1060 break; 1061 1062 case float_class_zero: 1063 return 0; 1064 1065 case float_class_normal: 1066 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ 1067 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { 1068 flags = float_flag_inexact; 1069 if (p->cls == float_class_zero) { 1070 r = 0; 1071 break; 1072 } 1073 } 1074 1075 if (p->sign) { 1076 flags = float_flag_invalid; 1077 r = 0; 1078 } else if (p->exp > DECOMPOSED_BINARY_POINT) { 1079 flags = float_flag_invalid; 1080 r = max; 1081 } else { 1082 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); 1083 if (r > max) { 1084 flags = float_flag_invalid; 1085 r = max; 1086 } 1087 } 1088 break; 1089 1090 default: 1091 g_assert_not_reached(); 1092 } 1093 1094 float_raise(flags, s); 1095 return r; 1096} 1097 1098/* 1099 * Integer to float conversions 1100 * 1101 * Returns the result of converting the two's complement integer `a' 1102 * to the floating-point format. The conversion is performed according 1103 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1104 */ 1105static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, 1106 int scale, float_status *s) 1107{ 1108 uint64_t f = a; 1109 int shift; 1110 1111 memset(p, 0, sizeof(*p)); 1112 1113 if (a == 0) { 1114 p->cls = float_class_zero; 1115 return; 1116 } 1117 1118 p->cls = float_class_normal; 1119 if (a < 0) { 1120 f = -f; 1121 p->sign = true; 1122 } 1123 shift = clz64(f); 1124 scale = MIN(MAX(scale, -0x10000), 0x10000); 1125 1126 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1127 p->frac_hi = f << shift; 1128} 1129 1130/* 1131 * Unsigned Integer to float conversions 1132 * 1133 * Returns the result of converting the unsigned integer `a' to the 1134 * floating-point format. The conversion is performed according to the 1135 * IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1136 */ 1137static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, 1138 int scale, float_status *status) 1139{ 1140 memset(p, 0, sizeof(*p)); 1141 1142 if (a == 0) { 1143 p->cls = float_class_zero; 1144 } else { 1145 int shift = clz64(a); 1146 scale = MIN(MAX(scale, -0x10000), 0x10000); 1147 p->cls = float_class_normal; 1148 p->exp = DECOMPOSED_BINARY_POINT - shift + scale; 1149 p->frac_hi = a << shift; 1150 } 1151} 1152 1153/* 1154 * Float min/max. 1155 */ 1156static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, 1157 float_status *s, int flags) 1158{ 1159 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1160 int a_exp, b_exp, cmp; 1161 1162 if (unlikely(ab_mask & float_cmask_anynan)) { 1163 /* 1164 * For minnum/maxnum, if one operand is a QNaN, and the other 1165 * operand is numerical, then return numerical argument. 1166 */ 1167 if ((flags & minmax_isnum) 1168 && !(ab_mask & float_cmask_snan) 1169 && (ab_mask & ~float_cmask_qnan)) { 1170 return is_nan(a->cls) ? b : a; 1171 } 1172 return parts_pick_nan(a, b, s); 1173 } 1174 1175 a_exp = a->exp; 1176 b_exp = b->exp; 1177 1178 if (unlikely(ab_mask != float_cmask_normal)) { 1179 switch (a->cls) { 1180 case float_class_normal: 1181 break; 1182 case float_class_inf: 1183 a_exp = INT16_MAX; 1184 break; 1185 case float_class_zero: 1186 a_exp = INT16_MIN; 1187 break; 1188 default: 1189 g_assert_not_reached(); 1190 break; 1191 } 1192 switch (b->cls) { 1193 case float_class_normal: 1194 break; 1195 case float_class_inf: 1196 b_exp = INT16_MAX; 1197 break; 1198 case float_class_zero: 1199 b_exp = INT16_MIN; 1200 break; 1201 default: 1202 g_assert_not_reached(); 1203 break; 1204 } 1205 } 1206 1207 /* Compare magnitudes. */ 1208 cmp = a_exp - b_exp; 1209 if (cmp == 0) { 1210 cmp = frac_cmp(a, b); 1211 } 1212 1213 /* 1214 * Take the sign into account. 1215 * For ismag, only do this if the magnitudes are equal. 1216 */ 1217 if (!(flags & minmax_ismag) || cmp == 0) { 1218 if (a->sign != b->sign) { 1219 /* For differing signs, the negative operand is less. */ 1220 cmp = a->sign ? -1 : 1; 1221 } else if (a->sign) { 1222 /* For two negative operands, invert the magnitude comparison. */ 1223 cmp = -cmp; 1224 } 1225 } 1226 1227 if (flags & minmax_ismin) { 1228 cmp = -cmp; 1229 } 1230 return cmp < 0 ? b : a; 1231} 1232 1233/* 1234 * Floating point compare 1235 */ 1236static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, 1237 float_status *s, bool is_quiet) 1238{ 1239 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); 1240 int cmp; 1241 1242 if (likely(ab_mask == float_cmask_normal)) { 1243 if (a->sign != b->sign) { 1244 goto a_sign; 1245 } 1246 if (a->exp != b->exp) { 1247 cmp = a->exp < b->exp ? -1 : 1; 1248 } else { 1249 cmp = frac_cmp(a, b); 1250 } 1251 if (a->sign) { 1252 cmp = -cmp; 1253 } 1254 return cmp; 1255 } 1256 1257 if (unlikely(ab_mask & float_cmask_anynan)) { 1258 if (!is_quiet || (ab_mask & float_cmask_snan)) { 1259 float_raise(float_flag_invalid, s); 1260 } 1261 return float_relation_unordered; 1262 } 1263 1264 if (ab_mask & float_cmask_zero) { 1265 if (ab_mask == float_cmask_zero) { 1266 return float_relation_equal; 1267 } else if (a->cls == float_class_zero) { 1268 goto b_sign; 1269 } else { 1270 goto a_sign; 1271 } 1272 } 1273 1274 if (ab_mask == float_cmask_inf) { 1275 if (a->sign == b->sign) { 1276 return float_relation_equal; 1277 } 1278 } else if (b->cls == float_class_inf) { 1279 goto b_sign; 1280 } else { 1281 g_assert(a->cls == float_class_inf); 1282 } 1283 1284 a_sign: 1285 return a->sign ? float_relation_less : float_relation_greater; 1286 b_sign: 1287 return b->sign ? float_relation_greater : float_relation_less; 1288} 1289 1290/* 1291 * Multiply A by 2 raised to the power N. 1292 */ 1293static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) 1294{ 1295 switch (a->cls) { 1296 case float_class_snan: 1297 case float_class_qnan: 1298 parts_return_nan(a, s); 1299 break; 1300 case float_class_zero: 1301 case float_class_inf: 1302 break; 1303 case float_class_normal: 1304 a->exp += MIN(MAX(n, -0x10000), 0x10000); 1305 break; 1306 default: 1307 g_assert_not_reached(); 1308 } 1309} 1310