1 /* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */ 2 3 /* 4 * This version hacked for use with gcc -msoft-float by bjh21. 5 * (Mostly a case of #ifdefing out things GCC doesn't need or provides 6 * itself). 7 */ 8 9 /* 10 * Things you may want to define: 11 * 12 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with 13 * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them 14 * properly renamed. 15 */ 16 17 /* 18 * This differs from the standard bits32/softfloat.c in that float64 19 * is defined to be a 64-bit integer rather than a structure. The 20 * structure is float64s, with translation between the two going via 21 * float64u. 22 */ 23 24 /* 25 =============================================================================== 26 27 This C source file is part of the SoftFloat IEC/IEEE Floating-Point 28 Arithmetic Package, Release 2a. 29 30 Written by John R. Hauser. This work was made possible in part by the 31 International Computer Science Institute, located at Suite 600, 1947 Center 32 Street, Berkeley, California 94704. Funding was partially provided by the 33 National Science Foundation under grant MIP-9311980. The original version 34 of this code was written as part of a project to build a fixed-point vector 35 processor in collaboration with the University of California at Berkeley, 36 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 37 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 38 arithmetic/SoftFloat.html'. 39 40 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 41 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 42 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 43 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 44 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 45 46 Derivative works are acceptable, even for commercial purposes, so long as 47 (1) they include prominent notice that the work is derivative, and (2) they 48 include prominent notice akin to these four paragraphs for those parts of 49 this code that are retained. 50 51 =============================================================================== 52 */ 53 54 #include <sys/cdefs.h> 55 #if defined(LIBC_SCCS) && !defined(lint) 56 __RCSID("$NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $"); 57 #endif /* LIBC_SCCS and not lint */ 58 59 #ifdef SOFTFLOAT_FOR_GCC 60 #include "softfloat-for-gcc.h" 61 #endif 62 63 #include "milieu.h" 64 #include "softfloat.h" 65 66 /* 67 * Conversions between floats as stored in memory and floats as 68 * SoftFloat uses them 69 */ 70 #ifndef FLOAT64_DEMANGLE 71 #define FLOAT64_DEMANGLE(a) (a) 72 #endif 73 #ifndef FLOAT64_MANGLE 74 #define FLOAT64_MANGLE(a) (a) 75 #endif 76 77 /* 78 ------------------------------------------------------------------------------- 79 Floating-point rounding mode and exception flags. 80 ------------------------------------------------------------------------------- 81 */ 82 fp_rnd float_rounding_mode = float_round_nearest_even; 83 fp_except float_exception_flags = 0; 84 85 /* 86 ------------------------------------------------------------------------------- 87 Primitive arithmetic functions, including multi-word arithmetic, and 88 division and square root approximations. (Can be specialized to target if 89 desired.) 90 ------------------------------------------------------------------------------- 91 */ 92 #include "softfloat-macros" 93 94 /* 95 ------------------------------------------------------------------------------- 96 Functions and definitions to determine: (1) whether tininess for underflow 97 is detected before or after rounding by default, (2) what (if anything) 98 happens when exceptions are raised, (3) how signaling NaNs are distinguished 99 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs 100 are propagated from function inputs to output. These details are target- 101 specific. 102 ------------------------------------------------------------------------------- 103 */ 104 #include "softfloat-specialize" 105 106 /* 107 ------------------------------------------------------------------------------- 108 Returns the fraction bits of the single-precision floating-point value `a'. 109 ------------------------------------------------------------------------------- 110 */ 111 INLINE bits32 extractFloat32Frac( float32 a ) 112 { 113 114 return a & 0x007FFFFF; 115 116 } 117 118 /* 119 ------------------------------------------------------------------------------- 120 Returns the exponent bits of the single-precision floating-point value `a'. 121 ------------------------------------------------------------------------------- 122 */ 123 INLINE int16 extractFloat32Exp( float32 a ) 124 { 125 126 return ( a>>23 ) & 0xFF; 127 128 } 129 130 /* 131 ------------------------------------------------------------------------------- 132 Returns the sign bit of the single-precision floating-point value `a'. 133 ------------------------------------------------------------------------------- 134 */ 135 INLINE flag extractFloat32Sign( float32 a ) 136 { 137 138 return a>>31; 139 140 } 141 142 /* 143 ------------------------------------------------------------------------------- 144 Normalizes the subnormal single-precision floating-point value represented 145 by the denormalized significand `aSig'. The normalized exponent and 146 significand are stored at the locations pointed to by `zExpPtr' and 147 `zSigPtr', respectively. 148 ------------------------------------------------------------------------------- 149 */ 150 static void 151 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) 152 { 153 int8 shiftCount; 154 155 shiftCount = countLeadingZeros32( aSig ) - 8; 156 *zSigPtr = aSig<<shiftCount; 157 *zExpPtr = 1 - shiftCount; 158 159 } 160 161 /* 162 ------------------------------------------------------------------------------- 163 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 164 single-precision floating-point value, returning the result. After being 165 shifted into the proper positions, the three fields are simply added 166 together to form the result. This means that any integer portion of `zSig' 167 will be added into the exponent. Since a properly normalized significand 168 will have an integer portion equal to 1, the `zExp' input should be 1 less 169 than the desired result exponent whenever `zSig' is a complete, normalized 170 significand. 171 ------------------------------------------------------------------------------- 172 */ 173 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) 174 { 175 176 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; 177 178 } 179 180 /* 181 ------------------------------------------------------------------------------- 182 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 183 and significand `zSig', and returns the proper single-precision floating- 184 point value corresponding to the abstract input. Ordinarily, the abstract 185 value is simply rounded and packed into the single-precision format, with 186 the inexact exception raised if the abstract input cannot be represented 187 exactly. However, if the abstract value is too large, the overflow and 188 inexact exceptions are raised and an infinity or maximal finite value is 189 returned. If the abstract value is too small, the input value is rounded to 190 a subnormal number, and the underflow and inexact exceptions are raised if 191 the abstract input cannot be represented exactly as a subnormal single- 192 precision floating-point number. 193 The input significand `zSig' has its binary point between bits 30 194 and 29, which is 7 bits to the left of the usual location. This shifted 195 significand must be normalized or smaller. If `zSig' is not normalized, 196 `zExp' must be 0; in that case, the result returned is a subnormal number, 197 and it must not require rounding. In the usual case that `zSig' is 198 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 199 The handling of underflow and overflow follows the IEC/IEEE Standard for 200 Binary Floating-Point Arithmetic. 201 ------------------------------------------------------------------------------- 202 */ 203 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 204 { 205 int8 roundingMode; 206 flag roundNearestEven; 207 int8 roundIncrement, roundBits; 208 flag isTiny; 209 210 roundingMode = float_rounding_mode; 211 roundNearestEven = roundingMode == float_round_nearest_even; 212 roundIncrement = 0x40; 213 if ( ! roundNearestEven ) { 214 if ( roundingMode == float_round_to_zero ) { 215 roundIncrement = 0; 216 } 217 else { 218 roundIncrement = 0x7F; 219 if ( zSign ) { 220 if ( roundingMode == float_round_up ) roundIncrement = 0; 221 } 222 else { 223 if ( roundingMode == float_round_down ) roundIncrement = 0; 224 } 225 } 226 } 227 roundBits = zSig & 0x7F; 228 if ( 0xFD <= (bits16) zExp ) { 229 if ( ( 0xFD < zExp ) 230 || ( ( zExp == 0xFD ) 231 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) 232 ) { 233 float_raise( float_flag_overflow | float_flag_inexact ); 234 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); 235 } 236 if ( zExp < 0 ) { 237 isTiny = 238 ( float_detect_tininess == float_tininess_before_rounding ) 239 || ( zExp < -1 ) 240 || ( zSig + roundIncrement < 0x80000000 ); 241 shift32RightJamming( zSig, - zExp, &zSig ); 242 zExp = 0; 243 roundBits = zSig & 0x7F; 244 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 245 } 246 } 247 if ( roundBits ) float_exception_flags |= float_flag_inexact; 248 zSig = ( zSig + roundIncrement )>>7; 249 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 250 if ( zSig == 0 ) zExp = 0; 251 return packFloat32( zSign, zExp, zSig ); 252 253 } 254 255 /* 256 ------------------------------------------------------------------------------- 257 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 258 and significand `zSig', and returns the proper single-precision floating- 259 point value corresponding to the abstract input. This routine is just like 260 `roundAndPackFloat32' except that `zSig' does not have to be normalized. 261 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 262 floating-point exponent. 263 ------------------------------------------------------------------------------- 264 */ 265 static float32 266 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 267 { 268 int8 shiftCount; 269 270 shiftCount = countLeadingZeros32( zSig ) - 1; 271 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); 272 273 } 274 275 /* 276 ------------------------------------------------------------------------------- 277 Returns the least-significant 32 fraction bits of the double-precision 278 floating-point value `a'. 279 ------------------------------------------------------------------------------- 280 */ 281 INLINE bits32 extractFloat64Frac1( float64 a ) 282 { 283 284 return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF ); 285 286 } 287 288 /* 289 ------------------------------------------------------------------------------- 290 Returns the most-significant 20 fraction bits of the double-precision 291 floating-point value `a'. 292 ------------------------------------------------------------------------------- 293 */ 294 INLINE bits32 extractFloat64Frac0( float64 a ) 295 { 296 297 return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF; 298 299 } 300 301 /* 302 ------------------------------------------------------------------------------- 303 Returns the exponent bits of the double-precision floating-point value `a'. 304 ------------------------------------------------------------------------------- 305 */ 306 INLINE int16 extractFloat64Exp( float64 a ) 307 { 308 309 return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF; 310 311 } 312 313 /* 314 ------------------------------------------------------------------------------- 315 Returns the sign bit of the double-precision floating-point value `a'. 316 ------------------------------------------------------------------------------- 317 */ 318 INLINE flag extractFloat64Sign( float64 a ) 319 { 320 321 return FLOAT64_DEMANGLE(a)>>63; 322 323 } 324 325 /* 326 ------------------------------------------------------------------------------- 327 Normalizes the subnormal double-precision floating-point value represented 328 by the denormalized significand formed by the concatenation of `aSig0' and 329 `aSig1'. The normalized exponent is stored at the location pointed to by 330 `zExpPtr'. The most significant 21 bits of the normalized significand are 331 stored at the location pointed to by `zSig0Ptr', and the least significant 332 32 bits of the normalized significand are stored at the location pointed to 333 by `zSig1Ptr'. 334 ------------------------------------------------------------------------------- 335 */ 336 static void 337 normalizeFloat64Subnormal( 338 bits32 aSig0, 339 bits32 aSig1, 340 int16 *zExpPtr, 341 bits32 *zSig0Ptr, 342 bits32 *zSig1Ptr 343 ) 344 { 345 int8 shiftCount; 346 347 if ( aSig0 == 0 ) { 348 shiftCount = countLeadingZeros32( aSig1 ) - 11; 349 if ( shiftCount < 0 ) { 350 *zSig0Ptr = aSig1>>( - shiftCount ); 351 *zSig1Ptr = aSig1<<( shiftCount & 31 ); 352 } 353 else { 354 *zSig0Ptr = aSig1<<shiftCount; 355 *zSig1Ptr = 0; 356 } 357 *zExpPtr = - shiftCount - 31; 358 } 359 else { 360 shiftCount = countLeadingZeros32( aSig0 ) - 11; 361 shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); 362 *zExpPtr = 1 - shiftCount; 363 } 364 365 } 366 367 /* 368 ------------------------------------------------------------------------------- 369 Packs the sign `zSign', the exponent `zExp', and the significand formed by 370 the concatenation of `zSig0' and `zSig1' into a double-precision floating- 371 point value, returning the result. After being shifted into the proper 372 positions, the three fields `zSign', `zExp', and `zSig0' are simply added 373 together to form the most significant 32 bits of the result. This means 374 that any integer portion of `zSig0' will be added into the exponent. Since 375 a properly normalized significand will have an integer portion equal to 1, 376 the `zExp' input should be 1 less than the desired result exponent whenever 377 `zSig0' and `zSig1' concatenated form a complete, normalized significand. 378 ------------------------------------------------------------------------------- 379 */ 380 INLINE float64 381 packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) 382 { 383 384 return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) + 385 ( ( (bits64) zExp )<<52 ) + 386 ( ( (bits64) zSig0 )<<32 ) + zSig1 ); 387 388 389 } 390 391 /* 392 ------------------------------------------------------------------------------- 393 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 394 and extended significand formed by the concatenation of `zSig0', `zSig1', 395 and `zSig2', and returns the proper double-precision floating-point value 396 corresponding to the abstract input. Ordinarily, the abstract value is 397 simply rounded and packed into the double-precision format, with the inexact 398 exception raised if the abstract input cannot be represented exactly. 399 However, if the abstract value is too large, the overflow and inexact 400 exceptions are raised and an infinity or maximal finite value is returned. 401 If the abstract value is too small, the input value is rounded to a 402 subnormal number, and the underflow and inexact exceptions are raised if the 403 abstract input cannot be represented exactly as a subnormal double-precision 404 floating-point number. 405 The input significand must be normalized or smaller. If the input 406 significand is not normalized, `zExp' must be 0; in that case, the result 407 returned is a subnormal number, and it must not require rounding. In the 408 usual case that the input significand is normalized, `zExp' must be 1 less 409 than the ``true'' floating-point exponent. The handling of underflow and 410 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 411 ------------------------------------------------------------------------------- 412 */ 413 static float64 414 roundAndPackFloat64( 415 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 ) 416 { 417 int8 roundingMode; 418 flag roundNearestEven, increment, isTiny; 419 420 roundingMode = float_rounding_mode; 421 roundNearestEven = ( roundingMode == float_round_nearest_even ); 422 increment = ( (sbits32) zSig2 < 0 ); 423 if ( ! roundNearestEven ) { 424 if ( roundingMode == float_round_to_zero ) { 425 increment = 0; 426 } 427 else { 428 if ( zSign ) { 429 increment = ( roundingMode == float_round_down ) && zSig2; 430 } 431 else { 432 increment = ( roundingMode == float_round_up ) && zSig2; 433 } 434 } 435 } 436 if ( 0x7FD <= (bits16) zExp ) { 437 if ( ( 0x7FD < zExp ) 438 || ( ( zExp == 0x7FD ) 439 && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 ) 440 && increment 441 ) 442 ) { 443 float_raise( float_flag_overflow | float_flag_inexact ); 444 if ( ( roundingMode == float_round_to_zero ) 445 || ( zSign && ( roundingMode == float_round_up ) ) 446 || ( ! zSign && ( roundingMode == float_round_down ) ) 447 ) { 448 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF ); 449 } 450 return packFloat64( zSign, 0x7FF, 0, 0 ); 451 } 452 if ( zExp < 0 ) { 453 isTiny = 454 ( float_detect_tininess == float_tininess_before_rounding ) 455 || ( zExp < -1 ) 456 || ! increment 457 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF ); 458 shift64ExtraRightJamming( 459 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); 460 zExp = 0; 461 if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); 462 if ( roundNearestEven ) { 463 increment = ( (sbits32) zSig2 < 0 ); 464 } 465 else { 466 if ( zSign ) { 467 increment = ( roundingMode == float_round_down ) && zSig2; 468 } 469 else { 470 increment = ( roundingMode == float_round_up ) && zSig2; 471 } 472 } 473 } 474 } 475 if ( zSig2 ) float_exception_flags |= float_flag_inexact; 476 if ( increment ) { 477 add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); 478 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); 479 } 480 else { 481 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; 482 } 483 return packFloat64( zSign, zExp, zSig0, zSig1 ); 484 485 } 486 487 /* 488 ------------------------------------------------------------------------------- 489 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 490 and significand formed by the concatenation of `zSig0' and `zSig1', and 491 returns the proper double-precision floating-point value corresponding 492 to the abstract input. This routine is just like `roundAndPackFloat64' 493 except that the input significand has fewer bits and does not have to be 494 normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- 495 point exponent. 496 ------------------------------------------------------------------------------- 497 */ 498 static float64 499 normalizeRoundAndPackFloat64( 500 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) 501 { 502 int8 shiftCount; 503 bits32 zSig2; 504 505 if ( zSig0 == 0 ) { 506 zSig0 = zSig1; 507 zSig1 = 0; 508 zExp -= 32; 509 } 510 shiftCount = countLeadingZeros32( zSig0 ) - 11; 511 if ( 0 <= shiftCount ) { 512 zSig2 = 0; 513 shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 514 } 515 else { 516 shift64ExtraRightJamming( 517 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); 518 } 519 zExp -= shiftCount; 520 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 521 522 } 523 524 /* 525 ------------------------------------------------------------------------------- 526 Returns the result of converting the 32-bit two's complement integer `a' to 527 the single-precision floating-point format. The conversion is performed 528 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 529 ------------------------------------------------------------------------------- 530 */ 531 float32 int32_to_float32( int32 a ) 532 { 533 flag zSign; 534 535 if ( a == 0 ) return 0; 536 if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); 537 zSign = ( a < 0 ); 538 return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); 539 540 } 541 542 /* 543 ------------------------------------------------------------------------------- 544 Returns the result of converting the 32-bit two's complement integer `a' to 545 the double-precision floating-point format. The conversion is performed 546 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 547 ------------------------------------------------------------------------------- 548 */ 549 float64 int32_to_float64( int32 a ) 550 { 551 flag zSign; 552 bits32 absA; 553 int8 shiftCount; 554 bits32 zSig0, zSig1; 555 556 if ( a == 0 ) return packFloat64( 0, 0, 0, 0 ); 557 zSign = ( a < 0 ); 558 absA = zSign ? - a : a; 559 shiftCount = countLeadingZeros32( absA ) - 11; 560 if ( 0 <= shiftCount ) { 561 zSig0 = absA<<shiftCount; 562 zSig1 = 0; 563 } 564 else { 565 shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 ); 566 } 567 return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 ); 568 569 } 570 571 #ifndef SOFTFLOAT_FOR_GCC 572 /* 573 ------------------------------------------------------------------------------- 574 Returns the result of converting the single-precision floating-point value 575 `a' to the 32-bit two's complement integer format. The conversion is 576 performed according to the IEC/IEEE Standard for Binary Floating-Point 577 Arithmetic---which means in particular that the conversion is rounded 578 according to the current rounding mode. If `a' is a NaN, the largest 579 positive integer is returned. Otherwise, if the conversion overflows, the 580 largest integer with the same sign as `a' is returned. 581 ------------------------------------------------------------------------------- 582 */ 583 int32 float32_to_int32( float32 a ) 584 { 585 flag aSign; 586 int16 aExp, shiftCount; 587 bits32 aSig, aSigExtra; 588 int32 z; 589 int8 roundingMode; 590 591 aSig = extractFloat32Frac( a ); 592 aExp = extractFloat32Exp( a ); 593 aSign = extractFloat32Sign( a ); 594 shiftCount = aExp - 0x96; 595 if ( 0 <= shiftCount ) { 596 if ( 0x9E <= aExp ) { 597 if ( a != 0xCF000000 ) { 598 float_raise( float_flag_invalid ); 599 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 600 return 0x7FFFFFFF; 601 } 602 } 603 return (sbits32) 0x80000000; 604 } 605 z = ( aSig | 0x00800000 )<<shiftCount; 606 if ( aSign ) z = - z; 607 } 608 else { 609 if ( aExp < 0x7E ) { 610 aSigExtra = aExp | aSig; 611 z = 0; 612 } 613 else { 614 aSig |= 0x00800000; 615 aSigExtra = aSig<<( shiftCount & 31 ); 616 z = aSig>>( - shiftCount ); 617 } 618 if ( aSigExtra ) float_exception_flags |= float_flag_inexact; 619 roundingMode = float_rounding_mode; 620 if ( roundingMode == float_round_nearest_even ) { 621 if ( (sbits32) aSigExtra < 0 ) { 622 ++z; 623 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1; 624 } 625 if ( aSign ) z = - z; 626 } 627 else { 628 aSigExtra = ( aSigExtra != 0 ); 629 if ( aSign ) { 630 z += ( roundingMode == float_round_down ) & aSigExtra; 631 z = - z; 632 } 633 else { 634 z += ( roundingMode == float_round_up ) & aSigExtra; 635 } 636 } 637 } 638 return z; 639 640 } 641 #endif 642 643 /* 644 ------------------------------------------------------------------------------- 645 Returns the result of converting the single-precision floating-point value 646 `a' to the 32-bit two's complement integer format. The conversion is 647 performed according to the IEC/IEEE Standard for Binary Floating-Point 648 Arithmetic, except that the conversion is always rounded toward zero. 649 If `a' is a NaN, the largest positive integer is returned. Otherwise, if 650 the conversion overflows, the largest integer with the same sign as `a' is 651 returned. 652 ------------------------------------------------------------------------------- 653 */ 654 int32 float32_to_int32_round_to_zero( float32 a ) 655 { 656 flag aSign; 657 int16 aExp, shiftCount; 658 bits32 aSig; 659 int32 z; 660 661 aSig = extractFloat32Frac( a ); 662 aExp = extractFloat32Exp( a ); 663 aSign = extractFloat32Sign( a ); 664 shiftCount = aExp - 0x9E; 665 if ( 0 <= shiftCount ) { 666 if ( a != 0xCF000000 ) { 667 float_raise( float_flag_invalid ); 668 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; 669 } 670 return (sbits32) 0x80000000; 671 } 672 else if ( aExp <= 0x7E ) { 673 if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 674 return 0; 675 } 676 aSig = ( aSig | 0x00800000 )<<8; 677 z = aSig>>( - shiftCount ); 678 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { 679 float_exception_flags |= float_flag_inexact; 680 } 681 if ( aSign ) z = - z; 682 return z; 683 684 } 685 686 /* 687 ------------------------------------------------------------------------------- 688 Returns the result of converting the single-precision floating-point value 689 `a' to the double-precision floating-point format. The conversion is 690 performed according to the IEC/IEEE Standard for Binary Floating-Point 691 Arithmetic. 692 ------------------------------------------------------------------------------- 693 */ 694 float64 float32_to_float64( float32 a ) 695 { 696 flag aSign; 697 int16 aExp; 698 bits32 aSig, zSig0, zSig1; 699 700 aSig = extractFloat32Frac( a ); 701 aExp = extractFloat32Exp( a ); 702 aSign = extractFloat32Sign( a ); 703 if ( aExp == 0xFF ) { 704 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); 705 return packFloat64( aSign, 0x7FF, 0, 0 ); 706 } 707 if ( aExp == 0 ) { 708 if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 ); 709 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 710 --aExp; 711 } 712 shift64Right( aSig, 0, 3, &zSig0, &zSig1 ); 713 return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 ); 714 715 } 716 717 #ifndef SOFTFLOAT_FOR_GCC 718 /* 719 ------------------------------------------------------------------------------- 720 Rounds the single-precision floating-point value `a' to an integer, 721 and returns the result as a single-precision floating-point value. The 722 operation is performed according to the IEC/IEEE Standard for Binary 723 Floating-Point Arithmetic. 724 ------------------------------------------------------------------------------- 725 */ 726 float32 float32_round_to_int( float32 a ) 727 { 728 flag aSign; 729 int16 aExp; 730 bits32 lastBitMask, roundBitsMask; 731 int8 roundingMode; 732 float32 z; 733 734 aExp = extractFloat32Exp( a ); 735 if ( 0x96 <= aExp ) { 736 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { 737 return propagateFloat32NaN( a, a ); 738 } 739 return a; 740 } 741 if ( aExp <= 0x7E ) { 742 if ( (bits32) ( a<<1 ) == 0 ) return a; 743 float_exception_flags |= float_flag_inexact; 744 aSign = extractFloat32Sign( a ); 745 switch ( float_rounding_mode ) { 746 case float_round_nearest_even: 747 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { 748 return packFloat32( aSign, 0x7F, 0 ); 749 } 750 break; 751 case float_round_to_zero: 752 break; 753 case float_round_down: 754 return aSign ? 0xBF800000 : 0; 755 case float_round_up: 756 return aSign ? 0x80000000 : 0x3F800000; 757 } 758 return packFloat32( aSign, 0, 0 ); 759 } 760 lastBitMask = 1; 761 lastBitMask <<= 0x96 - aExp; 762 roundBitsMask = lastBitMask - 1; 763 z = a; 764 roundingMode = float_rounding_mode; 765 if ( roundingMode == float_round_nearest_even ) { 766 z += lastBitMask>>1; 767 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 768 } 769 else if ( roundingMode != float_round_to_zero ) { 770 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { 771 z += roundBitsMask; 772 } 773 } 774 z &= ~ roundBitsMask; 775 if ( z != a ) float_exception_flags |= float_flag_inexact; 776 return z; 777 778 } 779 #endif 780 781 /* 782 ------------------------------------------------------------------------------- 783 Returns the result of adding the absolute values of the single-precision 784 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 785 before being returned. `zSign' is ignored if the result is a NaN. 786 The addition is performed according to the IEC/IEEE Standard for Binary 787 Floating-Point Arithmetic. 788 ------------------------------------------------------------------------------- 789 */ 790 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) 791 { 792 int16 aExp, bExp, zExp; 793 bits32 aSig, bSig, zSig; 794 int16 expDiff; 795 796 aSig = extractFloat32Frac( a ); 797 aExp = extractFloat32Exp( a ); 798 bSig = extractFloat32Frac( b ); 799 bExp = extractFloat32Exp( b ); 800 expDiff = aExp - bExp; 801 aSig <<= 6; 802 bSig <<= 6; 803 if ( 0 < expDiff ) { 804 if ( aExp == 0xFF ) { 805 if ( aSig ) return propagateFloat32NaN( a, b ); 806 return a; 807 } 808 if ( bExp == 0 ) { 809 --expDiff; 810 } 811 else { 812 bSig |= 0x20000000; 813 } 814 shift32RightJamming( bSig, expDiff, &bSig ); 815 zExp = aExp; 816 } 817 else if ( expDiff < 0 ) { 818 if ( bExp == 0xFF ) { 819 if ( bSig ) return propagateFloat32NaN( a, b ); 820 return packFloat32( zSign, 0xFF, 0 ); 821 } 822 if ( aExp == 0 ) { 823 ++expDiff; 824 } 825 else { 826 aSig |= 0x20000000; 827 } 828 shift32RightJamming( aSig, - expDiff, &aSig ); 829 zExp = bExp; 830 } 831 else { 832 if ( aExp == 0xFF ) { 833 if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 834 return a; 835 } 836 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); 837 zSig = 0x40000000 + aSig + bSig; 838 zExp = aExp; 839 goto roundAndPack; 840 } 841 aSig |= 0x20000000; 842 zSig = ( aSig + bSig )<<1; 843 --zExp; 844 if ( (sbits32) zSig < 0 ) { 845 zSig = aSig + bSig; 846 ++zExp; 847 } 848 roundAndPack: 849 return roundAndPackFloat32( zSign, zExp, zSig ); 850 851 } 852 853 /* 854 ------------------------------------------------------------------------------- 855 Returns the result of subtracting the absolute values of the single- 856 precision floating-point values `a' and `b'. If `zSign' is 1, the 857 difference is negated before being returned. `zSign' is ignored if the 858 result is a NaN. The subtraction is performed according to the IEC/IEEE 859 Standard for Binary Floating-Point Arithmetic. 860 ------------------------------------------------------------------------------- 861 */ 862 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) 863 { 864 int16 aExp, bExp, zExp; 865 bits32 aSig, bSig, zSig; 866 int16 expDiff; 867 868 aSig = extractFloat32Frac( a ); 869 aExp = extractFloat32Exp( a ); 870 bSig = extractFloat32Frac( b ); 871 bExp = extractFloat32Exp( b ); 872 expDiff = aExp - bExp; 873 aSig <<= 7; 874 bSig <<= 7; 875 if ( 0 < expDiff ) goto aExpBigger; 876 if ( expDiff < 0 ) goto bExpBigger; 877 if ( aExp == 0xFF ) { 878 if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 879 float_raise( float_flag_invalid ); 880 return float32_default_nan; 881 } 882 if ( aExp == 0 ) { 883 aExp = 1; 884 bExp = 1; 885 } 886 if ( bSig < aSig ) goto aBigger; 887 if ( aSig < bSig ) goto bBigger; 888 return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); 889 bExpBigger: 890 if ( bExp == 0xFF ) { 891 if ( bSig ) return propagateFloat32NaN( a, b ); 892 return packFloat32( zSign ^ 1, 0xFF, 0 ); 893 } 894 if ( aExp == 0 ) { 895 ++expDiff; 896 } 897 else { 898 aSig |= 0x40000000; 899 } 900 shift32RightJamming( aSig, - expDiff, &aSig ); 901 bSig |= 0x40000000; 902 bBigger: 903 zSig = bSig - aSig; 904 zExp = bExp; 905 zSign ^= 1; 906 goto normalizeRoundAndPack; 907 aExpBigger: 908 if ( aExp == 0xFF ) { 909 if ( aSig ) return propagateFloat32NaN( a, b ); 910 return a; 911 } 912 if ( bExp == 0 ) { 913 --expDiff; 914 } 915 else { 916 bSig |= 0x40000000; 917 } 918 shift32RightJamming( bSig, expDiff, &bSig ); 919 aSig |= 0x40000000; 920 aBigger: 921 zSig = aSig - bSig; 922 zExp = aExp; 923 normalizeRoundAndPack: 924 --zExp; 925 return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); 926 927 } 928 929 /* 930 ------------------------------------------------------------------------------- 931 Returns the result of adding the single-precision floating-point values `a' 932 and `b'. The operation is performed according to the IEC/IEEE Standard for 933 Binary Floating-Point Arithmetic. 934 ------------------------------------------------------------------------------- 935 */ 936 float32 float32_add( float32 a, float32 b ) 937 { 938 flag aSign, bSign; 939 940 aSign = extractFloat32Sign( a ); 941 bSign = extractFloat32Sign( b ); 942 if ( aSign == bSign ) { 943 return addFloat32Sigs( a, b, aSign ); 944 } 945 else { 946 return subFloat32Sigs( a, b, aSign ); 947 } 948 949 } 950 951 /* 952 ------------------------------------------------------------------------------- 953 Returns the result of subtracting the single-precision floating-point values 954 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 955 for Binary Floating-Point Arithmetic. 956 ------------------------------------------------------------------------------- 957 */ 958 float32 float32_sub( float32 a, float32 b ) 959 { 960 flag aSign, bSign; 961 962 aSign = extractFloat32Sign( a ); 963 bSign = extractFloat32Sign( b ); 964 if ( aSign == bSign ) { 965 return subFloat32Sigs( a, b, aSign ); 966 } 967 else { 968 return addFloat32Sigs( a, b, aSign ); 969 } 970 971 } 972 973 /* 974 ------------------------------------------------------------------------------- 975 Returns the result of multiplying the single-precision floating-point values 976 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 977 for Binary Floating-Point Arithmetic. 978 ------------------------------------------------------------------------------- 979 */ 980 float32 float32_mul( float32 a, float32 b ) 981 { 982 flag aSign, bSign, zSign; 983 int16 aExp, bExp, zExp; 984 bits32 aSig, bSig, zSig0, zSig1; 985 986 aSig = extractFloat32Frac( a ); 987 aExp = extractFloat32Exp( a ); 988 aSign = extractFloat32Sign( a ); 989 bSig = extractFloat32Frac( b ); 990 bExp = extractFloat32Exp( b ); 991 bSign = extractFloat32Sign( b ); 992 zSign = aSign ^ bSign; 993 if ( aExp == 0xFF ) { 994 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 995 return propagateFloat32NaN( a, b ); 996 } 997 if ( ( bExp | bSig ) == 0 ) { 998 float_raise( float_flag_invalid ); 999 return float32_default_nan; 1000 } 1001 return packFloat32( zSign, 0xFF, 0 ); 1002 } 1003 if ( bExp == 0xFF ) { 1004 if ( bSig ) return propagateFloat32NaN( a, b ); 1005 if ( ( aExp | aSig ) == 0 ) { 1006 float_raise( float_flag_invalid ); 1007 return float32_default_nan; 1008 } 1009 return packFloat32( zSign, 0xFF, 0 ); 1010 } 1011 if ( aExp == 0 ) { 1012 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1013 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1014 } 1015 if ( bExp == 0 ) { 1016 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); 1017 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1018 } 1019 zExp = aExp + bExp - 0x7F; 1020 aSig = ( aSig | 0x00800000 )<<7; 1021 bSig = ( bSig | 0x00800000 )<<8; 1022 mul32To64( aSig, bSig, &zSig0, &zSig1 ); 1023 zSig0 |= ( zSig1 != 0 ); 1024 if ( 0 <= (sbits32) ( zSig0<<1 ) ) { 1025 zSig0 <<= 1; 1026 --zExp; 1027 } 1028 return roundAndPackFloat32( zSign, zExp, zSig0 ); 1029 1030 } 1031 1032 /* 1033 ------------------------------------------------------------------------------- 1034 Returns the result of dividing the single-precision floating-point value `a' 1035 by the corresponding value `b'. The operation is performed according to the 1036 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1037 ------------------------------------------------------------------------------- 1038 */ 1039 float32 float32_div( float32 a, float32 b ) 1040 { 1041 flag aSign, bSign, zSign; 1042 int16 aExp, bExp, zExp; 1043 bits32 aSig, bSig, zSig, rem0, rem1, term0, term1; 1044 1045 aSig = extractFloat32Frac( a ); 1046 aExp = extractFloat32Exp( a ); 1047 aSign = extractFloat32Sign( a ); 1048 bSig = extractFloat32Frac( b ); 1049 bExp = extractFloat32Exp( b ); 1050 bSign = extractFloat32Sign( b ); 1051 zSign = aSign ^ bSign; 1052 if ( aExp == 0xFF ) { 1053 if ( aSig ) return propagateFloat32NaN( a, b ); 1054 if ( bExp == 0xFF ) { 1055 if ( bSig ) return propagateFloat32NaN( a, b ); 1056 float_raise( float_flag_invalid ); 1057 return float32_default_nan; 1058 } 1059 return packFloat32( zSign, 0xFF, 0 ); 1060 } 1061 if ( bExp == 0xFF ) { 1062 if ( bSig ) return propagateFloat32NaN( a, b ); 1063 return packFloat32( zSign, 0, 0 ); 1064 } 1065 if ( bExp == 0 ) { 1066 if ( bSig == 0 ) { 1067 if ( ( aExp | aSig ) == 0 ) { 1068 float_raise( float_flag_invalid ); 1069 return float32_default_nan; 1070 } 1071 float_raise( float_flag_divbyzero ); 1072 return packFloat32( zSign, 0xFF, 0 ); 1073 } 1074 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1075 } 1076 if ( aExp == 0 ) { 1077 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1078 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1079 } 1080 zExp = aExp - bExp + 0x7D; 1081 aSig = ( aSig | 0x00800000 )<<7; 1082 bSig = ( bSig | 0x00800000 )<<8; 1083 if ( bSig <= ( aSig + aSig ) ) { 1084 aSig >>= 1; 1085 ++zExp; 1086 } 1087 zSig = estimateDiv64To32( aSig, 0, bSig ); 1088 if ( ( zSig & 0x3F ) <= 2 ) { 1089 mul32To64( bSig, zSig, &term0, &term1 ); 1090 sub64( aSig, 0, term0, term1, &rem0, &rem1 ); 1091 while ( (sbits32) rem0 < 0 ) { 1092 --zSig; 1093 add64( rem0, rem1, 0, bSig, &rem0, &rem1 ); 1094 } 1095 zSig |= ( rem1 != 0 ); 1096 } 1097 return roundAndPackFloat32( zSign, zExp, zSig ); 1098 1099 } 1100 1101 #ifndef SOFTFLOAT_FOR_GCC 1102 /* 1103 ------------------------------------------------------------------------------- 1104 Returns the remainder of the single-precision floating-point value `a' 1105 with respect to the corresponding value `b'. The operation is performed 1106 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1107 ------------------------------------------------------------------------------- 1108 */ 1109 float32 float32_rem( float32 a, float32 b ) 1110 { 1111 flag aSign, bSign, zSign; 1112 int16 aExp, bExp, expDiff; 1113 bits32 aSig, bSig, q, allZero, alternateASig; 1114 sbits32 sigMean; 1115 1116 aSig = extractFloat32Frac( a ); 1117 aExp = extractFloat32Exp( a ); 1118 aSign = extractFloat32Sign( a ); 1119 bSig = extractFloat32Frac( b ); 1120 bExp = extractFloat32Exp( b ); 1121 bSign = extractFloat32Sign( b ); 1122 if ( aExp == 0xFF ) { 1123 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 1124 return propagateFloat32NaN( a, b ); 1125 } 1126 float_raise( float_flag_invalid ); 1127 return float32_default_nan; 1128 } 1129 if ( bExp == 0xFF ) { 1130 if ( bSig ) return propagateFloat32NaN( a, b ); 1131 return a; 1132 } 1133 if ( bExp == 0 ) { 1134 if ( bSig == 0 ) { 1135 float_raise( float_flag_invalid ); 1136 return float32_default_nan; 1137 } 1138 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1139 } 1140 if ( aExp == 0 ) { 1141 if ( aSig == 0 ) return a; 1142 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1143 } 1144 expDiff = aExp - bExp; 1145 aSig = ( aSig | 0x00800000 )<<8; 1146 bSig = ( bSig | 0x00800000 )<<8; 1147 if ( expDiff < 0 ) { 1148 if ( expDiff < -1 ) return a; 1149 aSig >>= 1; 1150 } 1151 q = ( bSig <= aSig ); 1152 if ( q ) aSig -= bSig; 1153 expDiff -= 32; 1154 while ( 0 < expDiff ) { 1155 q = estimateDiv64To32( aSig, 0, bSig ); 1156 q = ( 2 < q ) ? q - 2 : 0; 1157 aSig = - ( ( bSig>>2 ) * q ); 1158 expDiff -= 30; 1159 } 1160 expDiff += 32; 1161 if ( 0 < expDiff ) { 1162 q = estimateDiv64To32( aSig, 0, bSig ); 1163 q = ( 2 < q ) ? q - 2 : 0; 1164 q >>= 32 - expDiff; 1165 bSig >>= 2; 1166 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 1167 } 1168 else { 1169 aSig >>= 2; 1170 bSig >>= 2; 1171 } 1172 do { 1173 alternateASig = aSig; 1174 ++q; 1175 aSig -= bSig; 1176 } while ( 0 <= (sbits32) aSig ); 1177 sigMean = aSig + alternateASig; 1178 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 1179 aSig = alternateASig; 1180 } 1181 zSign = ( (sbits32) aSig < 0 ); 1182 if ( zSign ) aSig = - aSig; 1183 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); 1184 1185 } 1186 #endif 1187 1188 #ifndef SOFTFLOAT_FOR_GCC 1189 /* 1190 ------------------------------------------------------------------------------- 1191 Returns the square root of the single-precision floating-point value `a'. 1192 The operation is performed according to the IEC/IEEE Standard for Binary 1193 Floating-Point Arithmetic. 1194 ------------------------------------------------------------------------------- 1195 */ 1196 float32 float32_sqrt( float32 a ) 1197 { 1198 flag aSign; 1199 int16 aExp, zExp; 1200 bits32 aSig, zSig, rem0, rem1, term0, term1; 1201 1202 aSig = extractFloat32Frac( a ); 1203 aExp = extractFloat32Exp( a ); 1204 aSign = extractFloat32Sign( a ); 1205 if ( aExp == 0xFF ) { 1206 if ( aSig ) return propagateFloat32NaN( a, 0 ); 1207 if ( ! aSign ) return a; 1208 float_raise( float_flag_invalid ); 1209 return float32_default_nan; 1210 } 1211 if ( aSign ) { 1212 if ( ( aExp | aSig ) == 0 ) return a; 1213 float_raise( float_flag_invalid ); 1214 return float32_default_nan; 1215 } 1216 if ( aExp == 0 ) { 1217 if ( aSig == 0 ) return 0; 1218 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1219 } 1220 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; 1221 aSig = ( aSig | 0x00800000 )<<8; 1222 zSig = estimateSqrt32( aExp, aSig ) + 2; 1223 if ( ( zSig & 0x7F ) <= 5 ) { 1224 if ( zSig < 2 ) { 1225 zSig = 0x7FFFFFFF; 1226 goto roundAndPack; 1227 } 1228 else { 1229 aSig >>= aExp & 1; 1230 mul32To64( zSig, zSig, &term0, &term1 ); 1231 sub64( aSig, 0, term0, term1, &rem0, &rem1 ); 1232 while ( (sbits32) rem0 < 0 ) { 1233 --zSig; 1234 shortShift64Left( 0, zSig, 1, &term0, &term1 ); 1235 term1 |= 1; 1236 add64( rem0, rem1, term0, term1, &rem0, &rem1 ); 1237 } 1238 zSig |= ( ( rem0 | rem1 ) != 0 ); 1239 } 1240 } 1241 shift32RightJamming( zSig, 1, &zSig ); 1242 roundAndPack: 1243 return roundAndPackFloat32( 0, zExp, zSig ); 1244 1245 } 1246 #endif 1247 1248 /* 1249 ------------------------------------------------------------------------------- 1250 Returns 1 if the single-precision floating-point value `a' is equal to 1251 the corresponding value `b', and 0 otherwise. The comparison is performed 1252 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1253 ------------------------------------------------------------------------------- 1254 */ 1255 flag float32_eq( float32 a, float32 b ) 1256 { 1257 1258 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1259 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1260 ) { 1261 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1262 float_raise( float_flag_invalid ); 1263 } 1264 return 0; 1265 } 1266 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1267 1268 } 1269 1270 /* 1271 ------------------------------------------------------------------------------- 1272 Returns 1 if the single-precision floating-point value `a' is less than 1273 or equal to the corresponding value `b', and 0 otherwise. The comparison 1274 is performed according to the IEC/IEEE Standard for Binary Floating-Point 1275 Arithmetic. 1276 ------------------------------------------------------------------------------- 1277 */ 1278 flag float32_le( float32 a, float32 b ) 1279 { 1280 flag aSign, bSign; 1281 1282 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1283 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1284 ) { 1285 float_raise( float_flag_invalid ); 1286 return 0; 1287 } 1288 aSign = extractFloat32Sign( a ); 1289 bSign = extractFloat32Sign( b ); 1290 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1291 return ( a == b ) || ( aSign ^ ( a < b ) ); 1292 1293 } 1294 1295 /* 1296 ------------------------------------------------------------------------------- 1297 Returns 1 if the single-precision floating-point value `a' is less than 1298 the corresponding value `b', and 0 otherwise. The comparison is performed 1299 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1300 ------------------------------------------------------------------------------- 1301 */ 1302 flag float32_lt( float32 a, float32 b ) 1303 { 1304 flag aSign, bSign; 1305 1306 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1307 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1308 ) { 1309 float_raise( float_flag_invalid ); 1310 return 0; 1311 } 1312 aSign = extractFloat32Sign( a ); 1313 bSign = extractFloat32Sign( b ); 1314 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 1315 return ( a != b ) && ( aSign ^ ( a < b ) ); 1316 1317 } 1318 1319 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1320 /* 1321 ------------------------------------------------------------------------------- 1322 Returns 1 if the single-precision floating-point value `a' is equal to 1323 the corresponding value `b', and 0 otherwise. The invalid exception is 1324 raised if either operand is a NaN. Otherwise, the comparison is performed 1325 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1326 ------------------------------------------------------------------------------- 1327 */ 1328 flag float32_eq_signaling( float32 a, float32 b ) 1329 { 1330 1331 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1332 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1333 ) { 1334 float_raise( float_flag_invalid ); 1335 return 0; 1336 } 1337 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1338 1339 } 1340 1341 /* 1342 ------------------------------------------------------------------------------- 1343 Returns 1 if the single-precision floating-point value `a' is less than or 1344 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 1345 cause an exception. Otherwise, the comparison is performed according to the 1346 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1347 ------------------------------------------------------------------------------- 1348 */ 1349 flag float32_le_quiet( float32 a, float32 b ) 1350 { 1351 flag aSign, bSign; 1352 int16 aExp, bExp; 1353 1354 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1355 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1356 ) { 1357 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1358 float_raise( float_flag_invalid ); 1359 } 1360 return 0; 1361 } 1362 aSign = extractFloat32Sign( a ); 1363 bSign = extractFloat32Sign( b ); 1364 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1365 return ( a == b ) || ( aSign ^ ( a < b ) ); 1366 1367 } 1368 1369 /* 1370 ------------------------------------------------------------------------------- 1371 Returns 1 if the single-precision floating-point value `a' is less than 1372 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 1373 exception. Otherwise, the comparison is performed according to the IEC/IEEE 1374 Standard for Binary Floating-Point Arithmetic. 1375 ------------------------------------------------------------------------------- 1376 */ 1377 flag float32_lt_quiet( float32 a, float32 b ) 1378 { 1379 flag aSign, bSign; 1380 1381 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1382 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1383 ) { 1384 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1385 float_raise( float_flag_invalid ); 1386 } 1387 return 0; 1388 } 1389 aSign = extractFloat32Sign( a ); 1390 bSign = extractFloat32Sign( b ); 1391 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 1392 return ( a != b ) && ( aSign ^ ( a < b ) ); 1393 1394 } 1395 #endif /* !SOFTFLOAT_FOR_GCC */ 1396 1397 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1398 /* 1399 ------------------------------------------------------------------------------- 1400 Returns the result of converting the double-precision floating-point value 1401 `a' to the 32-bit two's complement integer format. The conversion is 1402 performed according to the IEC/IEEE Standard for Binary Floating-Point 1403 Arithmetic---which means in particular that the conversion is rounded 1404 according to the current rounding mode. If `a' is a NaN, the largest 1405 positive integer is returned. Otherwise, if the conversion overflows, the 1406 largest integer with the same sign as `a' is returned. 1407 ------------------------------------------------------------------------------- 1408 */ 1409 int32 float64_to_int32( float64 a ) 1410 { 1411 flag aSign; 1412 int16 aExp, shiftCount; 1413 bits32 aSig0, aSig1, absZ, aSigExtra; 1414 int32 z; 1415 int8 roundingMode; 1416 1417 aSig1 = extractFloat64Frac1( a ); 1418 aSig0 = extractFloat64Frac0( a ); 1419 aExp = extractFloat64Exp( a ); 1420 aSign = extractFloat64Sign( a ); 1421 shiftCount = aExp - 0x413; 1422 if ( 0 <= shiftCount ) { 1423 if ( 0x41E < aExp ) { 1424 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; 1425 goto invalid; 1426 } 1427 shortShift64Left( 1428 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); 1429 if ( 0x80000000 < absZ ) goto invalid; 1430 } 1431 else { 1432 aSig1 = ( aSig1 != 0 ); 1433 if ( aExp < 0x3FE ) { 1434 aSigExtra = aExp | aSig0 | aSig1; 1435 absZ = 0; 1436 } 1437 else { 1438 aSig0 |= 0x00100000; 1439 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; 1440 absZ = aSig0>>( - shiftCount ); 1441 } 1442 } 1443 roundingMode = float_rounding_mode; 1444 if ( roundingMode == float_round_nearest_even ) { 1445 if ( (sbits32) aSigExtra < 0 ) { 1446 ++absZ; 1447 if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1; 1448 } 1449 z = aSign ? - absZ : absZ; 1450 } 1451 else { 1452 aSigExtra = ( aSigExtra != 0 ); 1453 if ( aSign ) { 1454 z = - ( absZ 1455 + ( ( roundingMode == float_round_down ) & aSigExtra ) ); 1456 } 1457 else { 1458 z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra ); 1459 } 1460 } 1461 if ( ( aSign ^ ( z < 0 ) ) && z ) { 1462 invalid: 1463 float_raise( float_flag_invalid ); 1464 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 1465 } 1466 if ( aSigExtra ) float_exception_flags |= float_flag_inexact; 1467 return z; 1468 1469 } 1470 #endif /* !SOFTFLOAT_FOR_GCC */ 1471 1472 /* 1473 ------------------------------------------------------------------------------- 1474 Returns the result of converting the double-precision floating-point value 1475 `a' to the 32-bit two's complement integer format. The conversion is 1476 performed according to the IEC/IEEE Standard for Binary Floating-Point 1477 Arithmetic, except that the conversion is always rounded toward zero. 1478 If `a' is a NaN, the largest positive integer is returned. Otherwise, if 1479 the conversion overflows, the largest integer with the same sign as `a' is 1480 returned. 1481 ------------------------------------------------------------------------------- 1482 */ 1483 int32 float64_to_int32_round_to_zero( float64 a ) 1484 { 1485 flag aSign; 1486 int16 aExp, shiftCount; 1487 bits32 aSig0, aSig1, absZ, aSigExtra; 1488 int32 z; 1489 1490 aSig1 = extractFloat64Frac1( a ); 1491 aSig0 = extractFloat64Frac0( a ); 1492 aExp = extractFloat64Exp( a ); 1493 aSign = extractFloat64Sign( a ); 1494 shiftCount = aExp - 0x413; 1495 if ( 0 <= shiftCount ) { 1496 if ( 0x41E < aExp ) { 1497 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; 1498 goto invalid; 1499 } 1500 shortShift64Left( 1501 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); 1502 } 1503 else { 1504 if ( aExp < 0x3FF ) { 1505 if ( aExp | aSig0 | aSig1 ) { 1506 float_exception_flags |= float_flag_inexact; 1507 } 1508 return 0; 1509 } 1510 aSig0 |= 0x00100000; 1511 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; 1512 absZ = aSig0>>( - shiftCount ); 1513 } 1514 z = aSign ? - absZ : absZ; 1515 if ( ( aSign ^ ( z < 0 ) ) && z ) { 1516 invalid: 1517 float_raise( float_flag_invalid ); 1518 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 1519 } 1520 if ( aSigExtra ) float_exception_flags |= float_flag_inexact; 1521 return z; 1522 1523 } 1524 1525 /* 1526 ------------------------------------------------------------------------------- 1527 Returns the result of converting the double-precision floating-point value 1528 `a' to the single-precision floating-point format. The conversion is 1529 performed according to the IEC/IEEE Standard for Binary Floating-Point 1530 Arithmetic. 1531 ------------------------------------------------------------------------------- 1532 */ 1533 float32 float64_to_float32( float64 a ) 1534 { 1535 flag aSign; 1536 int16 aExp; 1537 bits32 aSig0, aSig1, zSig; 1538 bits32 allZero; 1539 1540 aSig1 = extractFloat64Frac1( a ); 1541 aSig0 = extractFloat64Frac0( a ); 1542 aExp = extractFloat64Exp( a ); 1543 aSign = extractFloat64Sign( a ); 1544 if ( aExp == 0x7FF ) { 1545 if ( aSig0 | aSig1 ) { 1546 return commonNaNToFloat32( float64ToCommonNaN( a ) ); 1547 } 1548 return packFloat32( aSign, 0xFF, 0 ); 1549 } 1550 shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig ); 1551 if ( aExp ) zSig |= 0x40000000; 1552 return roundAndPackFloat32( aSign, aExp - 0x381, zSig ); 1553 1554 } 1555 1556 #ifndef SOFTFLOAT_FOR_GCC 1557 /* 1558 ------------------------------------------------------------------------------- 1559 Rounds the double-precision floating-point value `a' to an integer, 1560 and returns the result as a double-precision floating-point value. The 1561 operation is performed according to the IEC/IEEE Standard for Binary 1562 Floating-Point Arithmetic. 1563 ------------------------------------------------------------------------------- 1564 */ 1565 float64 float64_round_to_int( float64 a ) 1566 { 1567 flag aSign; 1568 int16 aExp; 1569 bits32 lastBitMask, roundBitsMask; 1570 int8 roundingMode; 1571 float64 z; 1572 1573 aExp = extractFloat64Exp( a ); 1574 if ( 0x413 <= aExp ) { 1575 if ( 0x433 <= aExp ) { 1576 if ( ( aExp == 0x7FF ) 1577 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) { 1578 return propagateFloat64NaN( a, a ); 1579 } 1580 return a; 1581 } 1582 lastBitMask = 1; 1583 lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1; 1584 roundBitsMask = lastBitMask - 1; 1585 z = a; 1586 roundingMode = float_rounding_mode; 1587 if ( roundingMode == float_round_nearest_even ) { 1588 if ( lastBitMask ) { 1589 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); 1590 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 1591 } 1592 else { 1593 if ( (sbits32) z.low < 0 ) { 1594 ++z.high; 1595 if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1; 1596 } 1597 } 1598 } 1599 else if ( roundingMode != float_round_to_zero ) { 1600 if ( extractFloat64Sign( z ) 1601 ^ ( roundingMode == float_round_up ) ) { 1602 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); 1603 } 1604 } 1605 z.low &= ~ roundBitsMask; 1606 } 1607 else { 1608 if ( aExp <= 0x3FE ) { 1609 if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a; 1610 float_exception_flags |= float_flag_inexact; 1611 aSign = extractFloat64Sign( a ); 1612 switch ( float_rounding_mode ) { 1613 case float_round_nearest_even: 1614 if ( ( aExp == 0x3FE ) 1615 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) 1616 ) { 1617 return packFloat64( aSign, 0x3FF, 0, 0 ); 1618 } 1619 break; 1620 case float_round_down: 1621 return 1622 aSign ? packFloat64( 1, 0x3FF, 0, 0 ) 1623 : packFloat64( 0, 0, 0, 0 ); 1624 case float_round_up: 1625 return 1626 aSign ? packFloat64( 1, 0, 0, 0 ) 1627 : packFloat64( 0, 0x3FF, 0, 0 ); 1628 } 1629 return packFloat64( aSign, 0, 0, 0 ); 1630 } 1631 lastBitMask = 1; 1632 lastBitMask <<= 0x413 - aExp; 1633 roundBitsMask = lastBitMask - 1; 1634 z.low = 0; 1635 z.high = a.high; 1636 roundingMode = float_rounding_mode; 1637 if ( roundingMode == float_round_nearest_even ) { 1638 z.high += lastBitMask>>1; 1639 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { 1640 z.high &= ~ lastBitMask; 1641 } 1642 } 1643 else if ( roundingMode != float_round_to_zero ) { 1644 if ( extractFloat64Sign( z ) 1645 ^ ( roundingMode == float_round_up ) ) { 1646 z.high |= ( a.low != 0 ); 1647 z.high += roundBitsMask; 1648 } 1649 } 1650 z.high &= ~ roundBitsMask; 1651 } 1652 if ( ( z.low != a.low ) || ( z.high != a.high ) ) { 1653 float_exception_flags |= float_flag_inexact; 1654 } 1655 return z; 1656 1657 } 1658 #endif 1659 1660 /* 1661 ------------------------------------------------------------------------------- 1662 Returns the result of adding the absolute values of the double-precision 1663 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 1664 before being returned. `zSign' is ignored if the result is a NaN. 1665 The addition is performed according to the IEC/IEEE Standard for Binary 1666 Floating-Point Arithmetic. 1667 ------------------------------------------------------------------------------- 1668 */ 1669 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) 1670 { 1671 int16 aExp, bExp, zExp; 1672 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 1673 int16 expDiff; 1674 1675 aSig1 = extractFloat64Frac1( a ); 1676 aSig0 = extractFloat64Frac0( a ); 1677 aExp = extractFloat64Exp( a ); 1678 bSig1 = extractFloat64Frac1( b ); 1679 bSig0 = extractFloat64Frac0( b ); 1680 bExp = extractFloat64Exp( b ); 1681 expDiff = aExp - bExp; 1682 if ( 0 < expDiff ) { 1683 if ( aExp == 0x7FF ) { 1684 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1685 return a; 1686 } 1687 if ( bExp == 0 ) { 1688 --expDiff; 1689 } 1690 else { 1691 bSig0 |= 0x00100000; 1692 } 1693 shift64ExtraRightJamming( 1694 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); 1695 zExp = aExp; 1696 } 1697 else if ( expDiff < 0 ) { 1698 if ( bExp == 0x7FF ) { 1699 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1700 return packFloat64( zSign, 0x7FF, 0, 0 ); 1701 } 1702 if ( aExp == 0 ) { 1703 ++expDiff; 1704 } 1705 else { 1706 aSig0 |= 0x00100000; 1707 } 1708 shift64ExtraRightJamming( 1709 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); 1710 zExp = bExp; 1711 } 1712 else { 1713 if ( aExp == 0x7FF ) { 1714 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 1715 return propagateFloat64NaN( a, b ); 1716 } 1717 return a; 1718 } 1719 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1720 if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 ); 1721 zSig2 = 0; 1722 zSig0 |= 0x00200000; 1723 zExp = aExp; 1724 goto shiftRight1; 1725 } 1726 aSig0 |= 0x00100000; 1727 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1728 --zExp; 1729 if ( zSig0 < 0x00200000 ) goto roundAndPack; 1730 ++zExp; 1731 shiftRight1: 1732 shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 1733 roundAndPack: 1734 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 1735 1736 } 1737 1738 /* 1739 ------------------------------------------------------------------------------- 1740 Returns the result of subtracting the absolute values of the double- 1741 precision floating-point values `a' and `b'. If `zSign' is 1, the 1742 difference is negated before being returned. `zSign' is ignored if the 1743 result is a NaN. The subtraction is performed according to the IEC/IEEE 1744 Standard for Binary Floating-Point Arithmetic. 1745 ------------------------------------------------------------------------------- 1746 */ 1747 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) 1748 { 1749 int16 aExp, bExp, zExp; 1750 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; 1751 int16 expDiff; 1752 1753 aSig1 = extractFloat64Frac1( a ); 1754 aSig0 = extractFloat64Frac0( a ); 1755 aExp = extractFloat64Exp( a ); 1756 bSig1 = extractFloat64Frac1( b ); 1757 bSig0 = extractFloat64Frac0( b ); 1758 bExp = extractFloat64Exp( b ); 1759 expDiff = aExp - bExp; 1760 shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 ); 1761 shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 ); 1762 if ( 0 < expDiff ) goto aExpBigger; 1763 if ( expDiff < 0 ) goto bExpBigger; 1764 if ( aExp == 0x7FF ) { 1765 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 1766 return propagateFloat64NaN( a, b ); 1767 } 1768 float_raise( float_flag_invalid ); 1769 return float64_default_nan; 1770 } 1771 if ( aExp == 0 ) { 1772 aExp = 1; 1773 bExp = 1; 1774 } 1775 if ( bSig0 < aSig0 ) goto aBigger; 1776 if ( aSig0 < bSig0 ) goto bBigger; 1777 if ( bSig1 < aSig1 ) goto aBigger; 1778 if ( aSig1 < bSig1 ) goto bBigger; 1779 return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 ); 1780 bExpBigger: 1781 if ( bExp == 0x7FF ) { 1782 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1783 return packFloat64( zSign ^ 1, 0x7FF, 0, 0 ); 1784 } 1785 if ( aExp == 0 ) { 1786 ++expDiff; 1787 } 1788 else { 1789 aSig0 |= 0x40000000; 1790 } 1791 shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 1792 bSig0 |= 0x40000000; 1793 bBigger: 1794 sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); 1795 zExp = bExp; 1796 zSign ^= 1; 1797 goto normalizeRoundAndPack; 1798 aExpBigger: 1799 if ( aExp == 0x7FF ) { 1800 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1801 return a; 1802 } 1803 if ( bExp == 0 ) { 1804 --expDiff; 1805 } 1806 else { 1807 bSig0 |= 0x40000000; 1808 } 1809 shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); 1810 aSig0 |= 0x40000000; 1811 aBigger: 1812 sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1813 zExp = aExp; 1814 normalizeRoundAndPack: 1815 --zExp; 1816 return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 ); 1817 1818 } 1819 1820 /* 1821 ------------------------------------------------------------------------------- 1822 Returns the result of adding the double-precision floating-point values `a' 1823 and `b'. The operation is performed according to the IEC/IEEE Standard for 1824 Binary Floating-Point Arithmetic. 1825 ------------------------------------------------------------------------------- 1826 */ 1827 float64 float64_add( float64 a, float64 b ) 1828 { 1829 flag aSign, bSign; 1830 1831 aSign = extractFloat64Sign( a ); 1832 bSign = extractFloat64Sign( b ); 1833 if ( aSign == bSign ) { 1834 return addFloat64Sigs( a, b, aSign ); 1835 } 1836 else { 1837 return subFloat64Sigs( a, b, aSign ); 1838 } 1839 1840 } 1841 1842 /* 1843 ------------------------------------------------------------------------------- 1844 Returns the result of subtracting the double-precision floating-point values 1845 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 1846 for Binary Floating-Point Arithmetic. 1847 ------------------------------------------------------------------------------- 1848 */ 1849 float64 float64_sub( float64 a, float64 b ) 1850 { 1851 flag aSign, bSign; 1852 1853 aSign = extractFloat64Sign( a ); 1854 bSign = extractFloat64Sign( b ); 1855 if ( aSign == bSign ) { 1856 return subFloat64Sigs( a, b, aSign ); 1857 } 1858 else { 1859 return addFloat64Sigs( a, b, aSign ); 1860 } 1861 1862 } 1863 1864 /* 1865 ------------------------------------------------------------------------------- 1866 Returns the result of multiplying the double-precision floating-point values 1867 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 1868 for Binary Floating-Point Arithmetic. 1869 ------------------------------------------------------------------------------- 1870 */ 1871 float64 float64_mul( float64 a, float64 b ) 1872 { 1873 flag aSign, bSign, zSign; 1874 int16 aExp, bExp, zExp; 1875 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; 1876 1877 aSig1 = extractFloat64Frac1( a ); 1878 aSig0 = extractFloat64Frac0( a ); 1879 aExp = extractFloat64Exp( a ); 1880 aSign = extractFloat64Sign( a ); 1881 bSig1 = extractFloat64Frac1( b ); 1882 bSig0 = extractFloat64Frac0( b ); 1883 bExp = extractFloat64Exp( b ); 1884 bSign = extractFloat64Sign( b ); 1885 zSign = aSign ^ bSign; 1886 if ( aExp == 0x7FF ) { 1887 if ( ( aSig0 | aSig1 ) 1888 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { 1889 return propagateFloat64NaN( a, b ); 1890 } 1891 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; 1892 return packFloat64( zSign, 0x7FF, 0, 0 ); 1893 } 1894 if ( bExp == 0x7FF ) { 1895 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1896 if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 1897 invalid: 1898 float_raise( float_flag_invalid ); 1899 return float64_default_nan; 1900 } 1901 return packFloat64( zSign, 0x7FF, 0, 0 ); 1902 } 1903 if ( aExp == 0 ) { 1904 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1905 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 1906 } 1907 if ( bExp == 0 ) { 1908 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1909 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 1910 } 1911 zExp = aExp + bExp - 0x400; 1912 aSig0 |= 0x00100000; 1913 shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 ); 1914 mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); 1915 add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); 1916 zSig2 |= ( zSig3 != 0 ); 1917 if ( 0x00200000 <= zSig0 ) { 1918 shift64ExtraRightJamming( 1919 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 1920 ++zExp; 1921 } 1922 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 1923 1924 } 1925 1926 /* 1927 ------------------------------------------------------------------------------- 1928 Returns the result of dividing the double-precision floating-point value `a' 1929 by the corresponding value `b'. The operation is performed according to the 1930 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1931 ------------------------------------------------------------------------------- 1932 */ 1933 float64 float64_div( float64 a, float64 b ) 1934 { 1935 flag aSign, bSign, zSign; 1936 int16 aExp, bExp, zExp; 1937 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 1938 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 1939 1940 aSig1 = extractFloat64Frac1( a ); 1941 aSig0 = extractFloat64Frac0( a ); 1942 aExp = extractFloat64Exp( a ); 1943 aSign = extractFloat64Sign( a ); 1944 bSig1 = extractFloat64Frac1( b ); 1945 bSig0 = extractFloat64Frac0( b ); 1946 bExp = extractFloat64Exp( b ); 1947 bSign = extractFloat64Sign( b ); 1948 zSign = aSign ^ bSign; 1949 if ( aExp == 0x7FF ) { 1950 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1951 if ( bExp == 0x7FF ) { 1952 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1953 goto invalid; 1954 } 1955 return packFloat64( zSign, 0x7FF, 0, 0 ); 1956 } 1957 if ( bExp == 0x7FF ) { 1958 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1959 return packFloat64( zSign, 0, 0, 0 ); 1960 } 1961 if ( bExp == 0 ) { 1962 if ( ( bSig0 | bSig1 ) == 0 ) { 1963 if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 1964 invalid: 1965 float_raise( float_flag_invalid ); 1966 return float64_default_nan; 1967 } 1968 float_raise( float_flag_divbyzero ); 1969 return packFloat64( zSign, 0x7FF, 0, 0 ); 1970 } 1971 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 1972 } 1973 if ( aExp == 0 ) { 1974 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1975 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 1976 } 1977 zExp = aExp - bExp + 0x3FD; 1978 shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 ); 1979 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); 1980 if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) { 1981 shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); 1982 ++zExp; 1983 } 1984 zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 ); 1985 mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); 1986 sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); 1987 while ( (sbits32) rem0 < 0 ) { 1988 --zSig0; 1989 add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); 1990 } 1991 zSig1 = estimateDiv64To32( rem1, rem2, bSig0 ); 1992 if ( ( zSig1 & 0x3FF ) <= 4 ) { 1993 mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); 1994 sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); 1995 while ( (sbits32) rem1 < 0 ) { 1996 --zSig1; 1997 add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); 1998 } 1999 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 2000 } 2001 shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 ); 2002 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 2003 2004 } 2005 2006 #ifndef SOFTFLOAT_FOR_GCC 2007 /* 2008 ------------------------------------------------------------------------------- 2009 Returns the remainder of the double-precision floating-point value `a' 2010 with respect to the corresponding value `b'. The operation is performed 2011 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2012 ------------------------------------------------------------------------------- 2013 */ 2014 float64 float64_rem( float64 a, float64 b ) 2015 { 2016 flag aSign, bSign, zSign; 2017 int16 aExp, bExp, expDiff; 2018 bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; 2019 bits32 allZero, alternateASig0, alternateASig1, sigMean1; 2020 sbits32 sigMean0; 2021 float64 z; 2022 2023 aSig1 = extractFloat64Frac1( a ); 2024 aSig0 = extractFloat64Frac0( a ); 2025 aExp = extractFloat64Exp( a ); 2026 aSign = extractFloat64Sign( a ); 2027 bSig1 = extractFloat64Frac1( b ); 2028 bSig0 = extractFloat64Frac0( b ); 2029 bExp = extractFloat64Exp( b ); 2030 bSign = extractFloat64Sign( b ); 2031 if ( aExp == 0x7FF ) { 2032 if ( ( aSig0 | aSig1 ) 2033 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { 2034 return propagateFloat64NaN( a, b ); 2035 } 2036 goto invalid; 2037 } 2038 if ( bExp == 0x7FF ) { 2039 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 2040 return a; 2041 } 2042 if ( bExp == 0 ) { 2043 if ( ( bSig0 | bSig1 ) == 0 ) { 2044 invalid: 2045 float_raise( float_flag_invalid ); 2046 return float64_default_nan; 2047 } 2048 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 2049 } 2050 if ( aExp == 0 ) { 2051 if ( ( aSig0 | aSig1 ) == 0 ) return a; 2052 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 2053 } 2054 expDiff = aExp - bExp; 2055 if ( expDiff < -1 ) return a; 2056 shortShift64Left( 2057 aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 ); 2058 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); 2059 q = le64( bSig0, bSig1, aSig0, aSig1 ); 2060 if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 2061 expDiff -= 32; 2062 while ( 0 < expDiff ) { 2063 q = estimateDiv64To32( aSig0, aSig1, bSig0 ); 2064 q = ( 4 < q ) ? q - 4 : 0; 2065 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); 2066 shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero ); 2067 shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero ); 2068 sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 ); 2069 expDiff -= 29; 2070 } 2071 if ( -32 < expDiff ) { 2072 q = estimateDiv64To32( aSig0, aSig1, bSig0 ); 2073 q = ( 4 < q ) ? q - 4 : 0; 2074 q >>= - expDiff; 2075 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); 2076 expDiff += 24; 2077 if ( expDiff < 0 ) { 2078 shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 2079 } 2080 else { 2081 shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); 2082 } 2083 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); 2084 sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); 2085 } 2086 else { 2087 shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 ); 2088 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); 2089 } 2090 do { 2091 alternateASig0 = aSig0; 2092 alternateASig1 = aSig1; 2093 ++q; 2094 sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 2095 } while ( 0 <= (sbits32) aSig0 ); 2096 add64( 2097 aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 ); 2098 if ( ( sigMean0 < 0 ) 2099 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { 2100 aSig0 = alternateASig0; 2101 aSig1 = alternateASig1; 2102 } 2103 zSign = ( (sbits32) aSig0 < 0 ); 2104 if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); 2105 return 2106 normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); 2107 2108 } 2109 #endif 2110 2111 #ifndef SOFTFLOAT_FOR_GCC 2112 /* 2113 ------------------------------------------------------------------------------- 2114 Returns the square root of the double-precision floating-point value `a'. 2115 The operation is performed according to the IEC/IEEE Standard for Binary 2116 Floating-Point Arithmetic. 2117 ------------------------------------------------------------------------------- 2118 */ 2119 float64 float64_sqrt( float64 a ) 2120 { 2121 flag aSign; 2122 int16 aExp, zExp; 2123 bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; 2124 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 2125 float64 z; 2126 2127 aSig1 = extractFloat64Frac1( a ); 2128 aSig0 = extractFloat64Frac0( a ); 2129 aExp = extractFloat64Exp( a ); 2130 aSign = extractFloat64Sign( a ); 2131 if ( aExp == 0x7FF ) { 2132 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a ); 2133 if ( ! aSign ) return a; 2134 goto invalid; 2135 } 2136 if ( aSign ) { 2137 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; 2138 invalid: 2139 float_raise( float_flag_invalid ); 2140 return float64_default_nan; 2141 } 2142 if ( aExp == 0 ) { 2143 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 ); 2144 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 2145 } 2146 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; 2147 aSig0 |= 0x00100000; 2148 shortShift64Left( aSig0, aSig1, 11, &term0, &term1 ); 2149 zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1; 2150 if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF; 2151 doubleZSig0 = zSig0 + zSig0; 2152 shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 ); 2153 mul32To64( zSig0, zSig0, &term0, &term1 ); 2154 sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 2155 while ( (sbits32) rem0 < 0 ) { 2156 --zSig0; 2157 doubleZSig0 -= 2; 2158 add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 ); 2159 } 2160 zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 ); 2161 if ( ( zSig1 & 0x1FF ) <= 5 ) { 2162 if ( zSig1 == 0 ) zSig1 = 1; 2163 mul32To64( doubleZSig0, zSig1, &term1, &term2 ); 2164 sub64( rem1, 0, term1, term2, &rem1, &rem2 ); 2165 mul32To64( zSig1, zSig1, &term2, &term3 ); 2166 sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 2167 while ( (sbits32) rem1 < 0 ) { 2168 --zSig1; 2169 shortShift64Left( 0, zSig1, 1, &term2, &term3 ); 2170 term3 |= 1; 2171 term2 |= doubleZSig0; 2172 add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 2173 } 2174 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 2175 } 2176 shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 ); 2177 return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 ); 2178 2179 } 2180 #endif 2181 2182 /* 2183 ------------------------------------------------------------------------------- 2184 Returns 1 if the double-precision floating-point value `a' is equal to 2185 the corresponding value `b', and 0 otherwise. The comparison is performed 2186 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2187 ------------------------------------------------------------------------------- 2188 */ 2189 flag float64_eq( float64 a, float64 b ) 2190 { 2191 2192 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2193 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2194 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2195 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2196 ) { 2197 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2198 float_raise( float_flag_invalid ); 2199 } 2200 return 0; 2201 } 2202 return ( a == b ) || 2203 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); 2204 2205 } 2206 2207 /* 2208 ------------------------------------------------------------------------------- 2209 Returns 1 if the double-precision floating-point value `a' is less than 2210 or equal to the corresponding value `b', and 0 otherwise. The comparison 2211 is performed according to the IEC/IEEE Standard for Binary Floating-Point 2212 Arithmetic. 2213 ------------------------------------------------------------------------------- 2214 */ 2215 flag float64_le( float64 a, float64 b ) 2216 { 2217 flag aSign, bSign; 2218 2219 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2220 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2221 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2222 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2223 ) { 2224 float_raise( float_flag_invalid ); 2225 return 0; 2226 } 2227 aSign = extractFloat64Sign( a ); 2228 bSign = extractFloat64Sign( b ); 2229 if ( aSign != bSign ) 2230 return aSign || 2231 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 2232 0 ); 2233 return ( a == b ) || 2234 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 2235 } 2236 2237 /* 2238 ------------------------------------------------------------------------------- 2239 Returns 1 if the double-precision floating-point value `a' is less than 2240 the corresponding value `b', and 0 otherwise. The comparison is performed 2241 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2242 ------------------------------------------------------------------------------- 2243 */ 2244 flag float64_lt( float64 a, float64 b ) 2245 { 2246 flag aSign, bSign; 2247 2248 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2249 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2250 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2251 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2252 ) { 2253 float_raise( float_flag_invalid ); 2254 return 0; 2255 } 2256 aSign = extractFloat64Sign( a ); 2257 bSign = extractFloat64Sign( b ); 2258 if ( aSign != bSign ) 2259 return aSign && 2260 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != 2261 0 ); 2262 return ( a != b ) && 2263 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 2264 2265 } 2266 2267 #ifndef SOFTFLOAT_FOR_GCC 2268 /* 2269 ------------------------------------------------------------------------------- 2270 Returns 1 if the double-precision floating-point value `a' is equal to 2271 the corresponding value `b', and 0 otherwise. The invalid exception is 2272 raised if either operand is a NaN. Otherwise, the comparison is performed 2273 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2274 ------------------------------------------------------------------------------- 2275 */ 2276 flag float64_eq_signaling( float64 a, float64 b ) 2277 { 2278 2279 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2280 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2281 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2282 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2283 ) { 2284 float_raise( float_flag_invalid ); 2285 return 0; 2286 } 2287 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); 2288 2289 } 2290 2291 /* 2292 ------------------------------------------------------------------------------- 2293 Returns 1 if the double-precision floating-point value `a' is less than or 2294 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 2295 cause an exception. Otherwise, the comparison is performed according to the 2296 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2297 ------------------------------------------------------------------------------- 2298 */ 2299 flag float64_le_quiet( float64 a, float64 b ) 2300 { 2301 flag aSign, bSign; 2302 2303 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2304 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2305 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2306 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2307 ) { 2308 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2309 float_raise( float_flag_invalid ); 2310 } 2311 return 0; 2312 } 2313 aSign = extractFloat64Sign( a ); 2314 bSign = extractFloat64Sign( b ); 2315 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); 2316 return ( a == b ) || ( aSign ^ ( a < b ) ); 2317 2318 } 2319 2320 /* 2321 ------------------------------------------------------------------------------- 2322 Returns 1 if the double-precision floating-point value `a' is less than 2323 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 2324 exception. Otherwise, the comparison is performed according to the IEC/IEEE 2325 Standard for Binary Floating-Point Arithmetic. 2326 ------------------------------------------------------------------------------- 2327 */ 2328 flag float64_lt_quiet( float64 a, float64 b ) 2329 { 2330 flag aSign, bSign; 2331 2332 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2333 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2334 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2335 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2336 ) { 2337 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2338 float_raise( float_flag_invalid ); 2339 } 2340 return 0; 2341 } 2342 aSign = extractFloat64Sign( a ); 2343 bSign = extractFloat64Sign( b ); 2344 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); 2345 return ( a != b ) && ( aSign ^ ( a < b ) ); 2346 2347 } 2348 2349 #endif 2350