1*2cec0187Schristos /* $NetBSD: softfloat.c,v 1.2 2012/03/21 14:17:54 christos Exp $ */ 2936b7f4cSbjh21 3936b7f4cSbjh21 /* 4936b7f4cSbjh21 * This version hacked for use with gcc -msoft-float by bjh21. 5936b7f4cSbjh21 * (Mostly a case of #ifdefing out things GCC doesn't need or provides 6936b7f4cSbjh21 * itself). 7936b7f4cSbjh21 */ 8936b7f4cSbjh21 9936b7f4cSbjh21 /* 10936b7f4cSbjh21 * Things you may want to define: 11936b7f4cSbjh21 * 12936b7f4cSbjh21 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with 13936b7f4cSbjh21 * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them 14936b7f4cSbjh21 * properly renamed. 15936b7f4cSbjh21 */ 16936b7f4cSbjh21 17936b7f4cSbjh21 /* 18936b7f4cSbjh21 * This differs from the standard bits32/softfloat.c in that float64 19936b7f4cSbjh21 * is defined to be a 64-bit integer rather than a structure. The 20936b7f4cSbjh21 * structure is float64s, with translation between the two going via 21936b7f4cSbjh21 * float64u. 22936b7f4cSbjh21 */ 23936b7f4cSbjh21 24936b7f4cSbjh21 /* 25936b7f4cSbjh21 =============================================================================== 26936b7f4cSbjh21 27936b7f4cSbjh21 This C source file is part of the SoftFloat IEC/IEEE Floating-Point 28936b7f4cSbjh21 Arithmetic Package, Release 2a. 29936b7f4cSbjh21 30936b7f4cSbjh21 Written by John R. Hauser. This work was made possible in part by the 31936b7f4cSbjh21 International Computer Science Institute, located at Suite 600, 1947 Center 32936b7f4cSbjh21 Street, Berkeley, California 94704. Funding was partially provided by the 33936b7f4cSbjh21 National Science Foundation under grant MIP-9311980. The original version 34936b7f4cSbjh21 of this code was written as part of a project to build a fixed-point vector 35936b7f4cSbjh21 processor in collaboration with the University of California at Berkeley, 36936b7f4cSbjh21 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 37936b7f4cSbjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 38936b7f4cSbjh21 arithmetic/SoftFloat.html'. 39936b7f4cSbjh21 40936b7f4cSbjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 41936b7f4cSbjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 42936b7f4cSbjh21 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 43936b7f4cSbjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 44936b7f4cSbjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 45936b7f4cSbjh21 46936b7f4cSbjh21 Derivative works are acceptable, even for commercial purposes, so long as 47936b7f4cSbjh21 (1) they include prominent notice that the work is derivative, and (2) they 48936b7f4cSbjh21 include prominent notice akin to these four paragraphs for those parts of 49936b7f4cSbjh21 this code that are retained. 50936b7f4cSbjh21 51936b7f4cSbjh21 =============================================================================== 52936b7f4cSbjh21 */ 53936b7f4cSbjh21 54936b7f4cSbjh21 #include <sys/cdefs.h> 55936b7f4cSbjh21 #if defined(LIBC_SCCS) && !defined(lint) 56*2cec0187Schristos __RCSID("$NetBSD: softfloat.c,v 1.2 2012/03/21 14:17:54 christos Exp $"); 57936b7f4cSbjh21 #endif /* LIBC_SCCS and not lint */ 58936b7f4cSbjh21 59936b7f4cSbjh21 #ifdef SOFTFLOAT_FOR_GCC 60936b7f4cSbjh21 #include "softfloat-for-gcc.h" 61936b7f4cSbjh21 #endif 62936b7f4cSbjh21 63936b7f4cSbjh21 #include "milieu.h" 64936b7f4cSbjh21 #include "softfloat.h" 65936b7f4cSbjh21 66936b7f4cSbjh21 /* 67936b7f4cSbjh21 * Conversions between floats as stored in memory and floats as 68936b7f4cSbjh21 * SoftFloat uses them 69936b7f4cSbjh21 */ 70936b7f4cSbjh21 #ifndef FLOAT64_DEMANGLE 71936b7f4cSbjh21 #define FLOAT64_DEMANGLE(a) (a) 72936b7f4cSbjh21 #endif 73936b7f4cSbjh21 #ifndef FLOAT64_MANGLE 74936b7f4cSbjh21 #define FLOAT64_MANGLE(a) (a) 75936b7f4cSbjh21 #endif 76936b7f4cSbjh21 77936b7f4cSbjh21 /* 78936b7f4cSbjh21 ------------------------------------------------------------------------------- 79936b7f4cSbjh21 Floating-point rounding mode and exception flags. 80936b7f4cSbjh21 ------------------------------------------------------------------------------- 81936b7f4cSbjh21 */ 82936b7f4cSbjh21 fp_rnd float_rounding_mode = float_round_nearest_even; 83936b7f4cSbjh21 fp_except float_exception_flags = 0; 84936b7f4cSbjh21 85936b7f4cSbjh21 /* 86936b7f4cSbjh21 ------------------------------------------------------------------------------- 87936b7f4cSbjh21 Primitive arithmetic functions, including multi-word arithmetic, and 88936b7f4cSbjh21 division and square root approximations. (Can be specialized to target if 89936b7f4cSbjh21 desired.) 90936b7f4cSbjh21 ------------------------------------------------------------------------------- 91936b7f4cSbjh21 */ 92936b7f4cSbjh21 #include "softfloat-macros" 93936b7f4cSbjh21 94936b7f4cSbjh21 /* 95936b7f4cSbjh21 ------------------------------------------------------------------------------- 96936b7f4cSbjh21 Functions and definitions to determine: (1) whether tininess for underflow 97936b7f4cSbjh21 is detected before or after rounding by default, (2) what (if anything) 98936b7f4cSbjh21 happens when exceptions are raised, (3) how signaling NaNs are distinguished 99936b7f4cSbjh21 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs 100936b7f4cSbjh21 are propagated from function inputs to output. These details are target- 101936b7f4cSbjh21 specific. 102936b7f4cSbjh21 ------------------------------------------------------------------------------- 103936b7f4cSbjh21 */ 104936b7f4cSbjh21 #include "softfloat-specialize" 105936b7f4cSbjh21 106936b7f4cSbjh21 /* 107936b7f4cSbjh21 ------------------------------------------------------------------------------- 108936b7f4cSbjh21 Returns the fraction bits of the single-precision floating-point value `a'. 109936b7f4cSbjh21 ------------------------------------------------------------------------------- 110936b7f4cSbjh21 */ 111936b7f4cSbjh21 INLINE bits32 extractFloat32Frac( float32 a ) 112936b7f4cSbjh21 { 113936b7f4cSbjh21 114936b7f4cSbjh21 return a & 0x007FFFFF; 115936b7f4cSbjh21 116936b7f4cSbjh21 } 117936b7f4cSbjh21 118936b7f4cSbjh21 /* 119936b7f4cSbjh21 ------------------------------------------------------------------------------- 120936b7f4cSbjh21 Returns the exponent bits of the single-precision floating-point value `a'. 121936b7f4cSbjh21 ------------------------------------------------------------------------------- 122936b7f4cSbjh21 */ 123936b7f4cSbjh21 INLINE int16 extractFloat32Exp( float32 a ) 124936b7f4cSbjh21 { 125936b7f4cSbjh21 126936b7f4cSbjh21 return ( a>>23 ) & 0xFF; 127936b7f4cSbjh21 128936b7f4cSbjh21 } 129936b7f4cSbjh21 130936b7f4cSbjh21 /* 131936b7f4cSbjh21 ------------------------------------------------------------------------------- 132936b7f4cSbjh21 Returns the sign bit of the single-precision floating-point value `a'. 133936b7f4cSbjh21 ------------------------------------------------------------------------------- 134936b7f4cSbjh21 */ 135936b7f4cSbjh21 INLINE flag extractFloat32Sign( float32 a ) 136936b7f4cSbjh21 { 137936b7f4cSbjh21 138936b7f4cSbjh21 return a>>31; 139936b7f4cSbjh21 140936b7f4cSbjh21 } 141936b7f4cSbjh21 142936b7f4cSbjh21 /* 143936b7f4cSbjh21 ------------------------------------------------------------------------------- 144936b7f4cSbjh21 Normalizes the subnormal single-precision floating-point value represented 145936b7f4cSbjh21 by the denormalized significand `aSig'. The normalized exponent and 146936b7f4cSbjh21 significand are stored at the locations pointed to by `zExpPtr' and 147936b7f4cSbjh21 `zSigPtr', respectively. 148936b7f4cSbjh21 ------------------------------------------------------------------------------- 149936b7f4cSbjh21 */ 150936b7f4cSbjh21 static void 151936b7f4cSbjh21 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) 152936b7f4cSbjh21 { 153936b7f4cSbjh21 int8 shiftCount; 154936b7f4cSbjh21 155936b7f4cSbjh21 shiftCount = countLeadingZeros32( aSig ) - 8; 156936b7f4cSbjh21 *zSigPtr = aSig<<shiftCount; 157936b7f4cSbjh21 *zExpPtr = 1 - shiftCount; 158936b7f4cSbjh21 159936b7f4cSbjh21 } 160936b7f4cSbjh21 161936b7f4cSbjh21 /* 162936b7f4cSbjh21 ------------------------------------------------------------------------------- 163936b7f4cSbjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 164936b7f4cSbjh21 single-precision floating-point value, returning the result. After being 165936b7f4cSbjh21 shifted into the proper positions, the three fields are simply added 166936b7f4cSbjh21 together to form the result. This means that any integer portion of `zSig' 167936b7f4cSbjh21 will be added into the exponent. Since a properly normalized significand 168936b7f4cSbjh21 will have an integer portion equal to 1, the `zExp' input should be 1 less 169936b7f4cSbjh21 than the desired result exponent whenever `zSig' is a complete, normalized 170936b7f4cSbjh21 significand. 171936b7f4cSbjh21 ------------------------------------------------------------------------------- 172936b7f4cSbjh21 */ 173936b7f4cSbjh21 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) 174936b7f4cSbjh21 { 175936b7f4cSbjh21 176936b7f4cSbjh21 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; 177936b7f4cSbjh21 178936b7f4cSbjh21 } 179936b7f4cSbjh21 180936b7f4cSbjh21 /* 181936b7f4cSbjh21 ------------------------------------------------------------------------------- 182936b7f4cSbjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 183936b7f4cSbjh21 and significand `zSig', and returns the proper single-precision floating- 184936b7f4cSbjh21 point value corresponding to the abstract input. Ordinarily, the abstract 185936b7f4cSbjh21 value is simply rounded and packed into the single-precision format, with 186936b7f4cSbjh21 the inexact exception raised if the abstract input cannot be represented 187936b7f4cSbjh21 exactly. However, if the abstract value is too large, the overflow and 188936b7f4cSbjh21 inexact exceptions are raised and an infinity or maximal finite value is 189936b7f4cSbjh21 returned. If the abstract value is too small, the input value is rounded to 190936b7f4cSbjh21 a subnormal number, and the underflow and inexact exceptions are raised if 191936b7f4cSbjh21 the abstract input cannot be represented exactly as a subnormal single- 192936b7f4cSbjh21 precision floating-point number. 193936b7f4cSbjh21 The input significand `zSig' has its binary point between bits 30 194936b7f4cSbjh21 and 29, which is 7 bits to the left of the usual location. This shifted 195936b7f4cSbjh21 significand must be normalized or smaller. If `zSig' is not normalized, 196936b7f4cSbjh21 `zExp' must be 0; in that case, the result returned is a subnormal number, 197936b7f4cSbjh21 and it must not require rounding. In the usual case that `zSig' is 198936b7f4cSbjh21 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 199936b7f4cSbjh21 The handling of underflow and overflow follows the IEC/IEEE Standard for 200936b7f4cSbjh21 Binary Floating-Point Arithmetic. 201936b7f4cSbjh21 ------------------------------------------------------------------------------- 202936b7f4cSbjh21 */ 203936b7f4cSbjh21 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 204936b7f4cSbjh21 { 205936b7f4cSbjh21 int8 roundingMode; 206936b7f4cSbjh21 flag roundNearestEven; 207936b7f4cSbjh21 int8 roundIncrement, roundBits; 208936b7f4cSbjh21 flag isTiny; 209936b7f4cSbjh21 210936b7f4cSbjh21 roundingMode = float_rounding_mode; 211936b7f4cSbjh21 roundNearestEven = roundingMode == float_round_nearest_even; 212936b7f4cSbjh21 roundIncrement = 0x40; 213936b7f4cSbjh21 if ( ! roundNearestEven ) { 214936b7f4cSbjh21 if ( roundingMode == float_round_to_zero ) { 215936b7f4cSbjh21 roundIncrement = 0; 216936b7f4cSbjh21 } 217936b7f4cSbjh21 else { 218936b7f4cSbjh21 roundIncrement = 0x7F; 219936b7f4cSbjh21 if ( zSign ) { 220936b7f4cSbjh21 if ( roundingMode == float_round_up ) roundIncrement = 0; 221936b7f4cSbjh21 } 222936b7f4cSbjh21 else { 223936b7f4cSbjh21 if ( roundingMode == float_round_down ) roundIncrement = 0; 224936b7f4cSbjh21 } 225936b7f4cSbjh21 } 226936b7f4cSbjh21 } 227936b7f4cSbjh21 roundBits = zSig & 0x7F; 228936b7f4cSbjh21 if ( 0xFD <= (bits16) zExp ) { 229936b7f4cSbjh21 if ( ( 0xFD < zExp ) 230936b7f4cSbjh21 || ( ( zExp == 0xFD ) 231936b7f4cSbjh21 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) 232936b7f4cSbjh21 ) { 233936b7f4cSbjh21 float_raise( float_flag_overflow | float_flag_inexact ); 234936b7f4cSbjh21 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); 235936b7f4cSbjh21 } 236936b7f4cSbjh21 if ( zExp < 0 ) { 237936b7f4cSbjh21 isTiny = 238936b7f4cSbjh21 ( float_detect_tininess == float_tininess_before_rounding ) 239936b7f4cSbjh21 || ( zExp < -1 ) 240*2cec0187Schristos || ( zSig + roundIncrement < (uint32)0x80000000 ); 241936b7f4cSbjh21 shift32RightJamming( zSig, - zExp, &zSig ); 242936b7f4cSbjh21 zExp = 0; 243936b7f4cSbjh21 roundBits = zSig & 0x7F; 244936b7f4cSbjh21 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 245936b7f4cSbjh21 } 246936b7f4cSbjh21 } 247936b7f4cSbjh21 if ( roundBits ) float_exception_flags |= float_flag_inexact; 248936b7f4cSbjh21 zSig = ( zSig + roundIncrement )>>7; 249936b7f4cSbjh21 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 250936b7f4cSbjh21 if ( zSig == 0 ) zExp = 0; 251936b7f4cSbjh21 return packFloat32( zSign, zExp, zSig ); 252936b7f4cSbjh21 253936b7f4cSbjh21 } 254936b7f4cSbjh21 255936b7f4cSbjh21 /* 256936b7f4cSbjh21 ------------------------------------------------------------------------------- 257936b7f4cSbjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 258936b7f4cSbjh21 and significand `zSig', and returns the proper single-precision floating- 259936b7f4cSbjh21 point value corresponding to the abstract input. This routine is just like 260936b7f4cSbjh21 `roundAndPackFloat32' except that `zSig' does not have to be normalized. 261936b7f4cSbjh21 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 262936b7f4cSbjh21 floating-point exponent. 263936b7f4cSbjh21 ------------------------------------------------------------------------------- 264936b7f4cSbjh21 */ 265936b7f4cSbjh21 static float32 266936b7f4cSbjh21 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 267936b7f4cSbjh21 { 268936b7f4cSbjh21 int8 shiftCount; 269936b7f4cSbjh21 270936b7f4cSbjh21 shiftCount = countLeadingZeros32( zSig ) - 1; 271936b7f4cSbjh21 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); 272936b7f4cSbjh21 273936b7f4cSbjh21 } 274936b7f4cSbjh21 275936b7f4cSbjh21 /* 276936b7f4cSbjh21 ------------------------------------------------------------------------------- 277936b7f4cSbjh21 Returns the least-significant 32 fraction bits of the double-precision 278936b7f4cSbjh21 floating-point value `a'. 279936b7f4cSbjh21 ------------------------------------------------------------------------------- 280936b7f4cSbjh21 */ 281936b7f4cSbjh21 INLINE bits32 extractFloat64Frac1( float64 a ) 282936b7f4cSbjh21 { 283936b7f4cSbjh21 284*2cec0187Schristos return (bits32)(FLOAT64_DEMANGLE(a) & LIT64(0x00000000FFFFFFFF)); 285936b7f4cSbjh21 286936b7f4cSbjh21 } 287936b7f4cSbjh21 288936b7f4cSbjh21 /* 289936b7f4cSbjh21 ------------------------------------------------------------------------------- 290936b7f4cSbjh21 Returns the most-significant 20 fraction bits of the double-precision 291936b7f4cSbjh21 floating-point value `a'. 292936b7f4cSbjh21 ------------------------------------------------------------------------------- 293936b7f4cSbjh21 */ 294936b7f4cSbjh21 INLINE bits32 extractFloat64Frac0( float64 a ) 295936b7f4cSbjh21 { 296936b7f4cSbjh21 297*2cec0187Schristos return (bits32)((FLOAT64_DEMANGLE(a) >> 32) & 0x000FFFFF); 298936b7f4cSbjh21 299936b7f4cSbjh21 } 300936b7f4cSbjh21 301936b7f4cSbjh21 /* 302936b7f4cSbjh21 ------------------------------------------------------------------------------- 303936b7f4cSbjh21 Returns the exponent bits of the double-precision floating-point value `a'. 304936b7f4cSbjh21 ------------------------------------------------------------------------------- 305936b7f4cSbjh21 */ 306936b7f4cSbjh21 INLINE int16 extractFloat64Exp( float64 a ) 307936b7f4cSbjh21 { 308936b7f4cSbjh21 309*2cec0187Schristos return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF); 310936b7f4cSbjh21 311936b7f4cSbjh21 } 312936b7f4cSbjh21 313936b7f4cSbjh21 /* 314936b7f4cSbjh21 ------------------------------------------------------------------------------- 315936b7f4cSbjh21 Returns the sign bit of the double-precision floating-point value `a'. 316936b7f4cSbjh21 ------------------------------------------------------------------------------- 317936b7f4cSbjh21 */ 318936b7f4cSbjh21 INLINE flag extractFloat64Sign( float64 a ) 319936b7f4cSbjh21 { 320936b7f4cSbjh21 321*2cec0187Schristos return (flag)(FLOAT64_DEMANGLE(a) >> 63); 322936b7f4cSbjh21 323936b7f4cSbjh21 } 324936b7f4cSbjh21 325936b7f4cSbjh21 /* 326936b7f4cSbjh21 ------------------------------------------------------------------------------- 327936b7f4cSbjh21 Normalizes the subnormal double-precision floating-point value represented 328936b7f4cSbjh21 by the denormalized significand formed by the concatenation of `aSig0' and 329936b7f4cSbjh21 `aSig1'. The normalized exponent is stored at the location pointed to by 330936b7f4cSbjh21 `zExpPtr'. The most significant 21 bits of the normalized significand are 331936b7f4cSbjh21 stored at the location pointed to by `zSig0Ptr', and the least significant 332936b7f4cSbjh21 32 bits of the normalized significand are stored at the location pointed to 333936b7f4cSbjh21 by `zSig1Ptr'. 334936b7f4cSbjh21 ------------------------------------------------------------------------------- 335936b7f4cSbjh21 */ 336936b7f4cSbjh21 static void 337936b7f4cSbjh21 normalizeFloat64Subnormal( 338936b7f4cSbjh21 bits32 aSig0, 339936b7f4cSbjh21 bits32 aSig1, 340936b7f4cSbjh21 int16 *zExpPtr, 341936b7f4cSbjh21 bits32 *zSig0Ptr, 342936b7f4cSbjh21 bits32 *zSig1Ptr 343936b7f4cSbjh21 ) 344936b7f4cSbjh21 { 345936b7f4cSbjh21 int8 shiftCount; 346936b7f4cSbjh21 347936b7f4cSbjh21 if ( aSig0 == 0 ) { 348936b7f4cSbjh21 shiftCount = countLeadingZeros32( aSig1 ) - 11; 349936b7f4cSbjh21 if ( shiftCount < 0 ) { 350936b7f4cSbjh21 *zSig0Ptr = aSig1>>( - shiftCount ); 351936b7f4cSbjh21 *zSig1Ptr = aSig1<<( shiftCount & 31 ); 352936b7f4cSbjh21 } 353936b7f4cSbjh21 else { 354936b7f4cSbjh21 *zSig0Ptr = aSig1<<shiftCount; 355936b7f4cSbjh21 *zSig1Ptr = 0; 356936b7f4cSbjh21 } 357936b7f4cSbjh21 *zExpPtr = - shiftCount - 31; 358936b7f4cSbjh21 } 359936b7f4cSbjh21 else { 360936b7f4cSbjh21 shiftCount = countLeadingZeros32( aSig0 ) - 11; 361936b7f4cSbjh21 shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); 362936b7f4cSbjh21 *zExpPtr = 1 - shiftCount; 363936b7f4cSbjh21 } 364936b7f4cSbjh21 365936b7f4cSbjh21 } 366936b7f4cSbjh21 367936b7f4cSbjh21 /* 368936b7f4cSbjh21 ------------------------------------------------------------------------------- 369936b7f4cSbjh21 Packs the sign `zSign', the exponent `zExp', and the significand formed by 370936b7f4cSbjh21 the concatenation of `zSig0' and `zSig1' into a double-precision floating- 371936b7f4cSbjh21 point value, returning the result. After being shifted into the proper 372936b7f4cSbjh21 positions, the three fields `zSign', `zExp', and `zSig0' are simply added 373936b7f4cSbjh21 together to form the most significant 32 bits of the result. This means 374936b7f4cSbjh21 that any integer portion of `zSig0' will be added into the exponent. Since 375936b7f4cSbjh21 a properly normalized significand will have an integer portion equal to 1, 376936b7f4cSbjh21 the `zExp' input should be 1 less than the desired result exponent whenever 377936b7f4cSbjh21 `zSig0' and `zSig1' concatenated form a complete, normalized significand. 378936b7f4cSbjh21 ------------------------------------------------------------------------------- 379936b7f4cSbjh21 */ 380936b7f4cSbjh21 INLINE float64 381936b7f4cSbjh21 packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) 382936b7f4cSbjh21 { 383936b7f4cSbjh21 384936b7f4cSbjh21 return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) + 385936b7f4cSbjh21 ( ( (bits64) zExp )<<52 ) + 386936b7f4cSbjh21 ( ( (bits64) zSig0 )<<32 ) + zSig1 ); 387936b7f4cSbjh21 388936b7f4cSbjh21 389936b7f4cSbjh21 } 390936b7f4cSbjh21 391936b7f4cSbjh21 /* 392936b7f4cSbjh21 ------------------------------------------------------------------------------- 393936b7f4cSbjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 394936b7f4cSbjh21 and extended significand formed by the concatenation of `zSig0', `zSig1', 395936b7f4cSbjh21 and `zSig2', and returns the proper double-precision floating-point value 396936b7f4cSbjh21 corresponding to the abstract input. Ordinarily, the abstract value is 397936b7f4cSbjh21 simply rounded and packed into the double-precision format, with the inexact 398936b7f4cSbjh21 exception raised if the abstract input cannot be represented exactly. 399936b7f4cSbjh21 However, if the abstract value is too large, the overflow and inexact 400936b7f4cSbjh21 exceptions are raised and an infinity or maximal finite value is returned. 401936b7f4cSbjh21 If the abstract value is too small, the input value is rounded to a 402936b7f4cSbjh21 subnormal number, and the underflow and inexact exceptions are raised if the 403936b7f4cSbjh21 abstract input cannot be represented exactly as a subnormal double-precision 404936b7f4cSbjh21 floating-point number. 405936b7f4cSbjh21 The input significand must be normalized or smaller. If the input 406936b7f4cSbjh21 significand is not normalized, `zExp' must be 0; in that case, the result 407936b7f4cSbjh21 returned is a subnormal number, and it must not require rounding. In the 408936b7f4cSbjh21 usual case that the input significand is normalized, `zExp' must be 1 less 409936b7f4cSbjh21 than the ``true'' floating-point exponent. The handling of underflow and 410936b7f4cSbjh21 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 411936b7f4cSbjh21 ------------------------------------------------------------------------------- 412936b7f4cSbjh21 */ 413936b7f4cSbjh21 static float64 414936b7f4cSbjh21 roundAndPackFloat64( 415936b7f4cSbjh21 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 ) 416936b7f4cSbjh21 { 417936b7f4cSbjh21 int8 roundingMode; 418936b7f4cSbjh21 flag roundNearestEven, increment, isTiny; 419936b7f4cSbjh21 420936b7f4cSbjh21 roundingMode = float_rounding_mode; 421936b7f4cSbjh21 roundNearestEven = ( roundingMode == float_round_nearest_even ); 422936b7f4cSbjh21 increment = ( (sbits32) zSig2 < 0 ); 423936b7f4cSbjh21 if ( ! roundNearestEven ) { 424936b7f4cSbjh21 if ( roundingMode == float_round_to_zero ) { 425936b7f4cSbjh21 increment = 0; 426936b7f4cSbjh21 } 427936b7f4cSbjh21 else { 428936b7f4cSbjh21 if ( zSign ) { 429936b7f4cSbjh21 increment = ( roundingMode == float_round_down ) && zSig2; 430936b7f4cSbjh21 } 431936b7f4cSbjh21 else { 432936b7f4cSbjh21 increment = ( roundingMode == float_round_up ) && zSig2; 433936b7f4cSbjh21 } 434936b7f4cSbjh21 } 435936b7f4cSbjh21 } 436936b7f4cSbjh21 if ( 0x7FD <= (bits16) zExp ) { 437936b7f4cSbjh21 if ( ( 0x7FD < zExp ) 438936b7f4cSbjh21 || ( ( zExp == 0x7FD ) 439936b7f4cSbjh21 && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 ) 440936b7f4cSbjh21 && increment 441936b7f4cSbjh21 ) 442936b7f4cSbjh21 ) { 443936b7f4cSbjh21 float_raise( float_flag_overflow | float_flag_inexact ); 444936b7f4cSbjh21 if ( ( roundingMode == float_round_to_zero ) 445936b7f4cSbjh21 || ( zSign && ( roundingMode == float_round_up ) ) 446936b7f4cSbjh21 || ( ! zSign && ( roundingMode == float_round_down ) ) 447936b7f4cSbjh21 ) { 448936b7f4cSbjh21 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF ); 449936b7f4cSbjh21 } 450936b7f4cSbjh21 return packFloat64( zSign, 0x7FF, 0, 0 ); 451936b7f4cSbjh21 } 452936b7f4cSbjh21 if ( zExp < 0 ) { 453936b7f4cSbjh21 isTiny = 454936b7f4cSbjh21 ( float_detect_tininess == float_tininess_before_rounding ) 455936b7f4cSbjh21 || ( zExp < -1 ) 456936b7f4cSbjh21 || ! increment 457936b7f4cSbjh21 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF ); 458936b7f4cSbjh21 shift64ExtraRightJamming( 459936b7f4cSbjh21 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); 460936b7f4cSbjh21 zExp = 0; 461936b7f4cSbjh21 if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); 462936b7f4cSbjh21 if ( roundNearestEven ) { 463936b7f4cSbjh21 increment = ( (sbits32) zSig2 < 0 ); 464936b7f4cSbjh21 } 465936b7f4cSbjh21 else { 466936b7f4cSbjh21 if ( zSign ) { 467936b7f4cSbjh21 increment = ( roundingMode == float_round_down ) && zSig2; 468936b7f4cSbjh21 } 469936b7f4cSbjh21 else { 470936b7f4cSbjh21 increment = ( roundingMode == float_round_up ) && zSig2; 471936b7f4cSbjh21 } 472936b7f4cSbjh21 } 473936b7f4cSbjh21 } 474936b7f4cSbjh21 } 475936b7f4cSbjh21 if ( zSig2 ) float_exception_flags |= float_flag_inexact; 476936b7f4cSbjh21 if ( increment ) { 477936b7f4cSbjh21 add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); 478936b7f4cSbjh21 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); 479936b7f4cSbjh21 } 480936b7f4cSbjh21 else { 481936b7f4cSbjh21 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; 482936b7f4cSbjh21 } 483936b7f4cSbjh21 return packFloat64( zSign, zExp, zSig0, zSig1 ); 484936b7f4cSbjh21 485936b7f4cSbjh21 } 486936b7f4cSbjh21 487936b7f4cSbjh21 /* 488936b7f4cSbjh21 ------------------------------------------------------------------------------- 489936b7f4cSbjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 490936b7f4cSbjh21 and significand formed by the concatenation of `zSig0' and `zSig1', and 491936b7f4cSbjh21 returns the proper double-precision floating-point value corresponding 492936b7f4cSbjh21 to the abstract input. This routine is just like `roundAndPackFloat64' 493936b7f4cSbjh21 except that the input significand has fewer bits and does not have to be 494936b7f4cSbjh21 normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- 495936b7f4cSbjh21 point exponent. 496936b7f4cSbjh21 ------------------------------------------------------------------------------- 497936b7f4cSbjh21 */ 498936b7f4cSbjh21 static float64 499936b7f4cSbjh21 normalizeRoundAndPackFloat64( 500936b7f4cSbjh21 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) 501936b7f4cSbjh21 { 502936b7f4cSbjh21 int8 shiftCount; 503936b7f4cSbjh21 bits32 zSig2; 504936b7f4cSbjh21 505936b7f4cSbjh21 if ( zSig0 == 0 ) { 506936b7f4cSbjh21 zSig0 = zSig1; 507936b7f4cSbjh21 zSig1 = 0; 508936b7f4cSbjh21 zExp -= 32; 509936b7f4cSbjh21 } 510936b7f4cSbjh21 shiftCount = countLeadingZeros32( zSig0 ) - 11; 511936b7f4cSbjh21 if ( 0 <= shiftCount ) { 512936b7f4cSbjh21 zSig2 = 0; 513936b7f4cSbjh21 shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 514936b7f4cSbjh21 } 515936b7f4cSbjh21 else { 516936b7f4cSbjh21 shift64ExtraRightJamming( 517936b7f4cSbjh21 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); 518936b7f4cSbjh21 } 519936b7f4cSbjh21 zExp -= shiftCount; 520936b7f4cSbjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 521936b7f4cSbjh21 522936b7f4cSbjh21 } 523936b7f4cSbjh21 524936b7f4cSbjh21 /* 525936b7f4cSbjh21 ------------------------------------------------------------------------------- 526936b7f4cSbjh21 Returns the result of converting the 32-bit two's complement integer `a' to 527936b7f4cSbjh21 the single-precision floating-point format. The conversion is performed 528936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 529936b7f4cSbjh21 ------------------------------------------------------------------------------- 530936b7f4cSbjh21 */ 531936b7f4cSbjh21 float32 int32_to_float32( int32 a ) 532936b7f4cSbjh21 { 533936b7f4cSbjh21 flag zSign; 534936b7f4cSbjh21 535936b7f4cSbjh21 if ( a == 0 ) return 0; 536936b7f4cSbjh21 if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); 537936b7f4cSbjh21 zSign = ( a < 0 ); 538*2cec0187Schristos return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a)); 539936b7f4cSbjh21 540936b7f4cSbjh21 } 541936b7f4cSbjh21 542936b7f4cSbjh21 /* 543936b7f4cSbjh21 ------------------------------------------------------------------------------- 544936b7f4cSbjh21 Returns the result of converting the 32-bit two's complement integer `a' to 545936b7f4cSbjh21 the double-precision floating-point format. The conversion is performed 546936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 547936b7f4cSbjh21 ------------------------------------------------------------------------------- 548936b7f4cSbjh21 */ 549936b7f4cSbjh21 float64 int32_to_float64( int32 a ) 550936b7f4cSbjh21 { 551936b7f4cSbjh21 flag zSign; 552936b7f4cSbjh21 bits32 absA; 553936b7f4cSbjh21 int8 shiftCount; 554936b7f4cSbjh21 bits32 zSig0, zSig1; 555936b7f4cSbjh21 556936b7f4cSbjh21 if ( a == 0 ) return packFloat64( 0, 0, 0, 0 ); 557936b7f4cSbjh21 zSign = ( a < 0 ); 558936b7f4cSbjh21 absA = zSign ? - a : a; 559936b7f4cSbjh21 shiftCount = countLeadingZeros32( absA ) - 11; 560936b7f4cSbjh21 if ( 0 <= shiftCount ) { 561936b7f4cSbjh21 zSig0 = absA<<shiftCount; 562936b7f4cSbjh21 zSig1 = 0; 563936b7f4cSbjh21 } 564936b7f4cSbjh21 else { 565936b7f4cSbjh21 shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 ); 566936b7f4cSbjh21 } 567936b7f4cSbjh21 return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 ); 568936b7f4cSbjh21 569936b7f4cSbjh21 } 570936b7f4cSbjh21 571936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC 572936b7f4cSbjh21 /* 573936b7f4cSbjh21 ------------------------------------------------------------------------------- 574936b7f4cSbjh21 Returns the result of converting the single-precision floating-point value 575936b7f4cSbjh21 `a' to the 32-bit two's complement integer format. The conversion is 576936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 577936b7f4cSbjh21 Arithmetic---which means in particular that the conversion is rounded 578936b7f4cSbjh21 according to the current rounding mode. If `a' is a NaN, the largest 579936b7f4cSbjh21 positive integer is returned. Otherwise, if the conversion overflows, the 580936b7f4cSbjh21 largest integer with the same sign as `a' is returned. 581936b7f4cSbjh21 ------------------------------------------------------------------------------- 582936b7f4cSbjh21 */ 583936b7f4cSbjh21 int32 float32_to_int32( float32 a ) 584936b7f4cSbjh21 { 585936b7f4cSbjh21 flag aSign; 586936b7f4cSbjh21 int16 aExp, shiftCount; 587936b7f4cSbjh21 bits32 aSig, aSigExtra; 588936b7f4cSbjh21 int32 z; 589936b7f4cSbjh21 int8 roundingMode; 590936b7f4cSbjh21 591936b7f4cSbjh21 aSig = extractFloat32Frac( a ); 592936b7f4cSbjh21 aExp = extractFloat32Exp( a ); 593936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 594936b7f4cSbjh21 shiftCount = aExp - 0x96; 595936b7f4cSbjh21 if ( 0 <= shiftCount ) { 596936b7f4cSbjh21 if ( 0x9E <= aExp ) { 597936b7f4cSbjh21 if ( a != 0xCF000000 ) { 598936b7f4cSbjh21 float_raise( float_flag_invalid ); 599936b7f4cSbjh21 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 600936b7f4cSbjh21 return 0x7FFFFFFF; 601936b7f4cSbjh21 } 602936b7f4cSbjh21 } 603936b7f4cSbjh21 return (sbits32) 0x80000000; 604936b7f4cSbjh21 } 605936b7f4cSbjh21 z = ( aSig | 0x00800000 )<<shiftCount; 606936b7f4cSbjh21 if ( aSign ) z = - z; 607936b7f4cSbjh21 } 608936b7f4cSbjh21 else { 609936b7f4cSbjh21 if ( aExp < 0x7E ) { 610936b7f4cSbjh21 aSigExtra = aExp | aSig; 611936b7f4cSbjh21 z = 0; 612936b7f4cSbjh21 } 613936b7f4cSbjh21 else { 614936b7f4cSbjh21 aSig |= 0x00800000; 615936b7f4cSbjh21 aSigExtra = aSig<<( shiftCount & 31 ); 616936b7f4cSbjh21 z = aSig>>( - shiftCount ); 617936b7f4cSbjh21 } 618936b7f4cSbjh21 if ( aSigExtra ) float_exception_flags |= float_flag_inexact; 619936b7f4cSbjh21 roundingMode = float_rounding_mode; 620936b7f4cSbjh21 if ( roundingMode == float_round_nearest_even ) { 621936b7f4cSbjh21 if ( (sbits32) aSigExtra < 0 ) { 622936b7f4cSbjh21 ++z; 623936b7f4cSbjh21 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1; 624936b7f4cSbjh21 } 625936b7f4cSbjh21 if ( aSign ) z = - z; 626936b7f4cSbjh21 } 627936b7f4cSbjh21 else { 628936b7f4cSbjh21 aSigExtra = ( aSigExtra != 0 ); 629936b7f4cSbjh21 if ( aSign ) { 630936b7f4cSbjh21 z += ( roundingMode == float_round_down ) & aSigExtra; 631936b7f4cSbjh21 z = - z; 632936b7f4cSbjh21 } 633936b7f4cSbjh21 else { 634936b7f4cSbjh21 z += ( roundingMode == float_round_up ) & aSigExtra; 635936b7f4cSbjh21 } 636936b7f4cSbjh21 } 637936b7f4cSbjh21 } 638936b7f4cSbjh21 return z; 639936b7f4cSbjh21 640936b7f4cSbjh21 } 641936b7f4cSbjh21 #endif 642936b7f4cSbjh21 643936b7f4cSbjh21 /* 644936b7f4cSbjh21 ------------------------------------------------------------------------------- 645936b7f4cSbjh21 Returns the result of converting the single-precision floating-point value 646936b7f4cSbjh21 `a' to the 32-bit two's complement integer format. The conversion is 647936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 648936b7f4cSbjh21 Arithmetic, except that the conversion is always rounded toward zero. 649936b7f4cSbjh21 If `a' is a NaN, the largest positive integer is returned. Otherwise, if 650936b7f4cSbjh21 the conversion overflows, the largest integer with the same sign as `a' is 651936b7f4cSbjh21 returned. 652936b7f4cSbjh21 ------------------------------------------------------------------------------- 653936b7f4cSbjh21 */ 654936b7f4cSbjh21 int32 float32_to_int32_round_to_zero( float32 a ) 655936b7f4cSbjh21 { 656936b7f4cSbjh21 flag aSign; 657936b7f4cSbjh21 int16 aExp, shiftCount; 658936b7f4cSbjh21 bits32 aSig; 659936b7f4cSbjh21 int32 z; 660936b7f4cSbjh21 661936b7f4cSbjh21 aSig = extractFloat32Frac( a ); 662936b7f4cSbjh21 aExp = extractFloat32Exp( a ); 663936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 664936b7f4cSbjh21 shiftCount = aExp - 0x9E; 665936b7f4cSbjh21 if ( 0 <= shiftCount ) { 666936b7f4cSbjh21 if ( a != 0xCF000000 ) { 667936b7f4cSbjh21 float_raise( float_flag_invalid ); 668936b7f4cSbjh21 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; 669936b7f4cSbjh21 } 670936b7f4cSbjh21 return (sbits32) 0x80000000; 671936b7f4cSbjh21 } 672936b7f4cSbjh21 else if ( aExp <= 0x7E ) { 673936b7f4cSbjh21 if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 674936b7f4cSbjh21 return 0; 675936b7f4cSbjh21 } 676936b7f4cSbjh21 aSig = ( aSig | 0x00800000 )<<8; 677936b7f4cSbjh21 z = aSig>>( - shiftCount ); 678936b7f4cSbjh21 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { 679936b7f4cSbjh21 float_exception_flags |= float_flag_inexact; 680936b7f4cSbjh21 } 681936b7f4cSbjh21 if ( aSign ) z = - z; 682936b7f4cSbjh21 return z; 683936b7f4cSbjh21 684936b7f4cSbjh21 } 685936b7f4cSbjh21 686936b7f4cSbjh21 /* 687936b7f4cSbjh21 ------------------------------------------------------------------------------- 688936b7f4cSbjh21 Returns the result of converting the single-precision floating-point value 689936b7f4cSbjh21 `a' to the double-precision floating-point format. The conversion is 690936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 691936b7f4cSbjh21 Arithmetic. 692936b7f4cSbjh21 ------------------------------------------------------------------------------- 693936b7f4cSbjh21 */ 694936b7f4cSbjh21 float64 float32_to_float64( float32 a ) 695936b7f4cSbjh21 { 696936b7f4cSbjh21 flag aSign; 697936b7f4cSbjh21 int16 aExp; 698936b7f4cSbjh21 bits32 aSig, zSig0, zSig1; 699936b7f4cSbjh21 700936b7f4cSbjh21 aSig = extractFloat32Frac( a ); 701936b7f4cSbjh21 aExp = extractFloat32Exp( a ); 702936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 703936b7f4cSbjh21 if ( aExp == 0xFF ) { 704936b7f4cSbjh21 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); 705936b7f4cSbjh21 return packFloat64( aSign, 0x7FF, 0, 0 ); 706936b7f4cSbjh21 } 707936b7f4cSbjh21 if ( aExp == 0 ) { 708936b7f4cSbjh21 if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 ); 709936b7f4cSbjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 710936b7f4cSbjh21 --aExp; 711936b7f4cSbjh21 } 712936b7f4cSbjh21 shift64Right( aSig, 0, 3, &zSig0, &zSig1 ); 713936b7f4cSbjh21 return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 ); 714936b7f4cSbjh21 715936b7f4cSbjh21 } 716936b7f4cSbjh21 717936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC 718936b7f4cSbjh21 /* 719936b7f4cSbjh21 ------------------------------------------------------------------------------- 720936b7f4cSbjh21 Rounds the single-precision floating-point value `a' to an integer, 721936b7f4cSbjh21 and returns the result as a single-precision floating-point value. The 722936b7f4cSbjh21 operation is performed according to the IEC/IEEE Standard for Binary 723936b7f4cSbjh21 Floating-Point Arithmetic. 724936b7f4cSbjh21 ------------------------------------------------------------------------------- 725936b7f4cSbjh21 */ 726936b7f4cSbjh21 float32 float32_round_to_int( float32 a ) 727936b7f4cSbjh21 { 728936b7f4cSbjh21 flag aSign; 729936b7f4cSbjh21 int16 aExp; 730936b7f4cSbjh21 bits32 lastBitMask, roundBitsMask; 731936b7f4cSbjh21 int8 roundingMode; 732936b7f4cSbjh21 float32 z; 733936b7f4cSbjh21 734936b7f4cSbjh21 aExp = extractFloat32Exp( a ); 735936b7f4cSbjh21 if ( 0x96 <= aExp ) { 736936b7f4cSbjh21 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { 737936b7f4cSbjh21 return propagateFloat32NaN( a, a ); 738936b7f4cSbjh21 } 739936b7f4cSbjh21 return a; 740936b7f4cSbjh21 } 741936b7f4cSbjh21 if ( aExp <= 0x7E ) { 742936b7f4cSbjh21 if ( (bits32) ( a<<1 ) == 0 ) return a; 743936b7f4cSbjh21 float_exception_flags |= float_flag_inexact; 744936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 745936b7f4cSbjh21 switch ( float_rounding_mode ) { 746936b7f4cSbjh21 case float_round_nearest_even: 747936b7f4cSbjh21 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { 748936b7f4cSbjh21 return packFloat32( aSign, 0x7F, 0 ); 749936b7f4cSbjh21 } 750936b7f4cSbjh21 break; 751936b7f4cSbjh21 case float_round_to_zero: 752936b7f4cSbjh21 break; 753936b7f4cSbjh21 case float_round_down: 754936b7f4cSbjh21 return aSign ? 0xBF800000 : 0; 755936b7f4cSbjh21 case float_round_up: 756936b7f4cSbjh21 return aSign ? 0x80000000 : 0x3F800000; 757936b7f4cSbjh21 } 758936b7f4cSbjh21 return packFloat32( aSign, 0, 0 ); 759936b7f4cSbjh21 } 760936b7f4cSbjh21 lastBitMask = 1; 761936b7f4cSbjh21 lastBitMask <<= 0x96 - aExp; 762936b7f4cSbjh21 roundBitsMask = lastBitMask - 1; 763936b7f4cSbjh21 z = a; 764936b7f4cSbjh21 roundingMode = float_rounding_mode; 765936b7f4cSbjh21 if ( roundingMode == float_round_nearest_even ) { 766936b7f4cSbjh21 z += lastBitMask>>1; 767936b7f4cSbjh21 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 768936b7f4cSbjh21 } 769936b7f4cSbjh21 else if ( roundingMode != float_round_to_zero ) { 770936b7f4cSbjh21 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { 771936b7f4cSbjh21 z += roundBitsMask; 772936b7f4cSbjh21 } 773936b7f4cSbjh21 } 774936b7f4cSbjh21 z &= ~ roundBitsMask; 775936b7f4cSbjh21 if ( z != a ) float_exception_flags |= float_flag_inexact; 776936b7f4cSbjh21 return z; 777936b7f4cSbjh21 778936b7f4cSbjh21 } 779936b7f4cSbjh21 #endif 780936b7f4cSbjh21 781936b7f4cSbjh21 /* 782936b7f4cSbjh21 ------------------------------------------------------------------------------- 783936b7f4cSbjh21 Returns the result of adding the absolute values of the single-precision 784936b7f4cSbjh21 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 785936b7f4cSbjh21 before being returned. `zSign' is ignored if the result is a NaN. 786936b7f4cSbjh21 The addition is performed according to the IEC/IEEE Standard for Binary 787936b7f4cSbjh21 Floating-Point Arithmetic. 788936b7f4cSbjh21 ------------------------------------------------------------------------------- 789936b7f4cSbjh21 */ 790936b7f4cSbjh21 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) 791936b7f4cSbjh21 { 792936b7f4cSbjh21 int16 aExp, bExp, zExp; 793936b7f4cSbjh21 bits32 aSig, bSig, zSig; 794936b7f4cSbjh21 int16 expDiff; 795936b7f4cSbjh21 796936b7f4cSbjh21 aSig = extractFloat32Frac( a ); 797936b7f4cSbjh21 aExp = extractFloat32Exp( a ); 798936b7f4cSbjh21 bSig = extractFloat32Frac( b ); 799936b7f4cSbjh21 bExp = extractFloat32Exp( b ); 800936b7f4cSbjh21 expDiff = aExp - bExp; 801936b7f4cSbjh21 aSig <<= 6; 802936b7f4cSbjh21 bSig <<= 6; 803936b7f4cSbjh21 if ( 0 < expDiff ) { 804936b7f4cSbjh21 if ( aExp == 0xFF ) { 805936b7f4cSbjh21 if ( aSig ) return propagateFloat32NaN( a, b ); 806936b7f4cSbjh21 return a; 807936b7f4cSbjh21 } 808936b7f4cSbjh21 if ( bExp == 0 ) { 809936b7f4cSbjh21 --expDiff; 810936b7f4cSbjh21 } 811936b7f4cSbjh21 else { 812936b7f4cSbjh21 bSig |= 0x20000000; 813936b7f4cSbjh21 } 814936b7f4cSbjh21 shift32RightJamming( bSig, expDiff, &bSig ); 815936b7f4cSbjh21 zExp = aExp; 816936b7f4cSbjh21 } 817936b7f4cSbjh21 else if ( expDiff < 0 ) { 818936b7f4cSbjh21 if ( bExp == 0xFF ) { 819936b7f4cSbjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 820936b7f4cSbjh21 return packFloat32( zSign, 0xFF, 0 ); 821936b7f4cSbjh21 } 822936b7f4cSbjh21 if ( aExp == 0 ) { 823936b7f4cSbjh21 ++expDiff; 824936b7f4cSbjh21 } 825936b7f4cSbjh21 else { 826936b7f4cSbjh21 aSig |= 0x20000000; 827936b7f4cSbjh21 } 828936b7f4cSbjh21 shift32RightJamming( aSig, - expDiff, &aSig ); 829936b7f4cSbjh21 zExp = bExp; 830936b7f4cSbjh21 } 831936b7f4cSbjh21 else { 832936b7f4cSbjh21 if ( aExp == 0xFF ) { 833936b7f4cSbjh21 if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 834936b7f4cSbjh21 return a; 835936b7f4cSbjh21 } 836936b7f4cSbjh21 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); 837936b7f4cSbjh21 zSig = 0x40000000 + aSig + bSig; 838936b7f4cSbjh21 zExp = aExp; 839936b7f4cSbjh21 goto roundAndPack; 840936b7f4cSbjh21 } 841936b7f4cSbjh21 aSig |= 0x20000000; 842936b7f4cSbjh21 zSig = ( aSig + bSig )<<1; 843936b7f4cSbjh21 --zExp; 844936b7f4cSbjh21 if ( (sbits32) zSig < 0 ) { 845936b7f4cSbjh21 zSig = aSig + bSig; 846936b7f4cSbjh21 ++zExp; 847936b7f4cSbjh21 } 848936b7f4cSbjh21 roundAndPack: 849936b7f4cSbjh21 return roundAndPackFloat32( zSign, zExp, zSig ); 850936b7f4cSbjh21 851936b7f4cSbjh21 } 852936b7f4cSbjh21 853936b7f4cSbjh21 /* 854936b7f4cSbjh21 ------------------------------------------------------------------------------- 855936b7f4cSbjh21 Returns the result of subtracting the absolute values of the single- 856936b7f4cSbjh21 precision floating-point values `a' and `b'. If `zSign' is 1, the 857936b7f4cSbjh21 difference is negated before being returned. `zSign' is ignored if the 858936b7f4cSbjh21 result is a NaN. The subtraction is performed according to the IEC/IEEE 859936b7f4cSbjh21 Standard for Binary Floating-Point Arithmetic. 860936b7f4cSbjh21 ------------------------------------------------------------------------------- 861936b7f4cSbjh21 */ 862936b7f4cSbjh21 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) 863936b7f4cSbjh21 { 864936b7f4cSbjh21 int16 aExp, bExp, zExp; 865936b7f4cSbjh21 bits32 aSig, bSig, zSig; 866936b7f4cSbjh21 int16 expDiff; 867936b7f4cSbjh21 868936b7f4cSbjh21 aSig = extractFloat32Frac( a ); 869936b7f4cSbjh21 aExp = extractFloat32Exp( a ); 870936b7f4cSbjh21 bSig = extractFloat32Frac( b ); 871936b7f4cSbjh21 bExp = extractFloat32Exp( b ); 872936b7f4cSbjh21 expDiff = aExp - bExp; 873936b7f4cSbjh21 aSig <<= 7; 874936b7f4cSbjh21 bSig <<= 7; 875936b7f4cSbjh21 if ( 0 < expDiff ) goto aExpBigger; 876936b7f4cSbjh21 if ( expDiff < 0 ) goto bExpBigger; 877936b7f4cSbjh21 if ( aExp == 0xFF ) { 878936b7f4cSbjh21 if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 879936b7f4cSbjh21 float_raise( float_flag_invalid ); 880936b7f4cSbjh21 return float32_default_nan; 881936b7f4cSbjh21 } 882936b7f4cSbjh21 if ( aExp == 0 ) { 883936b7f4cSbjh21 aExp = 1; 884936b7f4cSbjh21 bExp = 1; 885936b7f4cSbjh21 } 886936b7f4cSbjh21 if ( bSig < aSig ) goto aBigger; 887936b7f4cSbjh21 if ( aSig < bSig ) goto bBigger; 888936b7f4cSbjh21 return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); 889936b7f4cSbjh21 bExpBigger: 890936b7f4cSbjh21 if ( bExp == 0xFF ) { 891936b7f4cSbjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 892936b7f4cSbjh21 return packFloat32( zSign ^ 1, 0xFF, 0 ); 893936b7f4cSbjh21 } 894936b7f4cSbjh21 if ( aExp == 0 ) { 895936b7f4cSbjh21 ++expDiff; 896936b7f4cSbjh21 } 897936b7f4cSbjh21 else { 898936b7f4cSbjh21 aSig |= 0x40000000; 899936b7f4cSbjh21 } 900936b7f4cSbjh21 shift32RightJamming( aSig, - expDiff, &aSig ); 901936b7f4cSbjh21 bSig |= 0x40000000; 902936b7f4cSbjh21 bBigger: 903936b7f4cSbjh21 zSig = bSig - aSig; 904936b7f4cSbjh21 zExp = bExp; 905936b7f4cSbjh21 zSign ^= 1; 906936b7f4cSbjh21 goto normalizeRoundAndPack; 907936b7f4cSbjh21 aExpBigger: 908936b7f4cSbjh21 if ( aExp == 0xFF ) { 909936b7f4cSbjh21 if ( aSig ) return propagateFloat32NaN( a, b ); 910936b7f4cSbjh21 return a; 911936b7f4cSbjh21 } 912936b7f4cSbjh21 if ( bExp == 0 ) { 913936b7f4cSbjh21 --expDiff; 914936b7f4cSbjh21 } 915936b7f4cSbjh21 else { 916936b7f4cSbjh21 bSig |= 0x40000000; 917936b7f4cSbjh21 } 918936b7f4cSbjh21 shift32RightJamming( bSig, expDiff, &bSig ); 919936b7f4cSbjh21 aSig |= 0x40000000; 920936b7f4cSbjh21 aBigger: 921936b7f4cSbjh21 zSig = aSig - bSig; 922936b7f4cSbjh21 zExp = aExp; 923936b7f4cSbjh21 normalizeRoundAndPack: 924936b7f4cSbjh21 --zExp; 925936b7f4cSbjh21 return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); 926936b7f4cSbjh21 927936b7f4cSbjh21 } 928936b7f4cSbjh21 929936b7f4cSbjh21 /* 930936b7f4cSbjh21 ------------------------------------------------------------------------------- 931936b7f4cSbjh21 Returns the result of adding the single-precision floating-point values `a' 932936b7f4cSbjh21 and `b'. The operation is performed according to the IEC/IEEE Standard for 933936b7f4cSbjh21 Binary Floating-Point Arithmetic. 934936b7f4cSbjh21 ------------------------------------------------------------------------------- 935936b7f4cSbjh21 */ 936936b7f4cSbjh21 float32 float32_add( float32 a, float32 b ) 937936b7f4cSbjh21 { 938936b7f4cSbjh21 flag aSign, bSign; 939936b7f4cSbjh21 940936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 941936b7f4cSbjh21 bSign = extractFloat32Sign( b ); 942936b7f4cSbjh21 if ( aSign == bSign ) { 943936b7f4cSbjh21 return addFloat32Sigs( a, b, aSign ); 944936b7f4cSbjh21 } 945936b7f4cSbjh21 else { 946936b7f4cSbjh21 return subFloat32Sigs( a, b, aSign ); 947936b7f4cSbjh21 } 948936b7f4cSbjh21 949936b7f4cSbjh21 } 950936b7f4cSbjh21 951936b7f4cSbjh21 /* 952936b7f4cSbjh21 ------------------------------------------------------------------------------- 953936b7f4cSbjh21 Returns the result of subtracting the single-precision floating-point values 954936b7f4cSbjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 955936b7f4cSbjh21 for Binary Floating-Point Arithmetic. 956936b7f4cSbjh21 ------------------------------------------------------------------------------- 957936b7f4cSbjh21 */ 958936b7f4cSbjh21 float32 float32_sub( float32 a, float32 b ) 959936b7f4cSbjh21 { 960936b7f4cSbjh21 flag aSign, bSign; 961936b7f4cSbjh21 962936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 963936b7f4cSbjh21 bSign = extractFloat32Sign( b ); 964936b7f4cSbjh21 if ( aSign == bSign ) { 965936b7f4cSbjh21 return subFloat32Sigs( a, b, aSign ); 966936b7f4cSbjh21 } 967936b7f4cSbjh21 else { 968936b7f4cSbjh21 return addFloat32Sigs( a, b, aSign ); 969936b7f4cSbjh21 } 970936b7f4cSbjh21 971936b7f4cSbjh21 } 972936b7f4cSbjh21 973936b7f4cSbjh21 /* 974936b7f4cSbjh21 ------------------------------------------------------------------------------- 975936b7f4cSbjh21 Returns the result of multiplying the single-precision floating-point values 976936b7f4cSbjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 977936b7f4cSbjh21 for Binary Floating-Point Arithmetic. 978936b7f4cSbjh21 ------------------------------------------------------------------------------- 979936b7f4cSbjh21 */ 980936b7f4cSbjh21 float32 float32_mul( float32 a, float32 b ) 981936b7f4cSbjh21 { 982936b7f4cSbjh21 flag aSign, bSign, zSign; 983936b7f4cSbjh21 int16 aExp, bExp, zExp; 984936b7f4cSbjh21 bits32 aSig, bSig, zSig0, zSig1; 985936b7f4cSbjh21 986936b7f4cSbjh21 aSig = extractFloat32Frac( a ); 987936b7f4cSbjh21 aExp = extractFloat32Exp( a ); 988936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 989936b7f4cSbjh21 bSig = extractFloat32Frac( b ); 990936b7f4cSbjh21 bExp = extractFloat32Exp( b ); 991936b7f4cSbjh21 bSign = extractFloat32Sign( b ); 992936b7f4cSbjh21 zSign = aSign ^ bSign; 993936b7f4cSbjh21 if ( aExp == 0xFF ) { 994936b7f4cSbjh21 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 995936b7f4cSbjh21 return propagateFloat32NaN( a, b ); 996936b7f4cSbjh21 } 997936b7f4cSbjh21 if ( ( bExp | bSig ) == 0 ) { 998936b7f4cSbjh21 float_raise( float_flag_invalid ); 999936b7f4cSbjh21 return float32_default_nan; 1000936b7f4cSbjh21 } 1001936b7f4cSbjh21 return packFloat32( zSign, 0xFF, 0 ); 1002936b7f4cSbjh21 } 1003936b7f4cSbjh21 if ( bExp == 0xFF ) { 1004936b7f4cSbjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1005936b7f4cSbjh21 if ( ( aExp | aSig ) == 0 ) { 1006936b7f4cSbjh21 float_raise( float_flag_invalid ); 1007936b7f4cSbjh21 return float32_default_nan; 1008936b7f4cSbjh21 } 1009936b7f4cSbjh21 return packFloat32( zSign, 0xFF, 0 ); 1010936b7f4cSbjh21 } 1011936b7f4cSbjh21 if ( aExp == 0 ) { 1012936b7f4cSbjh21 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1013936b7f4cSbjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1014936b7f4cSbjh21 } 1015936b7f4cSbjh21 if ( bExp == 0 ) { 1016936b7f4cSbjh21 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); 1017936b7f4cSbjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1018936b7f4cSbjh21 } 1019936b7f4cSbjh21 zExp = aExp + bExp - 0x7F; 1020936b7f4cSbjh21 aSig = ( aSig | 0x00800000 )<<7; 1021936b7f4cSbjh21 bSig = ( bSig | 0x00800000 )<<8; 1022936b7f4cSbjh21 mul32To64( aSig, bSig, &zSig0, &zSig1 ); 1023936b7f4cSbjh21 zSig0 |= ( zSig1 != 0 ); 1024936b7f4cSbjh21 if ( 0 <= (sbits32) ( zSig0<<1 ) ) { 1025936b7f4cSbjh21 zSig0 <<= 1; 1026936b7f4cSbjh21 --zExp; 1027936b7f4cSbjh21 } 1028936b7f4cSbjh21 return roundAndPackFloat32( zSign, zExp, zSig0 ); 1029936b7f4cSbjh21 1030936b7f4cSbjh21 } 1031936b7f4cSbjh21 1032936b7f4cSbjh21 /* 1033936b7f4cSbjh21 ------------------------------------------------------------------------------- 1034936b7f4cSbjh21 Returns the result of dividing the single-precision floating-point value `a' 1035936b7f4cSbjh21 by the corresponding value `b'. The operation is performed according to the 1036936b7f4cSbjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1037936b7f4cSbjh21 ------------------------------------------------------------------------------- 1038936b7f4cSbjh21 */ 1039936b7f4cSbjh21 float32 float32_div( float32 a, float32 b ) 1040936b7f4cSbjh21 { 1041936b7f4cSbjh21 flag aSign, bSign, zSign; 1042936b7f4cSbjh21 int16 aExp, bExp, zExp; 1043936b7f4cSbjh21 bits32 aSig, bSig, zSig, rem0, rem1, term0, term1; 1044936b7f4cSbjh21 1045936b7f4cSbjh21 aSig = extractFloat32Frac( a ); 1046936b7f4cSbjh21 aExp = extractFloat32Exp( a ); 1047936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 1048936b7f4cSbjh21 bSig = extractFloat32Frac( b ); 1049936b7f4cSbjh21 bExp = extractFloat32Exp( b ); 1050936b7f4cSbjh21 bSign = extractFloat32Sign( b ); 1051936b7f4cSbjh21 zSign = aSign ^ bSign; 1052936b7f4cSbjh21 if ( aExp == 0xFF ) { 1053936b7f4cSbjh21 if ( aSig ) return propagateFloat32NaN( a, b ); 1054936b7f4cSbjh21 if ( bExp == 0xFF ) { 1055936b7f4cSbjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1056936b7f4cSbjh21 float_raise( float_flag_invalid ); 1057936b7f4cSbjh21 return float32_default_nan; 1058936b7f4cSbjh21 } 1059936b7f4cSbjh21 return packFloat32( zSign, 0xFF, 0 ); 1060936b7f4cSbjh21 } 1061936b7f4cSbjh21 if ( bExp == 0xFF ) { 1062936b7f4cSbjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1063936b7f4cSbjh21 return packFloat32( zSign, 0, 0 ); 1064936b7f4cSbjh21 } 1065936b7f4cSbjh21 if ( bExp == 0 ) { 1066936b7f4cSbjh21 if ( bSig == 0 ) { 1067936b7f4cSbjh21 if ( ( aExp | aSig ) == 0 ) { 1068936b7f4cSbjh21 float_raise( float_flag_invalid ); 1069936b7f4cSbjh21 return float32_default_nan; 1070936b7f4cSbjh21 } 1071936b7f4cSbjh21 float_raise( float_flag_divbyzero ); 1072936b7f4cSbjh21 return packFloat32( zSign, 0xFF, 0 ); 1073936b7f4cSbjh21 } 1074936b7f4cSbjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1075936b7f4cSbjh21 } 1076936b7f4cSbjh21 if ( aExp == 0 ) { 1077936b7f4cSbjh21 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1078936b7f4cSbjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1079936b7f4cSbjh21 } 1080936b7f4cSbjh21 zExp = aExp - bExp + 0x7D; 1081936b7f4cSbjh21 aSig = ( aSig | 0x00800000 )<<7; 1082936b7f4cSbjh21 bSig = ( bSig | 0x00800000 )<<8; 1083936b7f4cSbjh21 if ( bSig <= ( aSig + aSig ) ) { 1084936b7f4cSbjh21 aSig >>= 1; 1085936b7f4cSbjh21 ++zExp; 1086936b7f4cSbjh21 } 1087936b7f4cSbjh21 zSig = estimateDiv64To32( aSig, 0, bSig ); 1088936b7f4cSbjh21 if ( ( zSig & 0x3F ) <= 2 ) { 1089936b7f4cSbjh21 mul32To64( bSig, zSig, &term0, &term1 ); 1090936b7f4cSbjh21 sub64( aSig, 0, term0, term1, &rem0, &rem1 ); 1091936b7f4cSbjh21 while ( (sbits32) rem0 < 0 ) { 1092936b7f4cSbjh21 --zSig; 1093936b7f4cSbjh21 add64( rem0, rem1, 0, bSig, &rem0, &rem1 ); 1094936b7f4cSbjh21 } 1095936b7f4cSbjh21 zSig |= ( rem1 != 0 ); 1096936b7f4cSbjh21 } 1097936b7f4cSbjh21 return roundAndPackFloat32( zSign, zExp, zSig ); 1098936b7f4cSbjh21 1099936b7f4cSbjh21 } 1100936b7f4cSbjh21 1101936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC 1102936b7f4cSbjh21 /* 1103936b7f4cSbjh21 ------------------------------------------------------------------------------- 1104936b7f4cSbjh21 Returns the remainder of the single-precision floating-point value `a' 1105936b7f4cSbjh21 with respect to the corresponding value `b'. The operation is performed 1106936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1107936b7f4cSbjh21 ------------------------------------------------------------------------------- 1108936b7f4cSbjh21 */ 1109936b7f4cSbjh21 float32 float32_rem( float32 a, float32 b ) 1110936b7f4cSbjh21 { 1111936b7f4cSbjh21 flag aSign, bSign, zSign; 1112936b7f4cSbjh21 int16 aExp, bExp, expDiff; 1113936b7f4cSbjh21 bits32 aSig, bSig, q, allZero, alternateASig; 1114936b7f4cSbjh21 sbits32 sigMean; 1115936b7f4cSbjh21 1116936b7f4cSbjh21 aSig = extractFloat32Frac( a ); 1117936b7f4cSbjh21 aExp = extractFloat32Exp( a ); 1118936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 1119936b7f4cSbjh21 bSig = extractFloat32Frac( b ); 1120936b7f4cSbjh21 bExp = extractFloat32Exp( b ); 1121936b7f4cSbjh21 bSign = extractFloat32Sign( b ); 1122936b7f4cSbjh21 if ( aExp == 0xFF ) { 1123936b7f4cSbjh21 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 1124936b7f4cSbjh21 return propagateFloat32NaN( a, b ); 1125936b7f4cSbjh21 } 1126936b7f4cSbjh21 float_raise( float_flag_invalid ); 1127936b7f4cSbjh21 return float32_default_nan; 1128936b7f4cSbjh21 } 1129936b7f4cSbjh21 if ( bExp == 0xFF ) { 1130936b7f4cSbjh21 if ( bSig ) return propagateFloat32NaN( a, b ); 1131936b7f4cSbjh21 return a; 1132936b7f4cSbjh21 } 1133936b7f4cSbjh21 if ( bExp == 0 ) { 1134936b7f4cSbjh21 if ( bSig == 0 ) { 1135936b7f4cSbjh21 float_raise( float_flag_invalid ); 1136936b7f4cSbjh21 return float32_default_nan; 1137936b7f4cSbjh21 } 1138936b7f4cSbjh21 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1139936b7f4cSbjh21 } 1140936b7f4cSbjh21 if ( aExp == 0 ) { 1141936b7f4cSbjh21 if ( aSig == 0 ) return a; 1142936b7f4cSbjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1143936b7f4cSbjh21 } 1144936b7f4cSbjh21 expDiff = aExp - bExp; 1145936b7f4cSbjh21 aSig = ( aSig | 0x00800000 )<<8; 1146936b7f4cSbjh21 bSig = ( bSig | 0x00800000 )<<8; 1147936b7f4cSbjh21 if ( expDiff < 0 ) { 1148936b7f4cSbjh21 if ( expDiff < -1 ) return a; 1149936b7f4cSbjh21 aSig >>= 1; 1150936b7f4cSbjh21 } 1151936b7f4cSbjh21 q = ( bSig <= aSig ); 1152936b7f4cSbjh21 if ( q ) aSig -= bSig; 1153936b7f4cSbjh21 expDiff -= 32; 1154936b7f4cSbjh21 while ( 0 < expDiff ) { 1155936b7f4cSbjh21 q = estimateDiv64To32( aSig, 0, bSig ); 1156936b7f4cSbjh21 q = ( 2 < q ) ? q - 2 : 0; 1157936b7f4cSbjh21 aSig = - ( ( bSig>>2 ) * q ); 1158936b7f4cSbjh21 expDiff -= 30; 1159936b7f4cSbjh21 } 1160936b7f4cSbjh21 expDiff += 32; 1161936b7f4cSbjh21 if ( 0 < expDiff ) { 1162936b7f4cSbjh21 q = estimateDiv64To32( aSig, 0, bSig ); 1163936b7f4cSbjh21 q = ( 2 < q ) ? q - 2 : 0; 1164936b7f4cSbjh21 q >>= 32 - expDiff; 1165936b7f4cSbjh21 bSig >>= 2; 1166936b7f4cSbjh21 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 1167936b7f4cSbjh21 } 1168936b7f4cSbjh21 else { 1169936b7f4cSbjh21 aSig >>= 2; 1170936b7f4cSbjh21 bSig >>= 2; 1171936b7f4cSbjh21 } 1172936b7f4cSbjh21 do { 1173936b7f4cSbjh21 alternateASig = aSig; 1174936b7f4cSbjh21 ++q; 1175936b7f4cSbjh21 aSig -= bSig; 1176936b7f4cSbjh21 } while ( 0 <= (sbits32) aSig ); 1177936b7f4cSbjh21 sigMean = aSig + alternateASig; 1178936b7f4cSbjh21 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 1179936b7f4cSbjh21 aSig = alternateASig; 1180936b7f4cSbjh21 } 1181936b7f4cSbjh21 zSign = ( (sbits32) aSig < 0 ); 1182936b7f4cSbjh21 if ( zSign ) aSig = - aSig; 1183936b7f4cSbjh21 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); 1184936b7f4cSbjh21 1185936b7f4cSbjh21 } 1186936b7f4cSbjh21 #endif 1187936b7f4cSbjh21 1188936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC 1189936b7f4cSbjh21 /* 1190936b7f4cSbjh21 ------------------------------------------------------------------------------- 1191936b7f4cSbjh21 Returns the square root of the single-precision floating-point value `a'. 1192936b7f4cSbjh21 The operation is performed according to the IEC/IEEE Standard for Binary 1193936b7f4cSbjh21 Floating-Point Arithmetic. 1194936b7f4cSbjh21 ------------------------------------------------------------------------------- 1195936b7f4cSbjh21 */ 1196936b7f4cSbjh21 float32 float32_sqrt( float32 a ) 1197936b7f4cSbjh21 { 1198936b7f4cSbjh21 flag aSign; 1199936b7f4cSbjh21 int16 aExp, zExp; 1200936b7f4cSbjh21 bits32 aSig, zSig, rem0, rem1, term0, term1; 1201936b7f4cSbjh21 1202936b7f4cSbjh21 aSig = extractFloat32Frac( a ); 1203936b7f4cSbjh21 aExp = extractFloat32Exp( a ); 1204936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 1205936b7f4cSbjh21 if ( aExp == 0xFF ) { 1206936b7f4cSbjh21 if ( aSig ) return propagateFloat32NaN( a, 0 ); 1207936b7f4cSbjh21 if ( ! aSign ) return a; 1208936b7f4cSbjh21 float_raise( float_flag_invalid ); 1209936b7f4cSbjh21 return float32_default_nan; 1210936b7f4cSbjh21 } 1211936b7f4cSbjh21 if ( aSign ) { 1212936b7f4cSbjh21 if ( ( aExp | aSig ) == 0 ) return a; 1213936b7f4cSbjh21 float_raise( float_flag_invalid ); 1214936b7f4cSbjh21 return float32_default_nan; 1215936b7f4cSbjh21 } 1216936b7f4cSbjh21 if ( aExp == 0 ) { 1217936b7f4cSbjh21 if ( aSig == 0 ) return 0; 1218936b7f4cSbjh21 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1219936b7f4cSbjh21 } 1220936b7f4cSbjh21 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; 1221936b7f4cSbjh21 aSig = ( aSig | 0x00800000 )<<8; 1222936b7f4cSbjh21 zSig = estimateSqrt32( aExp, aSig ) + 2; 1223936b7f4cSbjh21 if ( ( zSig & 0x7F ) <= 5 ) { 1224936b7f4cSbjh21 if ( zSig < 2 ) { 1225936b7f4cSbjh21 zSig = 0x7FFFFFFF; 1226936b7f4cSbjh21 goto roundAndPack; 1227936b7f4cSbjh21 } 1228936b7f4cSbjh21 else { 1229936b7f4cSbjh21 aSig >>= aExp & 1; 1230936b7f4cSbjh21 mul32To64( zSig, zSig, &term0, &term1 ); 1231936b7f4cSbjh21 sub64( aSig, 0, term0, term1, &rem0, &rem1 ); 1232936b7f4cSbjh21 while ( (sbits32) rem0 < 0 ) { 1233936b7f4cSbjh21 --zSig; 1234936b7f4cSbjh21 shortShift64Left( 0, zSig, 1, &term0, &term1 ); 1235936b7f4cSbjh21 term1 |= 1; 1236936b7f4cSbjh21 add64( rem0, rem1, term0, term1, &rem0, &rem1 ); 1237936b7f4cSbjh21 } 1238936b7f4cSbjh21 zSig |= ( ( rem0 | rem1 ) != 0 ); 1239936b7f4cSbjh21 } 1240936b7f4cSbjh21 } 1241936b7f4cSbjh21 shift32RightJamming( zSig, 1, &zSig ); 1242936b7f4cSbjh21 roundAndPack: 1243936b7f4cSbjh21 return roundAndPackFloat32( 0, zExp, zSig ); 1244936b7f4cSbjh21 1245936b7f4cSbjh21 } 1246936b7f4cSbjh21 #endif 1247936b7f4cSbjh21 1248936b7f4cSbjh21 /* 1249936b7f4cSbjh21 ------------------------------------------------------------------------------- 1250936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is equal to 1251936b7f4cSbjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 1252936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1253936b7f4cSbjh21 ------------------------------------------------------------------------------- 1254936b7f4cSbjh21 */ 1255936b7f4cSbjh21 flag float32_eq( float32 a, float32 b ) 1256936b7f4cSbjh21 { 1257936b7f4cSbjh21 1258936b7f4cSbjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1259936b7f4cSbjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1260936b7f4cSbjh21 ) { 1261936b7f4cSbjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1262936b7f4cSbjh21 float_raise( float_flag_invalid ); 1263936b7f4cSbjh21 } 1264936b7f4cSbjh21 return 0; 1265936b7f4cSbjh21 } 1266936b7f4cSbjh21 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1267936b7f4cSbjh21 1268936b7f4cSbjh21 } 1269936b7f4cSbjh21 1270936b7f4cSbjh21 /* 1271936b7f4cSbjh21 ------------------------------------------------------------------------------- 1272936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is less than 1273936b7f4cSbjh21 or equal to the corresponding value `b', and 0 otherwise. The comparison 1274936b7f4cSbjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 1275936b7f4cSbjh21 Arithmetic. 1276936b7f4cSbjh21 ------------------------------------------------------------------------------- 1277936b7f4cSbjh21 */ 1278936b7f4cSbjh21 flag float32_le( float32 a, float32 b ) 1279936b7f4cSbjh21 { 1280936b7f4cSbjh21 flag aSign, bSign; 1281936b7f4cSbjh21 1282936b7f4cSbjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1283936b7f4cSbjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1284936b7f4cSbjh21 ) { 1285936b7f4cSbjh21 float_raise( float_flag_invalid ); 1286936b7f4cSbjh21 return 0; 1287936b7f4cSbjh21 } 1288936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 1289936b7f4cSbjh21 bSign = extractFloat32Sign( b ); 1290936b7f4cSbjh21 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1291936b7f4cSbjh21 return ( a == b ) || ( aSign ^ ( a < b ) ); 1292936b7f4cSbjh21 1293936b7f4cSbjh21 } 1294936b7f4cSbjh21 1295936b7f4cSbjh21 /* 1296936b7f4cSbjh21 ------------------------------------------------------------------------------- 1297936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is less than 1298936b7f4cSbjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 1299936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1300936b7f4cSbjh21 ------------------------------------------------------------------------------- 1301936b7f4cSbjh21 */ 1302936b7f4cSbjh21 flag float32_lt( float32 a, float32 b ) 1303936b7f4cSbjh21 { 1304936b7f4cSbjh21 flag aSign, bSign; 1305936b7f4cSbjh21 1306936b7f4cSbjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1307936b7f4cSbjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1308936b7f4cSbjh21 ) { 1309936b7f4cSbjh21 float_raise( float_flag_invalid ); 1310936b7f4cSbjh21 return 0; 1311936b7f4cSbjh21 } 1312936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 1313936b7f4cSbjh21 bSign = extractFloat32Sign( b ); 1314936b7f4cSbjh21 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 1315936b7f4cSbjh21 return ( a != b ) && ( aSign ^ ( a < b ) ); 1316936b7f4cSbjh21 1317936b7f4cSbjh21 } 1318936b7f4cSbjh21 1319936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1320936b7f4cSbjh21 /* 1321936b7f4cSbjh21 ------------------------------------------------------------------------------- 1322936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is equal to 1323936b7f4cSbjh21 the corresponding value `b', and 0 otherwise. The invalid exception is 1324936b7f4cSbjh21 raised if either operand is a NaN. Otherwise, the comparison is performed 1325936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1326936b7f4cSbjh21 ------------------------------------------------------------------------------- 1327936b7f4cSbjh21 */ 1328936b7f4cSbjh21 flag float32_eq_signaling( float32 a, float32 b ) 1329936b7f4cSbjh21 { 1330936b7f4cSbjh21 1331936b7f4cSbjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1332936b7f4cSbjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1333936b7f4cSbjh21 ) { 1334936b7f4cSbjh21 float_raise( float_flag_invalid ); 1335936b7f4cSbjh21 return 0; 1336936b7f4cSbjh21 } 1337936b7f4cSbjh21 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1338936b7f4cSbjh21 1339936b7f4cSbjh21 } 1340936b7f4cSbjh21 1341936b7f4cSbjh21 /* 1342936b7f4cSbjh21 ------------------------------------------------------------------------------- 1343936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is less than or 1344936b7f4cSbjh21 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 1345936b7f4cSbjh21 cause an exception. Otherwise, the comparison is performed according to the 1346936b7f4cSbjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1347936b7f4cSbjh21 ------------------------------------------------------------------------------- 1348936b7f4cSbjh21 */ 1349936b7f4cSbjh21 flag float32_le_quiet( float32 a, float32 b ) 1350936b7f4cSbjh21 { 1351936b7f4cSbjh21 flag aSign, bSign; 1352936b7f4cSbjh21 int16 aExp, bExp; 1353936b7f4cSbjh21 1354936b7f4cSbjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1355936b7f4cSbjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1356936b7f4cSbjh21 ) { 1357936b7f4cSbjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1358936b7f4cSbjh21 float_raise( float_flag_invalid ); 1359936b7f4cSbjh21 } 1360936b7f4cSbjh21 return 0; 1361936b7f4cSbjh21 } 1362936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 1363936b7f4cSbjh21 bSign = extractFloat32Sign( b ); 1364936b7f4cSbjh21 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1365936b7f4cSbjh21 return ( a == b ) || ( aSign ^ ( a < b ) ); 1366936b7f4cSbjh21 1367936b7f4cSbjh21 } 1368936b7f4cSbjh21 1369936b7f4cSbjh21 /* 1370936b7f4cSbjh21 ------------------------------------------------------------------------------- 1371936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is less than 1372936b7f4cSbjh21 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 1373936b7f4cSbjh21 exception. Otherwise, the comparison is performed according to the IEC/IEEE 1374936b7f4cSbjh21 Standard for Binary Floating-Point Arithmetic. 1375936b7f4cSbjh21 ------------------------------------------------------------------------------- 1376936b7f4cSbjh21 */ 1377936b7f4cSbjh21 flag float32_lt_quiet( float32 a, float32 b ) 1378936b7f4cSbjh21 { 1379936b7f4cSbjh21 flag aSign, bSign; 1380936b7f4cSbjh21 1381936b7f4cSbjh21 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1382936b7f4cSbjh21 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1383936b7f4cSbjh21 ) { 1384936b7f4cSbjh21 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1385936b7f4cSbjh21 float_raise( float_flag_invalid ); 1386936b7f4cSbjh21 } 1387936b7f4cSbjh21 return 0; 1388936b7f4cSbjh21 } 1389936b7f4cSbjh21 aSign = extractFloat32Sign( a ); 1390936b7f4cSbjh21 bSign = extractFloat32Sign( b ); 1391936b7f4cSbjh21 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 1392936b7f4cSbjh21 return ( a != b ) && ( aSign ^ ( a < b ) ); 1393936b7f4cSbjh21 1394936b7f4cSbjh21 } 1395936b7f4cSbjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 1396936b7f4cSbjh21 1397936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1398936b7f4cSbjh21 /* 1399936b7f4cSbjh21 ------------------------------------------------------------------------------- 1400936b7f4cSbjh21 Returns the result of converting the double-precision floating-point value 1401936b7f4cSbjh21 `a' to the 32-bit two's complement integer format. The conversion is 1402936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 1403936b7f4cSbjh21 Arithmetic---which means in particular that the conversion is rounded 1404936b7f4cSbjh21 according to the current rounding mode. If `a' is a NaN, the largest 1405936b7f4cSbjh21 positive integer is returned. Otherwise, if the conversion overflows, the 1406936b7f4cSbjh21 largest integer with the same sign as `a' is returned. 1407936b7f4cSbjh21 ------------------------------------------------------------------------------- 1408936b7f4cSbjh21 */ 1409936b7f4cSbjh21 int32 float64_to_int32( float64 a ) 1410936b7f4cSbjh21 { 1411936b7f4cSbjh21 flag aSign; 1412936b7f4cSbjh21 int16 aExp, shiftCount; 1413936b7f4cSbjh21 bits32 aSig0, aSig1, absZ, aSigExtra; 1414936b7f4cSbjh21 int32 z; 1415936b7f4cSbjh21 int8 roundingMode; 1416936b7f4cSbjh21 1417936b7f4cSbjh21 aSig1 = extractFloat64Frac1( a ); 1418936b7f4cSbjh21 aSig0 = extractFloat64Frac0( a ); 1419936b7f4cSbjh21 aExp = extractFloat64Exp( a ); 1420936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 1421936b7f4cSbjh21 shiftCount = aExp - 0x413; 1422936b7f4cSbjh21 if ( 0 <= shiftCount ) { 1423936b7f4cSbjh21 if ( 0x41E < aExp ) { 1424936b7f4cSbjh21 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; 1425936b7f4cSbjh21 goto invalid; 1426936b7f4cSbjh21 } 1427936b7f4cSbjh21 shortShift64Left( 1428936b7f4cSbjh21 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); 1429936b7f4cSbjh21 if ( 0x80000000 < absZ ) goto invalid; 1430936b7f4cSbjh21 } 1431936b7f4cSbjh21 else { 1432936b7f4cSbjh21 aSig1 = ( aSig1 != 0 ); 1433936b7f4cSbjh21 if ( aExp < 0x3FE ) { 1434936b7f4cSbjh21 aSigExtra = aExp | aSig0 | aSig1; 1435936b7f4cSbjh21 absZ = 0; 1436936b7f4cSbjh21 } 1437936b7f4cSbjh21 else { 1438936b7f4cSbjh21 aSig0 |= 0x00100000; 1439936b7f4cSbjh21 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; 1440936b7f4cSbjh21 absZ = aSig0>>( - shiftCount ); 1441936b7f4cSbjh21 } 1442936b7f4cSbjh21 } 1443936b7f4cSbjh21 roundingMode = float_rounding_mode; 1444936b7f4cSbjh21 if ( roundingMode == float_round_nearest_even ) { 1445936b7f4cSbjh21 if ( (sbits32) aSigExtra < 0 ) { 1446936b7f4cSbjh21 ++absZ; 1447936b7f4cSbjh21 if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1; 1448936b7f4cSbjh21 } 1449936b7f4cSbjh21 z = aSign ? - absZ : absZ; 1450936b7f4cSbjh21 } 1451936b7f4cSbjh21 else { 1452936b7f4cSbjh21 aSigExtra = ( aSigExtra != 0 ); 1453936b7f4cSbjh21 if ( aSign ) { 1454936b7f4cSbjh21 z = - ( absZ 1455936b7f4cSbjh21 + ( ( roundingMode == float_round_down ) & aSigExtra ) ); 1456936b7f4cSbjh21 } 1457936b7f4cSbjh21 else { 1458936b7f4cSbjh21 z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra ); 1459936b7f4cSbjh21 } 1460936b7f4cSbjh21 } 1461936b7f4cSbjh21 if ( ( aSign ^ ( z < 0 ) ) && z ) { 1462936b7f4cSbjh21 invalid: 1463936b7f4cSbjh21 float_raise( float_flag_invalid ); 1464936b7f4cSbjh21 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 1465936b7f4cSbjh21 } 1466936b7f4cSbjh21 if ( aSigExtra ) float_exception_flags |= float_flag_inexact; 1467936b7f4cSbjh21 return z; 1468936b7f4cSbjh21 1469936b7f4cSbjh21 } 1470936b7f4cSbjh21 #endif /* !SOFTFLOAT_FOR_GCC */ 1471936b7f4cSbjh21 1472936b7f4cSbjh21 /* 1473936b7f4cSbjh21 ------------------------------------------------------------------------------- 1474936b7f4cSbjh21 Returns the result of converting the double-precision floating-point value 1475936b7f4cSbjh21 `a' to the 32-bit two's complement integer format. The conversion is 1476936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 1477936b7f4cSbjh21 Arithmetic, except that the conversion is always rounded toward zero. 1478936b7f4cSbjh21 If `a' is a NaN, the largest positive integer is returned. Otherwise, if 1479936b7f4cSbjh21 the conversion overflows, the largest integer with the same sign as `a' is 1480936b7f4cSbjh21 returned. 1481936b7f4cSbjh21 ------------------------------------------------------------------------------- 1482936b7f4cSbjh21 */ 1483936b7f4cSbjh21 int32 float64_to_int32_round_to_zero( float64 a ) 1484936b7f4cSbjh21 { 1485936b7f4cSbjh21 flag aSign; 1486936b7f4cSbjh21 int16 aExp, shiftCount; 1487936b7f4cSbjh21 bits32 aSig0, aSig1, absZ, aSigExtra; 1488936b7f4cSbjh21 int32 z; 1489936b7f4cSbjh21 1490936b7f4cSbjh21 aSig1 = extractFloat64Frac1( a ); 1491936b7f4cSbjh21 aSig0 = extractFloat64Frac0( a ); 1492936b7f4cSbjh21 aExp = extractFloat64Exp( a ); 1493936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 1494936b7f4cSbjh21 shiftCount = aExp - 0x413; 1495936b7f4cSbjh21 if ( 0 <= shiftCount ) { 1496936b7f4cSbjh21 if ( 0x41E < aExp ) { 1497936b7f4cSbjh21 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; 1498936b7f4cSbjh21 goto invalid; 1499936b7f4cSbjh21 } 1500936b7f4cSbjh21 shortShift64Left( 1501936b7f4cSbjh21 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); 1502936b7f4cSbjh21 } 1503936b7f4cSbjh21 else { 1504936b7f4cSbjh21 if ( aExp < 0x3FF ) { 1505936b7f4cSbjh21 if ( aExp | aSig0 | aSig1 ) { 1506936b7f4cSbjh21 float_exception_flags |= float_flag_inexact; 1507936b7f4cSbjh21 } 1508936b7f4cSbjh21 return 0; 1509936b7f4cSbjh21 } 1510936b7f4cSbjh21 aSig0 |= 0x00100000; 1511936b7f4cSbjh21 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; 1512936b7f4cSbjh21 absZ = aSig0>>( - shiftCount ); 1513936b7f4cSbjh21 } 1514936b7f4cSbjh21 z = aSign ? - absZ : absZ; 1515936b7f4cSbjh21 if ( ( aSign ^ ( z < 0 ) ) && z ) { 1516936b7f4cSbjh21 invalid: 1517936b7f4cSbjh21 float_raise( float_flag_invalid ); 1518936b7f4cSbjh21 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 1519936b7f4cSbjh21 } 1520936b7f4cSbjh21 if ( aSigExtra ) float_exception_flags |= float_flag_inexact; 1521936b7f4cSbjh21 return z; 1522936b7f4cSbjh21 1523936b7f4cSbjh21 } 1524936b7f4cSbjh21 1525936b7f4cSbjh21 /* 1526936b7f4cSbjh21 ------------------------------------------------------------------------------- 1527936b7f4cSbjh21 Returns the result of converting the double-precision floating-point value 1528936b7f4cSbjh21 `a' to the single-precision floating-point format. The conversion is 1529936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point 1530936b7f4cSbjh21 Arithmetic. 1531936b7f4cSbjh21 ------------------------------------------------------------------------------- 1532936b7f4cSbjh21 */ 1533936b7f4cSbjh21 float32 float64_to_float32( float64 a ) 1534936b7f4cSbjh21 { 1535936b7f4cSbjh21 flag aSign; 1536936b7f4cSbjh21 int16 aExp; 1537936b7f4cSbjh21 bits32 aSig0, aSig1, zSig; 1538936b7f4cSbjh21 bits32 allZero; 1539936b7f4cSbjh21 1540936b7f4cSbjh21 aSig1 = extractFloat64Frac1( a ); 1541936b7f4cSbjh21 aSig0 = extractFloat64Frac0( a ); 1542936b7f4cSbjh21 aExp = extractFloat64Exp( a ); 1543936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 1544936b7f4cSbjh21 if ( aExp == 0x7FF ) { 1545936b7f4cSbjh21 if ( aSig0 | aSig1 ) { 1546936b7f4cSbjh21 return commonNaNToFloat32( float64ToCommonNaN( a ) ); 1547936b7f4cSbjh21 } 1548936b7f4cSbjh21 return packFloat32( aSign, 0xFF, 0 ); 1549936b7f4cSbjh21 } 1550936b7f4cSbjh21 shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig ); 1551936b7f4cSbjh21 if ( aExp ) zSig |= 0x40000000; 1552936b7f4cSbjh21 return roundAndPackFloat32( aSign, aExp - 0x381, zSig ); 1553936b7f4cSbjh21 1554936b7f4cSbjh21 } 1555936b7f4cSbjh21 1556936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC 1557936b7f4cSbjh21 /* 1558936b7f4cSbjh21 ------------------------------------------------------------------------------- 1559936b7f4cSbjh21 Rounds the double-precision floating-point value `a' to an integer, 1560936b7f4cSbjh21 and returns the result as a double-precision floating-point value. The 1561936b7f4cSbjh21 operation is performed according to the IEC/IEEE Standard for Binary 1562936b7f4cSbjh21 Floating-Point Arithmetic. 1563936b7f4cSbjh21 ------------------------------------------------------------------------------- 1564936b7f4cSbjh21 */ 1565936b7f4cSbjh21 float64 float64_round_to_int( float64 a ) 1566936b7f4cSbjh21 { 1567936b7f4cSbjh21 flag aSign; 1568936b7f4cSbjh21 int16 aExp; 1569936b7f4cSbjh21 bits32 lastBitMask, roundBitsMask; 1570936b7f4cSbjh21 int8 roundingMode; 1571936b7f4cSbjh21 float64 z; 1572936b7f4cSbjh21 1573936b7f4cSbjh21 aExp = extractFloat64Exp( a ); 1574936b7f4cSbjh21 if ( 0x413 <= aExp ) { 1575936b7f4cSbjh21 if ( 0x433 <= aExp ) { 1576936b7f4cSbjh21 if ( ( aExp == 0x7FF ) 1577936b7f4cSbjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) { 1578936b7f4cSbjh21 return propagateFloat64NaN( a, a ); 1579936b7f4cSbjh21 } 1580936b7f4cSbjh21 return a; 1581936b7f4cSbjh21 } 1582936b7f4cSbjh21 lastBitMask = 1; 1583936b7f4cSbjh21 lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1; 1584936b7f4cSbjh21 roundBitsMask = lastBitMask - 1; 1585936b7f4cSbjh21 z = a; 1586936b7f4cSbjh21 roundingMode = float_rounding_mode; 1587936b7f4cSbjh21 if ( roundingMode == float_round_nearest_even ) { 1588936b7f4cSbjh21 if ( lastBitMask ) { 1589936b7f4cSbjh21 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); 1590936b7f4cSbjh21 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 1591936b7f4cSbjh21 } 1592936b7f4cSbjh21 else { 1593936b7f4cSbjh21 if ( (sbits32) z.low < 0 ) { 1594936b7f4cSbjh21 ++z.high; 1595936b7f4cSbjh21 if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1; 1596936b7f4cSbjh21 } 1597936b7f4cSbjh21 } 1598936b7f4cSbjh21 } 1599936b7f4cSbjh21 else if ( roundingMode != float_round_to_zero ) { 1600936b7f4cSbjh21 if ( extractFloat64Sign( z ) 1601936b7f4cSbjh21 ^ ( roundingMode == float_round_up ) ) { 1602936b7f4cSbjh21 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); 1603936b7f4cSbjh21 } 1604936b7f4cSbjh21 } 1605936b7f4cSbjh21 z.low &= ~ roundBitsMask; 1606936b7f4cSbjh21 } 1607936b7f4cSbjh21 else { 1608936b7f4cSbjh21 if ( aExp <= 0x3FE ) { 1609936b7f4cSbjh21 if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a; 1610936b7f4cSbjh21 float_exception_flags |= float_flag_inexact; 1611936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 1612936b7f4cSbjh21 switch ( float_rounding_mode ) { 1613936b7f4cSbjh21 case float_round_nearest_even: 1614936b7f4cSbjh21 if ( ( aExp == 0x3FE ) 1615936b7f4cSbjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) 1616936b7f4cSbjh21 ) { 1617936b7f4cSbjh21 return packFloat64( aSign, 0x3FF, 0, 0 ); 1618936b7f4cSbjh21 } 1619936b7f4cSbjh21 break; 1620936b7f4cSbjh21 case float_round_down: 1621936b7f4cSbjh21 return 1622936b7f4cSbjh21 aSign ? packFloat64( 1, 0x3FF, 0, 0 ) 1623936b7f4cSbjh21 : packFloat64( 0, 0, 0, 0 ); 1624936b7f4cSbjh21 case float_round_up: 1625936b7f4cSbjh21 return 1626936b7f4cSbjh21 aSign ? packFloat64( 1, 0, 0, 0 ) 1627936b7f4cSbjh21 : packFloat64( 0, 0x3FF, 0, 0 ); 1628936b7f4cSbjh21 } 1629936b7f4cSbjh21 return packFloat64( aSign, 0, 0, 0 ); 1630936b7f4cSbjh21 } 1631936b7f4cSbjh21 lastBitMask = 1; 1632936b7f4cSbjh21 lastBitMask <<= 0x413 - aExp; 1633936b7f4cSbjh21 roundBitsMask = lastBitMask - 1; 1634936b7f4cSbjh21 z.low = 0; 1635936b7f4cSbjh21 z.high = a.high; 1636936b7f4cSbjh21 roundingMode = float_rounding_mode; 1637936b7f4cSbjh21 if ( roundingMode == float_round_nearest_even ) { 1638936b7f4cSbjh21 z.high += lastBitMask>>1; 1639936b7f4cSbjh21 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { 1640936b7f4cSbjh21 z.high &= ~ lastBitMask; 1641936b7f4cSbjh21 } 1642936b7f4cSbjh21 } 1643936b7f4cSbjh21 else if ( roundingMode != float_round_to_zero ) { 1644936b7f4cSbjh21 if ( extractFloat64Sign( z ) 1645936b7f4cSbjh21 ^ ( roundingMode == float_round_up ) ) { 1646936b7f4cSbjh21 z.high |= ( a.low != 0 ); 1647936b7f4cSbjh21 z.high += roundBitsMask; 1648936b7f4cSbjh21 } 1649936b7f4cSbjh21 } 1650936b7f4cSbjh21 z.high &= ~ roundBitsMask; 1651936b7f4cSbjh21 } 1652936b7f4cSbjh21 if ( ( z.low != a.low ) || ( z.high != a.high ) ) { 1653936b7f4cSbjh21 float_exception_flags |= float_flag_inexact; 1654936b7f4cSbjh21 } 1655936b7f4cSbjh21 return z; 1656936b7f4cSbjh21 1657936b7f4cSbjh21 } 1658936b7f4cSbjh21 #endif 1659936b7f4cSbjh21 1660936b7f4cSbjh21 /* 1661936b7f4cSbjh21 ------------------------------------------------------------------------------- 1662936b7f4cSbjh21 Returns the result of adding the absolute values of the double-precision 1663936b7f4cSbjh21 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 1664936b7f4cSbjh21 before being returned. `zSign' is ignored if the result is a NaN. 1665936b7f4cSbjh21 The addition is performed according to the IEC/IEEE Standard for Binary 1666936b7f4cSbjh21 Floating-Point Arithmetic. 1667936b7f4cSbjh21 ------------------------------------------------------------------------------- 1668936b7f4cSbjh21 */ 1669936b7f4cSbjh21 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) 1670936b7f4cSbjh21 { 1671936b7f4cSbjh21 int16 aExp, bExp, zExp; 1672936b7f4cSbjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 1673936b7f4cSbjh21 int16 expDiff; 1674936b7f4cSbjh21 1675936b7f4cSbjh21 aSig1 = extractFloat64Frac1( a ); 1676936b7f4cSbjh21 aSig0 = extractFloat64Frac0( a ); 1677936b7f4cSbjh21 aExp = extractFloat64Exp( a ); 1678936b7f4cSbjh21 bSig1 = extractFloat64Frac1( b ); 1679936b7f4cSbjh21 bSig0 = extractFloat64Frac0( b ); 1680936b7f4cSbjh21 bExp = extractFloat64Exp( b ); 1681936b7f4cSbjh21 expDiff = aExp - bExp; 1682936b7f4cSbjh21 if ( 0 < expDiff ) { 1683936b7f4cSbjh21 if ( aExp == 0x7FF ) { 1684936b7f4cSbjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1685936b7f4cSbjh21 return a; 1686936b7f4cSbjh21 } 1687936b7f4cSbjh21 if ( bExp == 0 ) { 1688936b7f4cSbjh21 --expDiff; 1689936b7f4cSbjh21 } 1690936b7f4cSbjh21 else { 1691936b7f4cSbjh21 bSig0 |= 0x00100000; 1692936b7f4cSbjh21 } 1693936b7f4cSbjh21 shift64ExtraRightJamming( 1694936b7f4cSbjh21 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); 1695936b7f4cSbjh21 zExp = aExp; 1696936b7f4cSbjh21 } 1697936b7f4cSbjh21 else if ( expDiff < 0 ) { 1698936b7f4cSbjh21 if ( bExp == 0x7FF ) { 1699936b7f4cSbjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1700936b7f4cSbjh21 return packFloat64( zSign, 0x7FF, 0, 0 ); 1701936b7f4cSbjh21 } 1702936b7f4cSbjh21 if ( aExp == 0 ) { 1703936b7f4cSbjh21 ++expDiff; 1704936b7f4cSbjh21 } 1705936b7f4cSbjh21 else { 1706936b7f4cSbjh21 aSig0 |= 0x00100000; 1707936b7f4cSbjh21 } 1708936b7f4cSbjh21 shift64ExtraRightJamming( 1709936b7f4cSbjh21 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); 1710936b7f4cSbjh21 zExp = bExp; 1711936b7f4cSbjh21 } 1712936b7f4cSbjh21 else { 1713936b7f4cSbjh21 if ( aExp == 0x7FF ) { 1714936b7f4cSbjh21 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 1715936b7f4cSbjh21 return propagateFloat64NaN( a, b ); 1716936b7f4cSbjh21 } 1717936b7f4cSbjh21 return a; 1718936b7f4cSbjh21 } 1719936b7f4cSbjh21 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1720936b7f4cSbjh21 if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 ); 1721936b7f4cSbjh21 zSig2 = 0; 1722936b7f4cSbjh21 zSig0 |= 0x00200000; 1723936b7f4cSbjh21 zExp = aExp; 1724936b7f4cSbjh21 goto shiftRight1; 1725936b7f4cSbjh21 } 1726936b7f4cSbjh21 aSig0 |= 0x00100000; 1727936b7f4cSbjh21 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1728936b7f4cSbjh21 --zExp; 1729936b7f4cSbjh21 if ( zSig0 < 0x00200000 ) goto roundAndPack; 1730936b7f4cSbjh21 ++zExp; 1731936b7f4cSbjh21 shiftRight1: 1732936b7f4cSbjh21 shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 1733936b7f4cSbjh21 roundAndPack: 1734936b7f4cSbjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 1735936b7f4cSbjh21 1736936b7f4cSbjh21 } 1737936b7f4cSbjh21 1738936b7f4cSbjh21 /* 1739936b7f4cSbjh21 ------------------------------------------------------------------------------- 1740936b7f4cSbjh21 Returns the result of subtracting the absolute values of the double- 1741936b7f4cSbjh21 precision floating-point values `a' and `b'. If `zSign' is 1, the 1742936b7f4cSbjh21 difference is negated before being returned. `zSign' is ignored if the 1743936b7f4cSbjh21 result is a NaN. The subtraction is performed according to the IEC/IEEE 1744936b7f4cSbjh21 Standard for Binary Floating-Point Arithmetic. 1745936b7f4cSbjh21 ------------------------------------------------------------------------------- 1746936b7f4cSbjh21 */ 1747936b7f4cSbjh21 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) 1748936b7f4cSbjh21 { 1749936b7f4cSbjh21 int16 aExp, bExp, zExp; 1750936b7f4cSbjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; 1751936b7f4cSbjh21 int16 expDiff; 1752936b7f4cSbjh21 1753936b7f4cSbjh21 aSig1 = extractFloat64Frac1( a ); 1754936b7f4cSbjh21 aSig0 = extractFloat64Frac0( a ); 1755936b7f4cSbjh21 aExp = extractFloat64Exp( a ); 1756936b7f4cSbjh21 bSig1 = extractFloat64Frac1( b ); 1757936b7f4cSbjh21 bSig0 = extractFloat64Frac0( b ); 1758936b7f4cSbjh21 bExp = extractFloat64Exp( b ); 1759936b7f4cSbjh21 expDiff = aExp - bExp; 1760936b7f4cSbjh21 shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 ); 1761936b7f4cSbjh21 shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 ); 1762936b7f4cSbjh21 if ( 0 < expDiff ) goto aExpBigger; 1763936b7f4cSbjh21 if ( expDiff < 0 ) goto bExpBigger; 1764936b7f4cSbjh21 if ( aExp == 0x7FF ) { 1765936b7f4cSbjh21 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 1766936b7f4cSbjh21 return propagateFloat64NaN( a, b ); 1767936b7f4cSbjh21 } 1768936b7f4cSbjh21 float_raise( float_flag_invalid ); 1769936b7f4cSbjh21 return float64_default_nan; 1770936b7f4cSbjh21 } 1771936b7f4cSbjh21 if ( aExp == 0 ) { 1772936b7f4cSbjh21 aExp = 1; 1773936b7f4cSbjh21 bExp = 1; 1774936b7f4cSbjh21 } 1775936b7f4cSbjh21 if ( bSig0 < aSig0 ) goto aBigger; 1776936b7f4cSbjh21 if ( aSig0 < bSig0 ) goto bBigger; 1777936b7f4cSbjh21 if ( bSig1 < aSig1 ) goto aBigger; 1778936b7f4cSbjh21 if ( aSig1 < bSig1 ) goto bBigger; 1779936b7f4cSbjh21 return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 ); 1780936b7f4cSbjh21 bExpBigger: 1781936b7f4cSbjh21 if ( bExp == 0x7FF ) { 1782936b7f4cSbjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1783936b7f4cSbjh21 return packFloat64( zSign ^ 1, 0x7FF, 0, 0 ); 1784936b7f4cSbjh21 } 1785936b7f4cSbjh21 if ( aExp == 0 ) { 1786936b7f4cSbjh21 ++expDiff; 1787936b7f4cSbjh21 } 1788936b7f4cSbjh21 else { 1789936b7f4cSbjh21 aSig0 |= 0x40000000; 1790936b7f4cSbjh21 } 1791936b7f4cSbjh21 shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 1792936b7f4cSbjh21 bSig0 |= 0x40000000; 1793936b7f4cSbjh21 bBigger: 1794936b7f4cSbjh21 sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); 1795936b7f4cSbjh21 zExp = bExp; 1796936b7f4cSbjh21 zSign ^= 1; 1797936b7f4cSbjh21 goto normalizeRoundAndPack; 1798936b7f4cSbjh21 aExpBigger: 1799936b7f4cSbjh21 if ( aExp == 0x7FF ) { 1800936b7f4cSbjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1801936b7f4cSbjh21 return a; 1802936b7f4cSbjh21 } 1803936b7f4cSbjh21 if ( bExp == 0 ) { 1804936b7f4cSbjh21 --expDiff; 1805936b7f4cSbjh21 } 1806936b7f4cSbjh21 else { 1807936b7f4cSbjh21 bSig0 |= 0x40000000; 1808936b7f4cSbjh21 } 1809936b7f4cSbjh21 shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); 1810936b7f4cSbjh21 aSig0 |= 0x40000000; 1811936b7f4cSbjh21 aBigger: 1812936b7f4cSbjh21 sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1813936b7f4cSbjh21 zExp = aExp; 1814936b7f4cSbjh21 normalizeRoundAndPack: 1815936b7f4cSbjh21 --zExp; 1816936b7f4cSbjh21 return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 ); 1817936b7f4cSbjh21 1818936b7f4cSbjh21 } 1819936b7f4cSbjh21 1820936b7f4cSbjh21 /* 1821936b7f4cSbjh21 ------------------------------------------------------------------------------- 1822936b7f4cSbjh21 Returns the result of adding the double-precision floating-point values `a' 1823936b7f4cSbjh21 and `b'. The operation is performed according to the IEC/IEEE Standard for 1824936b7f4cSbjh21 Binary Floating-Point Arithmetic. 1825936b7f4cSbjh21 ------------------------------------------------------------------------------- 1826936b7f4cSbjh21 */ 1827936b7f4cSbjh21 float64 float64_add( float64 a, float64 b ) 1828936b7f4cSbjh21 { 1829936b7f4cSbjh21 flag aSign, bSign; 1830936b7f4cSbjh21 1831936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 1832936b7f4cSbjh21 bSign = extractFloat64Sign( b ); 1833936b7f4cSbjh21 if ( aSign == bSign ) { 1834936b7f4cSbjh21 return addFloat64Sigs( a, b, aSign ); 1835936b7f4cSbjh21 } 1836936b7f4cSbjh21 else { 1837936b7f4cSbjh21 return subFloat64Sigs( a, b, aSign ); 1838936b7f4cSbjh21 } 1839936b7f4cSbjh21 1840936b7f4cSbjh21 } 1841936b7f4cSbjh21 1842936b7f4cSbjh21 /* 1843936b7f4cSbjh21 ------------------------------------------------------------------------------- 1844936b7f4cSbjh21 Returns the result of subtracting the double-precision floating-point values 1845936b7f4cSbjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 1846936b7f4cSbjh21 for Binary Floating-Point Arithmetic. 1847936b7f4cSbjh21 ------------------------------------------------------------------------------- 1848936b7f4cSbjh21 */ 1849936b7f4cSbjh21 float64 float64_sub( float64 a, float64 b ) 1850936b7f4cSbjh21 { 1851936b7f4cSbjh21 flag aSign, bSign; 1852936b7f4cSbjh21 1853936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 1854936b7f4cSbjh21 bSign = extractFloat64Sign( b ); 1855936b7f4cSbjh21 if ( aSign == bSign ) { 1856936b7f4cSbjh21 return subFloat64Sigs( a, b, aSign ); 1857936b7f4cSbjh21 } 1858936b7f4cSbjh21 else { 1859936b7f4cSbjh21 return addFloat64Sigs( a, b, aSign ); 1860936b7f4cSbjh21 } 1861936b7f4cSbjh21 1862936b7f4cSbjh21 } 1863936b7f4cSbjh21 1864936b7f4cSbjh21 /* 1865936b7f4cSbjh21 ------------------------------------------------------------------------------- 1866936b7f4cSbjh21 Returns the result of multiplying the double-precision floating-point values 1867936b7f4cSbjh21 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 1868936b7f4cSbjh21 for Binary Floating-Point Arithmetic. 1869936b7f4cSbjh21 ------------------------------------------------------------------------------- 1870936b7f4cSbjh21 */ 1871936b7f4cSbjh21 float64 float64_mul( float64 a, float64 b ) 1872936b7f4cSbjh21 { 1873936b7f4cSbjh21 flag aSign, bSign, zSign; 1874936b7f4cSbjh21 int16 aExp, bExp, zExp; 1875936b7f4cSbjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; 1876936b7f4cSbjh21 1877936b7f4cSbjh21 aSig1 = extractFloat64Frac1( a ); 1878936b7f4cSbjh21 aSig0 = extractFloat64Frac0( a ); 1879936b7f4cSbjh21 aExp = extractFloat64Exp( a ); 1880936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 1881936b7f4cSbjh21 bSig1 = extractFloat64Frac1( b ); 1882936b7f4cSbjh21 bSig0 = extractFloat64Frac0( b ); 1883936b7f4cSbjh21 bExp = extractFloat64Exp( b ); 1884936b7f4cSbjh21 bSign = extractFloat64Sign( b ); 1885936b7f4cSbjh21 zSign = aSign ^ bSign; 1886936b7f4cSbjh21 if ( aExp == 0x7FF ) { 1887936b7f4cSbjh21 if ( ( aSig0 | aSig1 ) 1888936b7f4cSbjh21 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { 1889936b7f4cSbjh21 return propagateFloat64NaN( a, b ); 1890936b7f4cSbjh21 } 1891936b7f4cSbjh21 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; 1892936b7f4cSbjh21 return packFloat64( zSign, 0x7FF, 0, 0 ); 1893936b7f4cSbjh21 } 1894936b7f4cSbjh21 if ( bExp == 0x7FF ) { 1895936b7f4cSbjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1896936b7f4cSbjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 1897936b7f4cSbjh21 invalid: 1898936b7f4cSbjh21 float_raise( float_flag_invalid ); 1899936b7f4cSbjh21 return float64_default_nan; 1900936b7f4cSbjh21 } 1901936b7f4cSbjh21 return packFloat64( zSign, 0x7FF, 0, 0 ); 1902936b7f4cSbjh21 } 1903936b7f4cSbjh21 if ( aExp == 0 ) { 1904936b7f4cSbjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1905936b7f4cSbjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 1906936b7f4cSbjh21 } 1907936b7f4cSbjh21 if ( bExp == 0 ) { 1908936b7f4cSbjh21 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1909936b7f4cSbjh21 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 1910936b7f4cSbjh21 } 1911936b7f4cSbjh21 zExp = aExp + bExp - 0x400; 1912936b7f4cSbjh21 aSig0 |= 0x00100000; 1913936b7f4cSbjh21 shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 ); 1914936b7f4cSbjh21 mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); 1915936b7f4cSbjh21 add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); 1916936b7f4cSbjh21 zSig2 |= ( zSig3 != 0 ); 1917936b7f4cSbjh21 if ( 0x00200000 <= zSig0 ) { 1918936b7f4cSbjh21 shift64ExtraRightJamming( 1919936b7f4cSbjh21 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 1920936b7f4cSbjh21 ++zExp; 1921936b7f4cSbjh21 } 1922936b7f4cSbjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 1923936b7f4cSbjh21 1924936b7f4cSbjh21 } 1925936b7f4cSbjh21 1926936b7f4cSbjh21 /* 1927936b7f4cSbjh21 ------------------------------------------------------------------------------- 1928936b7f4cSbjh21 Returns the result of dividing the double-precision floating-point value `a' 1929936b7f4cSbjh21 by the corresponding value `b'. The operation is performed according to the 1930936b7f4cSbjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1931936b7f4cSbjh21 ------------------------------------------------------------------------------- 1932936b7f4cSbjh21 */ 1933936b7f4cSbjh21 float64 float64_div( float64 a, float64 b ) 1934936b7f4cSbjh21 { 1935936b7f4cSbjh21 flag aSign, bSign, zSign; 1936936b7f4cSbjh21 int16 aExp, bExp, zExp; 1937936b7f4cSbjh21 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 1938936b7f4cSbjh21 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 1939936b7f4cSbjh21 1940936b7f4cSbjh21 aSig1 = extractFloat64Frac1( a ); 1941936b7f4cSbjh21 aSig0 = extractFloat64Frac0( a ); 1942936b7f4cSbjh21 aExp = extractFloat64Exp( a ); 1943936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 1944936b7f4cSbjh21 bSig1 = extractFloat64Frac1( b ); 1945936b7f4cSbjh21 bSig0 = extractFloat64Frac0( b ); 1946936b7f4cSbjh21 bExp = extractFloat64Exp( b ); 1947936b7f4cSbjh21 bSign = extractFloat64Sign( b ); 1948936b7f4cSbjh21 zSign = aSign ^ bSign; 1949936b7f4cSbjh21 if ( aExp == 0x7FF ) { 1950936b7f4cSbjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1951936b7f4cSbjh21 if ( bExp == 0x7FF ) { 1952936b7f4cSbjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1953936b7f4cSbjh21 goto invalid; 1954936b7f4cSbjh21 } 1955936b7f4cSbjh21 return packFloat64( zSign, 0x7FF, 0, 0 ); 1956936b7f4cSbjh21 } 1957936b7f4cSbjh21 if ( bExp == 0x7FF ) { 1958936b7f4cSbjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1959936b7f4cSbjh21 return packFloat64( zSign, 0, 0, 0 ); 1960936b7f4cSbjh21 } 1961936b7f4cSbjh21 if ( bExp == 0 ) { 1962936b7f4cSbjh21 if ( ( bSig0 | bSig1 ) == 0 ) { 1963936b7f4cSbjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 1964936b7f4cSbjh21 invalid: 1965936b7f4cSbjh21 float_raise( float_flag_invalid ); 1966936b7f4cSbjh21 return float64_default_nan; 1967936b7f4cSbjh21 } 1968936b7f4cSbjh21 float_raise( float_flag_divbyzero ); 1969936b7f4cSbjh21 return packFloat64( zSign, 0x7FF, 0, 0 ); 1970936b7f4cSbjh21 } 1971936b7f4cSbjh21 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 1972936b7f4cSbjh21 } 1973936b7f4cSbjh21 if ( aExp == 0 ) { 1974936b7f4cSbjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1975936b7f4cSbjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 1976936b7f4cSbjh21 } 1977936b7f4cSbjh21 zExp = aExp - bExp + 0x3FD; 1978936b7f4cSbjh21 shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 ); 1979936b7f4cSbjh21 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); 1980936b7f4cSbjh21 if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) { 1981936b7f4cSbjh21 shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); 1982936b7f4cSbjh21 ++zExp; 1983936b7f4cSbjh21 } 1984936b7f4cSbjh21 zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 ); 1985936b7f4cSbjh21 mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); 1986936b7f4cSbjh21 sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); 1987936b7f4cSbjh21 while ( (sbits32) rem0 < 0 ) { 1988936b7f4cSbjh21 --zSig0; 1989936b7f4cSbjh21 add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); 1990936b7f4cSbjh21 } 1991936b7f4cSbjh21 zSig1 = estimateDiv64To32( rem1, rem2, bSig0 ); 1992936b7f4cSbjh21 if ( ( zSig1 & 0x3FF ) <= 4 ) { 1993936b7f4cSbjh21 mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); 1994936b7f4cSbjh21 sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); 1995936b7f4cSbjh21 while ( (sbits32) rem1 < 0 ) { 1996936b7f4cSbjh21 --zSig1; 1997936b7f4cSbjh21 add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); 1998936b7f4cSbjh21 } 1999936b7f4cSbjh21 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 2000936b7f4cSbjh21 } 2001936b7f4cSbjh21 shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 ); 2002936b7f4cSbjh21 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 2003936b7f4cSbjh21 2004936b7f4cSbjh21 } 2005936b7f4cSbjh21 2006936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC 2007936b7f4cSbjh21 /* 2008936b7f4cSbjh21 ------------------------------------------------------------------------------- 2009936b7f4cSbjh21 Returns the remainder of the double-precision floating-point value `a' 2010936b7f4cSbjh21 with respect to the corresponding value `b'. The operation is performed 2011936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2012936b7f4cSbjh21 ------------------------------------------------------------------------------- 2013936b7f4cSbjh21 */ 2014936b7f4cSbjh21 float64 float64_rem( float64 a, float64 b ) 2015936b7f4cSbjh21 { 2016936b7f4cSbjh21 flag aSign, bSign, zSign; 2017936b7f4cSbjh21 int16 aExp, bExp, expDiff; 2018936b7f4cSbjh21 bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; 2019936b7f4cSbjh21 bits32 allZero, alternateASig0, alternateASig1, sigMean1; 2020936b7f4cSbjh21 sbits32 sigMean0; 2021936b7f4cSbjh21 float64 z; 2022936b7f4cSbjh21 2023936b7f4cSbjh21 aSig1 = extractFloat64Frac1( a ); 2024936b7f4cSbjh21 aSig0 = extractFloat64Frac0( a ); 2025936b7f4cSbjh21 aExp = extractFloat64Exp( a ); 2026936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 2027936b7f4cSbjh21 bSig1 = extractFloat64Frac1( b ); 2028936b7f4cSbjh21 bSig0 = extractFloat64Frac0( b ); 2029936b7f4cSbjh21 bExp = extractFloat64Exp( b ); 2030936b7f4cSbjh21 bSign = extractFloat64Sign( b ); 2031936b7f4cSbjh21 if ( aExp == 0x7FF ) { 2032936b7f4cSbjh21 if ( ( aSig0 | aSig1 ) 2033936b7f4cSbjh21 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { 2034936b7f4cSbjh21 return propagateFloat64NaN( a, b ); 2035936b7f4cSbjh21 } 2036936b7f4cSbjh21 goto invalid; 2037936b7f4cSbjh21 } 2038936b7f4cSbjh21 if ( bExp == 0x7FF ) { 2039936b7f4cSbjh21 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 2040936b7f4cSbjh21 return a; 2041936b7f4cSbjh21 } 2042936b7f4cSbjh21 if ( bExp == 0 ) { 2043936b7f4cSbjh21 if ( ( bSig0 | bSig1 ) == 0 ) { 2044936b7f4cSbjh21 invalid: 2045936b7f4cSbjh21 float_raise( float_flag_invalid ); 2046936b7f4cSbjh21 return float64_default_nan; 2047936b7f4cSbjh21 } 2048936b7f4cSbjh21 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 2049936b7f4cSbjh21 } 2050936b7f4cSbjh21 if ( aExp == 0 ) { 2051936b7f4cSbjh21 if ( ( aSig0 | aSig1 ) == 0 ) return a; 2052936b7f4cSbjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 2053936b7f4cSbjh21 } 2054936b7f4cSbjh21 expDiff = aExp - bExp; 2055936b7f4cSbjh21 if ( expDiff < -1 ) return a; 2056936b7f4cSbjh21 shortShift64Left( 2057936b7f4cSbjh21 aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 ); 2058936b7f4cSbjh21 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); 2059936b7f4cSbjh21 q = le64( bSig0, bSig1, aSig0, aSig1 ); 2060936b7f4cSbjh21 if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 2061936b7f4cSbjh21 expDiff -= 32; 2062936b7f4cSbjh21 while ( 0 < expDiff ) { 2063936b7f4cSbjh21 q = estimateDiv64To32( aSig0, aSig1, bSig0 ); 2064936b7f4cSbjh21 q = ( 4 < q ) ? q - 4 : 0; 2065936b7f4cSbjh21 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); 2066936b7f4cSbjh21 shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero ); 2067936b7f4cSbjh21 shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero ); 2068936b7f4cSbjh21 sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 ); 2069936b7f4cSbjh21 expDiff -= 29; 2070936b7f4cSbjh21 } 2071936b7f4cSbjh21 if ( -32 < expDiff ) { 2072936b7f4cSbjh21 q = estimateDiv64To32( aSig0, aSig1, bSig0 ); 2073936b7f4cSbjh21 q = ( 4 < q ) ? q - 4 : 0; 2074936b7f4cSbjh21 q >>= - expDiff; 2075936b7f4cSbjh21 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); 2076936b7f4cSbjh21 expDiff += 24; 2077936b7f4cSbjh21 if ( expDiff < 0 ) { 2078936b7f4cSbjh21 shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 2079936b7f4cSbjh21 } 2080936b7f4cSbjh21 else { 2081936b7f4cSbjh21 shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); 2082936b7f4cSbjh21 } 2083936b7f4cSbjh21 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); 2084936b7f4cSbjh21 sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); 2085936b7f4cSbjh21 } 2086936b7f4cSbjh21 else { 2087936b7f4cSbjh21 shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 ); 2088936b7f4cSbjh21 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); 2089936b7f4cSbjh21 } 2090936b7f4cSbjh21 do { 2091936b7f4cSbjh21 alternateASig0 = aSig0; 2092936b7f4cSbjh21 alternateASig1 = aSig1; 2093936b7f4cSbjh21 ++q; 2094936b7f4cSbjh21 sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 2095936b7f4cSbjh21 } while ( 0 <= (sbits32) aSig0 ); 2096936b7f4cSbjh21 add64( 2097936b7f4cSbjh21 aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 ); 2098936b7f4cSbjh21 if ( ( sigMean0 < 0 ) 2099936b7f4cSbjh21 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { 2100936b7f4cSbjh21 aSig0 = alternateASig0; 2101936b7f4cSbjh21 aSig1 = alternateASig1; 2102936b7f4cSbjh21 } 2103936b7f4cSbjh21 zSign = ( (sbits32) aSig0 < 0 ); 2104936b7f4cSbjh21 if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); 2105936b7f4cSbjh21 return 2106936b7f4cSbjh21 normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); 2107936b7f4cSbjh21 2108936b7f4cSbjh21 } 2109936b7f4cSbjh21 #endif 2110936b7f4cSbjh21 2111936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC 2112936b7f4cSbjh21 /* 2113936b7f4cSbjh21 ------------------------------------------------------------------------------- 2114936b7f4cSbjh21 Returns the square root of the double-precision floating-point value `a'. 2115936b7f4cSbjh21 The operation is performed according to the IEC/IEEE Standard for Binary 2116936b7f4cSbjh21 Floating-Point Arithmetic. 2117936b7f4cSbjh21 ------------------------------------------------------------------------------- 2118936b7f4cSbjh21 */ 2119936b7f4cSbjh21 float64 float64_sqrt( float64 a ) 2120936b7f4cSbjh21 { 2121936b7f4cSbjh21 flag aSign; 2122936b7f4cSbjh21 int16 aExp, zExp; 2123936b7f4cSbjh21 bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; 2124936b7f4cSbjh21 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 2125936b7f4cSbjh21 float64 z; 2126936b7f4cSbjh21 2127936b7f4cSbjh21 aSig1 = extractFloat64Frac1( a ); 2128936b7f4cSbjh21 aSig0 = extractFloat64Frac0( a ); 2129936b7f4cSbjh21 aExp = extractFloat64Exp( a ); 2130936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 2131936b7f4cSbjh21 if ( aExp == 0x7FF ) { 2132936b7f4cSbjh21 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a ); 2133936b7f4cSbjh21 if ( ! aSign ) return a; 2134936b7f4cSbjh21 goto invalid; 2135936b7f4cSbjh21 } 2136936b7f4cSbjh21 if ( aSign ) { 2137936b7f4cSbjh21 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; 2138936b7f4cSbjh21 invalid: 2139936b7f4cSbjh21 float_raise( float_flag_invalid ); 2140936b7f4cSbjh21 return float64_default_nan; 2141936b7f4cSbjh21 } 2142936b7f4cSbjh21 if ( aExp == 0 ) { 2143936b7f4cSbjh21 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 ); 2144936b7f4cSbjh21 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 2145936b7f4cSbjh21 } 2146936b7f4cSbjh21 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; 2147936b7f4cSbjh21 aSig0 |= 0x00100000; 2148936b7f4cSbjh21 shortShift64Left( aSig0, aSig1, 11, &term0, &term1 ); 2149936b7f4cSbjh21 zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1; 2150936b7f4cSbjh21 if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF; 2151936b7f4cSbjh21 doubleZSig0 = zSig0 + zSig0; 2152936b7f4cSbjh21 shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 ); 2153936b7f4cSbjh21 mul32To64( zSig0, zSig0, &term0, &term1 ); 2154936b7f4cSbjh21 sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 2155936b7f4cSbjh21 while ( (sbits32) rem0 < 0 ) { 2156936b7f4cSbjh21 --zSig0; 2157936b7f4cSbjh21 doubleZSig0 -= 2; 2158936b7f4cSbjh21 add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 ); 2159936b7f4cSbjh21 } 2160936b7f4cSbjh21 zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 ); 2161936b7f4cSbjh21 if ( ( zSig1 & 0x1FF ) <= 5 ) { 2162936b7f4cSbjh21 if ( zSig1 == 0 ) zSig1 = 1; 2163936b7f4cSbjh21 mul32To64( doubleZSig0, zSig1, &term1, &term2 ); 2164936b7f4cSbjh21 sub64( rem1, 0, term1, term2, &rem1, &rem2 ); 2165936b7f4cSbjh21 mul32To64( zSig1, zSig1, &term2, &term3 ); 2166936b7f4cSbjh21 sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 2167936b7f4cSbjh21 while ( (sbits32) rem1 < 0 ) { 2168936b7f4cSbjh21 --zSig1; 2169936b7f4cSbjh21 shortShift64Left( 0, zSig1, 1, &term2, &term3 ); 2170936b7f4cSbjh21 term3 |= 1; 2171936b7f4cSbjh21 term2 |= doubleZSig0; 2172936b7f4cSbjh21 add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 2173936b7f4cSbjh21 } 2174936b7f4cSbjh21 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 2175936b7f4cSbjh21 } 2176936b7f4cSbjh21 shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 ); 2177936b7f4cSbjh21 return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 ); 2178936b7f4cSbjh21 2179936b7f4cSbjh21 } 2180936b7f4cSbjh21 #endif 2181936b7f4cSbjh21 2182936b7f4cSbjh21 /* 2183936b7f4cSbjh21 ------------------------------------------------------------------------------- 2184936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is equal to 2185936b7f4cSbjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 2186936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2187936b7f4cSbjh21 ------------------------------------------------------------------------------- 2188936b7f4cSbjh21 */ 2189936b7f4cSbjh21 flag float64_eq( float64 a, float64 b ) 2190936b7f4cSbjh21 { 2191936b7f4cSbjh21 2192936b7f4cSbjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2193936b7f4cSbjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2194936b7f4cSbjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2195936b7f4cSbjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2196936b7f4cSbjh21 ) { 2197936b7f4cSbjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2198936b7f4cSbjh21 float_raise( float_flag_invalid ); 2199936b7f4cSbjh21 } 2200936b7f4cSbjh21 return 0; 2201936b7f4cSbjh21 } 2202936b7f4cSbjh21 return ( a == b ) || 2203936b7f4cSbjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); 2204936b7f4cSbjh21 2205936b7f4cSbjh21 } 2206936b7f4cSbjh21 2207936b7f4cSbjh21 /* 2208936b7f4cSbjh21 ------------------------------------------------------------------------------- 2209936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is less than 2210936b7f4cSbjh21 or equal to the corresponding value `b', and 0 otherwise. The comparison 2211936b7f4cSbjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point 2212936b7f4cSbjh21 Arithmetic. 2213936b7f4cSbjh21 ------------------------------------------------------------------------------- 2214936b7f4cSbjh21 */ 2215936b7f4cSbjh21 flag float64_le( float64 a, float64 b ) 2216936b7f4cSbjh21 { 2217936b7f4cSbjh21 flag aSign, bSign; 2218936b7f4cSbjh21 2219936b7f4cSbjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2220936b7f4cSbjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2221936b7f4cSbjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2222936b7f4cSbjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2223936b7f4cSbjh21 ) { 2224936b7f4cSbjh21 float_raise( float_flag_invalid ); 2225936b7f4cSbjh21 return 0; 2226936b7f4cSbjh21 } 2227936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 2228936b7f4cSbjh21 bSign = extractFloat64Sign( b ); 2229936b7f4cSbjh21 if ( aSign != bSign ) 2230936b7f4cSbjh21 return aSign || 2231936b7f4cSbjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 2232936b7f4cSbjh21 0 ); 2233936b7f4cSbjh21 return ( a == b ) || 2234936b7f4cSbjh21 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 2235936b7f4cSbjh21 } 2236936b7f4cSbjh21 2237936b7f4cSbjh21 /* 2238936b7f4cSbjh21 ------------------------------------------------------------------------------- 2239936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is less than 2240936b7f4cSbjh21 the corresponding value `b', and 0 otherwise. The comparison is performed 2241936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2242936b7f4cSbjh21 ------------------------------------------------------------------------------- 2243936b7f4cSbjh21 */ 2244936b7f4cSbjh21 flag float64_lt( float64 a, float64 b ) 2245936b7f4cSbjh21 { 2246936b7f4cSbjh21 flag aSign, bSign; 2247936b7f4cSbjh21 2248936b7f4cSbjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2249936b7f4cSbjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2250936b7f4cSbjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2251936b7f4cSbjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2252936b7f4cSbjh21 ) { 2253936b7f4cSbjh21 float_raise( float_flag_invalid ); 2254936b7f4cSbjh21 return 0; 2255936b7f4cSbjh21 } 2256936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 2257936b7f4cSbjh21 bSign = extractFloat64Sign( b ); 2258936b7f4cSbjh21 if ( aSign != bSign ) 2259936b7f4cSbjh21 return aSign && 2260936b7f4cSbjh21 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != 2261936b7f4cSbjh21 0 ); 2262936b7f4cSbjh21 return ( a != b ) && 2263936b7f4cSbjh21 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 2264936b7f4cSbjh21 2265936b7f4cSbjh21 } 2266936b7f4cSbjh21 2267936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC 2268936b7f4cSbjh21 /* 2269936b7f4cSbjh21 ------------------------------------------------------------------------------- 2270936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is equal to 2271936b7f4cSbjh21 the corresponding value `b', and 0 otherwise. The invalid exception is 2272936b7f4cSbjh21 raised if either operand is a NaN. Otherwise, the comparison is performed 2273936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2274936b7f4cSbjh21 ------------------------------------------------------------------------------- 2275936b7f4cSbjh21 */ 2276936b7f4cSbjh21 flag float64_eq_signaling( float64 a, float64 b ) 2277936b7f4cSbjh21 { 2278936b7f4cSbjh21 2279936b7f4cSbjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2280936b7f4cSbjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2281936b7f4cSbjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2282936b7f4cSbjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2283936b7f4cSbjh21 ) { 2284936b7f4cSbjh21 float_raise( float_flag_invalid ); 2285936b7f4cSbjh21 return 0; 2286936b7f4cSbjh21 } 2287936b7f4cSbjh21 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); 2288936b7f4cSbjh21 2289936b7f4cSbjh21 } 2290936b7f4cSbjh21 2291936b7f4cSbjh21 /* 2292936b7f4cSbjh21 ------------------------------------------------------------------------------- 2293936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is less than or 2294936b7f4cSbjh21 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 2295936b7f4cSbjh21 cause an exception. Otherwise, the comparison is performed according to the 2296936b7f4cSbjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2297936b7f4cSbjh21 ------------------------------------------------------------------------------- 2298936b7f4cSbjh21 */ 2299936b7f4cSbjh21 flag float64_le_quiet( float64 a, float64 b ) 2300936b7f4cSbjh21 { 2301936b7f4cSbjh21 flag aSign, bSign; 2302936b7f4cSbjh21 2303936b7f4cSbjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2304936b7f4cSbjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2305936b7f4cSbjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2306936b7f4cSbjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2307936b7f4cSbjh21 ) { 2308936b7f4cSbjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2309936b7f4cSbjh21 float_raise( float_flag_invalid ); 2310936b7f4cSbjh21 } 2311936b7f4cSbjh21 return 0; 2312936b7f4cSbjh21 } 2313936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 2314936b7f4cSbjh21 bSign = extractFloat64Sign( b ); 2315936b7f4cSbjh21 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); 2316936b7f4cSbjh21 return ( a == b ) || ( aSign ^ ( a < b ) ); 2317936b7f4cSbjh21 2318936b7f4cSbjh21 } 2319936b7f4cSbjh21 2320936b7f4cSbjh21 /* 2321936b7f4cSbjh21 ------------------------------------------------------------------------------- 2322936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is less than 2323936b7f4cSbjh21 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 2324936b7f4cSbjh21 exception. Otherwise, the comparison is performed according to the IEC/IEEE 2325936b7f4cSbjh21 Standard for Binary Floating-Point Arithmetic. 2326936b7f4cSbjh21 ------------------------------------------------------------------------------- 2327936b7f4cSbjh21 */ 2328936b7f4cSbjh21 flag float64_lt_quiet( float64 a, float64 b ) 2329936b7f4cSbjh21 { 2330936b7f4cSbjh21 flag aSign, bSign; 2331936b7f4cSbjh21 2332936b7f4cSbjh21 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2333936b7f4cSbjh21 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2334936b7f4cSbjh21 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2335936b7f4cSbjh21 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2336936b7f4cSbjh21 ) { 2337936b7f4cSbjh21 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2338936b7f4cSbjh21 float_raise( float_flag_invalid ); 2339936b7f4cSbjh21 } 2340936b7f4cSbjh21 return 0; 2341936b7f4cSbjh21 } 2342936b7f4cSbjh21 aSign = extractFloat64Sign( a ); 2343936b7f4cSbjh21 bSign = extractFloat64Sign( b ); 2344936b7f4cSbjh21 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); 2345936b7f4cSbjh21 return ( a != b ) && ( aSign ^ ( a < b ) ); 2346936b7f4cSbjh21 2347936b7f4cSbjh21 } 2348936b7f4cSbjh21 2349936b7f4cSbjh21 #endif 2350