1*08daf0c2Sdrahn /* $OpenBSD: softfloat.c,v 1.1 2006/11/06 15:11:37 drahn Exp $ */ 2*08daf0c2Sdrahn /* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */ 3*08daf0c2Sdrahn 4*08daf0c2Sdrahn /* 5*08daf0c2Sdrahn * This version hacked for use with gcc -msoft-float by bjh21. 6*08daf0c2Sdrahn * (Mostly a case of #ifdefing out things GCC doesn't need or provides 7*08daf0c2Sdrahn * itself). 8*08daf0c2Sdrahn */ 9*08daf0c2Sdrahn 10*08daf0c2Sdrahn /* 11*08daf0c2Sdrahn * Things you may want to define: 12*08daf0c2Sdrahn * 13*08daf0c2Sdrahn * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with 14*08daf0c2Sdrahn * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them 15*08daf0c2Sdrahn * properly renamed. 16*08daf0c2Sdrahn */ 17*08daf0c2Sdrahn 18*08daf0c2Sdrahn /* 19*08daf0c2Sdrahn * This differs from the standard bits32/softfloat.c in that float64 20*08daf0c2Sdrahn * is defined to be a 64-bit integer rather than a structure. The 21*08daf0c2Sdrahn * structure is float64s, with translation between the two going via 22*08daf0c2Sdrahn * float64u. 23*08daf0c2Sdrahn */ 24*08daf0c2Sdrahn 25*08daf0c2Sdrahn /* 26*08daf0c2Sdrahn =============================================================================== 27*08daf0c2Sdrahn 28*08daf0c2Sdrahn This C source file is part of the SoftFloat IEC/IEEE Floating-Point 29*08daf0c2Sdrahn Arithmetic Package, Release 2a. 30*08daf0c2Sdrahn 31*08daf0c2Sdrahn Written by John R. Hauser. This work was made possible in part by the 32*08daf0c2Sdrahn International Computer Science Institute, located at Suite 600, 1947 Center 33*08daf0c2Sdrahn Street, Berkeley, California 94704. Funding was partially provided by the 34*08daf0c2Sdrahn National Science Foundation under grant MIP-9311980. The original version 35*08daf0c2Sdrahn of this code was written as part of a project to build a fixed-point vector 36*08daf0c2Sdrahn processor in collaboration with the University of California at Berkeley, 37*08daf0c2Sdrahn overseen by Profs. Nelson Morgan and John Wawrzynek. More information 38*08daf0c2Sdrahn is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 39*08daf0c2Sdrahn arithmetic/SoftFloat.html'. 40*08daf0c2Sdrahn 41*08daf0c2Sdrahn THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 42*08daf0c2Sdrahn has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 43*08daf0c2Sdrahn TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 44*08daf0c2Sdrahn PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 45*08daf0c2Sdrahn AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 46*08daf0c2Sdrahn 47*08daf0c2Sdrahn Derivative works are acceptable, even for commercial purposes, so long as 48*08daf0c2Sdrahn (1) they include prominent notice that the work is derivative, and (2) they 49*08daf0c2Sdrahn include prominent notice akin to these four paragraphs for those parts of 50*08daf0c2Sdrahn this code that are retained. 51*08daf0c2Sdrahn 52*08daf0c2Sdrahn =============================================================================== 53*08daf0c2Sdrahn */ 54*08daf0c2Sdrahn 55*08daf0c2Sdrahn #include <sys/cdefs.h> 56*08daf0c2Sdrahn 57*08daf0c2Sdrahn #ifdef SOFTFLOAT_FOR_GCC 58*08daf0c2Sdrahn #include "softfloat-for-gcc.h" 59*08daf0c2Sdrahn #endif 60*08daf0c2Sdrahn 61*08daf0c2Sdrahn #include "milieu.h" 62*08daf0c2Sdrahn #include "softfloat.h" 63*08daf0c2Sdrahn 64*08daf0c2Sdrahn /* 65*08daf0c2Sdrahn * Conversions between floats as stored in memory and floats as 66*08daf0c2Sdrahn * SoftFloat uses them 67*08daf0c2Sdrahn */ 68*08daf0c2Sdrahn #ifndef FLOAT64_DEMANGLE 69*08daf0c2Sdrahn #define FLOAT64_DEMANGLE(a) (a) 70*08daf0c2Sdrahn #endif 71*08daf0c2Sdrahn #ifndef FLOAT64_MANGLE 72*08daf0c2Sdrahn #define FLOAT64_MANGLE(a) (a) 73*08daf0c2Sdrahn #endif 74*08daf0c2Sdrahn 75*08daf0c2Sdrahn /* 76*08daf0c2Sdrahn ------------------------------------------------------------------------------- 77*08daf0c2Sdrahn Primitive arithmetic functions, including multi-word arithmetic, and 78*08daf0c2Sdrahn division and square root approximations. (Can be specialized to target if 79*08daf0c2Sdrahn desired.) 80*08daf0c2Sdrahn ------------------------------------------------------------------------------- 81*08daf0c2Sdrahn */ 82*08daf0c2Sdrahn #include "softfloat-macros.h" 83*08daf0c2Sdrahn 84*08daf0c2Sdrahn /* 85*08daf0c2Sdrahn ------------------------------------------------------------------------------- 86*08daf0c2Sdrahn Functions and definitions to determine: (1) whether tininess for underflow 87*08daf0c2Sdrahn is detected before or after rounding by default, (2) what (if anything) 88*08daf0c2Sdrahn happens when exceptions are raised, (3) how signaling NaNs are distinguished 89*08daf0c2Sdrahn from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs 90*08daf0c2Sdrahn are propagated from function inputs to output. These details are target- 91*08daf0c2Sdrahn specific. 92*08daf0c2Sdrahn ------------------------------------------------------------------------------- 93*08daf0c2Sdrahn */ 94*08daf0c2Sdrahn #include "softfloat-specialize.h" 95*08daf0c2Sdrahn 96*08daf0c2Sdrahn /* 97*08daf0c2Sdrahn ------------------------------------------------------------------------------- 98*08daf0c2Sdrahn Floating-point rounding mode and exception flags. 99*08daf0c2Sdrahn ------------------------------------------------------------------------------- 100*08daf0c2Sdrahn */ 101*08daf0c2Sdrahn fp_rnd float_rounding_mode = float_round_nearest_even; 102*08daf0c2Sdrahn fp_except float_exception_flags = 0; 103*08daf0c2Sdrahn 104*08daf0c2Sdrahn /* 105*08daf0c2Sdrahn ------------------------------------------------------------------------------- 106*08daf0c2Sdrahn Returns the fraction bits of the single-precision floating-point value `a'. 107*08daf0c2Sdrahn ------------------------------------------------------------------------------- 108*08daf0c2Sdrahn */ 109*08daf0c2Sdrahn INLINE bits32 extractFloat32Frac( float32 a ) 110*08daf0c2Sdrahn { 111*08daf0c2Sdrahn 112*08daf0c2Sdrahn return a & 0x007FFFFF; 113*08daf0c2Sdrahn 114*08daf0c2Sdrahn } 115*08daf0c2Sdrahn 116*08daf0c2Sdrahn /* 117*08daf0c2Sdrahn ------------------------------------------------------------------------------- 118*08daf0c2Sdrahn Returns the exponent bits of the single-precision floating-point value `a'. 119*08daf0c2Sdrahn ------------------------------------------------------------------------------- 120*08daf0c2Sdrahn */ 121*08daf0c2Sdrahn INLINE int16 extractFloat32Exp( float32 a ) 122*08daf0c2Sdrahn { 123*08daf0c2Sdrahn 124*08daf0c2Sdrahn return ( a>>23 ) & 0xFF; 125*08daf0c2Sdrahn 126*08daf0c2Sdrahn } 127*08daf0c2Sdrahn 128*08daf0c2Sdrahn /* 129*08daf0c2Sdrahn ------------------------------------------------------------------------------- 130*08daf0c2Sdrahn Returns the sign bit of the single-precision floating-point value `a'. 131*08daf0c2Sdrahn ------------------------------------------------------------------------------- 132*08daf0c2Sdrahn */ 133*08daf0c2Sdrahn INLINE flag extractFloat32Sign( float32 a ) 134*08daf0c2Sdrahn { 135*08daf0c2Sdrahn 136*08daf0c2Sdrahn return a>>31; 137*08daf0c2Sdrahn 138*08daf0c2Sdrahn } 139*08daf0c2Sdrahn 140*08daf0c2Sdrahn /* 141*08daf0c2Sdrahn ------------------------------------------------------------------------------- 142*08daf0c2Sdrahn Normalizes the subnormal single-precision floating-point value represented 143*08daf0c2Sdrahn by the denormalized significand `aSig'. The normalized exponent and 144*08daf0c2Sdrahn significand are stored at the locations pointed to by `zExpPtr' and 145*08daf0c2Sdrahn `zSigPtr', respectively. 146*08daf0c2Sdrahn ------------------------------------------------------------------------------- 147*08daf0c2Sdrahn */ 148*08daf0c2Sdrahn static void 149*08daf0c2Sdrahn normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) 150*08daf0c2Sdrahn { 151*08daf0c2Sdrahn int8 shiftCount; 152*08daf0c2Sdrahn 153*08daf0c2Sdrahn shiftCount = countLeadingZeros32( aSig ) - 8; 154*08daf0c2Sdrahn *zSigPtr = aSig<<shiftCount; 155*08daf0c2Sdrahn *zExpPtr = 1 - shiftCount; 156*08daf0c2Sdrahn 157*08daf0c2Sdrahn } 158*08daf0c2Sdrahn 159*08daf0c2Sdrahn /* 160*08daf0c2Sdrahn ------------------------------------------------------------------------------- 161*08daf0c2Sdrahn Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 162*08daf0c2Sdrahn single-precision floating-point value, returning the result. After being 163*08daf0c2Sdrahn shifted into the proper positions, the three fields are simply added 164*08daf0c2Sdrahn together to form the result. This means that any integer portion of `zSig' 165*08daf0c2Sdrahn will be added into the exponent. Since a properly normalized significand 166*08daf0c2Sdrahn will have an integer portion equal to 1, the `zExp' input should be 1 less 167*08daf0c2Sdrahn than the desired result exponent whenever `zSig' is a complete, normalized 168*08daf0c2Sdrahn significand. 169*08daf0c2Sdrahn ------------------------------------------------------------------------------- 170*08daf0c2Sdrahn */ 171*08daf0c2Sdrahn INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) 172*08daf0c2Sdrahn { 173*08daf0c2Sdrahn 174*08daf0c2Sdrahn return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; 175*08daf0c2Sdrahn 176*08daf0c2Sdrahn } 177*08daf0c2Sdrahn 178*08daf0c2Sdrahn /* 179*08daf0c2Sdrahn ------------------------------------------------------------------------------- 180*08daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp', 181*08daf0c2Sdrahn and significand `zSig', and returns the proper single-precision floating- 182*08daf0c2Sdrahn point value corresponding to the abstract input. Ordinarily, the abstract 183*08daf0c2Sdrahn value is simply rounded and packed into the single-precision format, with 184*08daf0c2Sdrahn the inexact exception raised if the abstract input cannot be represented 185*08daf0c2Sdrahn exactly. However, if the abstract value is too large, the overflow and 186*08daf0c2Sdrahn inexact exceptions are raised and an infinity or maximal finite value is 187*08daf0c2Sdrahn returned. If the abstract value is too small, the input value is rounded to 188*08daf0c2Sdrahn a subnormal number, and the underflow and inexact exceptions are raised if 189*08daf0c2Sdrahn the abstract input cannot be represented exactly as a subnormal single- 190*08daf0c2Sdrahn precision floating-point number. 191*08daf0c2Sdrahn The input significand `zSig' has its binary point between bits 30 192*08daf0c2Sdrahn and 29, which is 7 bits to the left of the usual location. This shifted 193*08daf0c2Sdrahn significand must be normalized or smaller. If `zSig' is not normalized, 194*08daf0c2Sdrahn `zExp' must be 0; in that case, the result returned is a subnormal number, 195*08daf0c2Sdrahn and it must not require rounding. In the usual case that `zSig' is 196*08daf0c2Sdrahn normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 197*08daf0c2Sdrahn The handling of underflow and overflow follows the IEC/IEEE Standard for 198*08daf0c2Sdrahn Binary Floating-Point Arithmetic. 199*08daf0c2Sdrahn ------------------------------------------------------------------------------- 200*08daf0c2Sdrahn */ 201*08daf0c2Sdrahn static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 202*08daf0c2Sdrahn { 203*08daf0c2Sdrahn int8 roundingMode; 204*08daf0c2Sdrahn flag roundNearestEven; 205*08daf0c2Sdrahn int8 roundIncrement, roundBits; 206*08daf0c2Sdrahn flag isTiny; 207*08daf0c2Sdrahn 208*08daf0c2Sdrahn roundingMode = float_rounding_mode; 209*08daf0c2Sdrahn roundNearestEven = roundingMode == float_round_nearest_even; 210*08daf0c2Sdrahn roundIncrement = 0x40; 211*08daf0c2Sdrahn if ( ! roundNearestEven ) { 212*08daf0c2Sdrahn if ( roundingMode == float_round_to_zero ) { 213*08daf0c2Sdrahn roundIncrement = 0; 214*08daf0c2Sdrahn } 215*08daf0c2Sdrahn else { 216*08daf0c2Sdrahn roundIncrement = 0x7F; 217*08daf0c2Sdrahn if ( zSign ) { 218*08daf0c2Sdrahn if ( roundingMode == float_round_up ) roundIncrement = 0; 219*08daf0c2Sdrahn } 220*08daf0c2Sdrahn else { 221*08daf0c2Sdrahn if ( roundingMode == float_round_down ) roundIncrement = 0; 222*08daf0c2Sdrahn } 223*08daf0c2Sdrahn } 224*08daf0c2Sdrahn } 225*08daf0c2Sdrahn roundBits = zSig & 0x7F; 226*08daf0c2Sdrahn if ( 0xFD <= (bits16) zExp ) { 227*08daf0c2Sdrahn if ( ( 0xFD < zExp ) 228*08daf0c2Sdrahn || ( ( zExp == 0xFD ) 229*08daf0c2Sdrahn && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) 230*08daf0c2Sdrahn ) { 231*08daf0c2Sdrahn float_raise( float_flag_overflow | float_flag_inexact ); 232*08daf0c2Sdrahn return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); 233*08daf0c2Sdrahn } 234*08daf0c2Sdrahn if ( zExp < 0 ) { 235*08daf0c2Sdrahn isTiny = 236*08daf0c2Sdrahn ( float_detect_tininess == float_tininess_before_rounding ) 237*08daf0c2Sdrahn || ( zExp < -1 ) 238*08daf0c2Sdrahn || ( zSig + roundIncrement < 0x80000000 ); 239*08daf0c2Sdrahn shift32RightJamming( zSig, - zExp, &zSig ); 240*08daf0c2Sdrahn zExp = 0; 241*08daf0c2Sdrahn roundBits = zSig & 0x7F; 242*08daf0c2Sdrahn if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 243*08daf0c2Sdrahn } 244*08daf0c2Sdrahn } 245*08daf0c2Sdrahn if ( roundBits ) float_exception_flags |= float_flag_inexact; 246*08daf0c2Sdrahn zSig = ( zSig + roundIncrement )>>7; 247*08daf0c2Sdrahn zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 248*08daf0c2Sdrahn if ( zSig == 0 ) zExp = 0; 249*08daf0c2Sdrahn return packFloat32( zSign, zExp, zSig ); 250*08daf0c2Sdrahn 251*08daf0c2Sdrahn } 252*08daf0c2Sdrahn 253*08daf0c2Sdrahn /* 254*08daf0c2Sdrahn ------------------------------------------------------------------------------- 255*08daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp', 256*08daf0c2Sdrahn and significand `zSig', and returns the proper single-precision floating- 257*08daf0c2Sdrahn point value corresponding to the abstract input. This routine is just like 258*08daf0c2Sdrahn `roundAndPackFloat32' except that `zSig' does not have to be normalized. 259*08daf0c2Sdrahn Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 260*08daf0c2Sdrahn floating-point exponent. 261*08daf0c2Sdrahn ------------------------------------------------------------------------------- 262*08daf0c2Sdrahn */ 263*08daf0c2Sdrahn static float32 264*08daf0c2Sdrahn normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 265*08daf0c2Sdrahn { 266*08daf0c2Sdrahn int8 shiftCount; 267*08daf0c2Sdrahn 268*08daf0c2Sdrahn shiftCount = countLeadingZeros32( zSig ) - 1; 269*08daf0c2Sdrahn return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); 270*08daf0c2Sdrahn 271*08daf0c2Sdrahn } 272*08daf0c2Sdrahn 273*08daf0c2Sdrahn /* 274*08daf0c2Sdrahn ------------------------------------------------------------------------------- 275*08daf0c2Sdrahn Returns the least-significant 32 fraction bits of the double-precision 276*08daf0c2Sdrahn floating-point value `a'. 277*08daf0c2Sdrahn ------------------------------------------------------------------------------- 278*08daf0c2Sdrahn */ 279*08daf0c2Sdrahn INLINE bits32 extractFloat64Frac1( float64 a ) 280*08daf0c2Sdrahn { 281*08daf0c2Sdrahn 282*08daf0c2Sdrahn return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF ); 283*08daf0c2Sdrahn 284*08daf0c2Sdrahn } 285*08daf0c2Sdrahn 286*08daf0c2Sdrahn /* 287*08daf0c2Sdrahn ------------------------------------------------------------------------------- 288*08daf0c2Sdrahn Returns the most-significant 20 fraction bits of the double-precision 289*08daf0c2Sdrahn floating-point value `a'. 290*08daf0c2Sdrahn ------------------------------------------------------------------------------- 291*08daf0c2Sdrahn */ 292*08daf0c2Sdrahn INLINE bits32 extractFloat64Frac0( float64 a ) 293*08daf0c2Sdrahn { 294*08daf0c2Sdrahn 295*08daf0c2Sdrahn return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF; 296*08daf0c2Sdrahn 297*08daf0c2Sdrahn } 298*08daf0c2Sdrahn 299*08daf0c2Sdrahn /* 300*08daf0c2Sdrahn ------------------------------------------------------------------------------- 301*08daf0c2Sdrahn Returns the exponent bits of the double-precision floating-point value `a'. 302*08daf0c2Sdrahn ------------------------------------------------------------------------------- 303*08daf0c2Sdrahn */ 304*08daf0c2Sdrahn INLINE int16 extractFloat64Exp( float64 a ) 305*08daf0c2Sdrahn { 306*08daf0c2Sdrahn 307*08daf0c2Sdrahn return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF; 308*08daf0c2Sdrahn 309*08daf0c2Sdrahn } 310*08daf0c2Sdrahn 311*08daf0c2Sdrahn /* 312*08daf0c2Sdrahn ------------------------------------------------------------------------------- 313*08daf0c2Sdrahn Returns the sign bit of the double-precision floating-point value `a'. 314*08daf0c2Sdrahn ------------------------------------------------------------------------------- 315*08daf0c2Sdrahn */ 316*08daf0c2Sdrahn INLINE flag extractFloat64Sign( float64 a ) 317*08daf0c2Sdrahn { 318*08daf0c2Sdrahn 319*08daf0c2Sdrahn return FLOAT64_DEMANGLE(a)>>63; 320*08daf0c2Sdrahn 321*08daf0c2Sdrahn } 322*08daf0c2Sdrahn 323*08daf0c2Sdrahn /* 324*08daf0c2Sdrahn ------------------------------------------------------------------------------- 325*08daf0c2Sdrahn Normalizes the subnormal double-precision floating-point value represented 326*08daf0c2Sdrahn by the denormalized significand formed by the concatenation of `aSig0' and 327*08daf0c2Sdrahn `aSig1'. The normalized exponent is stored at the location pointed to by 328*08daf0c2Sdrahn `zExpPtr'. The most significant 21 bits of the normalized significand are 329*08daf0c2Sdrahn stored at the location pointed to by `zSig0Ptr', and the least significant 330*08daf0c2Sdrahn 32 bits of the normalized significand are stored at the location pointed to 331*08daf0c2Sdrahn by `zSig1Ptr'. 332*08daf0c2Sdrahn ------------------------------------------------------------------------------- 333*08daf0c2Sdrahn */ 334*08daf0c2Sdrahn static void 335*08daf0c2Sdrahn normalizeFloat64Subnormal( 336*08daf0c2Sdrahn bits32 aSig0, 337*08daf0c2Sdrahn bits32 aSig1, 338*08daf0c2Sdrahn int16 *zExpPtr, 339*08daf0c2Sdrahn bits32 *zSig0Ptr, 340*08daf0c2Sdrahn bits32 *zSig1Ptr 341*08daf0c2Sdrahn ) 342*08daf0c2Sdrahn { 343*08daf0c2Sdrahn int8 shiftCount; 344*08daf0c2Sdrahn 345*08daf0c2Sdrahn if ( aSig0 == 0 ) { 346*08daf0c2Sdrahn shiftCount = countLeadingZeros32( aSig1 ) - 11; 347*08daf0c2Sdrahn if ( shiftCount < 0 ) { 348*08daf0c2Sdrahn *zSig0Ptr = aSig1>>( - shiftCount ); 349*08daf0c2Sdrahn *zSig1Ptr = aSig1<<( shiftCount & 31 ); 350*08daf0c2Sdrahn } 351*08daf0c2Sdrahn else { 352*08daf0c2Sdrahn *zSig0Ptr = aSig1<<shiftCount; 353*08daf0c2Sdrahn *zSig1Ptr = 0; 354*08daf0c2Sdrahn } 355*08daf0c2Sdrahn *zExpPtr = - shiftCount - 31; 356*08daf0c2Sdrahn } 357*08daf0c2Sdrahn else { 358*08daf0c2Sdrahn shiftCount = countLeadingZeros32( aSig0 ) - 11; 359*08daf0c2Sdrahn shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); 360*08daf0c2Sdrahn *zExpPtr = 1 - shiftCount; 361*08daf0c2Sdrahn } 362*08daf0c2Sdrahn 363*08daf0c2Sdrahn } 364*08daf0c2Sdrahn 365*08daf0c2Sdrahn /* 366*08daf0c2Sdrahn ------------------------------------------------------------------------------- 367*08daf0c2Sdrahn Packs the sign `zSign', the exponent `zExp', and the significand formed by 368*08daf0c2Sdrahn the concatenation of `zSig0' and `zSig1' into a double-precision floating- 369*08daf0c2Sdrahn point value, returning the result. After being shifted into the proper 370*08daf0c2Sdrahn positions, the three fields `zSign', `zExp', and `zSig0' are simply added 371*08daf0c2Sdrahn together to form the most significant 32 bits of the result. This means 372*08daf0c2Sdrahn that any integer portion of `zSig0' will be added into the exponent. Since 373*08daf0c2Sdrahn a properly normalized significand will have an integer portion equal to 1, 374*08daf0c2Sdrahn the `zExp' input should be 1 less than the desired result exponent whenever 375*08daf0c2Sdrahn `zSig0' and `zSig1' concatenated form a complete, normalized significand. 376*08daf0c2Sdrahn ------------------------------------------------------------------------------- 377*08daf0c2Sdrahn */ 378*08daf0c2Sdrahn INLINE float64 379*08daf0c2Sdrahn packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) 380*08daf0c2Sdrahn { 381*08daf0c2Sdrahn 382*08daf0c2Sdrahn return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) + 383*08daf0c2Sdrahn ( ( (bits64) zExp )<<52 ) + 384*08daf0c2Sdrahn ( ( (bits64) zSig0 )<<32 ) + zSig1 ); 385*08daf0c2Sdrahn 386*08daf0c2Sdrahn 387*08daf0c2Sdrahn } 388*08daf0c2Sdrahn 389*08daf0c2Sdrahn /* 390*08daf0c2Sdrahn ------------------------------------------------------------------------------- 391*08daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp', 392*08daf0c2Sdrahn and extended significand formed by the concatenation of `zSig0', `zSig1', 393*08daf0c2Sdrahn and `zSig2', and returns the proper double-precision floating-point value 394*08daf0c2Sdrahn corresponding to the abstract input. Ordinarily, the abstract value is 395*08daf0c2Sdrahn simply rounded and packed into the double-precision format, with the inexact 396*08daf0c2Sdrahn exception raised if the abstract input cannot be represented exactly. 397*08daf0c2Sdrahn However, if the abstract value is too large, the overflow and inexact 398*08daf0c2Sdrahn exceptions are raised and an infinity or maximal finite value is returned. 399*08daf0c2Sdrahn If the abstract value is too small, the input value is rounded to a 400*08daf0c2Sdrahn subnormal number, and the underflow and inexact exceptions are raised if the 401*08daf0c2Sdrahn abstract input cannot be represented exactly as a subnormal double-precision 402*08daf0c2Sdrahn floating-point number. 403*08daf0c2Sdrahn The input significand must be normalized or smaller. If the input 404*08daf0c2Sdrahn significand is not normalized, `zExp' must be 0; in that case, the result 405*08daf0c2Sdrahn returned is a subnormal number, and it must not require rounding. In the 406*08daf0c2Sdrahn usual case that the input significand is normalized, `zExp' must be 1 less 407*08daf0c2Sdrahn than the ``true'' floating-point exponent. The handling of underflow and 408*08daf0c2Sdrahn overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 409*08daf0c2Sdrahn ------------------------------------------------------------------------------- 410*08daf0c2Sdrahn */ 411*08daf0c2Sdrahn static float64 412*08daf0c2Sdrahn roundAndPackFloat64( 413*08daf0c2Sdrahn flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 ) 414*08daf0c2Sdrahn { 415*08daf0c2Sdrahn int8 roundingMode; 416*08daf0c2Sdrahn flag roundNearestEven, increment, isTiny; 417*08daf0c2Sdrahn 418*08daf0c2Sdrahn roundingMode = float_rounding_mode; 419*08daf0c2Sdrahn roundNearestEven = ( roundingMode == float_round_nearest_even ); 420*08daf0c2Sdrahn increment = ( (sbits32) zSig2 < 0 ); 421*08daf0c2Sdrahn if ( ! roundNearestEven ) { 422*08daf0c2Sdrahn if ( roundingMode == float_round_to_zero ) { 423*08daf0c2Sdrahn increment = 0; 424*08daf0c2Sdrahn } 425*08daf0c2Sdrahn else { 426*08daf0c2Sdrahn if ( zSign ) { 427*08daf0c2Sdrahn increment = ( roundingMode == float_round_down ) && zSig2; 428*08daf0c2Sdrahn } 429*08daf0c2Sdrahn else { 430*08daf0c2Sdrahn increment = ( roundingMode == float_round_up ) && zSig2; 431*08daf0c2Sdrahn } 432*08daf0c2Sdrahn } 433*08daf0c2Sdrahn } 434*08daf0c2Sdrahn if ( 0x7FD <= (bits16) zExp ) { 435*08daf0c2Sdrahn if ( ( 0x7FD < zExp ) 436*08daf0c2Sdrahn || ( ( zExp == 0x7FD ) 437*08daf0c2Sdrahn && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 ) 438*08daf0c2Sdrahn && increment 439*08daf0c2Sdrahn ) 440*08daf0c2Sdrahn ) { 441*08daf0c2Sdrahn float_raise( float_flag_overflow | float_flag_inexact ); 442*08daf0c2Sdrahn if ( ( roundingMode == float_round_to_zero ) 443*08daf0c2Sdrahn || ( zSign && ( roundingMode == float_round_up ) ) 444*08daf0c2Sdrahn || ( ! zSign && ( roundingMode == float_round_down ) ) 445*08daf0c2Sdrahn ) { 446*08daf0c2Sdrahn return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF ); 447*08daf0c2Sdrahn } 448*08daf0c2Sdrahn return packFloat64( zSign, 0x7FF, 0, 0 ); 449*08daf0c2Sdrahn } 450*08daf0c2Sdrahn if ( zExp < 0 ) { 451*08daf0c2Sdrahn isTiny = 452*08daf0c2Sdrahn ( float_detect_tininess == float_tininess_before_rounding ) 453*08daf0c2Sdrahn || ( zExp < -1 ) 454*08daf0c2Sdrahn || ! increment 455*08daf0c2Sdrahn || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF ); 456*08daf0c2Sdrahn shift64ExtraRightJamming( 457*08daf0c2Sdrahn zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); 458*08daf0c2Sdrahn zExp = 0; 459*08daf0c2Sdrahn if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); 460*08daf0c2Sdrahn if ( roundNearestEven ) { 461*08daf0c2Sdrahn increment = ( (sbits32) zSig2 < 0 ); 462*08daf0c2Sdrahn } 463*08daf0c2Sdrahn else { 464*08daf0c2Sdrahn if ( zSign ) { 465*08daf0c2Sdrahn increment = ( roundingMode == float_round_down ) && zSig2; 466*08daf0c2Sdrahn } 467*08daf0c2Sdrahn else { 468*08daf0c2Sdrahn increment = ( roundingMode == float_round_up ) && zSig2; 469*08daf0c2Sdrahn } 470*08daf0c2Sdrahn } 471*08daf0c2Sdrahn } 472*08daf0c2Sdrahn } 473*08daf0c2Sdrahn if ( zSig2 ) float_exception_flags |= float_flag_inexact; 474*08daf0c2Sdrahn if ( increment ) { 475*08daf0c2Sdrahn add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); 476*08daf0c2Sdrahn zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); 477*08daf0c2Sdrahn } 478*08daf0c2Sdrahn else { 479*08daf0c2Sdrahn if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; 480*08daf0c2Sdrahn } 481*08daf0c2Sdrahn return packFloat64( zSign, zExp, zSig0, zSig1 ); 482*08daf0c2Sdrahn 483*08daf0c2Sdrahn } 484*08daf0c2Sdrahn 485*08daf0c2Sdrahn /* 486*08daf0c2Sdrahn ------------------------------------------------------------------------------- 487*08daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp', 488*08daf0c2Sdrahn and significand formed by the concatenation of `zSig0' and `zSig1', and 489*08daf0c2Sdrahn returns the proper double-precision floating-point value corresponding 490*08daf0c2Sdrahn to the abstract input. This routine is just like `roundAndPackFloat64' 491*08daf0c2Sdrahn except that the input significand has fewer bits and does not have to be 492*08daf0c2Sdrahn normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- 493*08daf0c2Sdrahn point exponent. 494*08daf0c2Sdrahn ------------------------------------------------------------------------------- 495*08daf0c2Sdrahn */ 496*08daf0c2Sdrahn static float64 497*08daf0c2Sdrahn normalizeRoundAndPackFloat64( 498*08daf0c2Sdrahn flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) 499*08daf0c2Sdrahn { 500*08daf0c2Sdrahn int8 shiftCount; 501*08daf0c2Sdrahn bits32 zSig2; 502*08daf0c2Sdrahn 503*08daf0c2Sdrahn if ( zSig0 == 0 ) { 504*08daf0c2Sdrahn zSig0 = zSig1; 505*08daf0c2Sdrahn zSig1 = 0; 506*08daf0c2Sdrahn zExp -= 32; 507*08daf0c2Sdrahn } 508*08daf0c2Sdrahn shiftCount = countLeadingZeros32( zSig0 ) - 11; 509*08daf0c2Sdrahn if ( 0 <= shiftCount ) { 510*08daf0c2Sdrahn zSig2 = 0; 511*08daf0c2Sdrahn shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 512*08daf0c2Sdrahn } 513*08daf0c2Sdrahn else { 514*08daf0c2Sdrahn shift64ExtraRightJamming( 515*08daf0c2Sdrahn zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); 516*08daf0c2Sdrahn } 517*08daf0c2Sdrahn zExp -= shiftCount; 518*08daf0c2Sdrahn return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 519*08daf0c2Sdrahn 520*08daf0c2Sdrahn } 521*08daf0c2Sdrahn 522*08daf0c2Sdrahn /* 523*08daf0c2Sdrahn ------------------------------------------------------------------------------- 524*08daf0c2Sdrahn Returns the result of converting the 32-bit two's complement integer `a' to 525*08daf0c2Sdrahn the single-precision floating-point format. The conversion is performed 526*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 527*08daf0c2Sdrahn ------------------------------------------------------------------------------- 528*08daf0c2Sdrahn */ 529*08daf0c2Sdrahn float32 int32_to_float32( int32 a ) 530*08daf0c2Sdrahn { 531*08daf0c2Sdrahn flag zSign; 532*08daf0c2Sdrahn 533*08daf0c2Sdrahn if ( a == 0 ) return 0; 534*08daf0c2Sdrahn if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); 535*08daf0c2Sdrahn zSign = ( a < 0 ); 536*08daf0c2Sdrahn return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); 537*08daf0c2Sdrahn 538*08daf0c2Sdrahn } 539*08daf0c2Sdrahn 540*08daf0c2Sdrahn /* 541*08daf0c2Sdrahn ------------------------------------------------------------------------------- 542*08daf0c2Sdrahn Returns the result of converting the 32-bit two's complement integer `a' to 543*08daf0c2Sdrahn the double-precision floating-point format. The conversion is performed 544*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 545*08daf0c2Sdrahn ------------------------------------------------------------------------------- 546*08daf0c2Sdrahn */ 547*08daf0c2Sdrahn float64 int32_to_float64( int32 a ) 548*08daf0c2Sdrahn { 549*08daf0c2Sdrahn flag zSign; 550*08daf0c2Sdrahn bits32 absA; 551*08daf0c2Sdrahn int8 shiftCount; 552*08daf0c2Sdrahn bits32 zSig0, zSig1; 553*08daf0c2Sdrahn 554*08daf0c2Sdrahn if ( a == 0 ) return packFloat64( 0, 0, 0, 0 ); 555*08daf0c2Sdrahn zSign = ( a < 0 ); 556*08daf0c2Sdrahn absA = zSign ? - a : a; 557*08daf0c2Sdrahn shiftCount = countLeadingZeros32( absA ) - 11; 558*08daf0c2Sdrahn if ( 0 <= shiftCount ) { 559*08daf0c2Sdrahn zSig0 = absA<<shiftCount; 560*08daf0c2Sdrahn zSig1 = 0; 561*08daf0c2Sdrahn } 562*08daf0c2Sdrahn else { 563*08daf0c2Sdrahn shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 ); 564*08daf0c2Sdrahn } 565*08daf0c2Sdrahn return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 ); 566*08daf0c2Sdrahn 567*08daf0c2Sdrahn } 568*08daf0c2Sdrahn 569*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC 570*08daf0c2Sdrahn /* 571*08daf0c2Sdrahn ------------------------------------------------------------------------------- 572*08daf0c2Sdrahn Returns the result of converting the single-precision floating-point value 573*08daf0c2Sdrahn `a' to the 32-bit two's complement integer format. The conversion is 574*08daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point 575*08daf0c2Sdrahn Arithmetic---which means in particular that the conversion is rounded 576*08daf0c2Sdrahn according to the current rounding mode. If `a' is a NaN, the largest 577*08daf0c2Sdrahn positive integer is returned. Otherwise, if the conversion overflows, the 578*08daf0c2Sdrahn largest integer with the same sign as `a' is returned. 579*08daf0c2Sdrahn ------------------------------------------------------------------------------- 580*08daf0c2Sdrahn */ 581*08daf0c2Sdrahn int32 float32_to_int32( float32 a ) 582*08daf0c2Sdrahn { 583*08daf0c2Sdrahn flag aSign; 584*08daf0c2Sdrahn int16 aExp, shiftCount; 585*08daf0c2Sdrahn bits32 aSig, aSigExtra; 586*08daf0c2Sdrahn int32 z; 587*08daf0c2Sdrahn int8 roundingMode; 588*08daf0c2Sdrahn 589*08daf0c2Sdrahn aSig = extractFloat32Frac( a ); 590*08daf0c2Sdrahn aExp = extractFloat32Exp( a ); 591*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 592*08daf0c2Sdrahn shiftCount = aExp - 0x96; 593*08daf0c2Sdrahn if ( 0 <= shiftCount ) { 594*08daf0c2Sdrahn if ( 0x9E <= aExp ) { 595*08daf0c2Sdrahn if ( a != 0xCF000000 ) { 596*08daf0c2Sdrahn float_raise( float_flag_invalid ); 597*08daf0c2Sdrahn if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 598*08daf0c2Sdrahn return 0x7FFFFFFF; 599*08daf0c2Sdrahn } 600*08daf0c2Sdrahn } 601*08daf0c2Sdrahn return (sbits32) 0x80000000; 602*08daf0c2Sdrahn } 603*08daf0c2Sdrahn z = ( aSig | 0x00800000 )<<shiftCount; 604*08daf0c2Sdrahn if ( aSign ) z = - z; 605*08daf0c2Sdrahn } 606*08daf0c2Sdrahn else { 607*08daf0c2Sdrahn if ( aExp < 0x7E ) { 608*08daf0c2Sdrahn aSigExtra = aExp | aSig; 609*08daf0c2Sdrahn z = 0; 610*08daf0c2Sdrahn } 611*08daf0c2Sdrahn else { 612*08daf0c2Sdrahn aSig |= 0x00800000; 613*08daf0c2Sdrahn aSigExtra = aSig<<( shiftCount & 31 ); 614*08daf0c2Sdrahn z = aSig>>( - shiftCount ); 615*08daf0c2Sdrahn } 616*08daf0c2Sdrahn if ( aSigExtra ) float_exception_flags |= float_flag_inexact; 617*08daf0c2Sdrahn roundingMode = float_rounding_mode; 618*08daf0c2Sdrahn if ( roundingMode == float_round_nearest_even ) { 619*08daf0c2Sdrahn if ( (sbits32) aSigExtra < 0 ) { 620*08daf0c2Sdrahn ++z; 621*08daf0c2Sdrahn if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1; 622*08daf0c2Sdrahn } 623*08daf0c2Sdrahn if ( aSign ) z = - z; 624*08daf0c2Sdrahn } 625*08daf0c2Sdrahn else { 626*08daf0c2Sdrahn aSigExtra = ( aSigExtra != 0 ); 627*08daf0c2Sdrahn if ( aSign ) { 628*08daf0c2Sdrahn z += ( roundingMode == float_round_down ) & aSigExtra; 629*08daf0c2Sdrahn z = - z; 630*08daf0c2Sdrahn } 631*08daf0c2Sdrahn else { 632*08daf0c2Sdrahn z += ( roundingMode == float_round_up ) & aSigExtra; 633*08daf0c2Sdrahn } 634*08daf0c2Sdrahn } 635*08daf0c2Sdrahn } 636*08daf0c2Sdrahn return z; 637*08daf0c2Sdrahn 638*08daf0c2Sdrahn } 639*08daf0c2Sdrahn #endif 640*08daf0c2Sdrahn 641*08daf0c2Sdrahn /* 642*08daf0c2Sdrahn ------------------------------------------------------------------------------- 643*08daf0c2Sdrahn Returns the result of converting the single-precision floating-point value 644*08daf0c2Sdrahn `a' to the 32-bit two's complement integer format. The conversion is 645*08daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point 646*08daf0c2Sdrahn Arithmetic, except that the conversion is always rounded toward zero. 647*08daf0c2Sdrahn If `a' is a NaN, the largest positive integer is returned. Otherwise, if 648*08daf0c2Sdrahn the conversion overflows, the largest integer with the same sign as `a' is 649*08daf0c2Sdrahn returned. 650*08daf0c2Sdrahn ------------------------------------------------------------------------------- 651*08daf0c2Sdrahn */ 652*08daf0c2Sdrahn int32 float32_to_int32_round_to_zero( float32 a ) 653*08daf0c2Sdrahn { 654*08daf0c2Sdrahn flag aSign; 655*08daf0c2Sdrahn int16 aExp, shiftCount; 656*08daf0c2Sdrahn bits32 aSig; 657*08daf0c2Sdrahn int32 z; 658*08daf0c2Sdrahn 659*08daf0c2Sdrahn aSig = extractFloat32Frac( a ); 660*08daf0c2Sdrahn aExp = extractFloat32Exp( a ); 661*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 662*08daf0c2Sdrahn shiftCount = aExp - 0x9E; 663*08daf0c2Sdrahn if ( 0 <= shiftCount ) { 664*08daf0c2Sdrahn if ( a != 0xCF000000 ) { 665*08daf0c2Sdrahn float_raise( float_flag_invalid ); 666*08daf0c2Sdrahn if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; 667*08daf0c2Sdrahn } 668*08daf0c2Sdrahn return (sbits32) 0x80000000; 669*08daf0c2Sdrahn } 670*08daf0c2Sdrahn else if ( aExp <= 0x7E ) { 671*08daf0c2Sdrahn if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 672*08daf0c2Sdrahn return 0; 673*08daf0c2Sdrahn } 674*08daf0c2Sdrahn aSig = ( aSig | 0x00800000 )<<8; 675*08daf0c2Sdrahn z = aSig>>( - shiftCount ); 676*08daf0c2Sdrahn if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { 677*08daf0c2Sdrahn float_exception_flags |= float_flag_inexact; 678*08daf0c2Sdrahn } 679*08daf0c2Sdrahn if ( aSign ) z = - z; 680*08daf0c2Sdrahn return z; 681*08daf0c2Sdrahn 682*08daf0c2Sdrahn } 683*08daf0c2Sdrahn 684*08daf0c2Sdrahn /* 685*08daf0c2Sdrahn ------------------------------------------------------------------------------- 686*08daf0c2Sdrahn Returns the result of converting the single-precision floating-point value 687*08daf0c2Sdrahn `a' to the double-precision floating-point format. The conversion is 688*08daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point 689*08daf0c2Sdrahn Arithmetic. 690*08daf0c2Sdrahn ------------------------------------------------------------------------------- 691*08daf0c2Sdrahn */ 692*08daf0c2Sdrahn float64 float32_to_float64( float32 a ) 693*08daf0c2Sdrahn { 694*08daf0c2Sdrahn flag aSign; 695*08daf0c2Sdrahn int16 aExp; 696*08daf0c2Sdrahn bits32 aSig, zSig0, zSig1; 697*08daf0c2Sdrahn 698*08daf0c2Sdrahn aSig = extractFloat32Frac( a ); 699*08daf0c2Sdrahn aExp = extractFloat32Exp( a ); 700*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 701*08daf0c2Sdrahn if ( aExp == 0xFF ) { 702*08daf0c2Sdrahn if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); 703*08daf0c2Sdrahn return packFloat64( aSign, 0x7FF, 0, 0 ); 704*08daf0c2Sdrahn } 705*08daf0c2Sdrahn if ( aExp == 0 ) { 706*08daf0c2Sdrahn if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 ); 707*08daf0c2Sdrahn normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 708*08daf0c2Sdrahn --aExp; 709*08daf0c2Sdrahn } 710*08daf0c2Sdrahn shift64Right( aSig, 0, 3, &zSig0, &zSig1 ); 711*08daf0c2Sdrahn return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 ); 712*08daf0c2Sdrahn 713*08daf0c2Sdrahn } 714*08daf0c2Sdrahn 715*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC 716*08daf0c2Sdrahn /* 717*08daf0c2Sdrahn ------------------------------------------------------------------------------- 718*08daf0c2Sdrahn Rounds the single-precision floating-point value `a' to an integer, 719*08daf0c2Sdrahn and returns the result as a single-precision floating-point value. The 720*08daf0c2Sdrahn operation is performed according to the IEC/IEEE Standard for Binary 721*08daf0c2Sdrahn Floating-Point Arithmetic. 722*08daf0c2Sdrahn ------------------------------------------------------------------------------- 723*08daf0c2Sdrahn */ 724*08daf0c2Sdrahn float32 float32_round_to_int( float32 a ) 725*08daf0c2Sdrahn { 726*08daf0c2Sdrahn flag aSign; 727*08daf0c2Sdrahn int16 aExp; 728*08daf0c2Sdrahn bits32 lastBitMask, roundBitsMask; 729*08daf0c2Sdrahn int8 roundingMode; 730*08daf0c2Sdrahn float32 z; 731*08daf0c2Sdrahn 732*08daf0c2Sdrahn aExp = extractFloat32Exp( a ); 733*08daf0c2Sdrahn if ( 0x96 <= aExp ) { 734*08daf0c2Sdrahn if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { 735*08daf0c2Sdrahn return propagateFloat32NaN( a, a ); 736*08daf0c2Sdrahn } 737*08daf0c2Sdrahn return a; 738*08daf0c2Sdrahn } 739*08daf0c2Sdrahn if ( aExp <= 0x7E ) { 740*08daf0c2Sdrahn if ( (bits32) ( a<<1 ) == 0 ) return a; 741*08daf0c2Sdrahn float_exception_flags |= float_flag_inexact; 742*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 743*08daf0c2Sdrahn switch ( float_rounding_mode ) { 744*08daf0c2Sdrahn case float_round_nearest_even: 745*08daf0c2Sdrahn if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { 746*08daf0c2Sdrahn return packFloat32( aSign, 0x7F, 0 ); 747*08daf0c2Sdrahn } 748*08daf0c2Sdrahn break; 749*08daf0c2Sdrahn case float_round_to_zero: 750*08daf0c2Sdrahn break; 751*08daf0c2Sdrahn case float_round_down: 752*08daf0c2Sdrahn return aSign ? 0xBF800000 : 0; 753*08daf0c2Sdrahn case float_round_up: 754*08daf0c2Sdrahn return aSign ? 0x80000000 : 0x3F800000; 755*08daf0c2Sdrahn } 756*08daf0c2Sdrahn return packFloat32( aSign, 0, 0 ); 757*08daf0c2Sdrahn } 758*08daf0c2Sdrahn lastBitMask = 1; 759*08daf0c2Sdrahn lastBitMask <<= 0x96 - aExp; 760*08daf0c2Sdrahn roundBitsMask = lastBitMask - 1; 761*08daf0c2Sdrahn z = a; 762*08daf0c2Sdrahn roundingMode = float_rounding_mode; 763*08daf0c2Sdrahn if ( roundingMode == float_round_nearest_even ) { 764*08daf0c2Sdrahn z += lastBitMask>>1; 765*08daf0c2Sdrahn if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 766*08daf0c2Sdrahn } 767*08daf0c2Sdrahn else if ( roundingMode != float_round_to_zero ) { 768*08daf0c2Sdrahn if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { 769*08daf0c2Sdrahn z += roundBitsMask; 770*08daf0c2Sdrahn } 771*08daf0c2Sdrahn } 772*08daf0c2Sdrahn z &= ~ roundBitsMask; 773*08daf0c2Sdrahn if ( z != a ) float_exception_flags |= float_flag_inexact; 774*08daf0c2Sdrahn return z; 775*08daf0c2Sdrahn 776*08daf0c2Sdrahn } 777*08daf0c2Sdrahn #endif 778*08daf0c2Sdrahn 779*08daf0c2Sdrahn /* 780*08daf0c2Sdrahn ------------------------------------------------------------------------------- 781*08daf0c2Sdrahn Returns the result of adding the absolute values of the single-precision 782*08daf0c2Sdrahn floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 783*08daf0c2Sdrahn before being returned. `zSign' is ignored if the result is a NaN. 784*08daf0c2Sdrahn The addition is performed according to the IEC/IEEE Standard for Binary 785*08daf0c2Sdrahn Floating-Point Arithmetic. 786*08daf0c2Sdrahn ------------------------------------------------------------------------------- 787*08daf0c2Sdrahn */ 788*08daf0c2Sdrahn static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) 789*08daf0c2Sdrahn { 790*08daf0c2Sdrahn int16 aExp, bExp, zExp; 791*08daf0c2Sdrahn bits32 aSig, bSig, zSig; 792*08daf0c2Sdrahn int16 expDiff; 793*08daf0c2Sdrahn 794*08daf0c2Sdrahn aSig = extractFloat32Frac( a ); 795*08daf0c2Sdrahn aExp = extractFloat32Exp( a ); 796*08daf0c2Sdrahn bSig = extractFloat32Frac( b ); 797*08daf0c2Sdrahn bExp = extractFloat32Exp( b ); 798*08daf0c2Sdrahn expDiff = aExp - bExp; 799*08daf0c2Sdrahn aSig <<= 6; 800*08daf0c2Sdrahn bSig <<= 6; 801*08daf0c2Sdrahn if ( 0 < expDiff ) { 802*08daf0c2Sdrahn if ( aExp == 0xFF ) { 803*08daf0c2Sdrahn if ( aSig ) return propagateFloat32NaN( a, b ); 804*08daf0c2Sdrahn return a; 805*08daf0c2Sdrahn } 806*08daf0c2Sdrahn if ( bExp == 0 ) { 807*08daf0c2Sdrahn --expDiff; 808*08daf0c2Sdrahn } 809*08daf0c2Sdrahn else { 810*08daf0c2Sdrahn bSig |= 0x20000000; 811*08daf0c2Sdrahn } 812*08daf0c2Sdrahn shift32RightJamming( bSig, expDiff, &bSig ); 813*08daf0c2Sdrahn zExp = aExp; 814*08daf0c2Sdrahn } 815*08daf0c2Sdrahn else if ( expDiff < 0 ) { 816*08daf0c2Sdrahn if ( bExp == 0xFF ) { 817*08daf0c2Sdrahn if ( bSig ) return propagateFloat32NaN( a, b ); 818*08daf0c2Sdrahn return packFloat32( zSign, 0xFF, 0 ); 819*08daf0c2Sdrahn } 820*08daf0c2Sdrahn if ( aExp == 0 ) { 821*08daf0c2Sdrahn ++expDiff; 822*08daf0c2Sdrahn } 823*08daf0c2Sdrahn else { 824*08daf0c2Sdrahn aSig |= 0x20000000; 825*08daf0c2Sdrahn } 826*08daf0c2Sdrahn shift32RightJamming( aSig, - expDiff, &aSig ); 827*08daf0c2Sdrahn zExp = bExp; 828*08daf0c2Sdrahn } 829*08daf0c2Sdrahn else { 830*08daf0c2Sdrahn if ( aExp == 0xFF ) { 831*08daf0c2Sdrahn if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 832*08daf0c2Sdrahn return a; 833*08daf0c2Sdrahn } 834*08daf0c2Sdrahn if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); 835*08daf0c2Sdrahn zSig = 0x40000000 + aSig + bSig; 836*08daf0c2Sdrahn zExp = aExp; 837*08daf0c2Sdrahn goto roundAndPack; 838*08daf0c2Sdrahn } 839*08daf0c2Sdrahn aSig |= 0x20000000; 840*08daf0c2Sdrahn zSig = ( aSig + bSig )<<1; 841*08daf0c2Sdrahn --zExp; 842*08daf0c2Sdrahn if ( (sbits32) zSig < 0 ) { 843*08daf0c2Sdrahn zSig = aSig + bSig; 844*08daf0c2Sdrahn ++zExp; 845*08daf0c2Sdrahn } 846*08daf0c2Sdrahn roundAndPack: 847*08daf0c2Sdrahn return roundAndPackFloat32( zSign, zExp, zSig ); 848*08daf0c2Sdrahn 849*08daf0c2Sdrahn } 850*08daf0c2Sdrahn 851*08daf0c2Sdrahn /* 852*08daf0c2Sdrahn ------------------------------------------------------------------------------- 853*08daf0c2Sdrahn Returns the result of subtracting the absolute values of the single- 854*08daf0c2Sdrahn precision floating-point values `a' and `b'. If `zSign' is 1, the 855*08daf0c2Sdrahn difference is negated before being returned. `zSign' is ignored if the 856*08daf0c2Sdrahn result is a NaN. The subtraction is performed according to the IEC/IEEE 857*08daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic. 858*08daf0c2Sdrahn ------------------------------------------------------------------------------- 859*08daf0c2Sdrahn */ 860*08daf0c2Sdrahn static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) 861*08daf0c2Sdrahn { 862*08daf0c2Sdrahn int16 aExp, bExp, zExp; 863*08daf0c2Sdrahn bits32 aSig, bSig, zSig; 864*08daf0c2Sdrahn int16 expDiff; 865*08daf0c2Sdrahn 866*08daf0c2Sdrahn aSig = extractFloat32Frac( a ); 867*08daf0c2Sdrahn aExp = extractFloat32Exp( a ); 868*08daf0c2Sdrahn bSig = extractFloat32Frac( b ); 869*08daf0c2Sdrahn bExp = extractFloat32Exp( b ); 870*08daf0c2Sdrahn expDiff = aExp - bExp; 871*08daf0c2Sdrahn aSig <<= 7; 872*08daf0c2Sdrahn bSig <<= 7; 873*08daf0c2Sdrahn if ( 0 < expDiff ) goto aExpBigger; 874*08daf0c2Sdrahn if ( expDiff < 0 ) goto bExpBigger; 875*08daf0c2Sdrahn if ( aExp == 0xFF ) { 876*08daf0c2Sdrahn if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 877*08daf0c2Sdrahn float_raise( float_flag_invalid ); 878*08daf0c2Sdrahn return float32_default_nan; 879*08daf0c2Sdrahn } 880*08daf0c2Sdrahn if ( aExp == 0 ) { 881*08daf0c2Sdrahn aExp = 1; 882*08daf0c2Sdrahn bExp = 1; 883*08daf0c2Sdrahn } 884*08daf0c2Sdrahn if ( bSig < aSig ) goto aBigger; 885*08daf0c2Sdrahn if ( aSig < bSig ) goto bBigger; 886*08daf0c2Sdrahn return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); 887*08daf0c2Sdrahn bExpBigger: 888*08daf0c2Sdrahn if ( bExp == 0xFF ) { 889*08daf0c2Sdrahn if ( bSig ) return propagateFloat32NaN( a, b ); 890*08daf0c2Sdrahn return packFloat32( zSign ^ 1, 0xFF, 0 ); 891*08daf0c2Sdrahn } 892*08daf0c2Sdrahn if ( aExp == 0 ) { 893*08daf0c2Sdrahn ++expDiff; 894*08daf0c2Sdrahn } 895*08daf0c2Sdrahn else { 896*08daf0c2Sdrahn aSig |= 0x40000000; 897*08daf0c2Sdrahn } 898*08daf0c2Sdrahn shift32RightJamming( aSig, - expDiff, &aSig ); 899*08daf0c2Sdrahn bSig |= 0x40000000; 900*08daf0c2Sdrahn bBigger: 901*08daf0c2Sdrahn zSig = bSig - aSig; 902*08daf0c2Sdrahn zExp = bExp; 903*08daf0c2Sdrahn zSign ^= 1; 904*08daf0c2Sdrahn goto normalizeRoundAndPack; 905*08daf0c2Sdrahn aExpBigger: 906*08daf0c2Sdrahn if ( aExp == 0xFF ) { 907*08daf0c2Sdrahn if ( aSig ) return propagateFloat32NaN( a, b ); 908*08daf0c2Sdrahn return a; 909*08daf0c2Sdrahn } 910*08daf0c2Sdrahn if ( bExp == 0 ) { 911*08daf0c2Sdrahn --expDiff; 912*08daf0c2Sdrahn } 913*08daf0c2Sdrahn else { 914*08daf0c2Sdrahn bSig |= 0x40000000; 915*08daf0c2Sdrahn } 916*08daf0c2Sdrahn shift32RightJamming( bSig, expDiff, &bSig ); 917*08daf0c2Sdrahn aSig |= 0x40000000; 918*08daf0c2Sdrahn aBigger: 919*08daf0c2Sdrahn zSig = aSig - bSig; 920*08daf0c2Sdrahn zExp = aExp; 921*08daf0c2Sdrahn normalizeRoundAndPack: 922*08daf0c2Sdrahn --zExp; 923*08daf0c2Sdrahn return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); 924*08daf0c2Sdrahn 925*08daf0c2Sdrahn } 926*08daf0c2Sdrahn 927*08daf0c2Sdrahn /* 928*08daf0c2Sdrahn ------------------------------------------------------------------------------- 929*08daf0c2Sdrahn Returns the result of adding the single-precision floating-point values `a' 930*08daf0c2Sdrahn and `b'. The operation is performed according to the IEC/IEEE Standard for 931*08daf0c2Sdrahn Binary Floating-Point Arithmetic. 932*08daf0c2Sdrahn ------------------------------------------------------------------------------- 933*08daf0c2Sdrahn */ 934*08daf0c2Sdrahn float32 float32_add( float32 a, float32 b ) 935*08daf0c2Sdrahn { 936*08daf0c2Sdrahn flag aSign, bSign; 937*08daf0c2Sdrahn 938*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 939*08daf0c2Sdrahn bSign = extractFloat32Sign( b ); 940*08daf0c2Sdrahn if ( aSign == bSign ) { 941*08daf0c2Sdrahn return addFloat32Sigs( a, b, aSign ); 942*08daf0c2Sdrahn } 943*08daf0c2Sdrahn else { 944*08daf0c2Sdrahn return subFloat32Sigs( a, b, aSign ); 945*08daf0c2Sdrahn } 946*08daf0c2Sdrahn 947*08daf0c2Sdrahn } 948*08daf0c2Sdrahn 949*08daf0c2Sdrahn /* 950*08daf0c2Sdrahn ------------------------------------------------------------------------------- 951*08daf0c2Sdrahn Returns the result of subtracting the single-precision floating-point values 952*08daf0c2Sdrahn `a' and `b'. The operation is performed according to the IEC/IEEE Standard 953*08daf0c2Sdrahn for Binary Floating-Point Arithmetic. 954*08daf0c2Sdrahn ------------------------------------------------------------------------------- 955*08daf0c2Sdrahn */ 956*08daf0c2Sdrahn float32 float32_sub( float32 a, float32 b ) 957*08daf0c2Sdrahn { 958*08daf0c2Sdrahn flag aSign, bSign; 959*08daf0c2Sdrahn 960*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 961*08daf0c2Sdrahn bSign = extractFloat32Sign( b ); 962*08daf0c2Sdrahn if ( aSign == bSign ) { 963*08daf0c2Sdrahn return subFloat32Sigs( a, b, aSign ); 964*08daf0c2Sdrahn } 965*08daf0c2Sdrahn else { 966*08daf0c2Sdrahn return addFloat32Sigs( a, b, aSign ); 967*08daf0c2Sdrahn } 968*08daf0c2Sdrahn 969*08daf0c2Sdrahn } 970*08daf0c2Sdrahn 971*08daf0c2Sdrahn /* 972*08daf0c2Sdrahn ------------------------------------------------------------------------------- 973*08daf0c2Sdrahn Returns the result of multiplying the single-precision floating-point values 974*08daf0c2Sdrahn `a' and `b'. The operation is performed according to the IEC/IEEE Standard 975*08daf0c2Sdrahn for Binary Floating-Point Arithmetic. 976*08daf0c2Sdrahn ------------------------------------------------------------------------------- 977*08daf0c2Sdrahn */ 978*08daf0c2Sdrahn float32 float32_mul( float32 a, float32 b ) 979*08daf0c2Sdrahn { 980*08daf0c2Sdrahn flag aSign, bSign, zSign; 981*08daf0c2Sdrahn int16 aExp, bExp, zExp; 982*08daf0c2Sdrahn bits32 aSig, bSig, zSig0, zSig1; 983*08daf0c2Sdrahn 984*08daf0c2Sdrahn aSig = extractFloat32Frac( a ); 985*08daf0c2Sdrahn aExp = extractFloat32Exp( a ); 986*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 987*08daf0c2Sdrahn bSig = extractFloat32Frac( b ); 988*08daf0c2Sdrahn bExp = extractFloat32Exp( b ); 989*08daf0c2Sdrahn bSign = extractFloat32Sign( b ); 990*08daf0c2Sdrahn zSign = aSign ^ bSign; 991*08daf0c2Sdrahn if ( aExp == 0xFF ) { 992*08daf0c2Sdrahn if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 993*08daf0c2Sdrahn return propagateFloat32NaN( a, b ); 994*08daf0c2Sdrahn } 995*08daf0c2Sdrahn if ( ( bExp | bSig ) == 0 ) { 996*08daf0c2Sdrahn float_raise( float_flag_invalid ); 997*08daf0c2Sdrahn return float32_default_nan; 998*08daf0c2Sdrahn } 999*08daf0c2Sdrahn return packFloat32( zSign, 0xFF, 0 ); 1000*08daf0c2Sdrahn } 1001*08daf0c2Sdrahn if ( bExp == 0xFF ) { 1002*08daf0c2Sdrahn if ( bSig ) return propagateFloat32NaN( a, b ); 1003*08daf0c2Sdrahn if ( ( aExp | aSig ) == 0 ) { 1004*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1005*08daf0c2Sdrahn return float32_default_nan; 1006*08daf0c2Sdrahn } 1007*08daf0c2Sdrahn return packFloat32( zSign, 0xFF, 0 ); 1008*08daf0c2Sdrahn } 1009*08daf0c2Sdrahn if ( aExp == 0 ) { 1010*08daf0c2Sdrahn if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1011*08daf0c2Sdrahn normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1012*08daf0c2Sdrahn } 1013*08daf0c2Sdrahn if ( bExp == 0 ) { 1014*08daf0c2Sdrahn if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); 1015*08daf0c2Sdrahn normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1016*08daf0c2Sdrahn } 1017*08daf0c2Sdrahn zExp = aExp + bExp - 0x7F; 1018*08daf0c2Sdrahn aSig = ( aSig | 0x00800000 )<<7; 1019*08daf0c2Sdrahn bSig = ( bSig | 0x00800000 )<<8; 1020*08daf0c2Sdrahn mul32To64( aSig, bSig, &zSig0, &zSig1 ); 1021*08daf0c2Sdrahn zSig0 |= ( zSig1 != 0 ); 1022*08daf0c2Sdrahn if ( 0 <= (sbits32) ( zSig0<<1 ) ) { 1023*08daf0c2Sdrahn zSig0 <<= 1; 1024*08daf0c2Sdrahn --zExp; 1025*08daf0c2Sdrahn } 1026*08daf0c2Sdrahn return roundAndPackFloat32( zSign, zExp, zSig0 ); 1027*08daf0c2Sdrahn 1028*08daf0c2Sdrahn } 1029*08daf0c2Sdrahn 1030*08daf0c2Sdrahn /* 1031*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1032*08daf0c2Sdrahn Returns the result of dividing the single-precision floating-point value `a' 1033*08daf0c2Sdrahn by the corresponding value `b'. The operation is performed according to the 1034*08daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1035*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1036*08daf0c2Sdrahn */ 1037*08daf0c2Sdrahn float32 float32_div( float32 a, float32 b ) 1038*08daf0c2Sdrahn { 1039*08daf0c2Sdrahn flag aSign, bSign, zSign; 1040*08daf0c2Sdrahn int16 aExp, bExp, zExp; 1041*08daf0c2Sdrahn bits32 aSig, bSig, zSig, rem0, rem1, term0, term1; 1042*08daf0c2Sdrahn 1043*08daf0c2Sdrahn aSig = extractFloat32Frac( a ); 1044*08daf0c2Sdrahn aExp = extractFloat32Exp( a ); 1045*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 1046*08daf0c2Sdrahn bSig = extractFloat32Frac( b ); 1047*08daf0c2Sdrahn bExp = extractFloat32Exp( b ); 1048*08daf0c2Sdrahn bSign = extractFloat32Sign( b ); 1049*08daf0c2Sdrahn zSign = aSign ^ bSign; 1050*08daf0c2Sdrahn if ( aExp == 0xFF ) { 1051*08daf0c2Sdrahn if ( aSig ) return propagateFloat32NaN( a, b ); 1052*08daf0c2Sdrahn if ( bExp == 0xFF ) { 1053*08daf0c2Sdrahn if ( bSig ) return propagateFloat32NaN( a, b ); 1054*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1055*08daf0c2Sdrahn return float32_default_nan; 1056*08daf0c2Sdrahn } 1057*08daf0c2Sdrahn return packFloat32( zSign, 0xFF, 0 ); 1058*08daf0c2Sdrahn } 1059*08daf0c2Sdrahn if ( bExp == 0xFF ) { 1060*08daf0c2Sdrahn if ( bSig ) return propagateFloat32NaN( a, b ); 1061*08daf0c2Sdrahn return packFloat32( zSign, 0, 0 ); 1062*08daf0c2Sdrahn } 1063*08daf0c2Sdrahn if ( bExp == 0 ) { 1064*08daf0c2Sdrahn if ( bSig == 0 ) { 1065*08daf0c2Sdrahn if ( ( aExp | aSig ) == 0 ) { 1066*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1067*08daf0c2Sdrahn return float32_default_nan; 1068*08daf0c2Sdrahn } 1069*08daf0c2Sdrahn float_raise( float_flag_divbyzero ); 1070*08daf0c2Sdrahn return packFloat32( zSign, 0xFF, 0 ); 1071*08daf0c2Sdrahn } 1072*08daf0c2Sdrahn normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1073*08daf0c2Sdrahn } 1074*08daf0c2Sdrahn if ( aExp == 0 ) { 1075*08daf0c2Sdrahn if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1076*08daf0c2Sdrahn normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1077*08daf0c2Sdrahn } 1078*08daf0c2Sdrahn zExp = aExp - bExp + 0x7D; 1079*08daf0c2Sdrahn aSig = ( aSig | 0x00800000 )<<7; 1080*08daf0c2Sdrahn bSig = ( bSig | 0x00800000 )<<8; 1081*08daf0c2Sdrahn if ( bSig <= ( aSig + aSig ) ) { 1082*08daf0c2Sdrahn aSig >>= 1; 1083*08daf0c2Sdrahn ++zExp; 1084*08daf0c2Sdrahn } 1085*08daf0c2Sdrahn zSig = estimateDiv64To32( aSig, 0, bSig ); 1086*08daf0c2Sdrahn if ( ( zSig & 0x3F ) <= 2 ) { 1087*08daf0c2Sdrahn mul32To64( bSig, zSig, &term0, &term1 ); 1088*08daf0c2Sdrahn sub64( aSig, 0, term0, term1, &rem0, &rem1 ); 1089*08daf0c2Sdrahn while ( (sbits32) rem0 < 0 ) { 1090*08daf0c2Sdrahn --zSig; 1091*08daf0c2Sdrahn add64( rem0, rem1, 0, bSig, &rem0, &rem1 ); 1092*08daf0c2Sdrahn } 1093*08daf0c2Sdrahn zSig |= ( rem1 != 0 ); 1094*08daf0c2Sdrahn } 1095*08daf0c2Sdrahn return roundAndPackFloat32( zSign, zExp, zSig ); 1096*08daf0c2Sdrahn 1097*08daf0c2Sdrahn } 1098*08daf0c2Sdrahn 1099*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC 1100*08daf0c2Sdrahn /* 1101*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1102*08daf0c2Sdrahn Returns the remainder of the single-precision floating-point value `a' 1103*08daf0c2Sdrahn with respect to the corresponding value `b'. The operation is performed 1104*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1105*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1106*08daf0c2Sdrahn */ 1107*08daf0c2Sdrahn float32 float32_rem( float32 a, float32 b ) 1108*08daf0c2Sdrahn { 1109*08daf0c2Sdrahn flag aSign, bSign, zSign; 1110*08daf0c2Sdrahn int16 aExp, bExp, expDiff; 1111*08daf0c2Sdrahn bits32 aSig, bSig, q, allZero, alternateASig; 1112*08daf0c2Sdrahn sbits32 sigMean; 1113*08daf0c2Sdrahn 1114*08daf0c2Sdrahn aSig = extractFloat32Frac( a ); 1115*08daf0c2Sdrahn aExp = extractFloat32Exp( a ); 1116*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 1117*08daf0c2Sdrahn bSig = extractFloat32Frac( b ); 1118*08daf0c2Sdrahn bExp = extractFloat32Exp( b ); 1119*08daf0c2Sdrahn bSign = extractFloat32Sign( b ); 1120*08daf0c2Sdrahn if ( aExp == 0xFF ) { 1121*08daf0c2Sdrahn if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 1122*08daf0c2Sdrahn return propagateFloat32NaN( a, b ); 1123*08daf0c2Sdrahn } 1124*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1125*08daf0c2Sdrahn return float32_default_nan; 1126*08daf0c2Sdrahn } 1127*08daf0c2Sdrahn if ( bExp == 0xFF ) { 1128*08daf0c2Sdrahn if ( bSig ) return propagateFloat32NaN( a, b ); 1129*08daf0c2Sdrahn return a; 1130*08daf0c2Sdrahn } 1131*08daf0c2Sdrahn if ( bExp == 0 ) { 1132*08daf0c2Sdrahn if ( bSig == 0 ) { 1133*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1134*08daf0c2Sdrahn return float32_default_nan; 1135*08daf0c2Sdrahn } 1136*08daf0c2Sdrahn normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1137*08daf0c2Sdrahn } 1138*08daf0c2Sdrahn if ( aExp == 0 ) { 1139*08daf0c2Sdrahn if ( aSig == 0 ) return a; 1140*08daf0c2Sdrahn normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1141*08daf0c2Sdrahn } 1142*08daf0c2Sdrahn expDiff = aExp - bExp; 1143*08daf0c2Sdrahn aSig = ( aSig | 0x00800000 )<<8; 1144*08daf0c2Sdrahn bSig = ( bSig | 0x00800000 )<<8; 1145*08daf0c2Sdrahn if ( expDiff < 0 ) { 1146*08daf0c2Sdrahn if ( expDiff < -1 ) return a; 1147*08daf0c2Sdrahn aSig >>= 1; 1148*08daf0c2Sdrahn } 1149*08daf0c2Sdrahn q = ( bSig <= aSig ); 1150*08daf0c2Sdrahn if ( q ) aSig -= bSig; 1151*08daf0c2Sdrahn expDiff -= 32; 1152*08daf0c2Sdrahn while ( 0 < expDiff ) { 1153*08daf0c2Sdrahn q = estimateDiv64To32( aSig, 0, bSig ); 1154*08daf0c2Sdrahn q = ( 2 < q ) ? q - 2 : 0; 1155*08daf0c2Sdrahn aSig = - ( ( bSig>>2 ) * q ); 1156*08daf0c2Sdrahn expDiff -= 30; 1157*08daf0c2Sdrahn } 1158*08daf0c2Sdrahn expDiff += 32; 1159*08daf0c2Sdrahn if ( 0 < expDiff ) { 1160*08daf0c2Sdrahn q = estimateDiv64To32( aSig, 0, bSig ); 1161*08daf0c2Sdrahn q = ( 2 < q ) ? q - 2 : 0; 1162*08daf0c2Sdrahn q >>= 32 - expDiff; 1163*08daf0c2Sdrahn bSig >>= 2; 1164*08daf0c2Sdrahn aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 1165*08daf0c2Sdrahn } 1166*08daf0c2Sdrahn else { 1167*08daf0c2Sdrahn aSig >>= 2; 1168*08daf0c2Sdrahn bSig >>= 2; 1169*08daf0c2Sdrahn } 1170*08daf0c2Sdrahn do { 1171*08daf0c2Sdrahn alternateASig = aSig; 1172*08daf0c2Sdrahn ++q; 1173*08daf0c2Sdrahn aSig -= bSig; 1174*08daf0c2Sdrahn } while ( 0 <= (sbits32) aSig ); 1175*08daf0c2Sdrahn sigMean = aSig + alternateASig; 1176*08daf0c2Sdrahn if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 1177*08daf0c2Sdrahn aSig = alternateASig; 1178*08daf0c2Sdrahn } 1179*08daf0c2Sdrahn zSign = ( (sbits32) aSig < 0 ); 1180*08daf0c2Sdrahn if ( zSign ) aSig = - aSig; 1181*08daf0c2Sdrahn return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); 1182*08daf0c2Sdrahn 1183*08daf0c2Sdrahn } 1184*08daf0c2Sdrahn #endif 1185*08daf0c2Sdrahn 1186*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC 1187*08daf0c2Sdrahn /* 1188*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1189*08daf0c2Sdrahn Returns the square root of the single-precision floating-point value `a'. 1190*08daf0c2Sdrahn The operation is performed according to the IEC/IEEE Standard for Binary 1191*08daf0c2Sdrahn Floating-Point Arithmetic. 1192*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1193*08daf0c2Sdrahn */ 1194*08daf0c2Sdrahn float32 float32_sqrt( float32 a ) 1195*08daf0c2Sdrahn { 1196*08daf0c2Sdrahn flag aSign; 1197*08daf0c2Sdrahn int16 aExp, zExp; 1198*08daf0c2Sdrahn bits32 aSig, zSig, rem0, rem1, term0, term1; 1199*08daf0c2Sdrahn 1200*08daf0c2Sdrahn aSig = extractFloat32Frac( a ); 1201*08daf0c2Sdrahn aExp = extractFloat32Exp( a ); 1202*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 1203*08daf0c2Sdrahn if ( aExp == 0xFF ) { 1204*08daf0c2Sdrahn if ( aSig ) return propagateFloat32NaN( a, 0 ); 1205*08daf0c2Sdrahn if ( ! aSign ) return a; 1206*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1207*08daf0c2Sdrahn return float32_default_nan; 1208*08daf0c2Sdrahn } 1209*08daf0c2Sdrahn if ( aSign ) { 1210*08daf0c2Sdrahn if ( ( aExp | aSig ) == 0 ) return a; 1211*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1212*08daf0c2Sdrahn return float32_default_nan; 1213*08daf0c2Sdrahn } 1214*08daf0c2Sdrahn if ( aExp == 0 ) { 1215*08daf0c2Sdrahn if ( aSig == 0 ) return 0; 1216*08daf0c2Sdrahn normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1217*08daf0c2Sdrahn } 1218*08daf0c2Sdrahn zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; 1219*08daf0c2Sdrahn aSig = ( aSig | 0x00800000 )<<8; 1220*08daf0c2Sdrahn zSig = estimateSqrt32( aExp, aSig ) + 2; 1221*08daf0c2Sdrahn if ( ( zSig & 0x7F ) <= 5 ) { 1222*08daf0c2Sdrahn if ( zSig < 2 ) { 1223*08daf0c2Sdrahn zSig = 0x7FFFFFFF; 1224*08daf0c2Sdrahn goto roundAndPack; 1225*08daf0c2Sdrahn } 1226*08daf0c2Sdrahn else { 1227*08daf0c2Sdrahn aSig >>= aExp & 1; 1228*08daf0c2Sdrahn mul32To64( zSig, zSig, &term0, &term1 ); 1229*08daf0c2Sdrahn sub64( aSig, 0, term0, term1, &rem0, &rem1 ); 1230*08daf0c2Sdrahn while ( (sbits32) rem0 < 0 ) { 1231*08daf0c2Sdrahn --zSig; 1232*08daf0c2Sdrahn shortShift64Left( 0, zSig, 1, &term0, &term1 ); 1233*08daf0c2Sdrahn term1 |= 1; 1234*08daf0c2Sdrahn add64( rem0, rem1, term0, term1, &rem0, &rem1 ); 1235*08daf0c2Sdrahn } 1236*08daf0c2Sdrahn zSig |= ( ( rem0 | rem1 ) != 0 ); 1237*08daf0c2Sdrahn } 1238*08daf0c2Sdrahn } 1239*08daf0c2Sdrahn shift32RightJamming( zSig, 1, &zSig ); 1240*08daf0c2Sdrahn roundAndPack: 1241*08daf0c2Sdrahn return roundAndPackFloat32( 0, zExp, zSig ); 1242*08daf0c2Sdrahn 1243*08daf0c2Sdrahn } 1244*08daf0c2Sdrahn #endif 1245*08daf0c2Sdrahn 1246*08daf0c2Sdrahn /* 1247*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1248*08daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is equal to 1249*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise. The comparison is performed 1250*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1251*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1252*08daf0c2Sdrahn */ 1253*08daf0c2Sdrahn flag float32_eq( float32 a, float32 b ) 1254*08daf0c2Sdrahn { 1255*08daf0c2Sdrahn 1256*08daf0c2Sdrahn if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1257*08daf0c2Sdrahn || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1258*08daf0c2Sdrahn ) { 1259*08daf0c2Sdrahn if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1260*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1261*08daf0c2Sdrahn } 1262*08daf0c2Sdrahn return 0; 1263*08daf0c2Sdrahn } 1264*08daf0c2Sdrahn return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1265*08daf0c2Sdrahn 1266*08daf0c2Sdrahn } 1267*08daf0c2Sdrahn 1268*08daf0c2Sdrahn /* 1269*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1270*08daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than 1271*08daf0c2Sdrahn or equal to the corresponding value `b', and 0 otherwise. The comparison 1272*08daf0c2Sdrahn is performed according to the IEC/IEEE Standard for Binary Floating-Point 1273*08daf0c2Sdrahn Arithmetic. 1274*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1275*08daf0c2Sdrahn */ 1276*08daf0c2Sdrahn flag float32_le( float32 a, float32 b ) 1277*08daf0c2Sdrahn { 1278*08daf0c2Sdrahn flag aSign, bSign; 1279*08daf0c2Sdrahn 1280*08daf0c2Sdrahn if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1281*08daf0c2Sdrahn || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1282*08daf0c2Sdrahn ) { 1283*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1284*08daf0c2Sdrahn return 0; 1285*08daf0c2Sdrahn } 1286*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 1287*08daf0c2Sdrahn bSign = extractFloat32Sign( b ); 1288*08daf0c2Sdrahn if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1289*08daf0c2Sdrahn return ( a == b ) || ( aSign ^ ( a < b ) ); 1290*08daf0c2Sdrahn 1291*08daf0c2Sdrahn } 1292*08daf0c2Sdrahn 1293*08daf0c2Sdrahn /* 1294*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1295*08daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than 1296*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise. The comparison is performed 1297*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1298*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1299*08daf0c2Sdrahn */ 1300*08daf0c2Sdrahn flag float32_lt( float32 a, float32 b ) 1301*08daf0c2Sdrahn { 1302*08daf0c2Sdrahn flag aSign, bSign; 1303*08daf0c2Sdrahn 1304*08daf0c2Sdrahn if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1305*08daf0c2Sdrahn || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1306*08daf0c2Sdrahn ) { 1307*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1308*08daf0c2Sdrahn return 0; 1309*08daf0c2Sdrahn } 1310*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 1311*08daf0c2Sdrahn bSign = extractFloat32Sign( b ); 1312*08daf0c2Sdrahn if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 1313*08daf0c2Sdrahn return ( a != b ) && ( aSign ^ ( a < b ) ); 1314*08daf0c2Sdrahn 1315*08daf0c2Sdrahn } 1316*08daf0c2Sdrahn 1317*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1318*08daf0c2Sdrahn /* 1319*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1320*08daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is equal to 1321*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise. The invalid exception is 1322*08daf0c2Sdrahn raised if either operand is a NaN. Otherwise, the comparison is performed 1323*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1324*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1325*08daf0c2Sdrahn */ 1326*08daf0c2Sdrahn flag float32_eq_signaling( float32 a, float32 b ) 1327*08daf0c2Sdrahn { 1328*08daf0c2Sdrahn 1329*08daf0c2Sdrahn if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1330*08daf0c2Sdrahn || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1331*08daf0c2Sdrahn ) { 1332*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1333*08daf0c2Sdrahn return 0; 1334*08daf0c2Sdrahn } 1335*08daf0c2Sdrahn return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1336*08daf0c2Sdrahn 1337*08daf0c2Sdrahn } 1338*08daf0c2Sdrahn 1339*08daf0c2Sdrahn /* 1340*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1341*08daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than or 1342*08daf0c2Sdrahn equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 1343*08daf0c2Sdrahn cause an exception. Otherwise, the comparison is performed according to the 1344*08daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1345*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1346*08daf0c2Sdrahn */ 1347*08daf0c2Sdrahn flag float32_le_quiet( float32 a, float32 b ) 1348*08daf0c2Sdrahn { 1349*08daf0c2Sdrahn flag aSign, bSign; 1350*08daf0c2Sdrahn int16 aExp, bExp; 1351*08daf0c2Sdrahn 1352*08daf0c2Sdrahn if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1353*08daf0c2Sdrahn || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1354*08daf0c2Sdrahn ) { 1355*08daf0c2Sdrahn if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1356*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1357*08daf0c2Sdrahn } 1358*08daf0c2Sdrahn return 0; 1359*08daf0c2Sdrahn } 1360*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 1361*08daf0c2Sdrahn bSign = extractFloat32Sign( b ); 1362*08daf0c2Sdrahn if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1363*08daf0c2Sdrahn return ( a == b ) || ( aSign ^ ( a < b ) ); 1364*08daf0c2Sdrahn 1365*08daf0c2Sdrahn } 1366*08daf0c2Sdrahn 1367*08daf0c2Sdrahn /* 1368*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1369*08daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than 1370*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 1371*08daf0c2Sdrahn exception. Otherwise, the comparison is performed according to the IEC/IEEE 1372*08daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic. 1373*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1374*08daf0c2Sdrahn */ 1375*08daf0c2Sdrahn flag float32_lt_quiet( float32 a, float32 b ) 1376*08daf0c2Sdrahn { 1377*08daf0c2Sdrahn flag aSign, bSign; 1378*08daf0c2Sdrahn 1379*08daf0c2Sdrahn if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1380*08daf0c2Sdrahn || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1381*08daf0c2Sdrahn ) { 1382*08daf0c2Sdrahn if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1383*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1384*08daf0c2Sdrahn } 1385*08daf0c2Sdrahn return 0; 1386*08daf0c2Sdrahn } 1387*08daf0c2Sdrahn aSign = extractFloat32Sign( a ); 1388*08daf0c2Sdrahn bSign = extractFloat32Sign( b ); 1389*08daf0c2Sdrahn if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 1390*08daf0c2Sdrahn return ( a != b ) && ( aSign ^ ( a < b ) ); 1391*08daf0c2Sdrahn 1392*08daf0c2Sdrahn } 1393*08daf0c2Sdrahn #endif /* !SOFTFLOAT_FOR_GCC */ 1394*08daf0c2Sdrahn 1395*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1396*08daf0c2Sdrahn /* 1397*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1398*08daf0c2Sdrahn Returns the result of converting the double-precision floating-point value 1399*08daf0c2Sdrahn `a' to the 32-bit two's complement integer format. The conversion is 1400*08daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point 1401*08daf0c2Sdrahn Arithmetic---which means in particular that the conversion is rounded 1402*08daf0c2Sdrahn according to the current rounding mode. If `a' is a NaN, the largest 1403*08daf0c2Sdrahn positive integer is returned. Otherwise, if the conversion overflows, the 1404*08daf0c2Sdrahn largest integer with the same sign as `a' is returned. 1405*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1406*08daf0c2Sdrahn */ 1407*08daf0c2Sdrahn int32 float64_to_int32( float64 a ) 1408*08daf0c2Sdrahn { 1409*08daf0c2Sdrahn flag aSign; 1410*08daf0c2Sdrahn int16 aExp, shiftCount; 1411*08daf0c2Sdrahn bits32 aSig0, aSig1, absZ, aSigExtra; 1412*08daf0c2Sdrahn int32 z; 1413*08daf0c2Sdrahn int8 roundingMode; 1414*08daf0c2Sdrahn 1415*08daf0c2Sdrahn aSig1 = extractFloat64Frac1( a ); 1416*08daf0c2Sdrahn aSig0 = extractFloat64Frac0( a ); 1417*08daf0c2Sdrahn aExp = extractFloat64Exp( a ); 1418*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 1419*08daf0c2Sdrahn shiftCount = aExp - 0x413; 1420*08daf0c2Sdrahn if ( 0 <= shiftCount ) { 1421*08daf0c2Sdrahn if ( 0x41E < aExp ) { 1422*08daf0c2Sdrahn if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; 1423*08daf0c2Sdrahn goto invalid; 1424*08daf0c2Sdrahn } 1425*08daf0c2Sdrahn shortShift64Left( 1426*08daf0c2Sdrahn aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); 1427*08daf0c2Sdrahn if ( 0x80000000 < absZ ) goto invalid; 1428*08daf0c2Sdrahn } 1429*08daf0c2Sdrahn else { 1430*08daf0c2Sdrahn aSig1 = ( aSig1 != 0 ); 1431*08daf0c2Sdrahn if ( aExp < 0x3FE ) { 1432*08daf0c2Sdrahn aSigExtra = aExp | aSig0 | aSig1; 1433*08daf0c2Sdrahn absZ = 0; 1434*08daf0c2Sdrahn } 1435*08daf0c2Sdrahn else { 1436*08daf0c2Sdrahn aSig0 |= 0x00100000; 1437*08daf0c2Sdrahn aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; 1438*08daf0c2Sdrahn absZ = aSig0>>( - shiftCount ); 1439*08daf0c2Sdrahn } 1440*08daf0c2Sdrahn } 1441*08daf0c2Sdrahn roundingMode = float_rounding_mode; 1442*08daf0c2Sdrahn if ( roundingMode == float_round_nearest_even ) { 1443*08daf0c2Sdrahn if ( (sbits32) aSigExtra < 0 ) { 1444*08daf0c2Sdrahn ++absZ; 1445*08daf0c2Sdrahn if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1; 1446*08daf0c2Sdrahn } 1447*08daf0c2Sdrahn z = aSign ? - absZ : absZ; 1448*08daf0c2Sdrahn } 1449*08daf0c2Sdrahn else { 1450*08daf0c2Sdrahn aSigExtra = ( aSigExtra != 0 ); 1451*08daf0c2Sdrahn if ( aSign ) { 1452*08daf0c2Sdrahn z = - ( absZ 1453*08daf0c2Sdrahn + ( ( roundingMode == float_round_down ) & aSigExtra ) ); 1454*08daf0c2Sdrahn } 1455*08daf0c2Sdrahn else { 1456*08daf0c2Sdrahn z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra ); 1457*08daf0c2Sdrahn } 1458*08daf0c2Sdrahn } 1459*08daf0c2Sdrahn if ( ( aSign ^ ( z < 0 ) ) && z ) { 1460*08daf0c2Sdrahn invalid: 1461*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1462*08daf0c2Sdrahn return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 1463*08daf0c2Sdrahn } 1464*08daf0c2Sdrahn if ( aSigExtra ) float_exception_flags |= float_flag_inexact; 1465*08daf0c2Sdrahn return z; 1466*08daf0c2Sdrahn 1467*08daf0c2Sdrahn } 1468*08daf0c2Sdrahn #endif /* !SOFTFLOAT_FOR_GCC */ 1469*08daf0c2Sdrahn 1470*08daf0c2Sdrahn /* 1471*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1472*08daf0c2Sdrahn Returns the result of converting the double-precision floating-point value 1473*08daf0c2Sdrahn `a' to the 32-bit two's complement integer format. The conversion is 1474*08daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point 1475*08daf0c2Sdrahn Arithmetic, except that the conversion is always rounded toward zero. 1476*08daf0c2Sdrahn If `a' is a NaN, the largest positive integer is returned. Otherwise, if 1477*08daf0c2Sdrahn the conversion overflows, the largest integer with the same sign as `a' is 1478*08daf0c2Sdrahn returned. 1479*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1480*08daf0c2Sdrahn */ 1481*08daf0c2Sdrahn int32 float64_to_int32_round_to_zero( float64 a ) 1482*08daf0c2Sdrahn { 1483*08daf0c2Sdrahn flag aSign; 1484*08daf0c2Sdrahn int16 aExp, shiftCount; 1485*08daf0c2Sdrahn bits32 aSig0, aSig1, absZ, aSigExtra; 1486*08daf0c2Sdrahn int32 z; 1487*08daf0c2Sdrahn 1488*08daf0c2Sdrahn aSig1 = extractFloat64Frac1( a ); 1489*08daf0c2Sdrahn aSig0 = extractFloat64Frac0( a ); 1490*08daf0c2Sdrahn aExp = extractFloat64Exp( a ); 1491*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 1492*08daf0c2Sdrahn shiftCount = aExp - 0x413; 1493*08daf0c2Sdrahn if ( 0 <= shiftCount ) { 1494*08daf0c2Sdrahn if ( 0x41E < aExp ) { 1495*08daf0c2Sdrahn if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; 1496*08daf0c2Sdrahn goto invalid; 1497*08daf0c2Sdrahn } 1498*08daf0c2Sdrahn shortShift64Left( 1499*08daf0c2Sdrahn aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); 1500*08daf0c2Sdrahn } 1501*08daf0c2Sdrahn else { 1502*08daf0c2Sdrahn if ( aExp < 0x3FF ) { 1503*08daf0c2Sdrahn if ( aExp | aSig0 | aSig1 ) { 1504*08daf0c2Sdrahn float_exception_flags |= float_flag_inexact; 1505*08daf0c2Sdrahn } 1506*08daf0c2Sdrahn return 0; 1507*08daf0c2Sdrahn } 1508*08daf0c2Sdrahn aSig0 |= 0x00100000; 1509*08daf0c2Sdrahn aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; 1510*08daf0c2Sdrahn absZ = aSig0>>( - shiftCount ); 1511*08daf0c2Sdrahn } 1512*08daf0c2Sdrahn z = aSign ? - absZ : absZ; 1513*08daf0c2Sdrahn if ( ( aSign ^ ( z < 0 ) ) && z ) { 1514*08daf0c2Sdrahn invalid: 1515*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1516*08daf0c2Sdrahn return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 1517*08daf0c2Sdrahn } 1518*08daf0c2Sdrahn if ( aSigExtra ) float_exception_flags |= float_flag_inexact; 1519*08daf0c2Sdrahn return z; 1520*08daf0c2Sdrahn 1521*08daf0c2Sdrahn } 1522*08daf0c2Sdrahn 1523*08daf0c2Sdrahn /* 1524*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1525*08daf0c2Sdrahn Returns the result of converting the double-precision floating-point value 1526*08daf0c2Sdrahn `a' to the single-precision floating-point format. The conversion is 1527*08daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point 1528*08daf0c2Sdrahn Arithmetic. 1529*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1530*08daf0c2Sdrahn */ 1531*08daf0c2Sdrahn float32 float64_to_float32( float64 a ) 1532*08daf0c2Sdrahn { 1533*08daf0c2Sdrahn flag aSign; 1534*08daf0c2Sdrahn int16 aExp; 1535*08daf0c2Sdrahn bits32 aSig0, aSig1, zSig; 1536*08daf0c2Sdrahn bits32 allZero; 1537*08daf0c2Sdrahn 1538*08daf0c2Sdrahn aSig1 = extractFloat64Frac1( a ); 1539*08daf0c2Sdrahn aSig0 = extractFloat64Frac0( a ); 1540*08daf0c2Sdrahn aExp = extractFloat64Exp( a ); 1541*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 1542*08daf0c2Sdrahn if ( aExp == 0x7FF ) { 1543*08daf0c2Sdrahn if ( aSig0 | aSig1 ) { 1544*08daf0c2Sdrahn return commonNaNToFloat32( float64ToCommonNaN( a ) ); 1545*08daf0c2Sdrahn } 1546*08daf0c2Sdrahn return packFloat32( aSign, 0xFF, 0 ); 1547*08daf0c2Sdrahn } 1548*08daf0c2Sdrahn shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig ); 1549*08daf0c2Sdrahn if ( aExp ) zSig |= 0x40000000; 1550*08daf0c2Sdrahn return roundAndPackFloat32( aSign, aExp - 0x381, zSig ); 1551*08daf0c2Sdrahn 1552*08daf0c2Sdrahn } 1553*08daf0c2Sdrahn 1554*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC 1555*08daf0c2Sdrahn /* 1556*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1557*08daf0c2Sdrahn Rounds the double-precision floating-point value `a' to an integer, 1558*08daf0c2Sdrahn and returns the result as a double-precision floating-point value. The 1559*08daf0c2Sdrahn operation is performed according to the IEC/IEEE Standard for Binary 1560*08daf0c2Sdrahn Floating-Point Arithmetic. 1561*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1562*08daf0c2Sdrahn */ 1563*08daf0c2Sdrahn float64 float64_round_to_int( float64 a ) 1564*08daf0c2Sdrahn { 1565*08daf0c2Sdrahn flag aSign; 1566*08daf0c2Sdrahn int16 aExp; 1567*08daf0c2Sdrahn bits32 lastBitMask, roundBitsMask; 1568*08daf0c2Sdrahn int8 roundingMode; 1569*08daf0c2Sdrahn float64 z; 1570*08daf0c2Sdrahn 1571*08daf0c2Sdrahn aExp = extractFloat64Exp( a ); 1572*08daf0c2Sdrahn if ( 0x413 <= aExp ) { 1573*08daf0c2Sdrahn if ( 0x433 <= aExp ) { 1574*08daf0c2Sdrahn if ( ( aExp == 0x7FF ) 1575*08daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) { 1576*08daf0c2Sdrahn return propagateFloat64NaN( a, a ); 1577*08daf0c2Sdrahn } 1578*08daf0c2Sdrahn return a; 1579*08daf0c2Sdrahn } 1580*08daf0c2Sdrahn lastBitMask = 1; 1581*08daf0c2Sdrahn lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1; 1582*08daf0c2Sdrahn roundBitsMask = lastBitMask - 1; 1583*08daf0c2Sdrahn z = a; 1584*08daf0c2Sdrahn roundingMode = float_rounding_mode; 1585*08daf0c2Sdrahn if ( roundingMode == float_round_nearest_even ) { 1586*08daf0c2Sdrahn if ( lastBitMask ) { 1587*08daf0c2Sdrahn add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); 1588*08daf0c2Sdrahn if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 1589*08daf0c2Sdrahn } 1590*08daf0c2Sdrahn else { 1591*08daf0c2Sdrahn if ( (sbits32) z.low < 0 ) { 1592*08daf0c2Sdrahn ++z.high; 1593*08daf0c2Sdrahn if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1; 1594*08daf0c2Sdrahn } 1595*08daf0c2Sdrahn } 1596*08daf0c2Sdrahn } 1597*08daf0c2Sdrahn else if ( roundingMode != float_round_to_zero ) { 1598*08daf0c2Sdrahn if ( extractFloat64Sign( z ) 1599*08daf0c2Sdrahn ^ ( roundingMode == float_round_up ) ) { 1600*08daf0c2Sdrahn add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); 1601*08daf0c2Sdrahn } 1602*08daf0c2Sdrahn } 1603*08daf0c2Sdrahn z.low &= ~ roundBitsMask; 1604*08daf0c2Sdrahn } 1605*08daf0c2Sdrahn else { 1606*08daf0c2Sdrahn if ( aExp <= 0x3FE ) { 1607*08daf0c2Sdrahn if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a; 1608*08daf0c2Sdrahn float_exception_flags |= float_flag_inexact; 1609*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 1610*08daf0c2Sdrahn switch ( float_rounding_mode ) { 1611*08daf0c2Sdrahn case float_round_nearest_even: 1612*08daf0c2Sdrahn if ( ( aExp == 0x3FE ) 1613*08daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) 1614*08daf0c2Sdrahn ) { 1615*08daf0c2Sdrahn return packFloat64( aSign, 0x3FF, 0, 0 ); 1616*08daf0c2Sdrahn } 1617*08daf0c2Sdrahn break; 1618*08daf0c2Sdrahn case float_round_down: 1619*08daf0c2Sdrahn return 1620*08daf0c2Sdrahn aSign ? packFloat64( 1, 0x3FF, 0, 0 ) 1621*08daf0c2Sdrahn : packFloat64( 0, 0, 0, 0 ); 1622*08daf0c2Sdrahn case float_round_up: 1623*08daf0c2Sdrahn return 1624*08daf0c2Sdrahn aSign ? packFloat64( 1, 0, 0, 0 ) 1625*08daf0c2Sdrahn : packFloat64( 0, 0x3FF, 0, 0 ); 1626*08daf0c2Sdrahn } 1627*08daf0c2Sdrahn return packFloat64( aSign, 0, 0, 0 ); 1628*08daf0c2Sdrahn } 1629*08daf0c2Sdrahn lastBitMask = 1; 1630*08daf0c2Sdrahn lastBitMask <<= 0x413 - aExp; 1631*08daf0c2Sdrahn roundBitsMask = lastBitMask - 1; 1632*08daf0c2Sdrahn z.low = 0; 1633*08daf0c2Sdrahn z.high = a.high; 1634*08daf0c2Sdrahn roundingMode = float_rounding_mode; 1635*08daf0c2Sdrahn if ( roundingMode == float_round_nearest_even ) { 1636*08daf0c2Sdrahn z.high += lastBitMask>>1; 1637*08daf0c2Sdrahn if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { 1638*08daf0c2Sdrahn z.high &= ~ lastBitMask; 1639*08daf0c2Sdrahn } 1640*08daf0c2Sdrahn } 1641*08daf0c2Sdrahn else if ( roundingMode != float_round_to_zero ) { 1642*08daf0c2Sdrahn if ( extractFloat64Sign( z ) 1643*08daf0c2Sdrahn ^ ( roundingMode == float_round_up ) ) { 1644*08daf0c2Sdrahn z.high |= ( a.low != 0 ); 1645*08daf0c2Sdrahn z.high += roundBitsMask; 1646*08daf0c2Sdrahn } 1647*08daf0c2Sdrahn } 1648*08daf0c2Sdrahn z.high &= ~ roundBitsMask; 1649*08daf0c2Sdrahn } 1650*08daf0c2Sdrahn if ( ( z.low != a.low ) || ( z.high != a.high ) ) { 1651*08daf0c2Sdrahn float_exception_flags |= float_flag_inexact; 1652*08daf0c2Sdrahn } 1653*08daf0c2Sdrahn return z; 1654*08daf0c2Sdrahn 1655*08daf0c2Sdrahn } 1656*08daf0c2Sdrahn #endif 1657*08daf0c2Sdrahn 1658*08daf0c2Sdrahn /* 1659*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1660*08daf0c2Sdrahn Returns the result of adding the absolute values of the double-precision 1661*08daf0c2Sdrahn floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 1662*08daf0c2Sdrahn before being returned. `zSign' is ignored if the result is a NaN. 1663*08daf0c2Sdrahn The addition is performed according to the IEC/IEEE Standard for Binary 1664*08daf0c2Sdrahn Floating-Point Arithmetic. 1665*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1666*08daf0c2Sdrahn */ 1667*08daf0c2Sdrahn static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) 1668*08daf0c2Sdrahn { 1669*08daf0c2Sdrahn int16 aExp, bExp, zExp; 1670*08daf0c2Sdrahn bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 1671*08daf0c2Sdrahn int16 expDiff; 1672*08daf0c2Sdrahn 1673*08daf0c2Sdrahn aSig1 = extractFloat64Frac1( a ); 1674*08daf0c2Sdrahn aSig0 = extractFloat64Frac0( a ); 1675*08daf0c2Sdrahn aExp = extractFloat64Exp( a ); 1676*08daf0c2Sdrahn bSig1 = extractFloat64Frac1( b ); 1677*08daf0c2Sdrahn bSig0 = extractFloat64Frac0( b ); 1678*08daf0c2Sdrahn bExp = extractFloat64Exp( b ); 1679*08daf0c2Sdrahn expDiff = aExp - bExp; 1680*08daf0c2Sdrahn if ( 0 < expDiff ) { 1681*08daf0c2Sdrahn if ( aExp == 0x7FF ) { 1682*08daf0c2Sdrahn if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1683*08daf0c2Sdrahn return a; 1684*08daf0c2Sdrahn } 1685*08daf0c2Sdrahn if ( bExp == 0 ) { 1686*08daf0c2Sdrahn --expDiff; 1687*08daf0c2Sdrahn } 1688*08daf0c2Sdrahn else { 1689*08daf0c2Sdrahn bSig0 |= 0x00100000; 1690*08daf0c2Sdrahn } 1691*08daf0c2Sdrahn shift64ExtraRightJamming( 1692*08daf0c2Sdrahn bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); 1693*08daf0c2Sdrahn zExp = aExp; 1694*08daf0c2Sdrahn } 1695*08daf0c2Sdrahn else if ( expDiff < 0 ) { 1696*08daf0c2Sdrahn if ( bExp == 0x7FF ) { 1697*08daf0c2Sdrahn if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1698*08daf0c2Sdrahn return packFloat64( zSign, 0x7FF, 0, 0 ); 1699*08daf0c2Sdrahn } 1700*08daf0c2Sdrahn if ( aExp == 0 ) { 1701*08daf0c2Sdrahn ++expDiff; 1702*08daf0c2Sdrahn } 1703*08daf0c2Sdrahn else { 1704*08daf0c2Sdrahn aSig0 |= 0x00100000; 1705*08daf0c2Sdrahn } 1706*08daf0c2Sdrahn shift64ExtraRightJamming( 1707*08daf0c2Sdrahn aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); 1708*08daf0c2Sdrahn zExp = bExp; 1709*08daf0c2Sdrahn } 1710*08daf0c2Sdrahn else { 1711*08daf0c2Sdrahn if ( aExp == 0x7FF ) { 1712*08daf0c2Sdrahn if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 1713*08daf0c2Sdrahn return propagateFloat64NaN( a, b ); 1714*08daf0c2Sdrahn } 1715*08daf0c2Sdrahn return a; 1716*08daf0c2Sdrahn } 1717*08daf0c2Sdrahn add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1718*08daf0c2Sdrahn if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 ); 1719*08daf0c2Sdrahn zSig2 = 0; 1720*08daf0c2Sdrahn zSig0 |= 0x00200000; 1721*08daf0c2Sdrahn zExp = aExp; 1722*08daf0c2Sdrahn goto shiftRight1; 1723*08daf0c2Sdrahn } 1724*08daf0c2Sdrahn aSig0 |= 0x00100000; 1725*08daf0c2Sdrahn add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1726*08daf0c2Sdrahn --zExp; 1727*08daf0c2Sdrahn if ( zSig0 < 0x00200000 ) goto roundAndPack; 1728*08daf0c2Sdrahn ++zExp; 1729*08daf0c2Sdrahn shiftRight1: 1730*08daf0c2Sdrahn shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 1731*08daf0c2Sdrahn roundAndPack: 1732*08daf0c2Sdrahn return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 1733*08daf0c2Sdrahn 1734*08daf0c2Sdrahn } 1735*08daf0c2Sdrahn 1736*08daf0c2Sdrahn /* 1737*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1738*08daf0c2Sdrahn Returns the result of subtracting the absolute values of the double- 1739*08daf0c2Sdrahn precision floating-point values `a' and `b'. If `zSign' is 1, the 1740*08daf0c2Sdrahn difference is negated before being returned. `zSign' is ignored if the 1741*08daf0c2Sdrahn result is a NaN. The subtraction is performed according to the IEC/IEEE 1742*08daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic. 1743*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1744*08daf0c2Sdrahn */ 1745*08daf0c2Sdrahn static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) 1746*08daf0c2Sdrahn { 1747*08daf0c2Sdrahn int16 aExp, bExp, zExp; 1748*08daf0c2Sdrahn bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; 1749*08daf0c2Sdrahn int16 expDiff; 1750*08daf0c2Sdrahn 1751*08daf0c2Sdrahn aSig1 = extractFloat64Frac1( a ); 1752*08daf0c2Sdrahn aSig0 = extractFloat64Frac0( a ); 1753*08daf0c2Sdrahn aExp = extractFloat64Exp( a ); 1754*08daf0c2Sdrahn bSig1 = extractFloat64Frac1( b ); 1755*08daf0c2Sdrahn bSig0 = extractFloat64Frac0( b ); 1756*08daf0c2Sdrahn bExp = extractFloat64Exp( b ); 1757*08daf0c2Sdrahn expDiff = aExp - bExp; 1758*08daf0c2Sdrahn shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 ); 1759*08daf0c2Sdrahn shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 ); 1760*08daf0c2Sdrahn if ( 0 < expDiff ) goto aExpBigger; 1761*08daf0c2Sdrahn if ( expDiff < 0 ) goto bExpBigger; 1762*08daf0c2Sdrahn if ( aExp == 0x7FF ) { 1763*08daf0c2Sdrahn if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 1764*08daf0c2Sdrahn return propagateFloat64NaN( a, b ); 1765*08daf0c2Sdrahn } 1766*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1767*08daf0c2Sdrahn return float64_default_nan; 1768*08daf0c2Sdrahn } 1769*08daf0c2Sdrahn if ( aExp == 0 ) { 1770*08daf0c2Sdrahn aExp = 1; 1771*08daf0c2Sdrahn bExp = 1; 1772*08daf0c2Sdrahn } 1773*08daf0c2Sdrahn if ( bSig0 < aSig0 ) goto aBigger; 1774*08daf0c2Sdrahn if ( aSig0 < bSig0 ) goto bBigger; 1775*08daf0c2Sdrahn if ( bSig1 < aSig1 ) goto aBigger; 1776*08daf0c2Sdrahn if ( aSig1 < bSig1 ) goto bBigger; 1777*08daf0c2Sdrahn return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 ); 1778*08daf0c2Sdrahn bExpBigger: 1779*08daf0c2Sdrahn if ( bExp == 0x7FF ) { 1780*08daf0c2Sdrahn if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1781*08daf0c2Sdrahn return packFloat64( zSign ^ 1, 0x7FF, 0, 0 ); 1782*08daf0c2Sdrahn } 1783*08daf0c2Sdrahn if ( aExp == 0 ) { 1784*08daf0c2Sdrahn ++expDiff; 1785*08daf0c2Sdrahn } 1786*08daf0c2Sdrahn else { 1787*08daf0c2Sdrahn aSig0 |= 0x40000000; 1788*08daf0c2Sdrahn } 1789*08daf0c2Sdrahn shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 1790*08daf0c2Sdrahn bSig0 |= 0x40000000; 1791*08daf0c2Sdrahn bBigger: 1792*08daf0c2Sdrahn sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); 1793*08daf0c2Sdrahn zExp = bExp; 1794*08daf0c2Sdrahn zSign ^= 1; 1795*08daf0c2Sdrahn goto normalizeRoundAndPack; 1796*08daf0c2Sdrahn aExpBigger: 1797*08daf0c2Sdrahn if ( aExp == 0x7FF ) { 1798*08daf0c2Sdrahn if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1799*08daf0c2Sdrahn return a; 1800*08daf0c2Sdrahn } 1801*08daf0c2Sdrahn if ( bExp == 0 ) { 1802*08daf0c2Sdrahn --expDiff; 1803*08daf0c2Sdrahn } 1804*08daf0c2Sdrahn else { 1805*08daf0c2Sdrahn bSig0 |= 0x40000000; 1806*08daf0c2Sdrahn } 1807*08daf0c2Sdrahn shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); 1808*08daf0c2Sdrahn aSig0 |= 0x40000000; 1809*08daf0c2Sdrahn aBigger: 1810*08daf0c2Sdrahn sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1811*08daf0c2Sdrahn zExp = aExp; 1812*08daf0c2Sdrahn normalizeRoundAndPack: 1813*08daf0c2Sdrahn --zExp; 1814*08daf0c2Sdrahn return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 ); 1815*08daf0c2Sdrahn 1816*08daf0c2Sdrahn } 1817*08daf0c2Sdrahn 1818*08daf0c2Sdrahn /* 1819*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1820*08daf0c2Sdrahn Returns the result of adding the double-precision floating-point values `a' 1821*08daf0c2Sdrahn and `b'. The operation is performed according to the IEC/IEEE Standard for 1822*08daf0c2Sdrahn Binary Floating-Point Arithmetic. 1823*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1824*08daf0c2Sdrahn */ 1825*08daf0c2Sdrahn float64 float64_add( float64 a, float64 b ) 1826*08daf0c2Sdrahn { 1827*08daf0c2Sdrahn flag aSign, bSign; 1828*08daf0c2Sdrahn 1829*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 1830*08daf0c2Sdrahn bSign = extractFloat64Sign( b ); 1831*08daf0c2Sdrahn if ( aSign == bSign ) { 1832*08daf0c2Sdrahn return addFloat64Sigs( a, b, aSign ); 1833*08daf0c2Sdrahn } 1834*08daf0c2Sdrahn else { 1835*08daf0c2Sdrahn return subFloat64Sigs( a, b, aSign ); 1836*08daf0c2Sdrahn } 1837*08daf0c2Sdrahn 1838*08daf0c2Sdrahn } 1839*08daf0c2Sdrahn 1840*08daf0c2Sdrahn /* 1841*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1842*08daf0c2Sdrahn Returns the result of subtracting the double-precision floating-point values 1843*08daf0c2Sdrahn `a' and `b'. The operation is performed according to the IEC/IEEE Standard 1844*08daf0c2Sdrahn for Binary Floating-Point Arithmetic. 1845*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1846*08daf0c2Sdrahn */ 1847*08daf0c2Sdrahn float64 float64_sub( float64 a, float64 b ) 1848*08daf0c2Sdrahn { 1849*08daf0c2Sdrahn flag aSign, bSign; 1850*08daf0c2Sdrahn 1851*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 1852*08daf0c2Sdrahn bSign = extractFloat64Sign( b ); 1853*08daf0c2Sdrahn if ( aSign == bSign ) { 1854*08daf0c2Sdrahn return subFloat64Sigs( a, b, aSign ); 1855*08daf0c2Sdrahn } 1856*08daf0c2Sdrahn else { 1857*08daf0c2Sdrahn return addFloat64Sigs( a, b, aSign ); 1858*08daf0c2Sdrahn } 1859*08daf0c2Sdrahn 1860*08daf0c2Sdrahn } 1861*08daf0c2Sdrahn 1862*08daf0c2Sdrahn /* 1863*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1864*08daf0c2Sdrahn Returns the result of multiplying the double-precision floating-point values 1865*08daf0c2Sdrahn `a' and `b'. The operation is performed according to the IEC/IEEE Standard 1866*08daf0c2Sdrahn for Binary Floating-Point Arithmetic. 1867*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1868*08daf0c2Sdrahn */ 1869*08daf0c2Sdrahn float64 float64_mul( float64 a, float64 b ) 1870*08daf0c2Sdrahn { 1871*08daf0c2Sdrahn flag aSign, bSign, zSign; 1872*08daf0c2Sdrahn int16 aExp, bExp, zExp; 1873*08daf0c2Sdrahn bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; 1874*08daf0c2Sdrahn 1875*08daf0c2Sdrahn aSig1 = extractFloat64Frac1( a ); 1876*08daf0c2Sdrahn aSig0 = extractFloat64Frac0( a ); 1877*08daf0c2Sdrahn aExp = extractFloat64Exp( a ); 1878*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 1879*08daf0c2Sdrahn bSig1 = extractFloat64Frac1( b ); 1880*08daf0c2Sdrahn bSig0 = extractFloat64Frac0( b ); 1881*08daf0c2Sdrahn bExp = extractFloat64Exp( b ); 1882*08daf0c2Sdrahn bSign = extractFloat64Sign( b ); 1883*08daf0c2Sdrahn zSign = aSign ^ bSign; 1884*08daf0c2Sdrahn if ( aExp == 0x7FF ) { 1885*08daf0c2Sdrahn if ( ( aSig0 | aSig1 ) 1886*08daf0c2Sdrahn || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { 1887*08daf0c2Sdrahn return propagateFloat64NaN( a, b ); 1888*08daf0c2Sdrahn } 1889*08daf0c2Sdrahn if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; 1890*08daf0c2Sdrahn return packFloat64( zSign, 0x7FF, 0, 0 ); 1891*08daf0c2Sdrahn } 1892*08daf0c2Sdrahn if ( bExp == 0x7FF ) { 1893*08daf0c2Sdrahn if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1894*08daf0c2Sdrahn if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 1895*08daf0c2Sdrahn invalid: 1896*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1897*08daf0c2Sdrahn return float64_default_nan; 1898*08daf0c2Sdrahn } 1899*08daf0c2Sdrahn return packFloat64( zSign, 0x7FF, 0, 0 ); 1900*08daf0c2Sdrahn } 1901*08daf0c2Sdrahn if ( aExp == 0 ) { 1902*08daf0c2Sdrahn if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1903*08daf0c2Sdrahn normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 1904*08daf0c2Sdrahn } 1905*08daf0c2Sdrahn if ( bExp == 0 ) { 1906*08daf0c2Sdrahn if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1907*08daf0c2Sdrahn normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 1908*08daf0c2Sdrahn } 1909*08daf0c2Sdrahn zExp = aExp + bExp - 0x400; 1910*08daf0c2Sdrahn aSig0 |= 0x00100000; 1911*08daf0c2Sdrahn shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 ); 1912*08daf0c2Sdrahn mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); 1913*08daf0c2Sdrahn add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); 1914*08daf0c2Sdrahn zSig2 |= ( zSig3 != 0 ); 1915*08daf0c2Sdrahn if ( 0x00200000 <= zSig0 ) { 1916*08daf0c2Sdrahn shift64ExtraRightJamming( 1917*08daf0c2Sdrahn zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 1918*08daf0c2Sdrahn ++zExp; 1919*08daf0c2Sdrahn } 1920*08daf0c2Sdrahn return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 1921*08daf0c2Sdrahn 1922*08daf0c2Sdrahn } 1923*08daf0c2Sdrahn 1924*08daf0c2Sdrahn /* 1925*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1926*08daf0c2Sdrahn Returns the result of dividing the double-precision floating-point value `a' 1927*08daf0c2Sdrahn by the corresponding value `b'. The operation is performed according to the 1928*08daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1929*08daf0c2Sdrahn ------------------------------------------------------------------------------- 1930*08daf0c2Sdrahn */ 1931*08daf0c2Sdrahn float64 float64_div( float64 a, float64 b ) 1932*08daf0c2Sdrahn { 1933*08daf0c2Sdrahn flag aSign, bSign, zSign; 1934*08daf0c2Sdrahn int16 aExp, bExp, zExp; 1935*08daf0c2Sdrahn bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 1936*08daf0c2Sdrahn bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 1937*08daf0c2Sdrahn 1938*08daf0c2Sdrahn aSig1 = extractFloat64Frac1( a ); 1939*08daf0c2Sdrahn aSig0 = extractFloat64Frac0( a ); 1940*08daf0c2Sdrahn aExp = extractFloat64Exp( a ); 1941*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 1942*08daf0c2Sdrahn bSig1 = extractFloat64Frac1( b ); 1943*08daf0c2Sdrahn bSig0 = extractFloat64Frac0( b ); 1944*08daf0c2Sdrahn bExp = extractFloat64Exp( b ); 1945*08daf0c2Sdrahn bSign = extractFloat64Sign( b ); 1946*08daf0c2Sdrahn zSign = aSign ^ bSign; 1947*08daf0c2Sdrahn if ( aExp == 0x7FF ) { 1948*08daf0c2Sdrahn if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1949*08daf0c2Sdrahn if ( bExp == 0x7FF ) { 1950*08daf0c2Sdrahn if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1951*08daf0c2Sdrahn goto invalid; 1952*08daf0c2Sdrahn } 1953*08daf0c2Sdrahn return packFloat64( zSign, 0x7FF, 0, 0 ); 1954*08daf0c2Sdrahn } 1955*08daf0c2Sdrahn if ( bExp == 0x7FF ) { 1956*08daf0c2Sdrahn if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1957*08daf0c2Sdrahn return packFloat64( zSign, 0, 0, 0 ); 1958*08daf0c2Sdrahn } 1959*08daf0c2Sdrahn if ( bExp == 0 ) { 1960*08daf0c2Sdrahn if ( ( bSig0 | bSig1 ) == 0 ) { 1961*08daf0c2Sdrahn if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 1962*08daf0c2Sdrahn invalid: 1963*08daf0c2Sdrahn float_raise( float_flag_invalid ); 1964*08daf0c2Sdrahn return float64_default_nan; 1965*08daf0c2Sdrahn } 1966*08daf0c2Sdrahn float_raise( float_flag_divbyzero ); 1967*08daf0c2Sdrahn return packFloat64( zSign, 0x7FF, 0, 0 ); 1968*08daf0c2Sdrahn } 1969*08daf0c2Sdrahn normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 1970*08daf0c2Sdrahn } 1971*08daf0c2Sdrahn if ( aExp == 0 ) { 1972*08daf0c2Sdrahn if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1973*08daf0c2Sdrahn normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 1974*08daf0c2Sdrahn } 1975*08daf0c2Sdrahn zExp = aExp - bExp + 0x3FD; 1976*08daf0c2Sdrahn shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 ); 1977*08daf0c2Sdrahn shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); 1978*08daf0c2Sdrahn if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) { 1979*08daf0c2Sdrahn shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); 1980*08daf0c2Sdrahn ++zExp; 1981*08daf0c2Sdrahn } 1982*08daf0c2Sdrahn zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 ); 1983*08daf0c2Sdrahn mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); 1984*08daf0c2Sdrahn sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); 1985*08daf0c2Sdrahn while ( (sbits32) rem0 < 0 ) { 1986*08daf0c2Sdrahn --zSig0; 1987*08daf0c2Sdrahn add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); 1988*08daf0c2Sdrahn } 1989*08daf0c2Sdrahn zSig1 = estimateDiv64To32( rem1, rem2, bSig0 ); 1990*08daf0c2Sdrahn if ( ( zSig1 & 0x3FF ) <= 4 ) { 1991*08daf0c2Sdrahn mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); 1992*08daf0c2Sdrahn sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); 1993*08daf0c2Sdrahn while ( (sbits32) rem1 < 0 ) { 1994*08daf0c2Sdrahn --zSig1; 1995*08daf0c2Sdrahn add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); 1996*08daf0c2Sdrahn } 1997*08daf0c2Sdrahn zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 1998*08daf0c2Sdrahn } 1999*08daf0c2Sdrahn shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 ); 2000*08daf0c2Sdrahn return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 2001*08daf0c2Sdrahn 2002*08daf0c2Sdrahn } 2003*08daf0c2Sdrahn 2004*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC 2005*08daf0c2Sdrahn /* 2006*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2007*08daf0c2Sdrahn Returns the remainder of the double-precision floating-point value `a' 2008*08daf0c2Sdrahn with respect to the corresponding value `b'. The operation is performed 2009*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2010*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2011*08daf0c2Sdrahn */ 2012*08daf0c2Sdrahn float64 float64_rem( float64 a, float64 b ) 2013*08daf0c2Sdrahn { 2014*08daf0c2Sdrahn flag aSign, bSign, zSign; 2015*08daf0c2Sdrahn int16 aExp, bExp, expDiff; 2016*08daf0c2Sdrahn bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; 2017*08daf0c2Sdrahn bits32 allZero, alternateASig0, alternateASig1, sigMean1; 2018*08daf0c2Sdrahn sbits32 sigMean0; 2019*08daf0c2Sdrahn float64 z; 2020*08daf0c2Sdrahn 2021*08daf0c2Sdrahn aSig1 = extractFloat64Frac1( a ); 2022*08daf0c2Sdrahn aSig0 = extractFloat64Frac0( a ); 2023*08daf0c2Sdrahn aExp = extractFloat64Exp( a ); 2024*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 2025*08daf0c2Sdrahn bSig1 = extractFloat64Frac1( b ); 2026*08daf0c2Sdrahn bSig0 = extractFloat64Frac0( b ); 2027*08daf0c2Sdrahn bExp = extractFloat64Exp( b ); 2028*08daf0c2Sdrahn bSign = extractFloat64Sign( b ); 2029*08daf0c2Sdrahn if ( aExp == 0x7FF ) { 2030*08daf0c2Sdrahn if ( ( aSig0 | aSig1 ) 2031*08daf0c2Sdrahn || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { 2032*08daf0c2Sdrahn return propagateFloat64NaN( a, b ); 2033*08daf0c2Sdrahn } 2034*08daf0c2Sdrahn goto invalid; 2035*08daf0c2Sdrahn } 2036*08daf0c2Sdrahn if ( bExp == 0x7FF ) { 2037*08daf0c2Sdrahn if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 2038*08daf0c2Sdrahn return a; 2039*08daf0c2Sdrahn } 2040*08daf0c2Sdrahn if ( bExp == 0 ) { 2041*08daf0c2Sdrahn if ( ( bSig0 | bSig1 ) == 0 ) { 2042*08daf0c2Sdrahn invalid: 2043*08daf0c2Sdrahn float_raise( float_flag_invalid ); 2044*08daf0c2Sdrahn return float64_default_nan; 2045*08daf0c2Sdrahn } 2046*08daf0c2Sdrahn normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 2047*08daf0c2Sdrahn } 2048*08daf0c2Sdrahn if ( aExp == 0 ) { 2049*08daf0c2Sdrahn if ( ( aSig0 | aSig1 ) == 0 ) return a; 2050*08daf0c2Sdrahn normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 2051*08daf0c2Sdrahn } 2052*08daf0c2Sdrahn expDiff = aExp - bExp; 2053*08daf0c2Sdrahn if ( expDiff < -1 ) return a; 2054*08daf0c2Sdrahn shortShift64Left( 2055*08daf0c2Sdrahn aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 ); 2056*08daf0c2Sdrahn shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); 2057*08daf0c2Sdrahn q = le64( bSig0, bSig1, aSig0, aSig1 ); 2058*08daf0c2Sdrahn if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 2059*08daf0c2Sdrahn expDiff -= 32; 2060*08daf0c2Sdrahn while ( 0 < expDiff ) { 2061*08daf0c2Sdrahn q = estimateDiv64To32( aSig0, aSig1, bSig0 ); 2062*08daf0c2Sdrahn q = ( 4 < q ) ? q - 4 : 0; 2063*08daf0c2Sdrahn mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); 2064*08daf0c2Sdrahn shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero ); 2065*08daf0c2Sdrahn shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero ); 2066*08daf0c2Sdrahn sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 ); 2067*08daf0c2Sdrahn expDiff -= 29; 2068*08daf0c2Sdrahn } 2069*08daf0c2Sdrahn if ( -32 < expDiff ) { 2070*08daf0c2Sdrahn q = estimateDiv64To32( aSig0, aSig1, bSig0 ); 2071*08daf0c2Sdrahn q = ( 4 < q ) ? q - 4 : 0; 2072*08daf0c2Sdrahn q >>= - expDiff; 2073*08daf0c2Sdrahn shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); 2074*08daf0c2Sdrahn expDiff += 24; 2075*08daf0c2Sdrahn if ( expDiff < 0 ) { 2076*08daf0c2Sdrahn shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 2077*08daf0c2Sdrahn } 2078*08daf0c2Sdrahn else { 2079*08daf0c2Sdrahn shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); 2080*08daf0c2Sdrahn } 2081*08daf0c2Sdrahn mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); 2082*08daf0c2Sdrahn sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); 2083*08daf0c2Sdrahn } 2084*08daf0c2Sdrahn else { 2085*08daf0c2Sdrahn shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 ); 2086*08daf0c2Sdrahn shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); 2087*08daf0c2Sdrahn } 2088*08daf0c2Sdrahn do { 2089*08daf0c2Sdrahn alternateASig0 = aSig0; 2090*08daf0c2Sdrahn alternateASig1 = aSig1; 2091*08daf0c2Sdrahn ++q; 2092*08daf0c2Sdrahn sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 2093*08daf0c2Sdrahn } while ( 0 <= (sbits32) aSig0 ); 2094*08daf0c2Sdrahn add64( 2095*08daf0c2Sdrahn aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 ); 2096*08daf0c2Sdrahn if ( ( sigMean0 < 0 ) 2097*08daf0c2Sdrahn || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { 2098*08daf0c2Sdrahn aSig0 = alternateASig0; 2099*08daf0c2Sdrahn aSig1 = alternateASig1; 2100*08daf0c2Sdrahn } 2101*08daf0c2Sdrahn zSign = ( (sbits32) aSig0 < 0 ); 2102*08daf0c2Sdrahn if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); 2103*08daf0c2Sdrahn return 2104*08daf0c2Sdrahn normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); 2105*08daf0c2Sdrahn 2106*08daf0c2Sdrahn } 2107*08daf0c2Sdrahn #endif 2108*08daf0c2Sdrahn 2109*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC 2110*08daf0c2Sdrahn /* 2111*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2112*08daf0c2Sdrahn Returns the square root of the double-precision floating-point value `a'. 2113*08daf0c2Sdrahn The operation is performed according to the IEC/IEEE Standard for Binary 2114*08daf0c2Sdrahn Floating-Point Arithmetic. 2115*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2116*08daf0c2Sdrahn */ 2117*08daf0c2Sdrahn float64 float64_sqrt( float64 a ) 2118*08daf0c2Sdrahn { 2119*08daf0c2Sdrahn flag aSign; 2120*08daf0c2Sdrahn int16 aExp, zExp; 2121*08daf0c2Sdrahn bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; 2122*08daf0c2Sdrahn bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 2123*08daf0c2Sdrahn float64 z; 2124*08daf0c2Sdrahn 2125*08daf0c2Sdrahn aSig1 = extractFloat64Frac1( a ); 2126*08daf0c2Sdrahn aSig0 = extractFloat64Frac0( a ); 2127*08daf0c2Sdrahn aExp = extractFloat64Exp( a ); 2128*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 2129*08daf0c2Sdrahn if ( aExp == 0x7FF ) { 2130*08daf0c2Sdrahn if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a ); 2131*08daf0c2Sdrahn if ( ! aSign ) return a; 2132*08daf0c2Sdrahn goto invalid; 2133*08daf0c2Sdrahn } 2134*08daf0c2Sdrahn if ( aSign ) { 2135*08daf0c2Sdrahn if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; 2136*08daf0c2Sdrahn invalid: 2137*08daf0c2Sdrahn float_raise( float_flag_invalid ); 2138*08daf0c2Sdrahn return float64_default_nan; 2139*08daf0c2Sdrahn } 2140*08daf0c2Sdrahn if ( aExp == 0 ) { 2141*08daf0c2Sdrahn if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 ); 2142*08daf0c2Sdrahn normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 2143*08daf0c2Sdrahn } 2144*08daf0c2Sdrahn zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; 2145*08daf0c2Sdrahn aSig0 |= 0x00100000; 2146*08daf0c2Sdrahn shortShift64Left( aSig0, aSig1, 11, &term0, &term1 ); 2147*08daf0c2Sdrahn zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1; 2148*08daf0c2Sdrahn if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF; 2149*08daf0c2Sdrahn doubleZSig0 = zSig0 + zSig0; 2150*08daf0c2Sdrahn shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 ); 2151*08daf0c2Sdrahn mul32To64( zSig0, zSig0, &term0, &term1 ); 2152*08daf0c2Sdrahn sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 2153*08daf0c2Sdrahn while ( (sbits32) rem0 < 0 ) { 2154*08daf0c2Sdrahn --zSig0; 2155*08daf0c2Sdrahn doubleZSig0 -= 2; 2156*08daf0c2Sdrahn add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 ); 2157*08daf0c2Sdrahn } 2158*08daf0c2Sdrahn zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 ); 2159*08daf0c2Sdrahn if ( ( zSig1 & 0x1FF ) <= 5 ) { 2160*08daf0c2Sdrahn if ( zSig1 == 0 ) zSig1 = 1; 2161*08daf0c2Sdrahn mul32To64( doubleZSig0, zSig1, &term1, &term2 ); 2162*08daf0c2Sdrahn sub64( rem1, 0, term1, term2, &rem1, &rem2 ); 2163*08daf0c2Sdrahn mul32To64( zSig1, zSig1, &term2, &term3 ); 2164*08daf0c2Sdrahn sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 2165*08daf0c2Sdrahn while ( (sbits32) rem1 < 0 ) { 2166*08daf0c2Sdrahn --zSig1; 2167*08daf0c2Sdrahn shortShift64Left( 0, zSig1, 1, &term2, &term3 ); 2168*08daf0c2Sdrahn term3 |= 1; 2169*08daf0c2Sdrahn term2 |= doubleZSig0; 2170*08daf0c2Sdrahn add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 2171*08daf0c2Sdrahn } 2172*08daf0c2Sdrahn zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 2173*08daf0c2Sdrahn } 2174*08daf0c2Sdrahn shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 ); 2175*08daf0c2Sdrahn return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 ); 2176*08daf0c2Sdrahn 2177*08daf0c2Sdrahn } 2178*08daf0c2Sdrahn #endif 2179*08daf0c2Sdrahn 2180*08daf0c2Sdrahn /* 2181*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2182*08daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is equal to 2183*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise. The comparison is performed 2184*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2185*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2186*08daf0c2Sdrahn */ 2187*08daf0c2Sdrahn flag float64_eq( float64 a, float64 b ) 2188*08daf0c2Sdrahn { 2189*08daf0c2Sdrahn 2190*08daf0c2Sdrahn if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2191*08daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2192*08daf0c2Sdrahn || ( ( extractFloat64Exp( b ) == 0x7FF ) 2193*08daf0c2Sdrahn && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2194*08daf0c2Sdrahn ) { 2195*08daf0c2Sdrahn if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2196*08daf0c2Sdrahn float_raise( float_flag_invalid ); 2197*08daf0c2Sdrahn } 2198*08daf0c2Sdrahn return 0; 2199*08daf0c2Sdrahn } 2200*08daf0c2Sdrahn return ( a == b ) || 2201*08daf0c2Sdrahn ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); 2202*08daf0c2Sdrahn 2203*08daf0c2Sdrahn } 2204*08daf0c2Sdrahn 2205*08daf0c2Sdrahn /* 2206*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2207*08daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than 2208*08daf0c2Sdrahn or equal to the corresponding value `b', and 0 otherwise. The comparison 2209*08daf0c2Sdrahn is performed according to the IEC/IEEE Standard for Binary Floating-Point 2210*08daf0c2Sdrahn Arithmetic. 2211*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2212*08daf0c2Sdrahn */ 2213*08daf0c2Sdrahn flag float64_le( float64 a, float64 b ) 2214*08daf0c2Sdrahn { 2215*08daf0c2Sdrahn flag aSign, bSign; 2216*08daf0c2Sdrahn 2217*08daf0c2Sdrahn if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2218*08daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2219*08daf0c2Sdrahn || ( ( extractFloat64Exp( b ) == 0x7FF ) 2220*08daf0c2Sdrahn && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2221*08daf0c2Sdrahn ) { 2222*08daf0c2Sdrahn float_raise( float_flag_invalid ); 2223*08daf0c2Sdrahn return 0; 2224*08daf0c2Sdrahn } 2225*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 2226*08daf0c2Sdrahn bSign = extractFloat64Sign( b ); 2227*08daf0c2Sdrahn if ( aSign != bSign ) 2228*08daf0c2Sdrahn return aSign || 2229*08daf0c2Sdrahn ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 2230*08daf0c2Sdrahn 0 ); 2231*08daf0c2Sdrahn return ( a == b ) || 2232*08daf0c2Sdrahn ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 2233*08daf0c2Sdrahn } 2234*08daf0c2Sdrahn 2235*08daf0c2Sdrahn /* 2236*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2237*08daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than 2238*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise. The comparison is performed 2239*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2240*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2241*08daf0c2Sdrahn */ 2242*08daf0c2Sdrahn flag float64_lt( float64 a, float64 b ) 2243*08daf0c2Sdrahn { 2244*08daf0c2Sdrahn flag aSign, bSign; 2245*08daf0c2Sdrahn 2246*08daf0c2Sdrahn if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2247*08daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2248*08daf0c2Sdrahn || ( ( extractFloat64Exp( b ) == 0x7FF ) 2249*08daf0c2Sdrahn && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2250*08daf0c2Sdrahn ) { 2251*08daf0c2Sdrahn float_raise( float_flag_invalid ); 2252*08daf0c2Sdrahn return 0; 2253*08daf0c2Sdrahn } 2254*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 2255*08daf0c2Sdrahn bSign = extractFloat64Sign( b ); 2256*08daf0c2Sdrahn if ( aSign != bSign ) 2257*08daf0c2Sdrahn return aSign && 2258*08daf0c2Sdrahn ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != 2259*08daf0c2Sdrahn 0 ); 2260*08daf0c2Sdrahn return ( a != b ) && 2261*08daf0c2Sdrahn ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 2262*08daf0c2Sdrahn 2263*08daf0c2Sdrahn } 2264*08daf0c2Sdrahn 2265*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC 2266*08daf0c2Sdrahn /* 2267*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2268*08daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is equal to 2269*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise. The invalid exception is 2270*08daf0c2Sdrahn raised if either operand is a NaN. Otherwise, the comparison is performed 2271*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2272*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2273*08daf0c2Sdrahn */ 2274*08daf0c2Sdrahn flag float64_eq_signaling( float64 a, float64 b ) 2275*08daf0c2Sdrahn { 2276*08daf0c2Sdrahn 2277*08daf0c2Sdrahn if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2278*08daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2279*08daf0c2Sdrahn || ( ( extractFloat64Exp( b ) == 0x7FF ) 2280*08daf0c2Sdrahn && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2281*08daf0c2Sdrahn ) { 2282*08daf0c2Sdrahn float_raise( float_flag_invalid ); 2283*08daf0c2Sdrahn return 0; 2284*08daf0c2Sdrahn } 2285*08daf0c2Sdrahn return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); 2286*08daf0c2Sdrahn 2287*08daf0c2Sdrahn } 2288*08daf0c2Sdrahn 2289*08daf0c2Sdrahn /* 2290*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2291*08daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than or 2292*08daf0c2Sdrahn equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 2293*08daf0c2Sdrahn cause an exception. Otherwise, the comparison is performed according to the 2294*08daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2295*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2296*08daf0c2Sdrahn */ 2297*08daf0c2Sdrahn flag float64_le_quiet( float64 a, float64 b ) 2298*08daf0c2Sdrahn { 2299*08daf0c2Sdrahn flag aSign, bSign; 2300*08daf0c2Sdrahn 2301*08daf0c2Sdrahn if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2302*08daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2303*08daf0c2Sdrahn || ( ( extractFloat64Exp( b ) == 0x7FF ) 2304*08daf0c2Sdrahn && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2305*08daf0c2Sdrahn ) { 2306*08daf0c2Sdrahn if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2307*08daf0c2Sdrahn float_raise( float_flag_invalid ); 2308*08daf0c2Sdrahn } 2309*08daf0c2Sdrahn return 0; 2310*08daf0c2Sdrahn } 2311*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 2312*08daf0c2Sdrahn bSign = extractFloat64Sign( b ); 2313*08daf0c2Sdrahn if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); 2314*08daf0c2Sdrahn return ( a == b ) || ( aSign ^ ( a < b ) ); 2315*08daf0c2Sdrahn 2316*08daf0c2Sdrahn } 2317*08daf0c2Sdrahn 2318*08daf0c2Sdrahn /* 2319*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2320*08daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than 2321*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 2322*08daf0c2Sdrahn exception. Otherwise, the comparison is performed according to the IEC/IEEE 2323*08daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic. 2324*08daf0c2Sdrahn ------------------------------------------------------------------------------- 2325*08daf0c2Sdrahn */ 2326*08daf0c2Sdrahn flag float64_lt_quiet( float64 a, float64 b ) 2327*08daf0c2Sdrahn { 2328*08daf0c2Sdrahn flag aSign, bSign; 2329*08daf0c2Sdrahn 2330*08daf0c2Sdrahn if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2331*08daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2332*08daf0c2Sdrahn || ( ( extractFloat64Exp( b ) == 0x7FF ) 2333*08daf0c2Sdrahn && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2334*08daf0c2Sdrahn ) { 2335*08daf0c2Sdrahn if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2336*08daf0c2Sdrahn float_raise( float_flag_invalid ); 2337*08daf0c2Sdrahn } 2338*08daf0c2Sdrahn return 0; 2339*08daf0c2Sdrahn } 2340*08daf0c2Sdrahn aSign = extractFloat64Sign( a ); 2341*08daf0c2Sdrahn bSign = extractFloat64Sign( b ); 2342*08daf0c2Sdrahn if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); 2343*08daf0c2Sdrahn return ( a != b ) && ( aSign ^ ( a < b ) ); 2344*08daf0c2Sdrahn 2345*08daf0c2Sdrahn } 2346*08daf0c2Sdrahn 2347*08daf0c2Sdrahn #endif 2348