1*fcc6486eSmiod /* $OpenBSD: softfloat.c,v 1.3 2015/09/13 14:21:46 miod Exp $ */
208daf0c2Sdrahn /* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */
308daf0c2Sdrahn
408daf0c2Sdrahn /*
508daf0c2Sdrahn * This version hacked for use with gcc -msoft-float by bjh21.
608daf0c2Sdrahn * (Mostly a case of #ifdefing out things GCC doesn't need or provides
708daf0c2Sdrahn * itself).
808daf0c2Sdrahn */
908daf0c2Sdrahn
1008daf0c2Sdrahn /*
1108daf0c2Sdrahn * Things you may want to define:
1208daf0c2Sdrahn *
1308daf0c2Sdrahn * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
1408daf0c2Sdrahn * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them
1508daf0c2Sdrahn * properly renamed.
1608daf0c2Sdrahn */
1708daf0c2Sdrahn
1808daf0c2Sdrahn /*
1908daf0c2Sdrahn * This differs from the standard bits32/softfloat.c in that float64
2008daf0c2Sdrahn * is defined to be a 64-bit integer rather than a structure. The
2108daf0c2Sdrahn * structure is float64s, with translation between the two going via
2208daf0c2Sdrahn * float64u.
2308daf0c2Sdrahn */
2408daf0c2Sdrahn
2508daf0c2Sdrahn /*
2608daf0c2Sdrahn ===============================================================================
2708daf0c2Sdrahn
2808daf0c2Sdrahn This C source file is part of the SoftFloat IEC/IEEE Floating-Point
2908daf0c2Sdrahn Arithmetic Package, Release 2a.
3008daf0c2Sdrahn
3108daf0c2Sdrahn Written by John R. Hauser. This work was made possible in part by the
3208daf0c2Sdrahn International Computer Science Institute, located at Suite 600, 1947 Center
3308daf0c2Sdrahn Street, Berkeley, California 94704. Funding was partially provided by the
3408daf0c2Sdrahn National Science Foundation under grant MIP-9311980. The original version
3508daf0c2Sdrahn of this code was written as part of a project to build a fixed-point vector
3608daf0c2Sdrahn processor in collaboration with the University of California at Berkeley,
3708daf0c2Sdrahn overseen by Profs. Nelson Morgan and John Wawrzynek. More information
3808daf0c2Sdrahn is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
3908daf0c2Sdrahn arithmetic/SoftFloat.html'.
4008daf0c2Sdrahn
4108daf0c2Sdrahn THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
4208daf0c2Sdrahn has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
4308daf0c2Sdrahn TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
4408daf0c2Sdrahn PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
4508daf0c2Sdrahn AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
4608daf0c2Sdrahn
4708daf0c2Sdrahn Derivative works are acceptable, even for commercial purposes, so long as
4808daf0c2Sdrahn (1) they include prominent notice that the work is derivative, and (2) they
4908daf0c2Sdrahn include prominent notice akin to these four paragraphs for those parts of
5008daf0c2Sdrahn this code that are retained.
5108daf0c2Sdrahn
5208daf0c2Sdrahn ===============================================================================
5308daf0c2Sdrahn */
5408daf0c2Sdrahn
5508daf0c2Sdrahn #ifdef SOFTFLOAT_FOR_GCC
5608daf0c2Sdrahn #include "softfloat-for-gcc.h"
5708daf0c2Sdrahn #endif
5808daf0c2Sdrahn
5908daf0c2Sdrahn #include "milieu.h"
60*fcc6486eSmiod #include <softfloat.h>
6108daf0c2Sdrahn
6208daf0c2Sdrahn /*
6308daf0c2Sdrahn * Conversions between floats as stored in memory and floats as
6408daf0c2Sdrahn * SoftFloat uses them
6508daf0c2Sdrahn */
6608daf0c2Sdrahn #ifndef FLOAT64_DEMANGLE
6708daf0c2Sdrahn #define FLOAT64_DEMANGLE(a) (a)
6808daf0c2Sdrahn #endif
6908daf0c2Sdrahn #ifndef FLOAT64_MANGLE
7008daf0c2Sdrahn #define FLOAT64_MANGLE(a) (a)
7108daf0c2Sdrahn #endif
7208daf0c2Sdrahn
7308daf0c2Sdrahn /*
7408daf0c2Sdrahn -------------------------------------------------------------------------------
7508daf0c2Sdrahn Primitive arithmetic functions, including multi-word arithmetic, and
7608daf0c2Sdrahn division and square root approximations. (Can be specialized to target if
7708daf0c2Sdrahn desired.)
7808daf0c2Sdrahn -------------------------------------------------------------------------------
7908daf0c2Sdrahn */
8008daf0c2Sdrahn #include "softfloat-macros.h"
8108daf0c2Sdrahn
8208daf0c2Sdrahn /*
8308daf0c2Sdrahn -------------------------------------------------------------------------------
8408daf0c2Sdrahn Functions and definitions to determine: (1) whether tininess for underflow
8508daf0c2Sdrahn is detected before or after rounding by default, (2) what (if anything)
8608daf0c2Sdrahn happens when exceptions are raised, (3) how signaling NaNs are distinguished
8708daf0c2Sdrahn from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
8808daf0c2Sdrahn are propagated from function inputs to output. These details are target-
8908daf0c2Sdrahn specific.
9008daf0c2Sdrahn -------------------------------------------------------------------------------
9108daf0c2Sdrahn */
9208daf0c2Sdrahn #include "softfloat-specialize.h"
9308daf0c2Sdrahn
9408daf0c2Sdrahn /*
9508daf0c2Sdrahn -------------------------------------------------------------------------------
9608daf0c2Sdrahn Floating-point rounding mode and exception flags.
9708daf0c2Sdrahn -------------------------------------------------------------------------------
9808daf0c2Sdrahn */
9908daf0c2Sdrahn fp_rnd float_rounding_mode = float_round_nearest_even;
10008daf0c2Sdrahn fp_except float_exception_flags = 0;
10108daf0c2Sdrahn
10208daf0c2Sdrahn /*
10308daf0c2Sdrahn -------------------------------------------------------------------------------
10408daf0c2Sdrahn Returns the fraction bits of the single-precision floating-point value `a'.
10508daf0c2Sdrahn -------------------------------------------------------------------------------
10608daf0c2Sdrahn */
extractFloat32Frac(float32 a)10708daf0c2Sdrahn INLINE bits32 extractFloat32Frac( float32 a )
10808daf0c2Sdrahn {
10908daf0c2Sdrahn
11008daf0c2Sdrahn return a & 0x007FFFFF;
11108daf0c2Sdrahn
11208daf0c2Sdrahn }
11308daf0c2Sdrahn
11408daf0c2Sdrahn /*
11508daf0c2Sdrahn -------------------------------------------------------------------------------
11608daf0c2Sdrahn Returns the exponent bits of the single-precision floating-point value `a'.
11708daf0c2Sdrahn -------------------------------------------------------------------------------
11808daf0c2Sdrahn */
extractFloat32Exp(float32 a)11908daf0c2Sdrahn INLINE int16 extractFloat32Exp( float32 a )
12008daf0c2Sdrahn {
12108daf0c2Sdrahn
12208daf0c2Sdrahn return ( a>>23 ) & 0xFF;
12308daf0c2Sdrahn
12408daf0c2Sdrahn }
12508daf0c2Sdrahn
12608daf0c2Sdrahn /*
12708daf0c2Sdrahn -------------------------------------------------------------------------------
12808daf0c2Sdrahn Returns the sign bit of the single-precision floating-point value `a'.
12908daf0c2Sdrahn -------------------------------------------------------------------------------
13008daf0c2Sdrahn */
extractFloat32Sign(float32 a)13108daf0c2Sdrahn INLINE flag extractFloat32Sign( float32 a )
13208daf0c2Sdrahn {
13308daf0c2Sdrahn
13408daf0c2Sdrahn return a>>31;
13508daf0c2Sdrahn
13608daf0c2Sdrahn }
13708daf0c2Sdrahn
13808daf0c2Sdrahn /*
13908daf0c2Sdrahn -------------------------------------------------------------------------------
14008daf0c2Sdrahn Normalizes the subnormal single-precision floating-point value represented
14108daf0c2Sdrahn by the denormalized significand `aSig'. The normalized exponent and
14208daf0c2Sdrahn significand are stored at the locations pointed to by `zExpPtr' and
14308daf0c2Sdrahn `zSigPtr', respectively.
14408daf0c2Sdrahn -------------------------------------------------------------------------------
14508daf0c2Sdrahn */
14608daf0c2Sdrahn static void
normalizeFloat32Subnormal(bits32 aSig,int16 * zExpPtr,bits32 * zSigPtr)14708daf0c2Sdrahn normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
14808daf0c2Sdrahn {
14908daf0c2Sdrahn int8 shiftCount;
15008daf0c2Sdrahn
15108daf0c2Sdrahn shiftCount = countLeadingZeros32( aSig ) - 8;
15208daf0c2Sdrahn *zSigPtr = aSig<<shiftCount;
15308daf0c2Sdrahn *zExpPtr = 1 - shiftCount;
15408daf0c2Sdrahn
15508daf0c2Sdrahn }
15608daf0c2Sdrahn
15708daf0c2Sdrahn /*
15808daf0c2Sdrahn -------------------------------------------------------------------------------
15908daf0c2Sdrahn Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
16008daf0c2Sdrahn single-precision floating-point value, returning the result. After being
16108daf0c2Sdrahn shifted into the proper positions, the three fields are simply added
16208daf0c2Sdrahn together to form the result. This means that any integer portion of `zSig'
16308daf0c2Sdrahn will be added into the exponent. Since a properly normalized significand
16408daf0c2Sdrahn will have an integer portion equal to 1, the `zExp' input should be 1 less
16508daf0c2Sdrahn than the desired result exponent whenever `zSig' is a complete, normalized
16608daf0c2Sdrahn significand.
16708daf0c2Sdrahn -------------------------------------------------------------------------------
16808daf0c2Sdrahn */
packFloat32(flag zSign,int16 zExp,bits32 zSig)16908daf0c2Sdrahn INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
17008daf0c2Sdrahn {
17108daf0c2Sdrahn
17208daf0c2Sdrahn return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
17308daf0c2Sdrahn
17408daf0c2Sdrahn }
17508daf0c2Sdrahn
17608daf0c2Sdrahn /*
17708daf0c2Sdrahn -------------------------------------------------------------------------------
17808daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp',
17908daf0c2Sdrahn and significand `zSig', and returns the proper single-precision floating-
18008daf0c2Sdrahn point value corresponding to the abstract input. Ordinarily, the abstract
18108daf0c2Sdrahn value is simply rounded and packed into the single-precision format, with
18208daf0c2Sdrahn the inexact exception raised if the abstract input cannot be represented
18308daf0c2Sdrahn exactly. However, if the abstract value is too large, the overflow and
18408daf0c2Sdrahn inexact exceptions are raised and an infinity or maximal finite value is
18508daf0c2Sdrahn returned. If the abstract value is too small, the input value is rounded to
18608daf0c2Sdrahn a subnormal number, and the underflow and inexact exceptions are raised if
18708daf0c2Sdrahn the abstract input cannot be represented exactly as a subnormal single-
18808daf0c2Sdrahn precision floating-point number.
18908daf0c2Sdrahn The input significand `zSig' has its binary point between bits 30
19008daf0c2Sdrahn and 29, which is 7 bits to the left of the usual location. This shifted
19108daf0c2Sdrahn significand must be normalized or smaller. If `zSig' is not normalized,
19208daf0c2Sdrahn `zExp' must be 0; in that case, the result returned is a subnormal number,
19308daf0c2Sdrahn and it must not require rounding. In the usual case that `zSig' is
19408daf0c2Sdrahn normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
19508daf0c2Sdrahn The handling of underflow and overflow follows the IEC/IEEE Standard for
19608daf0c2Sdrahn Binary Floating-Point Arithmetic.
19708daf0c2Sdrahn -------------------------------------------------------------------------------
19808daf0c2Sdrahn */
roundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)19908daf0c2Sdrahn static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
20008daf0c2Sdrahn {
20108daf0c2Sdrahn int8 roundingMode;
20208daf0c2Sdrahn flag roundNearestEven;
20308daf0c2Sdrahn int8 roundIncrement, roundBits;
20408daf0c2Sdrahn flag isTiny;
20508daf0c2Sdrahn
20608daf0c2Sdrahn roundingMode = float_rounding_mode;
20708daf0c2Sdrahn roundNearestEven = roundingMode == float_round_nearest_even;
20808daf0c2Sdrahn roundIncrement = 0x40;
20908daf0c2Sdrahn if ( ! roundNearestEven ) {
21008daf0c2Sdrahn if ( roundingMode == float_round_to_zero ) {
21108daf0c2Sdrahn roundIncrement = 0;
21208daf0c2Sdrahn }
21308daf0c2Sdrahn else {
21408daf0c2Sdrahn roundIncrement = 0x7F;
21508daf0c2Sdrahn if ( zSign ) {
21608daf0c2Sdrahn if ( roundingMode == float_round_up ) roundIncrement = 0;
21708daf0c2Sdrahn }
21808daf0c2Sdrahn else {
21908daf0c2Sdrahn if ( roundingMode == float_round_down ) roundIncrement = 0;
22008daf0c2Sdrahn }
22108daf0c2Sdrahn }
22208daf0c2Sdrahn }
22308daf0c2Sdrahn roundBits = zSig & 0x7F;
22408daf0c2Sdrahn if ( 0xFD <= (bits16) zExp ) {
22508daf0c2Sdrahn if ( ( 0xFD < zExp )
22608daf0c2Sdrahn || ( ( zExp == 0xFD )
22708daf0c2Sdrahn && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
22808daf0c2Sdrahn ) {
22908daf0c2Sdrahn float_raise( float_flag_overflow | float_flag_inexact );
23008daf0c2Sdrahn return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
23108daf0c2Sdrahn }
23208daf0c2Sdrahn if ( zExp < 0 ) {
23308daf0c2Sdrahn isTiny =
23408daf0c2Sdrahn ( float_detect_tininess == float_tininess_before_rounding )
23508daf0c2Sdrahn || ( zExp < -1 )
23608daf0c2Sdrahn || ( zSig + roundIncrement < 0x80000000 );
23708daf0c2Sdrahn shift32RightJamming( zSig, - zExp, &zSig );
23808daf0c2Sdrahn zExp = 0;
23908daf0c2Sdrahn roundBits = zSig & 0x7F;
24008daf0c2Sdrahn if ( isTiny && roundBits ) float_raise( float_flag_underflow );
24108daf0c2Sdrahn }
24208daf0c2Sdrahn }
24308daf0c2Sdrahn if ( roundBits ) float_exception_flags |= float_flag_inexact;
24408daf0c2Sdrahn zSig = ( zSig + roundIncrement )>>7;
24508daf0c2Sdrahn zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
24608daf0c2Sdrahn if ( zSig == 0 ) zExp = 0;
24708daf0c2Sdrahn return packFloat32( zSign, zExp, zSig );
24808daf0c2Sdrahn
24908daf0c2Sdrahn }
25008daf0c2Sdrahn
25108daf0c2Sdrahn /*
25208daf0c2Sdrahn -------------------------------------------------------------------------------
25308daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp',
25408daf0c2Sdrahn and significand `zSig', and returns the proper single-precision floating-
25508daf0c2Sdrahn point value corresponding to the abstract input. This routine is just like
25608daf0c2Sdrahn `roundAndPackFloat32' except that `zSig' does not have to be normalized.
25708daf0c2Sdrahn Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
25808daf0c2Sdrahn floating-point exponent.
25908daf0c2Sdrahn -------------------------------------------------------------------------------
26008daf0c2Sdrahn */
26108daf0c2Sdrahn static float32
normalizeRoundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)26208daf0c2Sdrahn normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
26308daf0c2Sdrahn {
26408daf0c2Sdrahn int8 shiftCount;
26508daf0c2Sdrahn
26608daf0c2Sdrahn shiftCount = countLeadingZeros32( zSig ) - 1;
26708daf0c2Sdrahn return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
26808daf0c2Sdrahn
26908daf0c2Sdrahn }
27008daf0c2Sdrahn
27108daf0c2Sdrahn /*
27208daf0c2Sdrahn -------------------------------------------------------------------------------
27308daf0c2Sdrahn Returns the least-significant 32 fraction bits of the double-precision
27408daf0c2Sdrahn floating-point value `a'.
27508daf0c2Sdrahn -------------------------------------------------------------------------------
27608daf0c2Sdrahn */
extractFloat64Frac1(float64 a)27708daf0c2Sdrahn INLINE bits32 extractFloat64Frac1( float64 a )
27808daf0c2Sdrahn {
27908daf0c2Sdrahn
28008daf0c2Sdrahn return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF );
28108daf0c2Sdrahn
28208daf0c2Sdrahn }
28308daf0c2Sdrahn
28408daf0c2Sdrahn /*
28508daf0c2Sdrahn -------------------------------------------------------------------------------
28608daf0c2Sdrahn Returns the most-significant 20 fraction bits of the double-precision
28708daf0c2Sdrahn floating-point value `a'.
28808daf0c2Sdrahn -------------------------------------------------------------------------------
28908daf0c2Sdrahn */
extractFloat64Frac0(float64 a)29008daf0c2Sdrahn INLINE bits32 extractFloat64Frac0( float64 a )
29108daf0c2Sdrahn {
29208daf0c2Sdrahn
29308daf0c2Sdrahn return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF;
29408daf0c2Sdrahn
29508daf0c2Sdrahn }
29608daf0c2Sdrahn
29708daf0c2Sdrahn /*
29808daf0c2Sdrahn -------------------------------------------------------------------------------
29908daf0c2Sdrahn Returns the exponent bits of the double-precision floating-point value `a'.
30008daf0c2Sdrahn -------------------------------------------------------------------------------
30108daf0c2Sdrahn */
extractFloat64Exp(float64 a)30208daf0c2Sdrahn INLINE int16 extractFloat64Exp( float64 a )
30308daf0c2Sdrahn {
30408daf0c2Sdrahn
30508daf0c2Sdrahn return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
30608daf0c2Sdrahn
30708daf0c2Sdrahn }
30808daf0c2Sdrahn
30908daf0c2Sdrahn /*
31008daf0c2Sdrahn -------------------------------------------------------------------------------
31108daf0c2Sdrahn Returns the sign bit of the double-precision floating-point value `a'.
31208daf0c2Sdrahn -------------------------------------------------------------------------------
31308daf0c2Sdrahn */
extractFloat64Sign(float64 a)31408daf0c2Sdrahn INLINE flag extractFloat64Sign( float64 a )
31508daf0c2Sdrahn {
31608daf0c2Sdrahn
31708daf0c2Sdrahn return FLOAT64_DEMANGLE(a)>>63;
31808daf0c2Sdrahn
31908daf0c2Sdrahn }
32008daf0c2Sdrahn
32108daf0c2Sdrahn /*
32208daf0c2Sdrahn -------------------------------------------------------------------------------
32308daf0c2Sdrahn Normalizes the subnormal double-precision floating-point value represented
32408daf0c2Sdrahn by the denormalized significand formed by the concatenation of `aSig0' and
32508daf0c2Sdrahn `aSig1'. The normalized exponent is stored at the location pointed to by
32608daf0c2Sdrahn `zExpPtr'. The most significant 21 bits of the normalized significand are
32708daf0c2Sdrahn stored at the location pointed to by `zSig0Ptr', and the least significant
32808daf0c2Sdrahn 32 bits of the normalized significand are stored at the location pointed to
32908daf0c2Sdrahn by `zSig1Ptr'.
33008daf0c2Sdrahn -------------------------------------------------------------------------------
33108daf0c2Sdrahn */
33208daf0c2Sdrahn static void
normalizeFloat64Subnormal(bits32 aSig0,bits32 aSig1,int16 * zExpPtr,bits32 * zSig0Ptr,bits32 * zSig1Ptr)33308daf0c2Sdrahn normalizeFloat64Subnormal(
33408daf0c2Sdrahn bits32 aSig0,
33508daf0c2Sdrahn bits32 aSig1,
33608daf0c2Sdrahn int16 *zExpPtr,
33708daf0c2Sdrahn bits32 *zSig0Ptr,
33808daf0c2Sdrahn bits32 *zSig1Ptr
33908daf0c2Sdrahn )
34008daf0c2Sdrahn {
34108daf0c2Sdrahn int8 shiftCount;
34208daf0c2Sdrahn
34308daf0c2Sdrahn if ( aSig0 == 0 ) {
34408daf0c2Sdrahn shiftCount = countLeadingZeros32( aSig1 ) - 11;
34508daf0c2Sdrahn if ( shiftCount < 0 ) {
34608daf0c2Sdrahn *zSig0Ptr = aSig1>>( - shiftCount );
34708daf0c2Sdrahn *zSig1Ptr = aSig1<<( shiftCount & 31 );
34808daf0c2Sdrahn }
34908daf0c2Sdrahn else {
35008daf0c2Sdrahn *zSig0Ptr = aSig1<<shiftCount;
35108daf0c2Sdrahn *zSig1Ptr = 0;
35208daf0c2Sdrahn }
35308daf0c2Sdrahn *zExpPtr = - shiftCount - 31;
35408daf0c2Sdrahn }
35508daf0c2Sdrahn else {
35608daf0c2Sdrahn shiftCount = countLeadingZeros32( aSig0 ) - 11;
35708daf0c2Sdrahn shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
35808daf0c2Sdrahn *zExpPtr = 1 - shiftCount;
35908daf0c2Sdrahn }
36008daf0c2Sdrahn
36108daf0c2Sdrahn }
36208daf0c2Sdrahn
36308daf0c2Sdrahn /*
36408daf0c2Sdrahn -------------------------------------------------------------------------------
36508daf0c2Sdrahn Packs the sign `zSign', the exponent `zExp', and the significand formed by
36608daf0c2Sdrahn the concatenation of `zSig0' and `zSig1' into a double-precision floating-
36708daf0c2Sdrahn point value, returning the result. After being shifted into the proper
36808daf0c2Sdrahn positions, the three fields `zSign', `zExp', and `zSig0' are simply added
36908daf0c2Sdrahn together to form the most significant 32 bits of the result. This means
37008daf0c2Sdrahn that any integer portion of `zSig0' will be added into the exponent. Since
37108daf0c2Sdrahn a properly normalized significand will have an integer portion equal to 1,
37208daf0c2Sdrahn the `zExp' input should be 1 less than the desired result exponent whenever
37308daf0c2Sdrahn `zSig0' and `zSig1' concatenated form a complete, normalized significand.
37408daf0c2Sdrahn -------------------------------------------------------------------------------
37508daf0c2Sdrahn */
37608daf0c2Sdrahn INLINE float64
packFloat64(flag zSign,int16 zExp,bits32 zSig0,bits32 zSig1)37708daf0c2Sdrahn packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
37808daf0c2Sdrahn {
37908daf0c2Sdrahn
38008daf0c2Sdrahn return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
38108daf0c2Sdrahn ( ( (bits64) zExp )<<52 ) +
38208daf0c2Sdrahn ( ( (bits64) zSig0 )<<32 ) + zSig1 );
38308daf0c2Sdrahn
38408daf0c2Sdrahn
38508daf0c2Sdrahn }
38608daf0c2Sdrahn
38708daf0c2Sdrahn /*
38808daf0c2Sdrahn -------------------------------------------------------------------------------
38908daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp',
39008daf0c2Sdrahn and extended significand formed by the concatenation of `zSig0', `zSig1',
39108daf0c2Sdrahn and `zSig2', and returns the proper double-precision floating-point value
39208daf0c2Sdrahn corresponding to the abstract input. Ordinarily, the abstract value is
39308daf0c2Sdrahn simply rounded and packed into the double-precision format, with the inexact
39408daf0c2Sdrahn exception raised if the abstract input cannot be represented exactly.
39508daf0c2Sdrahn However, if the abstract value is too large, the overflow and inexact
39608daf0c2Sdrahn exceptions are raised and an infinity or maximal finite value is returned.
39708daf0c2Sdrahn If the abstract value is too small, the input value is rounded to a
39808daf0c2Sdrahn subnormal number, and the underflow and inexact exceptions are raised if the
39908daf0c2Sdrahn abstract input cannot be represented exactly as a subnormal double-precision
40008daf0c2Sdrahn floating-point number.
40108daf0c2Sdrahn The input significand must be normalized or smaller. If the input
40208daf0c2Sdrahn significand is not normalized, `zExp' must be 0; in that case, the result
40308daf0c2Sdrahn returned is a subnormal number, and it must not require rounding. In the
40408daf0c2Sdrahn usual case that the input significand is normalized, `zExp' must be 1 less
40508daf0c2Sdrahn than the ``true'' floating-point exponent. The handling of underflow and
40608daf0c2Sdrahn overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
40708daf0c2Sdrahn -------------------------------------------------------------------------------
40808daf0c2Sdrahn */
40908daf0c2Sdrahn static float64
roundAndPackFloat64(flag zSign,int16 zExp,bits32 zSig0,bits32 zSig1,bits32 zSig2)41008daf0c2Sdrahn roundAndPackFloat64(
41108daf0c2Sdrahn flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
41208daf0c2Sdrahn {
41308daf0c2Sdrahn int8 roundingMode;
41408daf0c2Sdrahn flag roundNearestEven, increment, isTiny;
41508daf0c2Sdrahn
41608daf0c2Sdrahn roundingMode = float_rounding_mode;
41708daf0c2Sdrahn roundNearestEven = ( roundingMode == float_round_nearest_even );
41808daf0c2Sdrahn increment = ( (sbits32) zSig2 < 0 );
41908daf0c2Sdrahn if ( ! roundNearestEven ) {
42008daf0c2Sdrahn if ( roundingMode == float_round_to_zero ) {
42108daf0c2Sdrahn increment = 0;
42208daf0c2Sdrahn }
42308daf0c2Sdrahn else {
42408daf0c2Sdrahn if ( zSign ) {
42508daf0c2Sdrahn increment = ( roundingMode == float_round_down ) && zSig2;
42608daf0c2Sdrahn }
42708daf0c2Sdrahn else {
42808daf0c2Sdrahn increment = ( roundingMode == float_round_up ) && zSig2;
42908daf0c2Sdrahn }
43008daf0c2Sdrahn }
43108daf0c2Sdrahn }
43208daf0c2Sdrahn if ( 0x7FD <= (bits16) zExp ) {
43308daf0c2Sdrahn if ( ( 0x7FD < zExp )
43408daf0c2Sdrahn || ( ( zExp == 0x7FD )
43508daf0c2Sdrahn && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
43608daf0c2Sdrahn && increment
43708daf0c2Sdrahn )
43808daf0c2Sdrahn ) {
43908daf0c2Sdrahn float_raise( float_flag_overflow | float_flag_inexact );
44008daf0c2Sdrahn if ( ( roundingMode == float_round_to_zero )
44108daf0c2Sdrahn || ( zSign && ( roundingMode == float_round_up ) )
44208daf0c2Sdrahn || ( ! zSign && ( roundingMode == float_round_down ) )
44308daf0c2Sdrahn ) {
44408daf0c2Sdrahn return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
44508daf0c2Sdrahn }
44608daf0c2Sdrahn return packFloat64( zSign, 0x7FF, 0, 0 );
44708daf0c2Sdrahn }
44808daf0c2Sdrahn if ( zExp < 0 ) {
44908daf0c2Sdrahn isTiny =
45008daf0c2Sdrahn ( float_detect_tininess == float_tininess_before_rounding )
45108daf0c2Sdrahn || ( zExp < -1 )
45208daf0c2Sdrahn || ! increment
45308daf0c2Sdrahn || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
45408daf0c2Sdrahn shift64ExtraRightJamming(
45508daf0c2Sdrahn zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
45608daf0c2Sdrahn zExp = 0;
45708daf0c2Sdrahn if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
45808daf0c2Sdrahn if ( roundNearestEven ) {
45908daf0c2Sdrahn increment = ( (sbits32) zSig2 < 0 );
46008daf0c2Sdrahn }
46108daf0c2Sdrahn else {
46208daf0c2Sdrahn if ( zSign ) {
46308daf0c2Sdrahn increment = ( roundingMode == float_round_down ) && zSig2;
46408daf0c2Sdrahn }
46508daf0c2Sdrahn else {
46608daf0c2Sdrahn increment = ( roundingMode == float_round_up ) && zSig2;
46708daf0c2Sdrahn }
46808daf0c2Sdrahn }
46908daf0c2Sdrahn }
47008daf0c2Sdrahn }
47108daf0c2Sdrahn if ( zSig2 ) float_exception_flags |= float_flag_inexact;
47208daf0c2Sdrahn if ( increment ) {
47308daf0c2Sdrahn add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
47408daf0c2Sdrahn zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
47508daf0c2Sdrahn }
47608daf0c2Sdrahn else {
47708daf0c2Sdrahn if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
47808daf0c2Sdrahn }
47908daf0c2Sdrahn return packFloat64( zSign, zExp, zSig0, zSig1 );
48008daf0c2Sdrahn
48108daf0c2Sdrahn }
48208daf0c2Sdrahn
48308daf0c2Sdrahn /*
48408daf0c2Sdrahn -------------------------------------------------------------------------------
48508daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp',
48608daf0c2Sdrahn and significand formed by the concatenation of `zSig0' and `zSig1', and
48708daf0c2Sdrahn returns the proper double-precision floating-point value corresponding
48808daf0c2Sdrahn to the abstract input. This routine is just like `roundAndPackFloat64'
48908daf0c2Sdrahn except that the input significand has fewer bits and does not have to be
49008daf0c2Sdrahn normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
49108daf0c2Sdrahn point exponent.
49208daf0c2Sdrahn -------------------------------------------------------------------------------
49308daf0c2Sdrahn */
49408daf0c2Sdrahn static float64
normalizeRoundAndPackFloat64(flag zSign,int16 zExp,bits32 zSig0,bits32 zSig1)49508daf0c2Sdrahn normalizeRoundAndPackFloat64(
49608daf0c2Sdrahn flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
49708daf0c2Sdrahn {
49808daf0c2Sdrahn int8 shiftCount;
49908daf0c2Sdrahn bits32 zSig2;
50008daf0c2Sdrahn
50108daf0c2Sdrahn if ( zSig0 == 0 ) {
50208daf0c2Sdrahn zSig0 = zSig1;
50308daf0c2Sdrahn zSig1 = 0;
50408daf0c2Sdrahn zExp -= 32;
50508daf0c2Sdrahn }
50608daf0c2Sdrahn shiftCount = countLeadingZeros32( zSig0 ) - 11;
50708daf0c2Sdrahn if ( 0 <= shiftCount ) {
50808daf0c2Sdrahn zSig2 = 0;
50908daf0c2Sdrahn shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
51008daf0c2Sdrahn }
51108daf0c2Sdrahn else {
51208daf0c2Sdrahn shift64ExtraRightJamming(
51308daf0c2Sdrahn zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
51408daf0c2Sdrahn }
51508daf0c2Sdrahn zExp -= shiftCount;
51608daf0c2Sdrahn return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
51708daf0c2Sdrahn
51808daf0c2Sdrahn }
51908daf0c2Sdrahn
52008daf0c2Sdrahn /*
52108daf0c2Sdrahn -------------------------------------------------------------------------------
52208daf0c2Sdrahn Returns the result of converting the 32-bit two's complement integer `a' to
52308daf0c2Sdrahn the single-precision floating-point format. The conversion is performed
52408daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
52508daf0c2Sdrahn -------------------------------------------------------------------------------
52608daf0c2Sdrahn */
int32_to_float32(int32 a)52708daf0c2Sdrahn float32 int32_to_float32( int32 a )
52808daf0c2Sdrahn {
52908daf0c2Sdrahn flag zSign;
53008daf0c2Sdrahn
53108daf0c2Sdrahn if ( a == 0 ) return 0;
53208daf0c2Sdrahn if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
53308daf0c2Sdrahn zSign = ( a < 0 );
53408daf0c2Sdrahn return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
53508daf0c2Sdrahn
53608daf0c2Sdrahn }
53708daf0c2Sdrahn
53808daf0c2Sdrahn /*
53908daf0c2Sdrahn -------------------------------------------------------------------------------
54008daf0c2Sdrahn Returns the result of converting the 32-bit two's complement integer `a' to
54108daf0c2Sdrahn the double-precision floating-point format. The conversion is performed
54208daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
54308daf0c2Sdrahn -------------------------------------------------------------------------------
54408daf0c2Sdrahn */
int32_to_float64(int32 a)54508daf0c2Sdrahn float64 int32_to_float64( int32 a )
54608daf0c2Sdrahn {
54708daf0c2Sdrahn flag zSign;
54808daf0c2Sdrahn bits32 absA;
54908daf0c2Sdrahn int8 shiftCount;
55008daf0c2Sdrahn bits32 zSig0, zSig1;
55108daf0c2Sdrahn
55208daf0c2Sdrahn if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
55308daf0c2Sdrahn zSign = ( a < 0 );
55408daf0c2Sdrahn absA = zSign ? - a : a;
55508daf0c2Sdrahn shiftCount = countLeadingZeros32( absA ) - 11;
55608daf0c2Sdrahn if ( 0 <= shiftCount ) {
55708daf0c2Sdrahn zSig0 = absA<<shiftCount;
55808daf0c2Sdrahn zSig1 = 0;
55908daf0c2Sdrahn }
56008daf0c2Sdrahn else {
56108daf0c2Sdrahn shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
56208daf0c2Sdrahn }
56308daf0c2Sdrahn return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
56408daf0c2Sdrahn
56508daf0c2Sdrahn }
56608daf0c2Sdrahn
56708daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
56808daf0c2Sdrahn /*
56908daf0c2Sdrahn -------------------------------------------------------------------------------
57008daf0c2Sdrahn Returns the result of converting the single-precision floating-point value
57108daf0c2Sdrahn `a' to the 32-bit two's complement integer format. The conversion is
57208daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
57308daf0c2Sdrahn Arithmetic---which means in particular that the conversion is rounded
57408daf0c2Sdrahn according to the current rounding mode. If `a' is a NaN, the largest
57508daf0c2Sdrahn positive integer is returned. Otherwise, if the conversion overflows, the
57608daf0c2Sdrahn largest integer with the same sign as `a' is returned.
57708daf0c2Sdrahn -------------------------------------------------------------------------------
57808daf0c2Sdrahn */
float32_to_int32(float32 a)57908daf0c2Sdrahn int32 float32_to_int32( float32 a )
58008daf0c2Sdrahn {
58108daf0c2Sdrahn flag aSign;
58208daf0c2Sdrahn int16 aExp, shiftCount;
58308daf0c2Sdrahn bits32 aSig, aSigExtra;
58408daf0c2Sdrahn int32 z;
58508daf0c2Sdrahn int8 roundingMode;
58608daf0c2Sdrahn
58708daf0c2Sdrahn aSig = extractFloat32Frac( a );
58808daf0c2Sdrahn aExp = extractFloat32Exp( a );
58908daf0c2Sdrahn aSign = extractFloat32Sign( a );
59008daf0c2Sdrahn shiftCount = aExp - 0x96;
59108daf0c2Sdrahn if ( 0 <= shiftCount ) {
59208daf0c2Sdrahn if ( 0x9E <= aExp ) {
59308daf0c2Sdrahn if ( a != 0xCF000000 ) {
59408daf0c2Sdrahn float_raise( float_flag_invalid );
59508daf0c2Sdrahn if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
59608daf0c2Sdrahn return 0x7FFFFFFF;
59708daf0c2Sdrahn }
59808daf0c2Sdrahn }
59908daf0c2Sdrahn return (sbits32) 0x80000000;
60008daf0c2Sdrahn }
60108daf0c2Sdrahn z = ( aSig | 0x00800000 )<<shiftCount;
60208daf0c2Sdrahn if ( aSign ) z = - z;
60308daf0c2Sdrahn }
60408daf0c2Sdrahn else {
60508daf0c2Sdrahn if ( aExp < 0x7E ) {
60608daf0c2Sdrahn aSigExtra = aExp | aSig;
60708daf0c2Sdrahn z = 0;
60808daf0c2Sdrahn }
60908daf0c2Sdrahn else {
61008daf0c2Sdrahn aSig |= 0x00800000;
61108daf0c2Sdrahn aSigExtra = aSig<<( shiftCount & 31 );
61208daf0c2Sdrahn z = aSig>>( - shiftCount );
61308daf0c2Sdrahn }
61408daf0c2Sdrahn if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
61508daf0c2Sdrahn roundingMode = float_rounding_mode;
61608daf0c2Sdrahn if ( roundingMode == float_round_nearest_even ) {
61708daf0c2Sdrahn if ( (sbits32) aSigExtra < 0 ) {
61808daf0c2Sdrahn ++z;
61908daf0c2Sdrahn if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
62008daf0c2Sdrahn }
62108daf0c2Sdrahn if ( aSign ) z = - z;
62208daf0c2Sdrahn }
62308daf0c2Sdrahn else {
62408daf0c2Sdrahn aSigExtra = ( aSigExtra != 0 );
62508daf0c2Sdrahn if ( aSign ) {
62608daf0c2Sdrahn z += ( roundingMode == float_round_down ) & aSigExtra;
62708daf0c2Sdrahn z = - z;
62808daf0c2Sdrahn }
62908daf0c2Sdrahn else {
63008daf0c2Sdrahn z += ( roundingMode == float_round_up ) & aSigExtra;
63108daf0c2Sdrahn }
63208daf0c2Sdrahn }
63308daf0c2Sdrahn }
63408daf0c2Sdrahn return z;
63508daf0c2Sdrahn
63608daf0c2Sdrahn }
63708daf0c2Sdrahn #endif
63808daf0c2Sdrahn
63908daf0c2Sdrahn /*
64008daf0c2Sdrahn -------------------------------------------------------------------------------
64108daf0c2Sdrahn Returns the result of converting the single-precision floating-point value
64208daf0c2Sdrahn `a' to the 32-bit two's complement integer format. The conversion is
64308daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
64408daf0c2Sdrahn Arithmetic, except that the conversion is always rounded toward zero.
64508daf0c2Sdrahn If `a' is a NaN, the largest positive integer is returned. Otherwise, if
64608daf0c2Sdrahn the conversion overflows, the largest integer with the same sign as `a' is
64708daf0c2Sdrahn returned.
64808daf0c2Sdrahn -------------------------------------------------------------------------------
64908daf0c2Sdrahn */
float32_to_int32_round_to_zero(float32 a)65008daf0c2Sdrahn int32 float32_to_int32_round_to_zero( float32 a )
65108daf0c2Sdrahn {
65208daf0c2Sdrahn flag aSign;
65308daf0c2Sdrahn int16 aExp, shiftCount;
65408daf0c2Sdrahn bits32 aSig;
65508daf0c2Sdrahn int32 z;
65608daf0c2Sdrahn
65708daf0c2Sdrahn aSig = extractFloat32Frac( a );
65808daf0c2Sdrahn aExp = extractFloat32Exp( a );
65908daf0c2Sdrahn aSign = extractFloat32Sign( a );
66008daf0c2Sdrahn shiftCount = aExp - 0x9E;
66108daf0c2Sdrahn if ( 0 <= shiftCount ) {
66208daf0c2Sdrahn if ( a != 0xCF000000 ) {
66308daf0c2Sdrahn float_raise( float_flag_invalid );
66408daf0c2Sdrahn if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
66508daf0c2Sdrahn }
66608daf0c2Sdrahn return (sbits32) 0x80000000;
66708daf0c2Sdrahn }
66808daf0c2Sdrahn else if ( aExp <= 0x7E ) {
66908daf0c2Sdrahn if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
67008daf0c2Sdrahn return 0;
67108daf0c2Sdrahn }
67208daf0c2Sdrahn aSig = ( aSig | 0x00800000 )<<8;
67308daf0c2Sdrahn z = aSig>>( - shiftCount );
67408daf0c2Sdrahn if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
67508daf0c2Sdrahn float_exception_flags |= float_flag_inexact;
67608daf0c2Sdrahn }
67708daf0c2Sdrahn if ( aSign ) z = - z;
67808daf0c2Sdrahn return z;
67908daf0c2Sdrahn
68008daf0c2Sdrahn }
68108daf0c2Sdrahn
68208daf0c2Sdrahn /*
68308daf0c2Sdrahn -------------------------------------------------------------------------------
68408daf0c2Sdrahn Returns the result of converting the single-precision floating-point value
68508daf0c2Sdrahn `a' to the double-precision floating-point format. The conversion is
68608daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
68708daf0c2Sdrahn Arithmetic.
68808daf0c2Sdrahn -------------------------------------------------------------------------------
68908daf0c2Sdrahn */
float32_to_float64(float32 a)69008daf0c2Sdrahn float64 float32_to_float64( float32 a )
69108daf0c2Sdrahn {
69208daf0c2Sdrahn flag aSign;
69308daf0c2Sdrahn int16 aExp;
69408daf0c2Sdrahn bits32 aSig, zSig0, zSig1;
69508daf0c2Sdrahn
69608daf0c2Sdrahn aSig = extractFloat32Frac( a );
69708daf0c2Sdrahn aExp = extractFloat32Exp( a );
69808daf0c2Sdrahn aSign = extractFloat32Sign( a );
69908daf0c2Sdrahn if ( aExp == 0xFF ) {
70008daf0c2Sdrahn if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
70108daf0c2Sdrahn return packFloat64( aSign, 0x7FF, 0, 0 );
70208daf0c2Sdrahn }
70308daf0c2Sdrahn if ( aExp == 0 ) {
70408daf0c2Sdrahn if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
70508daf0c2Sdrahn normalizeFloat32Subnormal( aSig, &aExp, &aSig );
70608daf0c2Sdrahn --aExp;
70708daf0c2Sdrahn }
70808daf0c2Sdrahn shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
70908daf0c2Sdrahn return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
71008daf0c2Sdrahn
71108daf0c2Sdrahn }
71208daf0c2Sdrahn
71308daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
71408daf0c2Sdrahn /*
71508daf0c2Sdrahn -------------------------------------------------------------------------------
71608daf0c2Sdrahn Rounds the single-precision floating-point value `a' to an integer,
71708daf0c2Sdrahn and returns the result as a single-precision floating-point value. The
71808daf0c2Sdrahn operation is performed according to the IEC/IEEE Standard for Binary
71908daf0c2Sdrahn Floating-Point Arithmetic.
72008daf0c2Sdrahn -------------------------------------------------------------------------------
72108daf0c2Sdrahn */
float32_round_to_int(float32 a)72208daf0c2Sdrahn float32 float32_round_to_int( float32 a )
72308daf0c2Sdrahn {
72408daf0c2Sdrahn flag aSign;
72508daf0c2Sdrahn int16 aExp;
72608daf0c2Sdrahn bits32 lastBitMask, roundBitsMask;
72708daf0c2Sdrahn int8 roundingMode;
72808daf0c2Sdrahn float32 z;
72908daf0c2Sdrahn
73008daf0c2Sdrahn aExp = extractFloat32Exp( a );
73108daf0c2Sdrahn if ( 0x96 <= aExp ) {
73208daf0c2Sdrahn if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
73308daf0c2Sdrahn return propagateFloat32NaN( a, a );
73408daf0c2Sdrahn }
73508daf0c2Sdrahn return a;
73608daf0c2Sdrahn }
73708daf0c2Sdrahn if ( aExp <= 0x7E ) {
73808daf0c2Sdrahn if ( (bits32) ( a<<1 ) == 0 ) return a;
73908daf0c2Sdrahn float_exception_flags |= float_flag_inexact;
74008daf0c2Sdrahn aSign = extractFloat32Sign( a );
74108daf0c2Sdrahn switch ( float_rounding_mode ) {
74208daf0c2Sdrahn case float_round_nearest_even:
74308daf0c2Sdrahn if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
74408daf0c2Sdrahn return packFloat32( aSign, 0x7F, 0 );
74508daf0c2Sdrahn }
74608daf0c2Sdrahn break;
74708daf0c2Sdrahn case float_round_to_zero:
74808daf0c2Sdrahn break;
74908daf0c2Sdrahn case float_round_down:
75008daf0c2Sdrahn return aSign ? 0xBF800000 : 0;
75108daf0c2Sdrahn case float_round_up:
75208daf0c2Sdrahn return aSign ? 0x80000000 : 0x3F800000;
75308daf0c2Sdrahn }
75408daf0c2Sdrahn return packFloat32( aSign, 0, 0 );
75508daf0c2Sdrahn }
75608daf0c2Sdrahn lastBitMask = 1;
75708daf0c2Sdrahn lastBitMask <<= 0x96 - aExp;
75808daf0c2Sdrahn roundBitsMask = lastBitMask - 1;
75908daf0c2Sdrahn z = a;
76008daf0c2Sdrahn roundingMode = float_rounding_mode;
76108daf0c2Sdrahn if ( roundingMode == float_round_nearest_even ) {
76208daf0c2Sdrahn z += lastBitMask>>1;
76308daf0c2Sdrahn if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
76408daf0c2Sdrahn }
76508daf0c2Sdrahn else if ( roundingMode != float_round_to_zero ) {
76608daf0c2Sdrahn if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
76708daf0c2Sdrahn z += roundBitsMask;
76808daf0c2Sdrahn }
76908daf0c2Sdrahn }
77008daf0c2Sdrahn z &= ~ roundBitsMask;
77108daf0c2Sdrahn if ( z != a ) float_exception_flags |= float_flag_inexact;
77208daf0c2Sdrahn return z;
77308daf0c2Sdrahn
77408daf0c2Sdrahn }
77508daf0c2Sdrahn #endif
77608daf0c2Sdrahn
77708daf0c2Sdrahn /*
77808daf0c2Sdrahn -------------------------------------------------------------------------------
77908daf0c2Sdrahn Returns the result of adding the absolute values of the single-precision
78008daf0c2Sdrahn floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
78108daf0c2Sdrahn before being returned. `zSign' is ignored if the result is a NaN.
78208daf0c2Sdrahn The addition is performed according to the IEC/IEEE Standard for Binary
78308daf0c2Sdrahn Floating-Point Arithmetic.
78408daf0c2Sdrahn -------------------------------------------------------------------------------
78508daf0c2Sdrahn */
addFloat32Sigs(float32 a,float32 b,flag zSign)78608daf0c2Sdrahn static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
78708daf0c2Sdrahn {
78808daf0c2Sdrahn int16 aExp, bExp, zExp;
78908daf0c2Sdrahn bits32 aSig, bSig, zSig;
79008daf0c2Sdrahn int16 expDiff;
79108daf0c2Sdrahn
79208daf0c2Sdrahn aSig = extractFloat32Frac( a );
79308daf0c2Sdrahn aExp = extractFloat32Exp( a );
79408daf0c2Sdrahn bSig = extractFloat32Frac( b );
79508daf0c2Sdrahn bExp = extractFloat32Exp( b );
79608daf0c2Sdrahn expDiff = aExp - bExp;
79708daf0c2Sdrahn aSig <<= 6;
79808daf0c2Sdrahn bSig <<= 6;
79908daf0c2Sdrahn if ( 0 < expDiff ) {
80008daf0c2Sdrahn if ( aExp == 0xFF ) {
80108daf0c2Sdrahn if ( aSig ) return propagateFloat32NaN( a, b );
80208daf0c2Sdrahn return a;
80308daf0c2Sdrahn }
80408daf0c2Sdrahn if ( bExp == 0 ) {
80508daf0c2Sdrahn --expDiff;
80608daf0c2Sdrahn }
80708daf0c2Sdrahn else {
80808daf0c2Sdrahn bSig |= 0x20000000;
80908daf0c2Sdrahn }
81008daf0c2Sdrahn shift32RightJamming( bSig, expDiff, &bSig );
81108daf0c2Sdrahn zExp = aExp;
81208daf0c2Sdrahn }
81308daf0c2Sdrahn else if ( expDiff < 0 ) {
81408daf0c2Sdrahn if ( bExp == 0xFF ) {
81508daf0c2Sdrahn if ( bSig ) return propagateFloat32NaN( a, b );
81608daf0c2Sdrahn return packFloat32( zSign, 0xFF, 0 );
81708daf0c2Sdrahn }
81808daf0c2Sdrahn if ( aExp == 0 ) {
81908daf0c2Sdrahn ++expDiff;
82008daf0c2Sdrahn }
82108daf0c2Sdrahn else {
82208daf0c2Sdrahn aSig |= 0x20000000;
82308daf0c2Sdrahn }
82408daf0c2Sdrahn shift32RightJamming( aSig, - expDiff, &aSig );
82508daf0c2Sdrahn zExp = bExp;
82608daf0c2Sdrahn }
82708daf0c2Sdrahn else {
82808daf0c2Sdrahn if ( aExp == 0xFF ) {
82908daf0c2Sdrahn if ( aSig | bSig ) return propagateFloat32NaN( a, b );
83008daf0c2Sdrahn return a;
83108daf0c2Sdrahn }
83208daf0c2Sdrahn if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
83308daf0c2Sdrahn zSig = 0x40000000 + aSig + bSig;
83408daf0c2Sdrahn zExp = aExp;
83508daf0c2Sdrahn goto roundAndPack;
83608daf0c2Sdrahn }
83708daf0c2Sdrahn aSig |= 0x20000000;
83808daf0c2Sdrahn zSig = ( aSig + bSig )<<1;
83908daf0c2Sdrahn --zExp;
84008daf0c2Sdrahn if ( (sbits32) zSig < 0 ) {
84108daf0c2Sdrahn zSig = aSig + bSig;
84208daf0c2Sdrahn ++zExp;
84308daf0c2Sdrahn }
84408daf0c2Sdrahn roundAndPack:
84508daf0c2Sdrahn return roundAndPackFloat32( zSign, zExp, zSig );
84608daf0c2Sdrahn
84708daf0c2Sdrahn }
84808daf0c2Sdrahn
84908daf0c2Sdrahn /*
85008daf0c2Sdrahn -------------------------------------------------------------------------------
85108daf0c2Sdrahn Returns the result of subtracting the absolute values of the single-
85208daf0c2Sdrahn precision floating-point values `a' and `b'. If `zSign' is 1, the
85308daf0c2Sdrahn difference is negated before being returned. `zSign' is ignored if the
85408daf0c2Sdrahn result is a NaN. The subtraction is performed according to the IEC/IEEE
85508daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic.
85608daf0c2Sdrahn -------------------------------------------------------------------------------
85708daf0c2Sdrahn */
subFloat32Sigs(float32 a,float32 b,flag zSign)85808daf0c2Sdrahn static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
85908daf0c2Sdrahn {
86008daf0c2Sdrahn int16 aExp, bExp, zExp;
86108daf0c2Sdrahn bits32 aSig, bSig, zSig;
86208daf0c2Sdrahn int16 expDiff;
86308daf0c2Sdrahn
86408daf0c2Sdrahn aSig = extractFloat32Frac( a );
86508daf0c2Sdrahn aExp = extractFloat32Exp( a );
86608daf0c2Sdrahn bSig = extractFloat32Frac( b );
86708daf0c2Sdrahn bExp = extractFloat32Exp( b );
86808daf0c2Sdrahn expDiff = aExp - bExp;
86908daf0c2Sdrahn aSig <<= 7;
87008daf0c2Sdrahn bSig <<= 7;
87108daf0c2Sdrahn if ( 0 < expDiff ) goto aExpBigger;
87208daf0c2Sdrahn if ( expDiff < 0 ) goto bExpBigger;
87308daf0c2Sdrahn if ( aExp == 0xFF ) {
87408daf0c2Sdrahn if ( aSig | bSig ) return propagateFloat32NaN( a, b );
87508daf0c2Sdrahn float_raise( float_flag_invalid );
87608daf0c2Sdrahn return float32_default_nan;
87708daf0c2Sdrahn }
87808daf0c2Sdrahn if ( aExp == 0 ) {
87908daf0c2Sdrahn aExp = 1;
88008daf0c2Sdrahn bExp = 1;
88108daf0c2Sdrahn }
88208daf0c2Sdrahn if ( bSig < aSig ) goto aBigger;
88308daf0c2Sdrahn if ( aSig < bSig ) goto bBigger;
88408daf0c2Sdrahn return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
88508daf0c2Sdrahn bExpBigger:
88608daf0c2Sdrahn if ( bExp == 0xFF ) {
88708daf0c2Sdrahn if ( bSig ) return propagateFloat32NaN( a, b );
88808daf0c2Sdrahn return packFloat32( zSign ^ 1, 0xFF, 0 );
88908daf0c2Sdrahn }
89008daf0c2Sdrahn if ( aExp == 0 ) {
89108daf0c2Sdrahn ++expDiff;
89208daf0c2Sdrahn }
89308daf0c2Sdrahn else {
89408daf0c2Sdrahn aSig |= 0x40000000;
89508daf0c2Sdrahn }
89608daf0c2Sdrahn shift32RightJamming( aSig, - expDiff, &aSig );
89708daf0c2Sdrahn bSig |= 0x40000000;
89808daf0c2Sdrahn bBigger:
89908daf0c2Sdrahn zSig = bSig - aSig;
90008daf0c2Sdrahn zExp = bExp;
90108daf0c2Sdrahn zSign ^= 1;
90208daf0c2Sdrahn goto normalizeRoundAndPack;
90308daf0c2Sdrahn aExpBigger:
90408daf0c2Sdrahn if ( aExp == 0xFF ) {
90508daf0c2Sdrahn if ( aSig ) return propagateFloat32NaN( a, b );
90608daf0c2Sdrahn return a;
90708daf0c2Sdrahn }
90808daf0c2Sdrahn if ( bExp == 0 ) {
90908daf0c2Sdrahn --expDiff;
91008daf0c2Sdrahn }
91108daf0c2Sdrahn else {
91208daf0c2Sdrahn bSig |= 0x40000000;
91308daf0c2Sdrahn }
91408daf0c2Sdrahn shift32RightJamming( bSig, expDiff, &bSig );
91508daf0c2Sdrahn aSig |= 0x40000000;
91608daf0c2Sdrahn aBigger:
91708daf0c2Sdrahn zSig = aSig - bSig;
91808daf0c2Sdrahn zExp = aExp;
91908daf0c2Sdrahn normalizeRoundAndPack:
92008daf0c2Sdrahn --zExp;
92108daf0c2Sdrahn return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
92208daf0c2Sdrahn
92308daf0c2Sdrahn }
92408daf0c2Sdrahn
92508daf0c2Sdrahn /*
92608daf0c2Sdrahn -------------------------------------------------------------------------------
92708daf0c2Sdrahn Returns the result of adding the single-precision floating-point values `a'
92808daf0c2Sdrahn and `b'. The operation is performed according to the IEC/IEEE Standard for
92908daf0c2Sdrahn Binary Floating-Point Arithmetic.
93008daf0c2Sdrahn -------------------------------------------------------------------------------
93108daf0c2Sdrahn */
float32_add(float32 a,float32 b)93208daf0c2Sdrahn float32 float32_add( float32 a, float32 b )
93308daf0c2Sdrahn {
93408daf0c2Sdrahn flag aSign, bSign;
93508daf0c2Sdrahn
93608daf0c2Sdrahn aSign = extractFloat32Sign( a );
93708daf0c2Sdrahn bSign = extractFloat32Sign( b );
93808daf0c2Sdrahn if ( aSign == bSign ) {
93908daf0c2Sdrahn return addFloat32Sigs( a, b, aSign );
94008daf0c2Sdrahn }
94108daf0c2Sdrahn else {
94208daf0c2Sdrahn return subFloat32Sigs( a, b, aSign );
94308daf0c2Sdrahn }
94408daf0c2Sdrahn
94508daf0c2Sdrahn }
94608daf0c2Sdrahn
94708daf0c2Sdrahn /*
94808daf0c2Sdrahn -------------------------------------------------------------------------------
94908daf0c2Sdrahn Returns the result of subtracting the single-precision floating-point values
95008daf0c2Sdrahn `a' and `b'. The operation is performed according to the IEC/IEEE Standard
95108daf0c2Sdrahn for Binary Floating-Point Arithmetic.
95208daf0c2Sdrahn -------------------------------------------------------------------------------
95308daf0c2Sdrahn */
float32_sub(float32 a,float32 b)95408daf0c2Sdrahn float32 float32_sub( float32 a, float32 b )
95508daf0c2Sdrahn {
95608daf0c2Sdrahn flag aSign, bSign;
95708daf0c2Sdrahn
95808daf0c2Sdrahn aSign = extractFloat32Sign( a );
95908daf0c2Sdrahn bSign = extractFloat32Sign( b );
96008daf0c2Sdrahn if ( aSign == bSign ) {
96108daf0c2Sdrahn return subFloat32Sigs( a, b, aSign );
96208daf0c2Sdrahn }
96308daf0c2Sdrahn else {
96408daf0c2Sdrahn return addFloat32Sigs( a, b, aSign );
96508daf0c2Sdrahn }
96608daf0c2Sdrahn
96708daf0c2Sdrahn }
96808daf0c2Sdrahn
96908daf0c2Sdrahn /*
97008daf0c2Sdrahn -------------------------------------------------------------------------------
97108daf0c2Sdrahn Returns the result of multiplying the single-precision floating-point values
97208daf0c2Sdrahn `a' and `b'. The operation is performed according to the IEC/IEEE Standard
97308daf0c2Sdrahn for Binary Floating-Point Arithmetic.
97408daf0c2Sdrahn -------------------------------------------------------------------------------
97508daf0c2Sdrahn */
float32_mul(float32 a,float32 b)97608daf0c2Sdrahn float32 float32_mul( float32 a, float32 b )
97708daf0c2Sdrahn {
97808daf0c2Sdrahn flag aSign, bSign, zSign;
97908daf0c2Sdrahn int16 aExp, bExp, zExp;
98008daf0c2Sdrahn bits32 aSig, bSig, zSig0, zSig1;
98108daf0c2Sdrahn
98208daf0c2Sdrahn aSig = extractFloat32Frac( a );
98308daf0c2Sdrahn aExp = extractFloat32Exp( a );
98408daf0c2Sdrahn aSign = extractFloat32Sign( a );
98508daf0c2Sdrahn bSig = extractFloat32Frac( b );
98608daf0c2Sdrahn bExp = extractFloat32Exp( b );
98708daf0c2Sdrahn bSign = extractFloat32Sign( b );
98808daf0c2Sdrahn zSign = aSign ^ bSign;
98908daf0c2Sdrahn if ( aExp == 0xFF ) {
99008daf0c2Sdrahn if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
99108daf0c2Sdrahn return propagateFloat32NaN( a, b );
99208daf0c2Sdrahn }
99308daf0c2Sdrahn if ( ( bExp | bSig ) == 0 ) {
99408daf0c2Sdrahn float_raise( float_flag_invalid );
99508daf0c2Sdrahn return float32_default_nan;
99608daf0c2Sdrahn }
99708daf0c2Sdrahn return packFloat32( zSign, 0xFF, 0 );
99808daf0c2Sdrahn }
99908daf0c2Sdrahn if ( bExp == 0xFF ) {
100008daf0c2Sdrahn if ( bSig ) return propagateFloat32NaN( a, b );
100108daf0c2Sdrahn if ( ( aExp | aSig ) == 0 ) {
100208daf0c2Sdrahn float_raise( float_flag_invalid );
100308daf0c2Sdrahn return float32_default_nan;
100408daf0c2Sdrahn }
100508daf0c2Sdrahn return packFloat32( zSign, 0xFF, 0 );
100608daf0c2Sdrahn }
100708daf0c2Sdrahn if ( aExp == 0 ) {
100808daf0c2Sdrahn if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
100908daf0c2Sdrahn normalizeFloat32Subnormal( aSig, &aExp, &aSig );
101008daf0c2Sdrahn }
101108daf0c2Sdrahn if ( bExp == 0 ) {
101208daf0c2Sdrahn if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
101308daf0c2Sdrahn normalizeFloat32Subnormal( bSig, &bExp, &bSig );
101408daf0c2Sdrahn }
101508daf0c2Sdrahn zExp = aExp + bExp - 0x7F;
101608daf0c2Sdrahn aSig = ( aSig | 0x00800000 )<<7;
101708daf0c2Sdrahn bSig = ( bSig | 0x00800000 )<<8;
101808daf0c2Sdrahn mul32To64( aSig, bSig, &zSig0, &zSig1 );
101908daf0c2Sdrahn zSig0 |= ( zSig1 != 0 );
102008daf0c2Sdrahn if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
102108daf0c2Sdrahn zSig0 <<= 1;
102208daf0c2Sdrahn --zExp;
102308daf0c2Sdrahn }
102408daf0c2Sdrahn return roundAndPackFloat32( zSign, zExp, zSig0 );
102508daf0c2Sdrahn
102608daf0c2Sdrahn }
102708daf0c2Sdrahn
102808daf0c2Sdrahn /*
102908daf0c2Sdrahn -------------------------------------------------------------------------------
103008daf0c2Sdrahn Returns the result of dividing the single-precision floating-point value `a'
103108daf0c2Sdrahn by the corresponding value `b'. The operation is performed according to the
103208daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic.
103308daf0c2Sdrahn -------------------------------------------------------------------------------
103408daf0c2Sdrahn */
float32_div(float32 a,float32 b)103508daf0c2Sdrahn float32 float32_div( float32 a, float32 b )
103608daf0c2Sdrahn {
103708daf0c2Sdrahn flag aSign, bSign, zSign;
103808daf0c2Sdrahn int16 aExp, bExp, zExp;
103908daf0c2Sdrahn bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
104008daf0c2Sdrahn
104108daf0c2Sdrahn aSig = extractFloat32Frac( a );
104208daf0c2Sdrahn aExp = extractFloat32Exp( a );
104308daf0c2Sdrahn aSign = extractFloat32Sign( a );
104408daf0c2Sdrahn bSig = extractFloat32Frac( b );
104508daf0c2Sdrahn bExp = extractFloat32Exp( b );
104608daf0c2Sdrahn bSign = extractFloat32Sign( b );
104708daf0c2Sdrahn zSign = aSign ^ bSign;
104808daf0c2Sdrahn if ( aExp == 0xFF ) {
104908daf0c2Sdrahn if ( aSig ) return propagateFloat32NaN( a, b );
105008daf0c2Sdrahn if ( bExp == 0xFF ) {
105108daf0c2Sdrahn if ( bSig ) return propagateFloat32NaN( a, b );
105208daf0c2Sdrahn float_raise( float_flag_invalid );
105308daf0c2Sdrahn return float32_default_nan;
105408daf0c2Sdrahn }
105508daf0c2Sdrahn return packFloat32( zSign, 0xFF, 0 );
105608daf0c2Sdrahn }
105708daf0c2Sdrahn if ( bExp == 0xFF ) {
105808daf0c2Sdrahn if ( bSig ) return propagateFloat32NaN( a, b );
105908daf0c2Sdrahn return packFloat32( zSign, 0, 0 );
106008daf0c2Sdrahn }
106108daf0c2Sdrahn if ( bExp == 0 ) {
106208daf0c2Sdrahn if ( bSig == 0 ) {
106308daf0c2Sdrahn if ( ( aExp | aSig ) == 0 ) {
106408daf0c2Sdrahn float_raise( float_flag_invalid );
106508daf0c2Sdrahn return float32_default_nan;
106608daf0c2Sdrahn }
106708daf0c2Sdrahn float_raise( float_flag_divbyzero );
106808daf0c2Sdrahn return packFloat32( zSign, 0xFF, 0 );
106908daf0c2Sdrahn }
107008daf0c2Sdrahn normalizeFloat32Subnormal( bSig, &bExp, &bSig );
107108daf0c2Sdrahn }
107208daf0c2Sdrahn if ( aExp == 0 ) {
107308daf0c2Sdrahn if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
107408daf0c2Sdrahn normalizeFloat32Subnormal( aSig, &aExp, &aSig );
107508daf0c2Sdrahn }
107608daf0c2Sdrahn zExp = aExp - bExp + 0x7D;
107708daf0c2Sdrahn aSig = ( aSig | 0x00800000 )<<7;
107808daf0c2Sdrahn bSig = ( bSig | 0x00800000 )<<8;
107908daf0c2Sdrahn if ( bSig <= ( aSig + aSig ) ) {
108008daf0c2Sdrahn aSig >>= 1;
108108daf0c2Sdrahn ++zExp;
108208daf0c2Sdrahn }
108308daf0c2Sdrahn zSig = estimateDiv64To32( aSig, 0, bSig );
108408daf0c2Sdrahn if ( ( zSig & 0x3F ) <= 2 ) {
108508daf0c2Sdrahn mul32To64( bSig, zSig, &term0, &term1 );
108608daf0c2Sdrahn sub64( aSig, 0, term0, term1, &rem0, &rem1 );
108708daf0c2Sdrahn while ( (sbits32) rem0 < 0 ) {
108808daf0c2Sdrahn --zSig;
108908daf0c2Sdrahn add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
109008daf0c2Sdrahn }
109108daf0c2Sdrahn zSig |= ( rem1 != 0 );
109208daf0c2Sdrahn }
109308daf0c2Sdrahn return roundAndPackFloat32( zSign, zExp, zSig );
109408daf0c2Sdrahn
109508daf0c2Sdrahn }
109608daf0c2Sdrahn
109708daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
109808daf0c2Sdrahn /*
109908daf0c2Sdrahn -------------------------------------------------------------------------------
110008daf0c2Sdrahn Returns the remainder of the single-precision floating-point value `a'
110108daf0c2Sdrahn with respect to the corresponding value `b'. The operation is performed
110208daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
110308daf0c2Sdrahn -------------------------------------------------------------------------------
110408daf0c2Sdrahn */
float32_rem(float32 a,float32 b)110508daf0c2Sdrahn float32 float32_rem( float32 a, float32 b )
110608daf0c2Sdrahn {
110708daf0c2Sdrahn flag aSign, bSign, zSign;
110808daf0c2Sdrahn int16 aExp, bExp, expDiff;
110908daf0c2Sdrahn bits32 aSig, bSig, q, allZero, alternateASig;
111008daf0c2Sdrahn sbits32 sigMean;
111108daf0c2Sdrahn
111208daf0c2Sdrahn aSig = extractFloat32Frac( a );
111308daf0c2Sdrahn aExp = extractFloat32Exp( a );
111408daf0c2Sdrahn aSign = extractFloat32Sign( a );
111508daf0c2Sdrahn bSig = extractFloat32Frac( b );
111608daf0c2Sdrahn bExp = extractFloat32Exp( b );
111708daf0c2Sdrahn bSign = extractFloat32Sign( b );
111808daf0c2Sdrahn if ( aExp == 0xFF ) {
111908daf0c2Sdrahn if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
112008daf0c2Sdrahn return propagateFloat32NaN( a, b );
112108daf0c2Sdrahn }
112208daf0c2Sdrahn float_raise( float_flag_invalid );
112308daf0c2Sdrahn return float32_default_nan;
112408daf0c2Sdrahn }
112508daf0c2Sdrahn if ( bExp == 0xFF ) {
112608daf0c2Sdrahn if ( bSig ) return propagateFloat32NaN( a, b );
112708daf0c2Sdrahn return a;
112808daf0c2Sdrahn }
112908daf0c2Sdrahn if ( bExp == 0 ) {
113008daf0c2Sdrahn if ( bSig == 0 ) {
113108daf0c2Sdrahn float_raise( float_flag_invalid );
113208daf0c2Sdrahn return float32_default_nan;
113308daf0c2Sdrahn }
113408daf0c2Sdrahn normalizeFloat32Subnormal( bSig, &bExp, &bSig );
113508daf0c2Sdrahn }
113608daf0c2Sdrahn if ( aExp == 0 ) {
113708daf0c2Sdrahn if ( aSig == 0 ) return a;
113808daf0c2Sdrahn normalizeFloat32Subnormal( aSig, &aExp, &aSig );
113908daf0c2Sdrahn }
114008daf0c2Sdrahn expDiff = aExp - bExp;
114108daf0c2Sdrahn aSig = ( aSig | 0x00800000 )<<8;
114208daf0c2Sdrahn bSig = ( bSig | 0x00800000 )<<8;
114308daf0c2Sdrahn if ( expDiff < 0 ) {
114408daf0c2Sdrahn if ( expDiff < -1 ) return a;
114508daf0c2Sdrahn aSig >>= 1;
114608daf0c2Sdrahn }
114708daf0c2Sdrahn q = ( bSig <= aSig );
114808daf0c2Sdrahn if ( q ) aSig -= bSig;
114908daf0c2Sdrahn expDiff -= 32;
115008daf0c2Sdrahn while ( 0 < expDiff ) {
115108daf0c2Sdrahn q = estimateDiv64To32( aSig, 0, bSig );
115208daf0c2Sdrahn q = ( 2 < q ) ? q - 2 : 0;
115308daf0c2Sdrahn aSig = - ( ( bSig>>2 ) * q );
115408daf0c2Sdrahn expDiff -= 30;
115508daf0c2Sdrahn }
115608daf0c2Sdrahn expDiff += 32;
115708daf0c2Sdrahn if ( 0 < expDiff ) {
115808daf0c2Sdrahn q = estimateDiv64To32( aSig, 0, bSig );
115908daf0c2Sdrahn q = ( 2 < q ) ? q - 2 : 0;
116008daf0c2Sdrahn q >>= 32 - expDiff;
116108daf0c2Sdrahn bSig >>= 2;
116208daf0c2Sdrahn aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
116308daf0c2Sdrahn }
116408daf0c2Sdrahn else {
116508daf0c2Sdrahn aSig >>= 2;
116608daf0c2Sdrahn bSig >>= 2;
116708daf0c2Sdrahn }
116808daf0c2Sdrahn do {
116908daf0c2Sdrahn alternateASig = aSig;
117008daf0c2Sdrahn ++q;
117108daf0c2Sdrahn aSig -= bSig;
117208daf0c2Sdrahn } while ( 0 <= (sbits32) aSig );
117308daf0c2Sdrahn sigMean = aSig + alternateASig;
117408daf0c2Sdrahn if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
117508daf0c2Sdrahn aSig = alternateASig;
117608daf0c2Sdrahn }
117708daf0c2Sdrahn zSign = ( (sbits32) aSig < 0 );
117808daf0c2Sdrahn if ( zSign ) aSig = - aSig;
117908daf0c2Sdrahn return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
118008daf0c2Sdrahn
118108daf0c2Sdrahn }
118208daf0c2Sdrahn #endif
118308daf0c2Sdrahn
118408daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
118508daf0c2Sdrahn /*
118608daf0c2Sdrahn -------------------------------------------------------------------------------
118708daf0c2Sdrahn Returns the square root of the single-precision floating-point value `a'.
118808daf0c2Sdrahn The operation is performed according to the IEC/IEEE Standard for Binary
118908daf0c2Sdrahn Floating-Point Arithmetic.
119008daf0c2Sdrahn -------------------------------------------------------------------------------
119108daf0c2Sdrahn */
float32_sqrt(float32 a)119208daf0c2Sdrahn float32 float32_sqrt( float32 a )
119308daf0c2Sdrahn {
119408daf0c2Sdrahn flag aSign;
119508daf0c2Sdrahn int16 aExp, zExp;
119608daf0c2Sdrahn bits32 aSig, zSig, rem0, rem1, term0, term1;
119708daf0c2Sdrahn
119808daf0c2Sdrahn aSig = extractFloat32Frac( a );
119908daf0c2Sdrahn aExp = extractFloat32Exp( a );
120008daf0c2Sdrahn aSign = extractFloat32Sign( a );
120108daf0c2Sdrahn if ( aExp == 0xFF ) {
120208daf0c2Sdrahn if ( aSig ) return propagateFloat32NaN( a, 0 );
120308daf0c2Sdrahn if ( ! aSign ) return a;
120408daf0c2Sdrahn float_raise( float_flag_invalid );
120508daf0c2Sdrahn return float32_default_nan;
120608daf0c2Sdrahn }
120708daf0c2Sdrahn if ( aSign ) {
120808daf0c2Sdrahn if ( ( aExp | aSig ) == 0 ) return a;
120908daf0c2Sdrahn float_raise( float_flag_invalid );
121008daf0c2Sdrahn return float32_default_nan;
121108daf0c2Sdrahn }
121208daf0c2Sdrahn if ( aExp == 0 ) {
121308daf0c2Sdrahn if ( aSig == 0 ) return 0;
121408daf0c2Sdrahn normalizeFloat32Subnormal( aSig, &aExp, &aSig );
121508daf0c2Sdrahn }
121608daf0c2Sdrahn zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
121708daf0c2Sdrahn aSig = ( aSig | 0x00800000 )<<8;
121808daf0c2Sdrahn zSig = estimateSqrt32( aExp, aSig ) + 2;
121908daf0c2Sdrahn if ( ( zSig & 0x7F ) <= 5 ) {
122008daf0c2Sdrahn if ( zSig < 2 ) {
122108daf0c2Sdrahn zSig = 0x7FFFFFFF;
122208daf0c2Sdrahn goto roundAndPack;
122308daf0c2Sdrahn }
122408daf0c2Sdrahn else {
122508daf0c2Sdrahn aSig >>= aExp & 1;
122608daf0c2Sdrahn mul32To64( zSig, zSig, &term0, &term1 );
122708daf0c2Sdrahn sub64( aSig, 0, term0, term1, &rem0, &rem1 );
122808daf0c2Sdrahn while ( (sbits32) rem0 < 0 ) {
122908daf0c2Sdrahn --zSig;
123008daf0c2Sdrahn shortShift64Left( 0, zSig, 1, &term0, &term1 );
123108daf0c2Sdrahn term1 |= 1;
123208daf0c2Sdrahn add64( rem0, rem1, term0, term1, &rem0, &rem1 );
123308daf0c2Sdrahn }
123408daf0c2Sdrahn zSig |= ( ( rem0 | rem1 ) != 0 );
123508daf0c2Sdrahn }
123608daf0c2Sdrahn }
123708daf0c2Sdrahn shift32RightJamming( zSig, 1, &zSig );
123808daf0c2Sdrahn roundAndPack:
123908daf0c2Sdrahn return roundAndPackFloat32( 0, zExp, zSig );
124008daf0c2Sdrahn
124108daf0c2Sdrahn }
124208daf0c2Sdrahn #endif
124308daf0c2Sdrahn
124408daf0c2Sdrahn /*
124508daf0c2Sdrahn -------------------------------------------------------------------------------
124608daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is equal to
124708daf0c2Sdrahn the corresponding value `b', and 0 otherwise. The comparison is performed
124808daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
124908daf0c2Sdrahn -------------------------------------------------------------------------------
125008daf0c2Sdrahn */
float32_eq(float32 a,float32 b)125108daf0c2Sdrahn flag float32_eq( float32 a, float32 b )
125208daf0c2Sdrahn {
125308daf0c2Sdrahn
125408daf0c2Sdrahn if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
125508daf0c2Sdrahn || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
125608daf0c2Sdrahn ) {
125708daf0c2Sdrahn if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
125808daf0c2Sdrahn float_raise( float_flag_invalid );
125908daf0c2Sdrahn }
126008daf0c2Sdrahn return 0;
126108daf0c2Sdrahn }
126208daf0c2Sdrahn return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
126308daf0c2Sdrahn
126408daf0c2Sdrahn }
126508daf0c2Sdrahn
126608daf0c2Sdrahn /*
126708daf0c2Sdrahn -------------------------------------------------------------------------------
126808daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than
126908daf0c2Sdrahn or equal to the corresponding value `b', and 0 otherwise. The comparison
127008daf0c2Sdrahn is performed according to the IEC/IEEE Standard for Binary Floating-Point
127108daf0c2Sdrahn Arithmetic.
127208daf0c2Sdrahn -------------------------------------------------------------------------------
127308daf0c2Sdrahn */
float32_le(float32 a,float32 b)127408daf0c2Sdrahn flag float32_le( float32 a, float32 b )
127508daf0c2Sdrahn {
127608daf0c2Sdrahn flag aSign, bSign;
127708daf0c2Sdrahn
127808daf0c2Sdrahn if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
127908daf0c2Sdrahn || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
128008daf0c2Sdrahn ) {
128108daf0c2Sdrahn float_raise( float_flag_invalid );
128208daf0c2Sdrahn return 0;
128308daf0c2Sdrahn }
128408daf0c2Sdrahn aSign = extractFloat32Sign( a );
128508daf0c2Sdrahn bSign = extractFloat32Sign( b );
128608daf0c2Sdrahn if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
128708daf0c2Sdrahn return ( a == b ) || ( aSign ^ ( a < b ) );
128808daf0c2Sdrahn
128908daf0c2Sdrahn }
129008daf0c2Sdrahn
129108daf0c2Sdrahn /*
129208daf0c2Sdrahn -------------------------------------------------------------------------------
129308daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than
129408daf0c2Sdrahn the corresponding value `b', and 0 otherwise. The comparison is performed
129508daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129608daf0c2Sdrahn -------------------------------------------------------------------------------
129708daf0c2Sdrahn */
float32_lt(float32 a,float32 b)129808daf0c2Sdrahn flag float32_lt( float32 a, float32 b )
129908daf0c2Sdrahn {
130008daf0c2Sdrahn flag aSign, bSign;
130108daf0c2Sdrahn
130208daf0c2Sdrahn if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
130308daf0c2Sdrahn || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
130408daf0c2Sdrahn ) {
130508daf0c2Sdrahn float_raise( float_flag_invalid );
130608daf0c2Sdrahn return 0;
130708daf0c2Sdrahn }
130808daf0c2Sdrahn aSign = extractFloat32Sign( a );
130908daf0c2Sdrahn bSign = extractFloat32Sign( b );
131008daf0c2Sdrahn if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
131108daf0c2Sdrahn return ( a != b ) && ( aSign ^ ( a < b ) );
131208daf0c2Sdrahn
131308daf0c2Sdrahn }
131408daf0c2Sdrahn
131508daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
131608daf0c2Sdrahn /*
131708daf0c2Sdrahn -------------------------------------------------------------------------------
131808daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is equal to
131908daf0c2Sdrahn the corresponding value `b', and 0 otherwise. The invalid exception is
132008daf0c2Sdrahn raised if either operand is a NaN. Otherwise, the comparison is performed
132108daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
132208daf0c2Sdrahn -------------------------------------------------------------------------------
132308daf0c2Sdrahn */
float32_eq_signaling(float32 a,float32 b)132408daf0c2Sdrahn flag float32_eq_signaling( float32 a, float32 b )
132508daf0c2Sdrahn {
132608daf0c2Sdrahn
132708daf0c2Sdrahn if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
132808daf0c2Sdrahn || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
132908daf0c2Sdrahn ) {
133008daf0c2Sdrahn float_raise( float_flag_invalid );
133108daf0c2Sdrahn return 0;
133208daf0c2Sdrahn }
133308daf0c2Sdrahn return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
133408daf0c2Sdrahn
133508daf0c2Sdrahn }
133608daf0c2Sdrahn
133708daf0c2Sdrahn /*
133808daf0c2Sdrahn -------------------------------------------------------------------------------
133908daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than or
134008daf0c2Sdrahn equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
134108daf0c2Sdrahn cause an exception. Otherwise, the comparison is performed according to the
134208daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic.
134308daf0c2Sdrahn -------------------------------------------------------------------------------
134408daf0c2Sdrahn */
float32_le_quiet(float32 a,float32 b)134508daf0c2Sdrahn flag float32_le_quiet( float32 a, float32 b )
134608daf0c2Sdrahn {
134708daf0c2Sdrahn flag aSign, bSign;
134808daf0c2Sdrahn int16 aExp, bExp;
134908daf0c2Sdrahn
135008daf0c2Sdrahn if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
135108daf0c2Sdrahn || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
135208daf0c2Sdrahn ) {
135308daf0c2Sdrahn if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
135408daf0c2Sdrahn float_raise( float_flag_invalid );
135508daf0c2Sdrahn }
135608daf0c2Sdrahn return 0;
135708daf0c2Sdrahn }
135808daf0c2Sdrahn aSign = extractFloat32Sign( a );
135908daf0c2Sdrahn bSign = extractFloat32Sign( b );
136008daf0c2Sdrahn if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
136108daf0c2Sdrahn return ( a == b ) || ( aSign ^ ( a < b ) );
136208daf0c2Sdrahn
136308daf0c2Sdrahn }
136408daf0c2Sdrahn
136508daf0c2Sdrahn /*
136608daf0c2Sdrahn -------------------------------------------------------------------------------
136708daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than
136808daf0c2Sdrahn the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
136908daf0c2Sdrahn exception. Otherwise, the comparison is performed according to the IEC/IEEE
137008daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic.
137108daf0c2Sdrahn -------------------------------------------------------------------------------
137208daf0c2Sdrahn */
float32_lt_quiet(float32 a,float32 b)137308daf0c2Sdrahn flag float32_lt_quiet( float32 a, float32 b )
137408daf0c2Sdrahn {
137508daf0c2Sdrahn flag aSign, bSign;
137608daf0c2Sdrahn
137708daf0c2Sdrahn if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
137808daf0c2Sdrahn || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
137908daf0c2Sdrahn ) {
138008daf0c2Sdrahn if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
138108daf0c2Sdrahn float_raise( float_flag_invalid );
138208daf0c2Sdrahn }
138308daf0c2Sdrahn return 0;
138408daf0c2Sdrahn }
138508daf0c2Sdrahn aSign = extractFloat32Sign( a );
138608daf0c2Sdrahn bSign = extractFloat32Sign( b );
138708daf0c2Sdrahn if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
138808daf0c2Sdrahn return ( a != b ) && ( aSign ^ ( a < b ) );
138908daf0c2Sdrahn
139008daf0c2Sdrahn }
139108daf0c2Sdrahn #endif /* !SOFTFLOAT_FOR_GCC */
139208daf0c2Sdrahn
139308daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
139408daf0c2Sdrahn /*
139508daf0c2Sdrahn -------------------------------------------------------------------------------
139608daf0c2Sdrahn Returns the result of converting the double-precision floating-point value
139708daf0c2Sdrahn `a' to the 32-bit two's complement integer format. The conversion is
139808daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
139908daf0c2Sdrahn Arithmetic---which means in particular that the conversion is rounded
140008daf0c2Sdrahn according to the current rounding mode. If `a' is a NaN, the largest
140108daf0c2Sdrahn positive integer is returned. Otherwise, if the conversion overflows, the
140208daf0c2Sdrahn largest integer with the same sign as `a' is returned.
140308daf0c2Sdrahn -------------------------------------------------------------------------------
140408daf0c2Sdrahn */
float64_to_int32(float64 a)140508daf0c2Sdrahn int32 float64_to_int32( float64 a )
140608daf0c2Sdrahn {
140708daf0c2Sdrahn flag aSign;
140808daf0c2Sdrahn int16 aExp, shiftCount;
140908daf0c2Sdrahn bits32 aSig0, aSig1, absZ, aSigExtra;
141008daf0c2Sdrahn int32 z;
141108daf0c2Sdrahn int8 roundingMode;
141208daf0c2Sdrahn
141308daf0c2Sdrahn aSig1 = extractFloat64Frac1( a );
141408daf0c2Sdrahn aSig0 = extractFloat64Frac0( a );
141508daf0c2Sdrahn aExp = extractFloat64Exp( a );
141608daf0c2Sdrahn aSign = extractFloat64Sign( a );
141708daf0c2Sdrahn shiftCount = aExp - 0x413;
141808daf0c2Sdrahn if ( 0 <= shiftCount ) {
141908daf0c2Sdrahn if ( 0x41E < aExp ) {
142008daf0c2Sdrahn if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
142108daf0c2Sdrahn goto invalid;
142208daf0c2Sdrahn }
142308daf0c2Sdrahn shortShift64Left(
142408daf0c2Sdrahn aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
142508daf0c2Sdrahn if ( 0x80000000 < absZ ) goto invalid;
142608daf0c2Sdrahn }
142708daf0c2Sdrahn else {
142808daf0c2Sdrahn aSig1 = ( aSig1 != 0 );
142908daf0c2Sdrahn if ( aExp < 0x3FE ) {
143008daf0c2Sdrahn aSigExtra = aExp | aSig0 | aSig1;
143108daf0c2Sdrahn absZ = 0;
143208daf0c2Sdrahn }
143308daf0c2Sdrahn else {
143408daf0c2Sdrahn aSig0 |= 0x00100000;
143508daf0c2Sdrahn aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
143608daf0c2Sdrahn absZ = aSig0>>( - shiftCount );
143708daf0c2Sdrahn }
143808daf0c2Sdrahn }
143908daf0c2Sdrahn roundingMode = float_rounding_mode;
144008daf0c2Sdrahn if ( roundingMode == float_round_nearest_even ) {
144108daf0c2Sdrahn if ( (sbits32) aSigExtra < 0 ) {
144208daf0c2Sdrahn ++absZ;
144308daf0c2Sdrahn if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
144408daf0c2Sdrahn }
144508daf0c2Sdrahn z = aSign ? - absZ : absZ;
144608daf0c2Sdrahn }
144708daf0c2Sdrahn else {
144808daf0c2Sdrahn aSigExtra = ( aSigExtra != 0 );
144908daf0c2Sdrahn if ( aSign ) {
145008daf0c2Sdrahn z = - ( absZ
145108daf0c2Sdrahn + ( ( roundingMode == float_round_down ) & aSigExtra ) );
145208daf0c2Sdrahn }
145308daf0c2Sdrahn else {
145408daf0c2Sdrahn z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
145508daf0c2Sdrahn }
145608daf0c2Sdrahn }
145708daf0c2Sdrahn if ( ( aSign ^ ( z < 0 ) ) && z ) {
145808daf0c2Sdrahn invalid:
145908daf0c2Sdrahn float_raise( float_flag_invalid );
146008daf0c2Sdrahn return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
146108daf0c2Sdrahn }
146208daf0c2Sdrahn if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
146308daf0c2Sdrahn return z;
146408daf0c2Sdrahn
146508daf0c2Sdrahn }
146608daf0c2Sdrahn #endif /* !SOFTFLOAT_FOR_GCC */
146708daf0c2Sdrahn
146808daf0c2Sdrahn /*
146908daf0c2Sdrahn -------------------------------------------------------------------------------
147008daf0c2Sdrahn Returns the result of converting the double-precision floating-point value
147108daf0c2Sdrahn `a' to the 32-bit two's complement integer format. The conversion is
147208daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
147308daf0c2Sdrahn Arithmetic, except that the conversion is always rounded toward zero.
147408daf0c2Sdrahn If `a' is a NaN, the largest positive integer is returned. Otherwise, if
147508daf0c2Sdrahn the conversion overflows, the largest integer with the same sign as `a' is
147608daf0c2Sdrahn returned.
147708daf0c2Sdrahn -------------------------------------------------------------------------------
147808daf0c2Sdrahn */
float64_to_int32_round_to_zero(float64 a)147908daf0c2Sdrahn int32 float64_to_int32_round_to_zero( float64 a )
148008daf0c2Sdrahn {
148108daf0c2Sdrahn flag aSign;
148208daf0c2Sdrahn int16 aExp, shiftCount;
148308daf0c2Sdrahn bits32 aSig0, aSig1, absZ, aSigExtra;
148408daf0c2Sdrahn int32 z;
148508daf0c2Sdrahn
148608daf0c2Sdrahn aSig1 = extractFloat64Frac1( a );
148708daf0c2Sdrahn aSig0 = extractFloat64Frac0( a );
148808daf0c2Sdrahn aExp = extractFloat64Exp( a );
148908daf0c2Sdrahn aSign = extractFloat64Sign( a );
149008daf0c2Sdrahn shiftCount = aExp - 0x413;
149108daf0c2Sdrahn if ( 0 <= shiftCount ) {
149208daf0c2Sdrahn if ( 0x41E < aExp ) {
149308daf0c2Sdrahn if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
149408daf0c2Sdrahn goto invalid;
149508daf0c2Sdrahn }
149608daf0c2Sdrahn shortShift64Left(
149708daf0c2Sdrahn aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
149808daf0c2Sdrahn }
149908daf0c2Sdrahn else {
150008daf0c2Sdrahn if ( aExp < 0x3FF ) {
150108daf0c2Sdrahn if ( aExp | aSig0 | aSig1 ) {
150208daf0c2Sdrahn float_exception_flags |= float_flag_inexact;
150308daf0c2Sdrahn }
150408daf0c2Sdrahn return 0;
150508daf0c2Sdrahn }
150608daf0c2Sdrahn aSig0 |= 0x00100000;
150708daf0c2Sdrahn aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
150808daf0c2Sdrahn absZ = aSig0>>( - shiftCount );
150908daf0c2Sdrahn }
151008daf0c2Sdrahn z = aSign ? - absZ : absZ;
151108daf0c2Sdrahn if ( ( aSign ^ ( z < 0 ) ) && z ) {
151208daf0c2Sdrahn invalid:
151308daf0c2Sdrahn float_raise( float_flag_invalid );
151408daf0c2Sdrahn return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
151508daf0c2Sdrahn }
151608daf0c2Sdrahn if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
151708daf0c2Sdrahn return z;
151808daf0c2Sdrahn
151908daf0c2Sdrahn }
152008daf0c2Sdrahn
152108daf0c2Sdrahn /*
152208daf0c2Sdrahn -------------------------------------------------------------------------------
152308daf0c2Sdrahn Returns the result of converting the double-precision floating-point value
152408daf0c2Sdrahn `a' to the single-precision floating-point format. The conversion is
152508daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
152608daf0c2Sdrahn Arithmetic.
152708daf0c2Sdrahn -------------------------------------------------------------------------------
152808daf0c2Sdrahn */
float64_to_float32(float64 a)152908daf0c2Sdrahn float32 float64_to_float32( float64 a )
153008daf0c2Sdrahn {
153108daf0c2Sdrahn flag aSign;
153208daf0c2Sdrahn int16 aExp;
153308daf0c2Sdrahn bits32 aSig0, aSig1, zSig;
153408daf0c2Sdrahn bits32 allZero;
153508daf0c2Sdrahn
153608daf0c2Sdrahn aSig1 = extractFloat64Frac1( a );
153708daf0c2Sdrahn aSig0 = extractFloat64Frac0( a );
153808daf0c2Sdrahn aExp = extractFloat64Exp( a );
153908daf0c2Sdrahn aSign = extractFloat64Sign( a );
154008daf0c2Sdrahn if ( aExp == 0x7FF ) {
154108daf0c2Sdrahn if ( aSig0 | aSig1 ) {
154208daf0c2Sdrahn return commonNaNToFloat32( float64ToCommonNaN( a ) );
154308daf0c2Sdrahn }
154408daf0c2Sdrahn return packFloat32( aSign, 0xFF, 0 );
154508daf0c2Sdrahn }
154608daf0c2Sdrahn shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
154708daf0c2Sdrahn if ( aExp ) zSig |= 0x40000000;
154808daf0c2Sdrahn return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
154908daf0c2Sdrahn
155008daf0c2Sdrahn }
155108daf0c2Sdrahn
155208daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
155308daf0c2Sdrahn /*
155408daf0c2Sdrahn -------------------------------------------------------------------------------
155508daf0c2Sdrahn Rounds the double-precision floating-point value `a' to an integer,
155608daf0c2Sdrahn and returns the result as a double-precision floating-point value. The
155708daf0c2Sdrahn operation is performed according to the IEC/IEEE Standard for Binary
155808daf0c2Sdrahn Floating-Point Arithmetic.
155908daf0c2Sdrahn -------------------------------------------------------------------------------
156008daf0c2Sdrahn */
float64_round_to_int(float64 a)156108daf0c2Sdrahn float64 float64_round_to_int( float64 a )
156208daf0c2Sdrahn {
156308daf0c2Sdrahn flag aSign;
156408daf0c2Sdrahn int16 aExp;
156508daf0c2Sdrahn bits32 lastBitMask, roundBitsMask;
156608daf0c2Sdrahn int8 roundingMode;
156708daf0c2Sdrahn float64 z;
156808daf0c2Sdrahn
156908daf0c2Sdrahn aExp = extractFloat64Exp( a );
157008daf0c2Sdrahn if ( 0x413 <= aExp ) {
157108daf0c2Sdrahn if ( 0x433 <= aExp ) {
157208daf0c2Sdrahn if ( ( aExp == 0x7FF )
157308daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
157408daf0c2Sdrahn return propagateFloat64NaN( a, a );
157508daf0c2Sdrahn }
157608daf0c2Sdrahn return a;
157708daf0c2Sdrahn }
157808daf0c2Sdrahn lastBitMask = 1;
157908daf0c2Sdrahn lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
158008daf0c2Sdrahn roundBitsMask = lastBitMask - 1;
158108daf0c2Sdrahn z = a;
158208daf0c2Sdrahn roundingMode = float_rounding_mode;
158308daf0c2Sdrahn if ( roundingMode == float_round_nearest_even ) {
158408daf0c2Sdrahn if ( lastBitMask ) {
158508daf0c2Sdrahn add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
158608daf0c2Sdrahn if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
158708daf0c2Sdrahn }
158808daf0c2Sdrahn else {
158908daf0c2Sdrahn if ( (sbits32) z.low < 0 ) {
159008daf0c2Sdrahn ++z.high;
159108daf0c2Sdrahn if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
159208daf0c2Sdrahn }
159308daf0c2Sdrahn }
159408daf0c2Sdrahn }
159508daf0c2Sdrahn else if ( roundingMode != float_round_to_zero ) {
159608daf0c2Sdrahn if ( extractFloat64Sign( z )
159708daf0c2Sdrahn ^ ( roundingMode == float_round_up ) ) {
159808daf0c2Sdrahn add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
159908daf0c2Sdrahn }
160008daf0c2Sdrahn }
160108daf0c2Sdrahn z.low &= ~ roundBitsMask;
160208daf0c2Sdrahn }
160308daf0c2Sdrahn else {
160408daf0c2Sdrahn if ( aExp <= 0x3FE ) {
160508daf0c2Sdrahn if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
160608daf0c2Sdrahn float_exception_flags |= float_flag_inexact;
160708daf0c2Sdrahn aSign = extractFloat64Sign( a );
160808daf0c2Sdrahn switch ( float_rounding_mode ) {
160908daf0c2Sdrahn case float_round_nearest_even:
161008daf0c2Sdrahn if ( ( aExp == 0x3FE )
161108daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
161208daf0c2Sdrahn ) {
161308daf0c2Sdrahn return packFloat64( aSign, 0x3FF, 0, 0 );
161408daf0c2Sdrahn }
161508daf0c2Sdrahn break;
161608daf0c2Sdrahn case float_round_down:
161708daf0c2Sdrahn return
161808daf0c2Sdrahn aSign ? packFloat64( 1, 0x3FF, 0, 0 )
161908daf0c2Sdrahn : packFloat64( 0, 0, 0, 0 );
162008daf0c2Sdrahn case float_round_up:
162108daf0c2Sdrahn return
162208daf0c2Sdrahn aSign ? packFloat64( 1, 0, 0, 0 )
162308daf0c2Sdrahn : packFloat64( 0, 0x3FF, 0, 0 );
162408daf0c2Sdrahn }
162508daf0c2Sdrahn return packFloat64( aSign, 0, 0, 0 );
162608daf0c2Sdrahn }
162708daf0c2Sdrahn lastBitMask = 1;
162808daf0c2Sdrahn lastBitMask <<= 0x413 - aExp;
162908daf0c2Sdrahn roundBitsMask = lastBitMask - 1;
163008daf0c2Sdrahn z.low = 0;
163108daf0c2Sdrahn z.high = a.high;
163208daf0c2Sdrahn roundingMode = float_rounding_mode;
163308daf0c2Sdrahn if ( roundingMode == float_round_nearest_even ) {
163408daf0c2Sdrahn z.high += lastBitMask>>1;
163508daf0c2Sdrahn if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
163608daf0c2Sdrahn z.high &= ~ lastBitMask;
163708daf0c2Sdrahn }
163808daf0c2Sdrahn }
163908daf0c2Sdrahn else if ( roundingMode != float_round_to_zero ) {
164008daf0c2Sdrahn if ( extractFloat64Sign( z )
164108daf0c2Sdrahn ^ ( roundingMode == float_round_up ) ) {
164208daf0c2Sdrahn z.high |= ( a.low != 0 );
164308daf0c2Sdrahn z.high += roundBitsMask;
164408daf0c2Sdrahn }
164508daf0c2Sdrahn }
164608daf0c2Sdrahn z.high &= ~ roundBitsMask;
164708daf0c2Sdrahn }
164808daf0c2Sdrahn if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
164908daf0c2Sdrahn float_exception_flags |= float_flag_inexact;
165008daf0c2Sdrahn }
165108daf0c2Sdrahn return z;
165208daf0c2Sdrahn
165308daf0c2Sdrahn }
165408daf0c2Sdrahn #endif
165508daf0c2Sdrahn
165608daf0c2Sdrahn /*
165708daf0c2Sdrahn -------------------------------------------------------------------------------
165808daf0c2Sdrahn Returns the result of adding the absolute values of the double-precision
165908daf0c2Sdrahn floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
166008daf0c2Sdrahn before being returned. `zSign' is ignored if the result is a NaN.
166108daf0c2Sdrahn The addition is performed according to the IEC/IEEE Standard for Binary
166208daf0c2Sdrahn Floating-Point Arithmetic.
166308daf0c2Sdrahn -------------------------------------------------------------------------------
166408daf0c2Sdrahn */
addFloat64Sigs(float64 a,float64 b,flag zSign)166508daf0c2Sdrahn static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
166608daf0c2Sdrahn {
166708daf0c2Sdrahn int16 aExp, bExp, zExp;
166808daf0c2Sdrahn bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
166908daf0c2Sdrahn int16 expDiff;
167008daf0c2Sdrahn
167108daf0c2Sdrahn aSig1 = extractFloat64Frac1( a );
167208daf0c2Sdrahn aSig0 = extractFloat64Frac0( a );
167308daf0c2Sdrahn aExp = extractFloat64Exp( a );
167408daf0c2Sdrahn bSig1 = extractFloat64Frac1( b );
167508daf0c2Sdrahn bSig0 = extractFloat64Frac0( b );
167608daf0c2Sdrahn bExp = extractFloat64Exp( b );
167708daf0c2Sdrahn expDiff = aExp - bExp;
167808daf0c2Sdrahn if ( 0 < expDiff ) {
167908daf0c2Sdrahn if ( aExp == 0x7FF ) {
168008daf0c2Sdrahn if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
168108daf0c2Sdrahn return a;
168208daf0c2Sdrahn }
168308daf0c2Sdrahn if ( bExp == 0 ) {
168408daf0c2Sdrahn --expDiff;
168508daf0c2Sdrahn }
168608daf0c2Sdrahn else {
168708daf0c2Sdrahn bSig0 |= 0x00100000;
168808daf0c2Sdrahn }
168908daf0c2Sdrahn shift64ExtraRightJamming(
169008daf0c2Sdrahn bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
169108daf0c2Sdrahn zExp = aExp;
169208daf0c2Sdrahn }
169308daf0c2Sdrahn else if ( expDiff < 0 ) {
169408daf0c2Sdrahn if ( bExp == 0x7FF ) {
169508daf0c2Sdrahn if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
169608daf0c2Sdrahn return packFloat64( zSign, 0x7FF, 0, 0 );
169708daf0c2Sdrahn }
169808daf0c2Sdrahn if ( aExp == 0 ) {
169908daf0c2Sdrahn ++expDiff;
170008daf0c2Sdrahn }
170108daf0c2Sdrahn else {
170208daf0c2Sdrahn aSig0 |= 0x00100000;
170308daf0c2Sdrahn }
170408daf0c2Sdrahn shift64ExtraRightJamming(
170508daf0c2Sdrahn aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
170608daf0c2Sdrahn zExp = bExp;
170708daf0c2Sdrahn }
170808daf0c2Sdrahn else {
170908daf0c2Sdrahn if ( aExp == 0x7FF ) {
171008daf0c2Sdrahn if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
171108daf0c2Sdrahn return propagateFloat64NaN( a, b );
171208daf0c2Sdrahn }
171308daf0c2Sdrahn return a;
171408daf0c2Sdrahn }
171508daf0c2Sdrahn add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
171608daf0c2Sdrahn if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
171708daf0c2Sdrahn zSig2 = 0;
171808daf0c2Sdrahn zSig0 |= 0x00200000;
171908daf0c2Sdrahn zExp = aExp;
172008daf0c2Sdrahn goto shiftRight1;
172108daf0c2Sdrahn }
172208daf0c2Sdrahn aSig0 |= 0x00100000;
172308daf0c2Sdrahn add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
172408daf0c2Sdrahn --zExp;
172508daf0c2Sdrahn if ( zSig0 < 0x00200000 ) goto roundAndPack;
172608daf0c2Sdrahn ++zExp;
172708daf0c2Sdrahn shiftRight1:
172808daf0c2Sdrahn shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
172908daf0c2Sdrahn roundAndPack:
173008daf0c2Sdrahn return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
173108daf0c2Sdrahn
173208daf0c2Sdrahn }
173308daf0c2Sdrahn
173408daf0c2Sdrahn /*
173508daf0c2Sdrahn -------------------------------------------------------------------------------
173608daf0c2Sdrahn Returns the result of subtracting the absolute values of the double-
173708daf0c2Sdrahn precision floating-point values `a' and `b'. If `zSign' is 1, the
173808daf0c2Sdrahn difference is negated before being returned. `zSign' is ignored if the
173908daf0c2Sdrahn result is a NaN. The subtraction is performed according to the IEC/IEEE
174008daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic.
174108daf0c2Sdrahn -------------------------------------------------------------------------------
174208daf0c2Sdrahn */
subFloat64Sigs(float64 a,float64 b,flag zSign)174308daf0c2Sdrahn static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
174408daf0c2Sdrahn {
174508daf0c2Sdrahn int16 aExp, bExp, zExp;
174608daf0c2Sdrahn bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
174708daf0c2Sdrahn int16 expDiff;
174808daf0c2Sdrahn
174908daf0c2Sdrahn aSig1 = extractFloat64Frac1( a );
175008daf0c2Sdrahn aSig0 = extractFloat64Frac0( a );
175108daf0c2Sdrahn aExp = extractFloat64Exp( a );
175208daf0c2Sdrahn bSig1 = extractFloat64Frac1( b );
175308daf0c2Sdrahn bSig0 = extractFloat64Frac0( b );
175408daf0c2Sdrahn bExp = extractFloat64Exp( b );
175508daf0c2Sdrahn expDiff = aExp - bExp;
175608daf0c2Sdrahn shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
175708daf0c2Sdrahn shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
175808daf0c2Sdrahn if ( 0 < expDiff ) goto aExpBigger;
175908daf0c2Sdrahn if ( expDiff < 0 ) goto bExpBigger;
176008daf0c2Sdrahn if ( aExp == 0x7FF ) {
176108daf0c2Sdrahn if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
176208daf0c2Sdrahn return propagateFloat64NaN( a, b );
176308daf0c2Sdrahn }
176408daf0c2Sdrahn float_raise( float_flag_invalid );
176508daf0c2Sdrahn return float64_default_nan;
176608daf0c2Sdrahn }
176708daf0c2Sdrahn if ( aExp == 0 ) {
176808daf0c2Sdrahn aExp = 1;
176908daf0c2Sdrahn bExp = 1;
177008daf0c2Sdrahn }
177108daf0c2Sdrahn if ( bSig0 < aSig0 ) goto aBigger;
177208daf0c2Sdrahn if ( aSig0 < bSig0 ) goto bBigger;
177308daf0c2Sdrahn if ( bSig1 < aSig1 ) goto aBigger;
177408daf0c2Sdrahn if ( aSig1 < bSig1 ) goto bBigger;
177508daf0c2Sdrahn return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
177608daf0c2Sdrahn bExpBigger:
177708daf0c2Sdrahn if ( bExp == 0x7FF ) {
177808daf0c2Sdrahn if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
177908daf0c2Sdrahn return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
178008daf0c2Sdrahn }
178108daf0c2Sdrahn if ( aExp == 0 ) {
178208daf0c2Sdrahn ++expDiff;
178308daf0c2Sdrahn }
178408daf0c2Sdrahn else {
178508daf0c2Sdrahn aSig0 |= 0x40000000;
178608daf0c2Sdrahn }
178708daf0c2Sdrahn shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
178808daf0c2Sdrahn bSig0 |= 0x40000000;
178908daf0c2Sdrahn bBigger:
179008daf0c2Sdrahn sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
179108daf0c2Sdrahn zExp = bExp;
179208daf0c2Sdrahn zSign ^= 1;
179308daf0c2Sdrahn goto normalizeRoundAndPack;
179408daf0c2Sdrahn aExpBigger:
179508daf0c2Sdrahn if ( aExp == 0x7FF ) {
179608daf0c2Sdrahn if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
179708daf0c2Sdrahn return a;
179808daf0c2Sdrahn }
179908daf0c2Sdrahn if ( bExp == 0 ) {
180008daf0c2Sdrahn --expDiff;
180108daf0c2Sdrahn }
180208daf0c2Sdrahn else {
180308daf0c2Sdrahn bSig0 |= 0x40000000;
180408daf0c2Sdrahn }
180508daf0c2Sdrahn shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
180608daf0c2Sdrahn aSig0 |= 0x40000000;
180708daf0c2Sdrahn aBigger:
180808daf0c2Sdrahn sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
180908daf0c2Sdrahn zExp = aExp;
181008daf0c2Sdrahn normalizeRoundAndPack:
181108daf0c2Sdrahn --zExp;
181208daf0c2Sdrahn return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
181308daf0c2Sdrahn
181408daf0c2Sdrahn }
181508daf0c2Sdrahn
181608daf0c2Sdrahn /*
181708daf0c2Sdrahn -------------------------------------------------------------------------------
181808daf0c2Sdrahn Returns the result of adding the double-precision floating-point values `a'
181908daf0c2Sdrahn and `b'. The operation is performed according to the IEC/IEEE Standard for
182008daf0c2Sdrahn Binary Floating-Point Arithmetic.
182108daf0c2Sdrahn -------------------------------------------------------------------------------
182208daf0c2Sdrahn */
float64_add(float64 a,float64 b)182308daf0c2Sdrahn float64 float64_add( float64 a, float64 b )
182408daf0c2Sdrahn {
182508daf0c2Sdrahn flag aSign, bSign;
182608daf0c2Sdrahn
182708daf0c2Sdrahn aSign = extractFloat64Sign( a );
182808daf0c2Sdrahn bSign = extractFloat64Sign( b );
182908daf0c2Sdrahn if ( aSign == bSign ) {
183008daf0c2Sdrahn return addFloat64Sigs( a, b, aSign );
183108daf0c2Sdrahn }
183208daf0c2Sdrahn else {
183308daf0c2Sdrahn return subFloat64Sigs( a, b, aSign );
183408daf0c2Sdrahn }
183508daf0c2Sdrahn
183608daf0c2Sdrahn }
183708daf0c2Sdrahn
183808daf0c2Sdrahn /*
183908daf0c2Sdrahn -------------------------------------------------------------------------------
184008daf0c2Sdrahn Returns the result of subtracting the double-precision floating-point values
184108daf0c2Sdrahn `a' and `b'. The operation is performed according to the IEC/IEEE Standard
184208daf0c2Sdrahn for Binary Floating-Point Arithmetic.
184308daf0c2Sdrahn -------------------------------------------------------------------------------
184408daf0c2Sdrahn */
float64_sub(float64 a,float64 b)184508daf0c2Sdrahn float64 float64_sub( float64 a, float64 b )
184608daf0c2Sdrahn {
184708daf0c2Sdrahn flag aSign, bSign;
184808daf0c2Sdrahn
184908daf0c2Sdrahn aSign = extractFloat64Sign( a );
185008daf0c2Sdrahn bSign = extractFloat64Sign( b );
185108daf0c2Sdrahn if ( aSign == bSign ) {
185208daf0c2Sdrahn return subFloat64Sigs( a, b, aSign );
185308daf0c2Sdrahn }
185408daf0c2Sdrahn else {
185508daf0c2Sdrahn return addFloat64Sigs( a, b, aSign );
185608daf0c2Sdrahn }
185708daf0c2Sdrahn
185808daf0c2Sdrahn }
185908daf0c2Sdrahn
186008daf0c2Sdrahn /*
186108daf0c2Sdrahn -------------------------------------------------------------------------------
186208daf0c2Sdrahn Returns the result of multiplying the double-precision floating-point values
186308daf0c2Sdrahn `a' and `b'. The operation is performed according to the IEC/IEEE Standard
186408daf0c2Sdrahn for Binary Floating-Point Arithmetic.
186508daf0c2Sdrahn -------------------------------------------------------------------------------
186608daf0c2Sdrahn */
float64_mul(float64 a,float64 b)186708daf0c2Sdrahn float64 float64_mul( float64 a, float64 b )
186808daf0c2Sdrahn {
186908daf0c2Sdrahn flag aSign, bSign, zSign;
187008daf0c2Sdrahn int16 aExp, bExp, zExp;
187108daf0c2Sdrahn bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
187208daf0c2Sdrahn
187308daf0c2Sdrahn aSig1 = extractFloat64Frac1( a );
187408daf0c2Sdrahn aSig0 = extractFloat64Frac0( a );
187508daf0c2Sdrahn aExp = extractFloat64Exp( a );
187608daf0c2Sdrahn aSign = extractFloat64Sign( a );
187708daf0c2Sdrahn bSig1 = extractFloat64Frac1( b );
187808daf0c2Sdrahn bSig0 = extractFloat64Frac0( b );
187908daf0c2Sdrahn bExp = extractFloat64Exp( b );
188008daf0c2Sdrahn bSign = extractFloat64Sign( b );
188108daf0c2Sdrahn zSign = aSign ^ bSign;
188208daf0c2Sdrahn if ( aExp == 0x7FF ) {
188308daf0c2Sdrahn if ( ( aSig0 | aSig1 )
188408daf0c2Sdrahn || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
188508daf0c2Sdrahn return propagateFloat64NaN( a, b );
188608daf0c2Sdrahn }
188708daf0c2Sdrahn if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
188808daf0c2Sdrahn return packFloat64( zSign, 0x7FF, 0, 0 );
188908daf0c2Sdrahn }
189008daf0c2Sdrahn if ( bExp == 0x7FF ) {
189108daf0c2Sdrahn if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
189208daf0c2Sdrahn if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
189308daf0c2Sdrahn invalid:
189408daf0c2Sdrahn float_raise( float_flag_invalid );
189508daf0c2Sdrahn return float64_default_nan;
189608daf0c2Sdrahn }
189708daf0c2Sdrahn return packFloat64( zSign, 0x7FF, 0, 0 );
189808daf0c2Sdrahn }
189908daf0c2Sdrahn if ( aExp == 0 ) {
190008daf0c2Sdrahn if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
190108daf0c2Sdrahn normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
190208daf0c2Sdrahn }
190308daf0c2Sdrahn if ( bExp == 0 ) {
190408daf0c2Sdrahn if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
190508daf0c2Sdrahn normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
190608daf0c2Sdrahn }
190708daf0c2Sdrahn zExp = aExp + bExp - 0x400;
190808daf0c2Sdrahn aSig0 |= 0x00100000;
190908daf0c2Sdrahn shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
191008daf0c2Sdrahn mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
191108daf0c2Sdrahn add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
191208daf0c2Sdrahn zSig2 |= ( zSig3 != 0 );
191308daf0c2Sdrahn if ( 0x00200000 <= zSig0 ) {
191408daf0c2Sdrahn shift64ExtraRightJamming(
191508daf0c2Sdrahn zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
191608daf0c2Sdrahn ++zExp;
191708daf0c2Sdrahn }
191808daf0c2Sdrahn return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
191908daf0c2Sdrahn
192008daf0c2Sdrahn }
192108daf0c2Sdrahn
192208daf0c2Sdrahn /*
192308daf0c2Sdrahn -------------------------------------------------------------------------------
192408daf0c2Sdrahn Returns the result of dividing the double-precision floating-point value `a'
192508daf0c2Sdrahn by the corresponding value `b'. The operation is performed according to the
192608daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic.
192708daf0c2Sdrahn -------------------------------------------------------------------------------
192808daf0c2Sdrahn */
float64_div(float64 a,float64 b)192908daf0c2Sdrahn float64 float64_div( float64 a, float64 b )
193008daf0c2Sdrahn {
193108daf0c2Sdrahn flag aSign, bSign, zSign;
193208daf0c2Sdrahn int16 aExp, bExp, zExp;
193308daf0c2Sdrahn bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
193408daf0c2Sdrahn bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
193508daf0c2Sdrahn
193608daf0c2Sdrahn aSig1 = extractFloat64Frac1( a );
193708daf0c2Sdrahn aSig0 = extractFloat64Frac0( a );
193808daf0c2Sdrahn aExp = extractFloat64Exp( a );
193908daf0c2Sdrahn aSign = extractFloat64Sign( a );
194008daf0c2Sdrahn bSig1 = extractFloat64Frac1( b );
194108daf0c2Sdrahn bSig0 = extractFloat64Frac0( b );
194208daf0c2Sdrahn bExp = extractFloat64Exp( b );
194308daf0c2Sdrahn bSign = extractFloat64Sign( b );
194408daf0c2Sdrahn zSign = aSign ^ bSign;
194508daf0c2Sdrahn if ( aExp == 0x7FF ) {
194608daf0c2Sdrahn if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
194708daf0c2Sdrahn if ( bExp == 0x7FF ) {
194808daf0c2Sdrahn if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
194908daf0c2Sdrahn goto invalid;
195008daf0c2Sdrahn }
195108daf0c2Sdrahn return packFloat64( zSign, 0x7FF, 0, 0 );
195208daf0c2Sdrahn }
195308daf0c2Sdrahn if ( bExp == 0x7FF ) {
195408daf0c2Sdrahn if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
195508daf0c2Sdrahn return packFloat64( zSign, 0, 0, 0 );
195608daf0c2Sdrahn }
195708daf0c2Sdrahn if ( bExp == 0 ) {
195808daf0c2Sdrahn if ( ( bSig0 | bSig1 ) == 0 ) {
195908daf0c2Sdrahn if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
196008daf0c2Sdrahn invalid:
196108daf0c2Sdrahn float_raise( float_flag_invalid );
196208daf0c2Sdrahn return float64_default_nan;
196308daf0c2Sdrahn }
196408daf0c2Sdrahn float_raise( float_flag_divbyzero );
196508daf0c2Sdrahn return packFloat64( zSign, 0x7FF, 0, 0 );
196608daf0c2Sdrahn }
196708daf0c2Sdrahn normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
196808daf0c2Sdrahn }
196908daf0c2Sdrahn if ( aExp == 0 ) {
197008daf0c2Sdrahn if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
197108daf0c2Sdrahn normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
197208daf0c2Sdrahn }
197308daf0c2Sdrahn zExp = aExp - bExp + 0x3FD;
197408daf0c2Sdrahn shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
197508daf0c2Sdrahn shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
197608daf0c2Sdrahn if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
197708daf0c2Sdrahn shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
197808daf0c2Sdrahn ++zExp;
197908daf0c2Sdrahn }
198008daf0c2Sdrahn zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
198108daf0c2Sdrahn mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
198208daf0c2Sdrahn sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
198308daf0c2Sdrahn while ( (sbits32) rem0 < 0 ) {
198408daf0c2Sdrahn --zSig0;
198508daf0c2Sdrahn add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
198608daf0c2Sdrahn }
198708daf0c2Sdrahn zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
198808daf0c2Sdrahn if ( ( zSig1 & 0x3FF ) <= 4 ) {
198908daf0c2Sdrahn mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
199008daf0c2Sdrahn sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
199108daf0c2Sdrahn while ( (sbits32) rem1 < 0 ) {
199208daf0c2Sdrahn --zSig1;
199308daf0c2Sdrahn add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
199408daf0c2Sdrahn }
199508daf0c2Sdrahn zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
199608daf0c2Sdrahn }
199708daf0c2Sdrahn shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
199808daf0c2Sdrahn return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
199908daf0c2Sdrahn
200008daf0c2Sdrahn }
200108daf0c2Sdrahn
200208daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
200308daf0c2Sdrahn /*
200408daf0c2Sdrahn -------------------------------------------------------------------------------
200508daf0c2Sdrahn Returns the remainder of the double-precision floating-point value `a'
200608daf0c2Sdrahn with respect to the corresponding value `b'. The operation is performed
200708daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
200808daf0c2Sdrahn -------------------------------------------------------------------------------
200908daf0c2Sdrahn */
float64_rem(float64 a,float64 b)201008daf0c2Sdrahn float64 float64_rem( float64 a, float64 b )
201108daf0c2Sdrahn {
201208daf0c2Sdrahn flag aSign, bSign, zSign;
201308daf0c2Sdrahn int16 aExp, bExp, expDiff;
201408daf0c2Sdrahn bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
201508daf0c2Sdrahn bits32 allZero, alternateASig0, alternateASig1, sigMean1;
201608daf0c2Sdrahn sbits32 sigMean0;
201708daf0c2Sdrahn float64 z;
201808daf0c2Sdrahn
201908daf0c2Sdrahn aSig1 = extractFloat64Frac1( a );
202008daf0c2Sdrahn aSig0 = extractFloat64Frac0( a );
202108daf0c2Sdrahn aExp = extractFloat64Exp( a );
202208daf0c2Sdrahn aSign = extractFloat64Sign( a );
202308daf0c2Sdrahn bSig1 = extractFloat64Frac1( b );
202408daf0c2Sdrahn bSig0 = extractFloat64Frac0( b );
202508daf0c2Sdrahn bExp = extractFloat64Exp( b );
202608daf0c2Sdrahn bSign = extractFloat64Sign( b );
202708daf0c2Sdrahn if ( aExp == 0x7FF ) {
202808daf0c2Sdrahn if ( ( aSig0 | aSig1 )
202908daf0c2Sdrahn || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
203008daf0c2Sdrahn return propagateFloat64NaN( a, b );
203108daf0c2Sdrahn }
203208daf0c2Sdrahn goto invalid;
203308daf0c2Sdrahn }
203408daf0c2Sdrahn if ( bExp == 0x7FF ) {
203508daf0c2Sdrahn if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
203608daf0c2Sdrahn return a;
203708daf0c2Sdrahn }
203808daf0c2Sdrahn if ( bExp == 0 ) {
203908daf0c2Sdrahn if ( ( bSig0 | bSig1 ) == 0 ) {
204008daf0c2Sdrahn invalid:
204108daf0c2Sdrahn float_raise( float_flag_invalid );
204208daf0c2Sdrahn return float64_default_nan;
204308daf0c2Sdrahn }
204408daf0c2Sdrahn normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
204508daf0c2Sdrahn }
204608daf0c2Sdrahn if ( aExp == 0 ) {
204708daf0c2Sdrahn if ( ( aSig0 | aSig1 ) == 0 ) return a;
204808daf0c2Sdrahn normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
204908daf0c2Sdrahn }
205008daf0c2Sdrahn expDiff = aExp - bExp;
205108daf0c2Sdrahn if ( expDiff < -1 ) return a;
205208daf0c2Sdrahn shortShift64Left(
205308daf0c2Sdrahn aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
205408daf0c2Sdrahn shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
205508daf0c2Sdrahn q = le64( bSig0, bSig1, aSig0, aSig1 );
205608daf0c2Sdrahn if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
205708daf0c2Sdrahn expDiff -= 32;
205808daf0c2Sdrahn while ( 0 < expDiff ) {
205908daf0c2Sdrahn q = estimateDiv64To32( aSig0, aSig1, bSig0 );
206008daf0c2Sdrahn q = ( 4 < q ) ? q - 4 : 0;
206108daf0c2Sdrahn mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
206208daf0c2Sdrahn shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
206308daf0c2Sdrahn shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
206408daf0c2Sdrahn sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
206508daf0c2Sdrahn expDiff -= 29;
206608daf0c2Sdrahn }
206708daf0c2Sdrahn if ( -32 < expDiff ) {
206808daf0c2Sdrahn q = estimateDiv64To32( aSig0, aSig1, bSig0 );
206908daf0c2Sdrahn q = ( 4 < q ) ? q - 4 : 0;
207008daf0c2Sdrahn q >>= - expDiff;
207108daf0c2Sdrahn shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
207208daf0c2Sdrahn expDiff += 24;
207308daf0c2Sdrahn if ( expDiff < 0 ) {
207408daf0c2Sdrahn shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
207508daf0c2Sdrahn }
207608daf0c2Sdrahn else {
207708daf0c2Sdrahn shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
207808daf0c2Sdrahn }
207908daf0c2Sdrahn mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
208008daf0c2Sdrahn sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
208108daf0c2Sdrahn }
208208daf0c2Sdrahn else {
208308daf0c2Sdrahn shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
208408daf0c2Sdrahn shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
208508daf0c2Sdrahn }
208608daf0c2Sdrahn do {
208708daf0c2Sdrahn alternateASig0 = aSig0;
208808daf0c2Sdrahn alternateASig1 = aSig1;
208908daf0c2Sdrahn ++q;
209008daf0c2Sdrahn sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
209108daf0c2Sdrahn } while ( 0 <= (sbits32) aSig0 );
209208daf0c2Sdrahn add64(
209308daf0c2Sdrahn aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
209408daf0c2Sdrahn if ( ( sigMean0 < 0 )
209508daf0c2Sdrahn || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
209608daf0c2Sdrahn aSig0 = alternateASig0;
209708daf0c2Sdrahn aSig1 = alternateASig1;
209808daf0c2Sdrahn }
209908daf0c2Sdrahn zSign = ( (sbits32) aSig0 < 0 );
210008daf0c2Sdrahn if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
210108daf0c2Sdrahn return
210208daf0c2Sdrahn normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
210308daf0c2Sdrahn
210408daf0c2Sdrahn }
210508daf0c2Sdrahn #endif
210608daf0c2Sdrahn
210708daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
210808daf0c2Sdrahn /*
210908daf0c2Sdrahn -------------------------------------------------------------------------------
211008daf0c2Sdrahn Returns the square root of the double-precision floating-point value `a'.
211108daf0c2Sdrahn The operation is performed according to the IEC/IEEE Standard for Binary
211208daf0c2Sdrahn Floating-Point Arithmetic.
211308daf0c2Sdrahn -------------------------------------------------------------------------------
211408daf0c2Sdrahn */
float64_sqrt(float64 a)211508daf0c2Sdrahn float64 float64_sqrt( float64 a )
211608daf0c2Sdrahn {
211708daf0c2Sdrahn flag aSign;
211808daf0c2Sdrahn int16 aExp, zExp;
211908daf0c2Sdrahn bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
212008daf0c2Sdrahn bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
212108daf0c2Sdrahn float64 z;
212208daf0c2Sdrahn
212308daf0c2Sdrahn aSig1 = extractFloat64Frac1( a );
212408daf0c2Sdrahn aSig0 = extractFloat64Frac0( a );
212508daf0c2Sdrahn aExp = extractFloat64Exp( a );
212608daf0c2Sdrahn aSign = extractFloat64Sign( a );
212708daf0c2Sdrahn if ( aExp == 0x7FF ) {
212808daf0c2Sdrahn if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
212908daf0c2Sdrahn if ( ! aSign ) return a;
213008daf0c2Sdrahn goto invalid;
213108daf0c2Sdrahn }
213208daf0c2Sdrahn if ( aSign ) {
213308daf0c2Sdrahn if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
213408daf0c2Sdrahn invalid:
213508daf0c2Sdrahn float_raise( float_flag_invalid );
213608daf0c2Sdrahn return float64_default_nan;
213708daf0c2Sdrahn }
213808daf0c2Sdrahn if ( aExp == 0 ) {
213908daf0c2Sdrahn if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
214008daf0c2Sdrahn normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
214108daf0c2Sdrahn }
214208daf0c2Sdrahn zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
214308daf0c2Sdrahn aSig0 |= 0x00100000;
214408daf0c2Sdrahn shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
214508daf0c2Sdrahn zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
214608daf0c2Sdrahn if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
214708daf0c2Sdrahn doubleZSig0 = zSig0 + zSig0;
214808daf0c2Sdrahn shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
214908daf0c2Sdrahn mul32To64( zSig0, zSig0, &term0, &term1 );
215008daf0c2Sdrahn sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
215108daf0c2Sdrahn while ( (sbits32) rem0 < 0 ) {
215208daf0c2Sdrahn --zSig0;
215308daf0c2Sdrahn doubleZSig0 -= 2;
215408daf0c2Sdrahn add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
215508daf0c2Sdrahn }
215608daf0c2Sdrahn zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
215708daf0c2Sdrahn if ( ( zSig1 & 0x1FF ) <= 5 ) {
215808daf0c2Sdrahn if ( zSig1 == 0 ) zSig1 = 1;
215908daf0c2Sdrahn mul32To64( doubleZSig0, zSig1, &term1, &term2 );
216008daf0c2Sdrahn sub64( rem1, 0, term1, term2, &rem1, &rem2 );
216108daf0c2Sdrahn mul32To64( zSig1, zSig1, &term2, &term3 );
216208daf0c2Sdrahn sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
216308daf0c2Sdrahn while ( (sbits32) rem1 < 0 ) {
216408daf0c2Sdrahn --zSig1;
216508daf0c2Sdrahn shortShift64Left( 0, zSig1, 1, &term2, &term3 );
216608daf0c2Sdrahn term3 |= 1;
216708daf0c2Sdrahn term2 |= doubleZSig0;
216808daf0c2Sdrahn add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
216908daf0c2Sdrahn }
217008daf0c2Sdrahn zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
217108daf0c2Sdrahn }
217208daf0c2Sdrahn shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
217308daf0c2Sdrahn return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
217408daf0c2Sdrahn
217508daf0c2Sdrahn }
217608daf0c2Sdrahn #endif
217708daf0c2Sdrahn
217808daf0c2Sdrahn /*
217908daf0c2Sdrahn -------------------------------------------------------------------------------
218008daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is equal to
218108daf0c2Sdrahn the corresponding value `b', and 0 otherwise. The comparison is performed
218208daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
218308daf0c2Sdrahn -------------------------------------------------------------------------------
218408daf0c2Sdrahn */
float64_eq(float64 a,float64 b)218508daf0c2Sdrahn flag float64_eq( float64 a, float64 b )
218608daf0c2Sdrahn {
218708daf0c2Sdrahn
218808daf0c2Sdrahn if ( ( ( extractFloat64Exp( a ) == 0x7FF )
218908daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
219008daf0c2Sdrahn || ( ( extractFloat64Exp( b ) == 0x7FF )
219108daf0c2Sdrahn && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
219208daf0c2Sdrahn ) {
219308daf0c2Sdrahn if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
219408daf0c2Sdrahn float_raise( float_flag_invalid );
219508daf0c2Sdrahn }
219608daf0c2Sdrahn return 0;
219708daf0c2Sdrahn }
219808daf0c2Sdrahn return ( a == b ) ||
219908daf0c2Sdrahn ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
220008daf0c2Sdrahn
220108daf0c2Sdrahn }
220208daf0c2Sdrahn
220308daf0c2Sdrahn /*
220408daf0c2Sdrahn -------------------------------------------------------------------------------
220508daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than
220608daf0c2Sdrahn or equal to the corresponding value `b', and 0 otherwise. The comparison
220708daf0c2Sdrahn is performed according to the IEC/IEEE Standard for Binary Floating-Point
220808daf0c2Sdrahn Arithmetic.
220908daf0c2Sdrahn -------------------------------------------------------------------------------
221008daf0c2Sdrahn */
float64_le(float64 a,float64 b)221108daf0c2Sdrahn flag float64_le( float64 a, float64 b )
221208daf0c2Sdrahn {
221308daf0c2Sdrahn flag aSign, bSign;
221408daf0c2Sdrahn
221508daf0c2Sdrahn if ( ( ( extractFloat64Exp( a ) == 0x7FF )
221608daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
221708daf0c2Sdrahn || ( ( extractFloat64Exp( b ) == 0x7FF )
221808daf0c2Sdrahn && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
221908daf0c2Sdrahn ) {
222008daf0c2Sdrahn float_raise( float_flag_invalid );
222108daf0c2Sdrahn return 0;
222208daf0c2Sdrahn }
222308daf0c2Sdrahn aSign = extractFloat64Sign( a );
222408daf0c2Sdrahn bSign = extractFloat64Sign( b );
222508daf0c2Sdrahn if ( aSign != bSign )
222608daf0c2Sdrahn return aSign ||
222708daf0c2Sdrahn ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
222808daf0c2Sdrahn 0 );
222908daf0c2Sdrahn return ( a == b ) ||
223008daf0c2Sdrahn ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
223108daf0c2Sdrahn }
223208daf0c2Sdrahn
223308daf0c2Sdrahn /*
223408daf0c2Sdrahn -------------------------------------------------------------------------------
223508daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than
223608daf0c2Sdrahn the corresponding value `b', and 0 otherwise. The comparison is performed
223708daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
223808daf0c2Sdrahn -------------------------------------------------------------------------------
223908daf0c2Sdrahn */
float64_lt(float64 a,float64 b)224008daf0c2Sdrahn flag float64_lt( float64 a, float64 b )
224108daf0c2Sdrahn {
224208daf0c2Sdrahn flag aSign, bSign;
224308daf0c2Sdrahn
224408daf0c2Sdrahn if ( ( ( extractFloat64Exp( a ) == 0x7FF )
224508daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
224608daf0c2Sdrahn || ( ( extractFloat64Exp( b ) == 0x7FF )
224708daf0c2Sdrahn && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
224808daf0c2Sdrahn ) {
224908daf0c2Sdrahn float_raise( float_flag_invalid );
225008daf0c2Sdrahn return 0;
225108daf0c2Sdrahn }
225208daf0c2Sdrahn aSign = extractFloat64Sign( a );
225308daf0c2Sdrahn bSign = extractFloat64Sign( b );
225408daf0c2Sdrahn if ( aSign != bSign )
225508daf0c2Sdrahn return aSign &&
225608daf0c2Sdrahn ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
225708daf0c2Sdrahn 0 );
225808daf0c2Sdrahn return ( a != b ) &&
225908daf0c2Sdrahn ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
226008daf0c2Sdrahn
226108daf0c2Sdrahn }
226208daf0c2Sdrahn
226308daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
226408daf0c2Sdrahn /*
226508daf0c2Sdrahn -------------------------------------------------------------------------------
226608daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is equal to
226708daf0c2Sdrahn the corresponding value `b', and 0 otherwise. The invalid exception is
226808daf0c2Sdrahn raised if either operand is a NaN. Otherwise, the comparison is performed
226908daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
227008daf0c2Sdrahn -------------------------------------------------------------------------------
227108daf0c2Sdrahn */
float64_eq_signaling(float64 a,float64 b)227208daf0c2Sdrahn flag float64_eq_signaling( float64 a, float64 b )
227308daf0c2Sdrahn {
227408daf0c2Sdrahn
227508daf0c2Sdrahn if ( ( ( extractFloat64Exp( a ) == 0x7FF )
227608daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
227708daf0c2Sdrahn || ( ( extractFloat64Exp( b ) == 0x7FF )
227808daf0c2Sdrahn && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
227908daf0c2Sdrahn ) {
228008daf0c2Sdrahn float_raise( float_flag_invalid );
228108daf0c2Sdrahn return 0;
228208daf0c2Sdrahn }
228308daf0c2Sdrahn return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
228408daf0c2Sdrahn
228508daf0c2Sdrahn }
228608daf0c2Sdrahn
228708daf0c2Sdrahn /*
228808daf0c2Sdrahn -------------------------------------------------------------------------------
228908daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than or
229008daf0c2Sdrahn equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
229108daf0c2Sdrahn cause an exception. Otherwise, the comparison is performed according to the
229208daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic.
229308daf0c2Sdrahn -------------------------------------------------------------------------------
229408daf0c2Sdrahn */
float64_le_quiet(float64 a,float64 b)229508daf0c2Sdrahn flag float64_le_quiet( float64 a, float64 b )
229608daf0c2Sdrahn {
229708daf0c2Sdrahn flag aSign, bSign;
229808daf0c2Sdrahn
229908daf0c2Sdrahn if ( ( ( extractFloat64Exp( a ) == 0x7FF )
230008daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
230108daf0c2Sdrahn || ( ( extractFloat64Exp( b ) == 0x7FF )
230208daf0c2Sdrahn && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
230308daf0c2Sdrahn ) {
230408daf0c2Sdrahn if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
230508daf0c2Sdrahn float_raise( float_flag_invalid );
230608daf0c2Sdrahn }
230708daf0c2Sdrahn return 0;
230808daf0c2Sdrahn }
230908daf0c2Sdrahn aSign = extractFloat64Sign( a );
231008daf0c2Sdrahn bSign = extractFloat64Sign( b );
231108daf0c2Sdrahn if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
231208daf0c2Sdrahn return ( a == b ) || ( aSign ^ ( a < b ) );
231308daf0c2Sdrahn
231408daf0c2Sdrahn }
231508daf0c2Sdrahn
231608daf0c2Sdrahn /*
231708daf0c2Sdrahn -------------------------------------------------------------------------------
231808daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than
231908daf0c2Sdrahn the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
232008daf0c2Sdrahn exception. Otherwise, the comparison is performed according to the IEC/IEEE
232108daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic.
232208daf0c2Sdrahn -------------------------------------------------------------------------------
232308daf0c2Sdrahn */
float64_lt_quiet(float64 a,float64 b)232408daf0c2Sdrahn flag float64_lt_quiet( float64 a, float64 b )
232508daf0c2Sdrahn {
232608daf0c2Sdrahn flag aSign, bSign;
232708daf0c2Sdrahn
232808daf0c2Sdrahn if ( ( ( extractFloat64Exp( a ) == 0x7FF )
232908daf0c2Sdrahn && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
233008daf0c2Sdrahn || ( ( extractFloat64Exp( b ) == 0x7FF )
233108daf0c2Sdrahn && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
233208daf0c2Sdrahn ) {
233308daf0c2Sdrahn if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
233408daf0c2Sdrahn float_raise( float_flag_invalid );
233508daf0c2Sdrahn }
233608daf0c2Sdrahn return 0;
233708daf0c2Sdrahn }
233808daf0c2Sdrahn aSign = extractFloat64Sign( a );
233908daf0c2Sdrahn bSign = extractFloat64Sign( b );
234008daf0c2Sdrahn if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
234108daf0c2Sdrahn return ( a != b ) && ( aSign ^ ( a < b ) );
234208daf0c2Sdrahn
234308daf0c2Sdrahn }
234408daf0c2Sdrahn
234508daf0c2Sdrahn #endif
2346