xref: /openbsd/lib/libc/softfloat/softfloat.c (revision fcc6486e)
1*fcc6486eSmiod /*	$OpenBSD: softfloat.c,v 1.3 2015/09/13 14:21:46 miod Exp $	*/
208daf0c2Sdrahn /* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */
308daf0c2Sdrahn 
408daf0c2Sdrahn /*
508daf0c2Sdrahn  * This version hacked for use with gcc -msoft-float by bjh21.
608daf0c2Sdrahn  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
708daf0c2Sdrahn  *  itself).
808daf0c2Sdrahn  */
908daf0c2Sdrahn 
1008daf0c2Sdrahn /*
1108daf0c2Sdrahn  * Things you may want to define:
1208daf0c2Sdrahn  *
1308daf0c2Sdrahn  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
1408daf0c2Sdrahn  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
1508daf0c2Sdrahn  *   properly renamed.
1608daf0c2Sdrahn  */
1708daf0c2Sdrahn 
1808daf0c2Sdrahn /*
1908daf0c2Sdrahn  * This differs from the standard bits32/softfloat.c in that float64
2008daf0c2Sdrahn  * is defined to be a 64-bit integer rather than a structure.  The
2108daf0c2Sdrahn  * structure is float64s, with translation between the two going via
2208daf0c2Sdrahn  * float64u.
2308daf0c2Sdrahn  */
2408daf0c2Sdrahn 
2508daf0c2Sdrahn /*
2608daf0c2Sdrahn ===============================================================================
2708daf0c2Sdrahn 
2808daf0c2Sdrahn This C source file is part of the SoftFloat IEC/IEEE Floating-Point
2908daf0c2Sdrahn Arithmetic Package, Release 2a.
3008daf0c2Sdrahn 
3108daf0c2Sdrahn Written by John R. Hauser.  This work was made possible in part by the
3208daf0c2Sdrahn International Computer Science Institute, located at Suite 600, 1947 Center
3308daf0c2Sdrahn Street, Berkeley, California 94704.  Funding was partially provided by the
3408daf0c2Sdrahn National Science Foundation under grant MIP-9311980.  The original version
3508daf0c2Sdrahn of this code was written as part of a project to build a fixed-point vector
3608daf0c2Sdrahn processor in collaboration with the University of California at Berkeley,
3708daf0c2Sdrahn overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
3808daf0c2Sdrahn is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
3908daf0c2Sdrahn arithmetic/SoftFloat.html'.
4008daf0c2Sdrahn 
4108daf0c2Sdrahn THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
4208daf0c2Sdrahn has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
4308daf0c2Sdrahn TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
4408daf0c2Sdrahn PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
4508daf0c2Sdrahn AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
4608daf0c2Sdrahn 
4708daf0c2Sdrahn Derivative works are acceptable, even for commercial purposes, so long as
4808daf0c2Sdrahn (1) they include prominent notice that the work is derivative, and (2) they
4908daf0c2Sdrahn include prominent notice akin to these four paragraphs for those parts of
5008daf0c2Sdrahn this code that are retained.
5108daf0c2Sdrahn 
5208daf0c2Sdrahn ===============================================================================
5308daf0c2Sdrahn */
5408daf0c2Sdrahn 
5508daf0c2Sdrahn #ifdef SOFTFLOAT_FOR_GCC
5608daf0c2Sdrahn #include "softfloat-for-gcc.h"
5708daf0c2Sdrahn #endif
5808daf0c2Sdrahn 
5908daf0c2Sdrahn #include "milieu.h"
60*fcc6486eSmiod #include <softfloat.h>
6108daf0c2Sdrahn 
6208daf0c2Sdrahn /*
6308daf0c2Sdrahn  * Conversions between floats as stored in memory and floats as
6408daf0c2Sdrahn  * SoftFloat uses them
6508daf0c2Sdrahn  */
6608daf0c2Sdrahn #ifndef FLOAT64_DEMANGLE
6708daf0c2Sdrahn #define FLOAT64_DEMANGLE(a)	(a)
6808daf0c2Sdrahn #endif
6908daf0c2Sdrahn #ifndef FLOAT64_MANGLE
7008daf0c2Sdrahn #define FLOAT64_MANGLE(a)	(a)
7108daf0c2Sdrahn #endif
7208daf0c2Sdrahn 
7308daf0c2Sdrahn /*
7408daf0c2Sdrahn -------------------------------------------------------------------------------
7508daf0c2Sdrahn Primitive arithmetic functions, including multi-word arithmetic, and
7608daf0c2Sdrahn division and square root approximations.  (Can be specialized to target if
7708daf0c2Sdrahn desired.)
7808daf0c2Sdrahn -------------------------------------------------------------------------------
7908daf0c2Sdrahn */
8008daf0c2Sdrahn #include "softfloat-macros.h"
8108daf0c2Sdrahn 
8208daf0c2Sdrahn /*
8308daf0c2Sdrahn -------------------------------------------------------------------------------
8408daf0c2Sdrahn Functions and definitions to determine:  (1) whether tininess for underflow
8508daf0c2Sdrahn is detected before or after rounding by default, (2) what (if anything)
8608daf0c2Sdrahn happens when exceptions are raised, (3) how signaling NaNs are distinguished
8708daf0c2Sdrahn from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
8808daf0c2Sdrahn are propagated from function inputs to output.  These details are target-
8908daf0c2Sdrahn specific.
9008daf0c2Sdrahn -------------------------------------------------------------------------------
9108daf0c2Sdrahn */
9208daf0c2Sdrahn #include "softfloat-specialize.h"
9308daf0c2Sdrahn 
9408daf0c2Sdrahn /*
9508daf0c2Sdrahn -------------------------------------------------------------------------------
9608daf0c2Sdrahn Floating-point rounding mode and exception flags.
9708daf0c2Sdrahn -------------------------------------------------------------------------------
9808daf0c2Sdrahn */
9908daf0c2Sdrahn fp_rnd float_rounding_mode = float_round_nearest_even;
10008daf0c2Sdrahn fp_except float_exception_flags = 0;
10108daf0c2Sdrahn 
10208daf0c2Sdrahn /*
10308daf0c2Sdrahn -------------------------------------------------------------------------------
10408daf0c2Sdrahn Returns the fraction bits of the single-precision floating-point value `a'.
10508daf0c2Sdrahn -------------------------------------------------------------------------------
10608daf0c2Sdrahn */
extractFloat32Frac(float32 a)10708daf0c2Sdrahn INLINE bits32 extractFloat32Frac( float32 a )
10808daf0c2Sdrahn {
10908daf0c2Sdrahn 
11008daf0c2Sdrahn     return a & 0x007FFFFF;
11108daf0c2Sdrahn 
11208daf0c2Sdrahn }
11308daf0c2Sdrahn 
11408daf0c2Sdrahn /*
11508daf0c2Sdrahn -------------------------------------------------------------------------------
11608daf0c2Sdrahn Returns the exponent bits of the single-precision floating-point value `a'.
11708daf0c2Sdrahn -------------------------------------------------------------------------------
11808daf0c2Sdrahn */
extractFloat32Exp(float32 a)11908daf0c2Sdrahn INLINE int16 extractFloat32Exp( float32 a )
12008daf0c2Sdrahn {
12108daf0c2Sdrahn 
12208daf0c2Sdrahn     return ( a>>23 ) & 0xFF;
12308daf0c2Sdrahn 
12408daf0c2Sdrahn }
12508daf0c2Sdrahn 
12608daf0c2Sdrahn /*
12708daf0c2Sdrahn -------------------------------------------------------------------------------
12808daf0c2Sdrahn Returns the sign bit of the single-precision floating-point value `a'.
12908daf0c2Sdrahn -------------------------------------------------------------------------------
13008daf0c2Sdrahn */
extractFloat32Sign(float32 a)13108daf0c2Sdrahn INLINE flag extractFloat32Sign( float32 a )
13208daf0c2Sdrahn {
13308daf0c2Sdrahn 
13408daf0c2Sdrahn     return a>>31;
13508daf0c2Sdrahn 
13608daf0c2Sdrahn }
13708daf0c2Sdrahn 
13808daf0c2Sdrahn /*
13908daf0c2Sdrahn -------------------------------------------------------------------------------
14008daf0c2Sdrahn Normalizes the subnormal single-precision floating-point value represented
14108daf0c2Sdrahn by the denormalized significand `aSig'.  The normalized exponent and
14208daf0c2Sdrahn significand are stored at the locations pointed to by `zExpPtr' and
14308daf0c2Sdrahn `zSigPtr', respectively.
14408daf0c2Sdrahn -------------------------------------------------------------------------------
14508daf0c2Sdrahn */
14608daf0c2Sdrahn static void
normalizeFloat32Subnormal(bits32 aSig,int16 * zExpPtr,bits32 * zSigPtr)14708daf0c2Sdrahn  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
14808daf0c2Sdrahn {
14908daf0c2Sdrahn     int8 shiftCount;
15008daf0c2Sdrahn 
15108daf0c2Sdrahn     shiftCount = countLeadingZeros32( aSig ) - 8;
15208daf0c2Sdrahn     *zSigPtr = aSig<<shiftCount;
15308daf0c2Sdrahn     *zExpPtr = 1 - shiftCount;
15408daf0c2Sdrahn 
15508daf0c2Sdrahn }
15608daf0c2Sdrahn 
15708daf0c2Sdrahn /*
15808daf0c2Sdrahn -------------------------------------------------------------------------------
15908daf0c2Sdrahn Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
16008daf0c2Sdrahn single-precision floating-point value, returning the result.  After being
16108daf0c2Sdrahn shifted into the proper positions, the three fields are simply added
16208daf0c2Sdrahn together to form the result.  This means that any integer portion of `zSig'
16308daf0c2Sdrahn will be added into the exponent.  Since a properly normalized significand
16408daf0c2Sdrahn will have an integer portion equal to 1, the `zExp' input should be 1 less
16508daf0c2Sdrahn than the desired result exponent whenever `zSig' is a complete, normalized
16608daf0c2Sdrahn significand.
16708daf0c2Sdrahn -------------------------------------------------------------------------------
16808daf0c2Sdrahn */
packFloat32(flag zSign,int16 zExp,bits32 zSig)16908daf0c2Sdrahn INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
17008daf0c2Sdrahn {
17108daf0c2Sdrahn 
17208daf0c2Sdrahn     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
17308daf0c2Sdrahn 
17408daf0c2Sdrahn }
17508daf0c2Sdrahn 
17608daf0c2Sdrahn /*
17708daf0c2Sdrahn -------------------------------------------------------------------------------
17808daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp',
17908daf0c2Sdrahn and significand `zSig', and returns the proper single-precision floating-
18008daf0c2Sdrahn point value corresponding to the abstract input.  Ordinarily, the abstract
18108daf0c2Sdrahn value is simply rounded and packed into the single-precision format, with
18208daf0c2Sdrahn the inexact exception raised if the abstract input cannot be represented
18308daf0c2Sdrahn exactly.  However, if the abstract value is too large, the overflow and
18408daf0c2Sdrahn inexact exceptions are raised and an infinity or maximal finite value is
18508daf0c2Sdrahn returned.  If the abstract value is too small, the input value is rounded to
18608daf0c2Sdrahn a subnormal number, and the underflow and inexact exceptions are raised if
18708daf0c2Sdrahn the abstract input cannot be represented exactly as a subnormal single-
18808daf0c2Sdrahn precision floating-point number.
18908daf0c2Sdrahn     The input significand `zSig' has its binary point between bits 30
19008daf0c2Sdrahn and 29, which is 7 bits to the left of the usual location.  This shifted
19108daf0c2Sdrahn significand must be normalized or smaller.  If `zSig' is not normalized,
19208daf0c2Sdrahn `zExp' must be 0; in that case, the result returned is a subnormal number,
19308daf0c2Sdrahn and it must not require rounding.  In the usual case that `zSig' is
19408daf0c2Sdrahn normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
19508daf0c2Sdrahn The handling of underflow and overflow follows the IEC/IEEE Standard for
19608daf0c2Sdrahn Binary Floating-Point Arithmetic.
19708daf0c2Sdrahn -------------------------------------------------------------------------------
19808daf0c2Sdrahn */
roundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)19908daf0c2Sdrahn static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
20008daf0c2Sdrahn {
20108daf0c2Sdrahn     int8 roundingMode;
20208daf0c2Sdrahn     flag roundNearestEven;
20308daf0c2Sdrahn     int8 roundIncrement, roundBits;
20408daf0c2Sdrahn     flag isTiny;
20508daf0c2Sdrahn 
20608daf0c2Sdrahn     roundingMode = float_rounding_mode;
20708daf0c2Sdrahn     roundNearestEven = roundingMode == float_round_nearest_even;
20808daf0c2Sdrahn     roundIncrement = 0x40;
20908daf0c2Sdrahn     if ( ! roundNearestEven ) {
21008daf0c2Sdrahn         if ( roundingMode == float_round_to_zero ) {
21108daf0c2Sdrahn             roundIncrement = 0;
21208daf0c2Sdrahn         }
21308daf0c2Sdrahn         else {
21408daf0c2Sdrahn             roundIncrement = 0x7F;
21508daf0c2Sdrahn             if ( zSign ) {
21608daf0c2Sdrahn                 if ( roundingMode == float_round_up ) roundIncrement = 0;
21708daf0c2Sdrahn             }
21808daf0c2Sdrahn             else {
21908daf0c2Sdrahn                 if ( roundingMode == float_round_down ) roundIncrement = 0;
22008daf0c2Sdrahn             }
22108daf0c2Sdrahn         }
22208daf0c2Sdrahn     }
22308daf0c2Sdrahn     roundBits = zSig & 0x7F;
22408daf0c2Sdrahn     if ( 0xFD <= (bits16) zExp ) {
22508daf0c2Sdrahn         if (    ( 0xFD < zExp )
22608daf0c2Sdrahn              || (    ( zExp == 0xFD )
22708daf0c2Sdrahn                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
22808daf0c2Sdrahn            ) {
22908daf0c2Sdrahn             float_raise( float_flag_overflow | float_flag_inexact );
23008daf0c2Sdrahn             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
23108daf0c2Sdrahn         }
23208daf0c2Sdrahn         if ( zExp < 0 ) {
23308daf0c2Sdrahn             isTiny =
23408daf0c2Sdrahn                    ( float_detect_tininess == float_tininess_before_rounding )
23508daf0c2Sdrahn                 || ( zExp < -1 )
23608daf0c2Sdrahn                 || ( zSig + roundIncrement < 0x80000000 );
23708daf0c2Sdrahn             shift32RightJamming( zSig, - zExp, &zSig );
23808daf0c2Sdrahn             zExp = 0;
23908daf0c2Sdrahn             roundBits = zSig & 0x7F;
24008daf0c2Sdrahn             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
24108daf0c2Sdrahn         }
24208daf0c2Sdrahn     }
24308daf0c2Sdrahn     if ( roundBits ) float_exception_flags |= float_flag_inexact;
24408daf0c2Sdrahn     zSig = ( zSig + roundIncrement )>>7;
24508daf0c2Sdrahn     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
24608daf0c2Sdrahn     if ( zSig == 0 ) zExp = 0;
24708daf0c2Sdrahn     return packFloat32( zSign, zExp, zSig );
24808daf0c2Sdrahn 
24908daf0c2Sdrahn }
25008daf0c2Sdrahn 
25108daf0c2Sdrahn /*
25208daf0c2Sdrahn -------------------------------------------------------------------------------
25308daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp',
25408daf0c2Sdrahn and significand `zSig', and returns the proper single-precision floating-
25508daf0c2Sdrahn point value corresponding to the abstract input.  This routine is just like
25608daf0c2Sdrahn `roundAndPackFloat32' except that `zSig' does not have to be normalized.
25708daf0c2Sdrahn Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
25808daf0c2Sdrahn floating-point exponent.
25908daf0c2Sdrahn -------------------------------------------------------------------------------
26008daf0c2Sdrahn */
26108daf0c2Sdrahn static float32
normalizeRoundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)26208daf0c2Sdrahn  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
26308daf0c2Sdrahn {
26408daf0c2Sdrahn     int8 shiftCount;
26508daf0c2Sdrahn 
26608daf0c2Sdrahn     shiftCount = countLeadingZeros32( zSig ) - 1;
26708daf0c2Sdrahn     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
26808daf0c2Sdrahn 
26908daf0c2Sdrahn }
27008daf0c2Sdrahn 
27108daf0c2Sdrahn /*
27208daf0c2Sdrahn -------------------------------------------------------------------------------
27308daf0c2Sdrahn Returns the least-significant 32 fraction bits of the double-precision
27408daf0c2Sdrahn floating-point value `a'.
27508daf0c2Sdrahn -------------------------------------------------------------------------------
27608daf0c2Sdrahn */
extractFloat64Frac1(float64 a)27708daf0c2Sdrahn INLINE bits32 extractFloat64Frac1( float64 a )
27808daf0c2Sdrahn {
27908daf0c2Sdrahn 
28008daf0c2Sdrahn     return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF );
28108daf0c2Sdrahn 
28208daf0c2Sdrahn }
28308daf0c2Sdrahn 
28408daf0c2Sdrahn /*
28508daf0c2Sdrahn -------------------------------------------------------------------------------
28608daf0c2Sdrahn Returns the most-significant 20 fraction bits of the double-precision
28708daf0c2Sdrahn floating-point value `a'.
28808daf0c2Sdrahn -------------------------------------------------------------------------------
28908daf0c2Sdrahn */
extractFloat64Frac0(float64 a)29008daf0c2Sdrahn INLINE bits32 extractFloat64Frac0( float64 a )
29108daf0c2Sdrahn {
29208daf0c2Sdrahn 
29308daf0c2Sdrahn     return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF;
29408daf0c2Sdrahn 
29508daf0c2Sdrahn }
29608daf0c2Sdrahn 
29708daf0c2Sdrahn /*
29808daf0c2Sdrahn -------------------------------------------------------------------------------
29908daf0c2Sdrahn Returns the exponent bits of the double-precision floating-point value `a'.
30008daf0c2Sdrahn -------------------------------------------------------------------------------
30108daf0c2Sdrahn */
extractFloat64Exp(float64 a)30208daf0c2Sdrahn INLINE int16 extractFloat64Exp( float64 a )
30308daf0c2Sdrahn {
30408daf0c2Sdrahn 
30508daf0c2Sdrahn     return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
30608daf0c2Sdrahn 
30708daf0c2Sdrahn }
30808daf0c2Sdrahn 
30908daf0c2Sdrahn /*
31008daf0c2Sdrahn -------------------------------------------------------------------------------
31108daf0c2Sdrahn Returns the sign bit of the double-precision floating-point value `a'.
31208daf0c2Sdrahn -------------------------------------------------------------------------------
31308daf0c2Sdrahn */
extractFloat64Sign(float64 a)31408daf0c2Sdrahn INLINE flag extractFloat64Sign( float64 a )
31508daf0c2Sdrahn {
31608daf0c2Sdrahn 
31708daf0c2Sdrahn     return FLOAT64_DEMANGLE(a)>>63;
31808daf0c2Sdrahn 
31908daf0c2Sdrahn }
32008daf0c2Sdrahn 
32108daf0c2Sdrahn /*
32208daf0c2Sdrahn -------------------------------------------------------------------------------
32308daf0c2Sdrahn Normalizes the subnormal double-precision floating-point value represented
32408daf0c2Sdrahn by the denormalized significand formed by the concatenation of `aSig0' and
32508daf0c2Sdrahn `aSig1'.  The normalized exponent is stored at the location pointed to by
32608daf0c2Sdrahn `zExpPtr'.  The most significant 21 bits of the normalized significand are
32708daf0c2Sdrahn stored at the location pointed to by `zSig0Ptr', and the least significant
32808daf0c2Sdrahn 32 bits of the normalized significand are stored at the location pointed to
32908daf0c2Sdrahn by `zSig1Ptr'.
33008daf0c2Sdrahn -------------------------------------------------------------------------------
33108daf0c2Sdrahn */
33208daf0c2Sdrahn static void
normalizeFloat64Subnormal(bits32 aSig0,bits32 aSig1,int16 * zExpPtr,bits32 * zSig0Ptr,bits32 * zSig1Ptr)33308daf0c2Sdrahn  normalizeFloat64Subnormal(
33408daf0c2Sdrahn      bits32 aSig0,
33508daf0c2Sdrahn      bits32 aSig1,
33608daf0c2Sdrahn      int16 *zExpPtr,
33708daf0c2Sdrahn      bits32 *zSig0Ptr,
33808daf0c2Sdrahn      bits32 *zSig1Ptr
33908daf0c2Sdrahn  )
34008daf0c2Sdrahn {
34108daf0c2Sdrahn     int8 shiftCount;
34208daf0c2Sdrahn 
34308daf0c2Sdrahn     if ( aSig0 == 0 ) {
34408daf0c2Sdrahn         shiftCount = countLeadingZeros32( aSig1 ) - 11;
34508daf0c2Sdrahn         if ( shiftCount < 0 ) {
34608daf0c2Sdrahn             *zSig0Ptr = aSig1>>( - shiftCount );
34708daf0c2Sdrahn             *zSig1Ptr = aSig1<<( shiftCount & 31 );
34808daf0c2Sdrahn         }
34908daf0c2Sdrahn         else {
35008daf0c2Sdrahn             *zSig0Ptr = aSig1<<shiftCount;
35108daf0c2Sdrahn             *zSig1Ptr = 0;
35208daf0c2Sdrahn         }
35308daf0c2Sdrahn         *zExpPtr = - shiftCount - 31;
35408daf0c2Sdrahn     }
35508daf0c2Sdrahn     else {
35608daf0c2Sdrahn         shiftCount = countLeadingZeros32( aSig0 ) - 11;
35708daf0c2Sdrahn         shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
35808daf0c2Sdrahn         *zExpPtr = 1 - shiftCount;
35908daf0c2Sdrahn     }
36008daf0c2Sdrahn 
36108daf0c2Sdrahn }
36208daf0c2Sdrahn 
36308daf0c2Sdrahn /*
36408daf0c2Sdrahn -------------------------------------------------------------------------------
36508daf0c2Sdrahn Packs the sign `zSign', the exponent `zExp', and the significand formed by
36608daf0c2Sdrahn the concatenation of `zSig0' and `zSig1' into a double-precision floating-
36708daf0c2Sdrahn point value, returning the result.  After being shifted into the proper
36808daf0c2Sdrahn positions, the three fields `zSign', `zExp', and `zSig0' are simply added
36908daf0c2Sdrahn together to form the most significant 32 bits of the result.  This means
37008daf0c2Sdrahn that any integer portion of `zSig0' will be added into the exponent.  Since
37108daf0c2Sdrahn a properly normalized significand will have an integer portion equal to 1,
37208daf0c2Sdrahn the `zExp' input should be 1 less than the desired result exponent whenever
37308daf0c2Sdrahn `zSig0' and `zSig1' concatenated form a complete, normalized significand.
37408daf0c2Sdrahn -------------------------------------------------------------------------------
37508daf0c2Sdrahn */
37608daf0c2Sdrahn INLINE float64
packFloat64(flag zSign,int16 zExp,bits32 zSig0,bits32 zSig1)37708daf0c2Sdrahn  packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
37808daf0c2Sdrahn {
37908daf0c2Sdrahn 
38008daf0c2Sdrahn     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
38108daf0c2Sdrahn 			   ( ( (bits64) zExp )<<52 ) +
38208daf0c2Sdrahn 			   ( ( (bits64) zSig0 )<<32 ) + zSig1 );
38308daf0c2Sdrahn 
38408daf0c2Sdrahn 
38508daf0c2Sdrahn }
38608daf0c2Sdrahn 
38708daf0c2Sdrahn /*
38808daf0c2Sdrahn -------------------------------------------------------------------------------
38908daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp',
39008daf0c2Sdrahn and extended significand formed by the concatenation of `zSig0', `zSig1',
39108daf0c2Sdrahn and `zSig2', and returns the proper double-precision floating-point value
39208daf0c2Sdrahn corresponding to the abstract input.  Ordinarily, the abstract value is
39308daf0c2Sdrahn simply rounded and packed into the double-precision format, with the inexact
39408daf0c2Sdrahn exception raised if the abstract input cannot be represented exactly.
39508daf0c2Sdrahn However, if the abstract value is too large, the overflow and inexact
39608daf0c2Sdrahn exceptions are raised and an infinity or maximal finite value is returned.
39708daf0c2Sdrahn If the abstract value is too small, the input value is rounded to a
39808daf0c2Sdrahn subnormal number, and the underflow and inexact exceptions are raised if the
39908daf0c2Sdrahn abstract input cannot be represented exactly as a subnormal double-precision
40008daf0c2Sdrahn floating-point number.
40108daf0c2Sdrahn     The input significand must be normalized or smaller.  If the input
40208daf0c2Sdrahn significand is not normalized, `zExp' must be 0; in that case, the result
40308daf0c2Sdrahn returned is a subnormal number, and it must not require rounding.  In the
40408daf0c2Sdrahn usual case that the input significand is normalized, `zExp' must be 1 less
40508daf0c2Sdrahn than the ``true'' floating-point exponent.  The handling of underflow and
40608daf0c2Sdrahn overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
40708daf0c2Sdrahn -------------------------------------------------------------------------------
40808daf0c2Sdrahn */
40908daf0c2Sdrahn static float64
roundAndPackFloat64(flag zSign,int16 zExp,bits32 zSig0,bits32 zSig1,bits32 zSig2)41008daf0c2Sdrahn  roundAndPackFloat64(
41108daf0c2Sdrahn      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
41208daf0c2Sdrahn {
41308daf0c2Sdrahn     int8 roundingMode;
41408daf0c2Sdrahn     flag roundNearestEven, increment, isTiny;
41508daf0c2Sdrahn 
41608daf0c2Sdrahn     roundingMode = float_rounding_mode;
41708daf0c2Sdrahn     roundNearestEven = ( roundingMode == float_round_nearest_even );
41808daf0c2Sdrahn     increment = ( (sbits32) zSig2 < 0 );
41908daf0c2Sdrahn     if ( ! roundNearestEven ) {
42008daf0c2Sdrahn         if ( roundingMode == float_round_to_zero ) {
42108daf0c2Sdrahn             increment = 0;
42208daf0c2Sdrahn         }
42308daf0c2Sdrahn         else {
42408daf0c2Sdrahn             if ( zSign ) {
42508daf0c2Sdrahn                 increment = ( roundingMode == float_round_down ) && zSig2;
42608daf0c2Sdrahn             }
42708daf0c2Sdrahn             else {
42808daf0c2Sdrahn                 increment = ( roundingMode == float_round_up ) && zSig2;
42908daf0c2Sdrahn             }
43008daf0c2Sdrahn         }
43108daf0c2Sdrahn     }
43208daf0c2Sdrahn     if ( 0x7FD <= (bits16) zExp ) {
43308daf0c2Sdrahn         if (    ( 0x7FD < zExp )
43408daf0c2Sdrahn              || (    ( zExp == 0x7FD )
43508daf0c2Sdrahn                   && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
43608daf0c2Sdrahn                   && increment
43708daf0c2Sdrahn                 )
43808daf0c2Sdrahn            ) {
43908daf0c2Sdrahn             float_raise( float_flag_overflow | float_flag_inexact );
44008daf0c2Sdrahn             if (    ( roundingMode == float_round_to_zero )
44108daf0c2Sdrahn                  || ( zSign && ( roundingMode == float_round_up ) )
44208daf0c2Sdrahn                  || ( ! zSign && ( roundingMode == float_round_down ) )
44308daf0c2Sdrahn                ) {
44408daf0c2Sdrahn                 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
44508daf0c2Sdrahn             }
44608daf0c2Sdrahn             return packFloat64( zSign, 0x7FF, 0, 0 );
44708daf0c2Sdrahn         }
44808daf0c2Sdrahn         if ( zExp < 0 ) {
44908daf0c2Sdrahn             isTiny =
45008daf0c2Sdrahn                    ( float_detect_tininess == float_tininess_before_rounding )
45108daf0c2Sdrahn                 || ( zExp < -1 )
45208daf0c2Sdrahn                 || ! increment
45308daf0c2Sdrahn                 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
45408daf0c2Sdrahn             shift64ExtraRightJamming(
45508daf0c2Sdrahn                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
45608daf0c2Sdrahn             zExp = 0;
45708daf0c2Sdrahn             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
45808daf0c2Sdrahn             if ( roundNearestEven ) {
45908daf0c2Sdrahn                 increment = ( (sbits32) zSig2 < 0 );
46008daf0c2Sdrahn             }
46108daf0c2Sdrahn             else {
46208daf0c2Sdrahn                 if ( zSign ) {
46308daf0c2Sdrahn                     increment = ( roundingMode == float_round_down ) && zSig2;
46408daf0c2Sdrahn                 }
46508daf0c2Sdrahn                 else {
46608daf0c2Sdrahn                     increment = ( roundingMode == float_round_up ) && zSig2;
46708daf0c2Sdrahn                 }
46808daf0c2Sdrahn             }
46908daf0c2Sdrahn         }
47008daf0c2Sdrahn     }
47108daf0c2Sdrahn     if ( zSig2 ) float_exception_flags |= float_flag_inexact;
47208daf0c2Sdrahn     if ( increment ) {
47308daf0c2Sdrahn         add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
47408daf0c2Sdrahn         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
47508daf0c2Sdrahn     }
47608daf0c2Sdrahn     else {
47708daf0c2Sdrahn         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
47808daf0c2Sdrahn     }
47908daf0c2Sdrahn     return packFloat64( zSign, zExp, zSig0, zSig1 );
48008daf0c2Sdrahn 
48108daf0c2Sdrahn }
48208daf0c2Sdrahn 
48308daf0c2Sdrahn /*
48408daf0c2Sdrahn -------------------------------------------------------------------------------
48508daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp',
48608daf0c2Sdrahn and significand formed by the concatenation of `zSig0' and `zSig1', and
48708daf0c2Sdrahn returns the proper double-precision floating-point value corresponding
48808daf0c2Sdrahn to the abstract input.  This routine is just like `roundAndPackFloat64'
48908daf0c2Sdrahn except that the input significand has fewer bits and does not have to be
49008daf0c2Sdrahn normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
49108daf0c2Sdrahn point exponent.
49208daf0c2Sdrahn -------------------------------------------------------------------------------
49308daf0c2Sdrahn */
49408daf0c2Sdrahn static float64
normalizeRoundAndPackFloat64(flag zSign,int16 zExp,bits32 zSig0,bits32 zSig1)49508daf0c2Sdrahn  normalizeRoundAndPackFloat64(
49608daf0c2Sdrahn      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
49708daf0c2Sdrahn {
49808daf0c2Sdrahn     int8 shiftCount;
49908daf0c2Sdrahn     bits32 zSig2;
50008daf0c2Sdrahn 
50108daf0c2Sdrahn     if ( zSig0 == 0 ) {
50208daf0c2Sdrahn         zSig0 = zSig1;
50308daf0c2Sdrahn         zSig1 = 0;
50408daf0c2Sdrahn         zExp -= 32;
50508daf0c2Sdrahn     }
50608daf0c2Sdrahn     shiftCount = countLeadingZeros32( zSig0 ) - 11;
50708daf0c2Sdrahn     if ( 0 <= shiftCount ) {
50808daf0c2Sdrahn         zSig2 = 0;
50908daf0c2Sdrahn         shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
51008daf0c2Sdrahn     }
51108daf0c2Sdrahn     else {
51208daf0c2Sdrahn         shift64ExtraRightJamming(
51308daf0c2Sdrahn             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
51408daf0c2Sdrahn     }
51508daf0c2Sdrahn     zExp -= shiftCount;
51608daf0c2Sdrahn     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
51708daf0c2Sdrahn 
51808daf0c2Sdrahn }
51908daf0c2Sdrahn 
52008daf0c2Sdrahn /*
52108daf0c2Sdrahn -------------------------------------------------------------------------------
52208daf0c2Sdrahn Returns the result of converting the 32-bit two's complement integer `a' to
52308daf0c2Sdrahn the single-precision floating-point format.  The conversion is performed
52408daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
52508daf0c2Sdrahn -------------------------------------------------------------------------------
52608daf0c2Sdrahn */
int32_to_float32(int32 a)52708daf0c2Sdrahn float32 int32_to_float32( int32 a )
52808daf0c2Sdrahn {
52908daf0c2Sdrahn     flag zSign;
53008daf0c2Sdrahn 
53108daf0c2Sdrahn     if ( a == 0 ) return 0;
53208daf0c2Sdrahn     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
53308daf0c2Sdrahn     zSign = ( a < 0 );
53408daf0c2Sdrahn     return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
53508daf0c2Sdrahn 
53608daf0c2Sdrahn }
53708daf0c2Sdrahn 
53808daf0c2Sdrahn /*
53908daf0c2Sdrahn -------------------------------------------------------------------------------
54008daf0c2Sdrahn Returns the result of converting the 32-bit two's complement integer `a' to
54108daf0c2Sdrahn the double-precision floating-point format.  The conversion is performed
54208daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
54308daf0c2Sdrahn -------------------------------------------------------------------------------
54408daf0c2Sdrahn */
int32_to_float64(int32 a)54508daf0c2Sdrahn float64 int32_to_float64( int32 a )
54608daf0c2Sdrahn {
54708daf0c2Sdrahn     flag zSign;
54808daf0c2Sdrahn     bits32 absA;
54908daf0c2Sdrahn     int8 shiftCount;
55008daf0c2Sdrahn     bits32 zSig0, zSig1;
55108daf0c2Sdrahn 
55208daf0c2Sdrahn     if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
55308daf0c2Sdrahn     zSign = ( a < 0 );
55408daf0c2Sdrahn     absA = zSign ? - a : a;
55508daf0c2Sdrahn     shiftCount = countLeadingZeros32( absA ) - 11;
55608daf0c2Sdrahn     if ( 0 <= shiftCount ) {
55708daf0c2Sdrahn         zSig0 = absA<<shiftCount;
55808daf0c2Sdrahn         zSig1 = 0;
55908daf0c2Sdrahn     }
56008daf0c2Sdrahn     else {
56108daf0c2Sdrahn         shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
56208daf0c2Sdrahn     }
56308daf0c2Sdrahn     return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
56408daf0c2Sdrahn 
56508daf0c2Sdrahn }
56608daf0c2Sdrahn 
56708daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
56808daf0c2Sdrahn /*
56908daf0c2Sdrahn -------------------------------------------------------------------------------
57008daf0c2Sdrahn Returns the result of converting the single-precision floating-point value
57108daf0c2Sdrahn `a' to the 32-bit two's complement integer format.  The conversion is
57208daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
57308daf0c2Sdrahn Arithmetic---which means in particular that the conversion is rounded
57408daf0c2Sdrahn according to the current rounding mode.  If `a' is a NaN, the largest
57508daf0c2Sdrahn positive integer is returned.  Otherwise, if the conversion overflows, the
57608daf0c2Sdrahn largest integer with the same sign as `a' is returned.
57708daf0c2Sdrahn -------------------------------------------------------------------------------
57808daf0c2Sdrahn */
float32_to_int32(float32 a)57908daf0c2Sdrahn int32 float32_to_int32( float32 a )
58008daf0c2Sdrahn {
58108daf0c2Sdrahn     flag aSign;
58208daf0c2Sdrahn     int16 aExp, shiftCount;
58308daf0c2Sdrahn     bits32 aSig, aSigExtra;
58408daf0c2Sdrahn     int32 z;
58508daf0c2Sdrahn     int8 roundingMode;
58608daf0c2Sdrahn 
58708daf0c2Sdrahn     aSig = extractFloat32Frac( a );
58808daf0c2Sdrahn     aExp = extractFloat32Exp( a );
58908daf0c2Sdrahn     aSign = extractFloat32Sign( a );
59008daf0c2Sdrahn     shiftCount = aExp - 0x96;
59108daf0c2Sdrahn     if ( 0 <= shiftCount ) {
59208daf0c2Sdrahn         if ( 0x9E <= aExp ) {
59308daf0c2Sdrahn             if ( a != 0xCF000000 ) {
59408daf0c2Sdrahn                 float_raise( float_flag_invalid );
59508daf0c2Sdrahn                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
59608daf0c2Sdrahn                     return 0x7FFFFFFF;
59708daf0c2Sdrahn                 }
59808daf0c2Sdrahn             }
59908daf0c2Sdrahn             return (sbits32) 0x80000000;
60008daf0c2Sdrahn         }
60108daf0c2Sdrahn         z = ( aSig | 0x00800000 )<<shiftCount;
60208daf0c2Sdrahn         if ( aSign ) z = - z;
60308daf0c2Sdrahn     }
60408daf0c2Sdrahn     else {
60508daf0c2Sdrahn         if ( aExp < 0x7E ) {
60608daf0c2Sdrahn             aSigExtra = aExp | aSig;
60708daf0c2Sdrahn             z = 0;
60808daf0c2Sdrahn         }
60908daf0c2Sdrahn         else {
61008daf0c2Sdrahn             aSig |= 0x00800000;
61108daf0c2Sdrahn             aSigExtra = aSig<<( shiftCount & 31 );
61208daf0c2Sdrahn             z = aSig>>( - shiftCount );
61308daf0c2Sdrahn         }
61408daf0c2Sdrahn         if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
61508daf0c2Sdrahn         roundingMode = float_rounding_mode;
61608daf0c2Sdrahn         if ( roundingMode == float_round_nearest_even ) {
61708daf0c2Sdrahn             if ( (sbits32) aSigExtra < 0 ) {
61808daf0c2Sdrahn                 ++z;
61908daf0c2Sdrahn                 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
62008daf0c2Sdrahn             }
62108daf0c2Sdrahn             if ( aSign ) z = - z;
62208daf0c2Sdrahn         }
62308daf0c2Sdrahn         else {
62408daf0c2Sdrahn             aSigExtra = ( aSigExtra != 0 );
62508daf0c2Sdrahn             if ( aSign ) {
62608daf0c2Sdrahn                 z += ( roundingMode == float_round_down ) & aSigExtra;
62708daf0c2Sdrahn                 z = - z;
62808daf0c2Sdrahn             }
62908daf0c2Sdrahn             else {
63008daf0c2Sdrahn                 z += ( roundingMode == float_round_up ) & aSigExtra;
63108daf0c2Sdrahn             }
63208daf0c2Sdrahn         }
63308daf0c2Sdrahn     }
63408daf0c2Sdrahn     return z;
63508daf0c2Sdrahn 
63608daf0c2Sdrahn }
63708daf0c2Sdrahn #endif
63808daf0c2Sdrahn 
63908daf0c2Sdrahn /*
64008daf0c2Sdrahn -------------------------------------------------------------------------------
64108daf0c2Sdrahn Returns the result of converting the single-precision floating-point value
64208daf0c2Sdrahn `a' to the 32-bit two's complement integer format.  The conversion is
64308daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
64408daf0c2Sdrahn Arithmetic, except that the conversion is always rounded toward zero.
64508daf0c2Sdrahn If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
64608daf0c2Sdrahn the conversion overflows, the largest integer with the same sign as `a' is
64708daf0c2Sdrahn returned.
64808daf0c2Sdrahn -------------------------------------------------------------------------------
64908daf0c2Sdrahn */
float32_to_int32_round_to_zero(float32 a)65008daf0c2Sdrahn int32 float32_to_int32_round_to_zero( float32 a )
65108daf0c2Sdrahn {
65208daf0c2Sdrahn     flag aSign;
65308daf0c2Sdrahn     int16 aExp, shiftCount;
65408daf0c2Sdrahn     bits32 aSig;
65508daf0c2Sdrahn     int32 z;
65608daf0c2Sdrahn 
65708daf0c2Sdrahn     aSig = extractFloat32Frac( a );
65808daf0c2Sdrahn     aExp = extractFloat32Exp( a );
65908daf0c2Sdrahn     aSign = extractFloat32Sign( a );
66008daf0c2Sdrahn     shiftCount = aExp - 0x9E;
66108daf0c2Sdrahn     if ( 0 <= shiftCount ) {
66208daf0c2Sdrahn         if ( a != 0xCF000000 ) {
66308daf0c2Sdrahn             float_raise( float_flag_invalid );
66408daf0c2Sdrahn             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
66508daf0c2Sdrahn         }
66608daf0c2Sdrahn         return (sbits32) 0x80000000;
66708daf0c2Sdrahn     }
66808daf0c2Sdrahn     else if ( aExp <= 0x7E ) {
66908daf0c2Sdrahn         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
67008daf0c2Sdrahn         return 0;
67108daf0c2Sdrahn     }
67208daf0c2Sdrahn     aSig = ( aSig | 0x00800000 )<<8;
67308daf0c2Sdrahn     z = aSig>>( - shiftCount );
67408daf0c2Sdrahn     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
67508daf0c2Sdrahn         float_exception_flags |= float_flag_inexact;
67608daf0c2Sdrahn     }
67708daf0c2Sdrahn     if ( aSign ) z = - z;
67808daf0c2Sdrahn     return z;
67908daf0c2Sdrahn 
68008daf0c2Sdrahn }
68108daf0c2Sdrahn 
68208daf0c2Sdrahn /*
68308daf0c2Sdrahn -------------------------------------------------------------------------------
68408daf0c2Sdrahn Returns the result of converting the single-precision floating-point value
68508daf0c2Sdrahn `a' to the double-precision floating-point format.  The conversion is
68608daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
68708daf0c2Sdrahn Arithmetic.
68808daf0c2Sdrahn -------------------------------------------------------------------------------
68908daf0c2Sdrahn */
float32_to_float64(float32 a)69008daf0c2Sdrahn float64 float32_to_float64( float32 a )
69108daf0c2Sdrahn {
69208daf0c2Sdrahn     flag aSign;
69308daf0c2Sdrahn     int16 aExp;
69408daf0c2Sdrahn     bits32 aSig, zSig0, zSig1;
69508daf0c2Sdrahn 
69608daf0c2Sdrahn     aSig = extractFloat32Frac( a );
69708daf0c2Sdrahn     aExp = extractFloat32Exp( a );
69808daf0c2Sdrahn     aSign = extractFloat32Sign( a );
69908daf0c2Sdrahn     if ( aExp == 0xFF ) {
70008daf0c2Sdrahn         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
70108daf0c2Sdrahn         return packFloat64( aSign, 0x7FF, 0, 0 );
70208daf0c2Sdrahn     }
70308daf0c2Sdrahn     if ( aExp == 0 ) {
70408daf0c2Sdrahn         if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
70508daf0c2Sdrahn         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
70608daf0c2Sdrahn         --aExp;
70708daf0c2Sdrahn     }
70808daf0c2Sdrahn     shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
70908daf0c2Sdrahn     return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
71008daf0c2Sdrahn 
71108daf0c2Sdrahn }
71208daf0c2Sdrahn 
71308daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
71408daf0c2Sdrahn /*
71508daf0c2Sdrahn -------------------------------------------------------------------------------
71608daf0c2Sdrahn Rounds the single-precision floating-point value `a' to an integer,
71708daf0c2Sdrahn and returns the result as a single-precision floating-point value.  The
71808daf0c2Sdrahn operation is performed according to the IEC/IEEE Standard for Binary
71908daf0c2Sdrahn Floating-Point Arithmetic.
72008daf0c2Sdrahn -------------------------------------------------------------------------------
72108daf0c2Sdrahn */
float32_round_to_int(float32 a)72208daf0c2Sdrahn float32 float32_round_to_int( float32 a )
72308daf0c2Sdrahn {
72408daf0c2Sdrahn     flag aSign;
72508daf0c2Sdrahn     int16 aExp;
72608daf0c2Sdrahn     bits32 lastBitMask, roundBitsMask;
72708daf0c2Sdrahn     int8 roundingMode;
72808daf0c2Sdrahn     float32 z;
72908daf0c2Sdrahn 
73008daf0c2Sdrahn     aExp = extractFloat32Exp( a );
73108daf0c2Sdrahn     if ( 0x96 <= aExp ) {
73208daf0c2Sdrahn         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
73308daf0c2Sdrahn             return propagateFloat32NaN( a, a );
73408daf0c2Sdrahn         }
73508daf0c2Sdrahn         return a;
73608daf0c2Sdrahn     }
73708daf0c2Sdrahn     if ( aExp <= 0x7E ) {
73808daf0c2Sdrahn         if ( (bits32) ( a<<1 ) == 0 ) return a;
73908daf0c2Sdrahn         float_exception_flags |= float_flag_inexact;
74008daf0c2Sdrahn         aSign = extractFloat32Sign( a );
74108daf0c2Sdrahn         switch ( float_rounding_mode ) {
74208daf0c2Sdrahn          case float_round_nearest_even:
74308daf0c2Sdrahn             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
74408daf0c2Sdrahn                 return packFloat32( aSign, 0x7F, 0 );
74508daf0c2Sdrahn             }
74608daf0c2Sdrahn             break;
74708daf0c2Sdrahn 	 case float_round_to_zero:
74808daf0c2Sdrahn 	    break;
74908daf0c2Sdrahn          case float_round_down:
75008daf0c2Sdrahn             return aSign ? 0xBF800000 : 0;
75108daf0c2Sdrahn          case float_round_up:
75208daf0c2Sdrahn             return aSign ? 0x80000000 : 0x3F800000;
75308daf0c2Sdrahn         }
75408daf0c2Sdrahn         return packFloat32( aSign, 0, 0 );
75508daf0c2Sdrahn     }
75608daf0c2Sdrahn     lastBitMask = 1;
75708daf0c2Sdrahn     lastBitMask <<= 0x96 - aExp;
75808daf0c2Sdrahn     roundBitsMask = lastBitMask - 1;
75908daf0c2Sdrahn     z = a;
76008daf0c2Sdrahn     roundingMode = float_rounding_mode;
76108daf0c2Sdrahn     if ( roundingMode == float_round_nearest_even ) {
76208daf0c2Sdrahn         z += lastBitMask>>1;
76308daf0c2Sdrahn         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
76408daf0c2Sdrahn     }
76508daf0c2Sdrahn     else if ( roundingMode != float_round_to_zero ) {
76608daf0c2Sdrahn         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
76708daf0c2Sdrahn             z += roundBitsMask;
76808daf0c2Sdrahn         }
76908daf0c2Sdrahn     }
77008daf0c2Sdrahn     z &= ~ roundBitsMask;
77108daf0c2Sdrahn     if ( z != a ) float_exception_flags |= float_flag_inexact;
77208daf0c2Sdrahn     return z;
77308daf0c2Sdrahn 
77408daf0c2Sdrahn }
77508daf0c2Sdrahn #endif
77608daf0c2Sdrahn 
77708daf0c2Sdrahn /*
77808daf0c2Sdrahn -------------------------------------------------------------------------------
77908daf0c2Sdrahn Returns the result of adding the absolute values of the single-precision
78008daf0c2Sdrahn floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
78108daf0c2Sdrahn before being returned.  `zSign' is ignored if the result is a NaN.
78208daf0c2Sdrahn The addition is performed according to the IEC/IEEE Standard for Binary
78308daf0c2Sdrahn Floating-Point Arithmetic.
78408daf0c2Sdrahn -------------------------------------------------------------------------------
78508daf0c2Sdrahn */
addFloat32Sigs(float32 a,float32 b,flag zSign)78608daf0c2Sdrahn static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
78708daf0c2Sdrahn {
78808daf0c2Sdrahn     int16 aExp, bExp, zExp;
78908daf0c2Sdrahn     bits32 aSig, bSig, zSig;
79008daf0c2Sdrahn     int16 expDiff;
79108daf0c2Sdrahn 
79208daf0c2Sdrahn     aSig = extractFloat32Frac( a );
79308daf0c2Sdrahn     aExp = extractFloat32Exp( a );
79408daf0c2Sdrahn     bSig = extractFloat32Frac( b );
79508daf0c2Sdrahn     bExp = extractFloat32Exp( b );
79608daf0c2Sdrahn     expDiff = aExp - bExp;
79708daf0c2Sdrahn     aSig <<= 6;
79808daf0c2Sdrahn     bSig <<= 6;
79908daf0c2Sdrahn     if ( 0 < expDiff ) {
80008daf0c2Sdrahn         if ( aExp == 0xFF ) {
80108daf0c2Sdrahn             if ( aSig ) return propagateFloat32NaN( a, b );
80208daf0c2Sdrahn             return a;
80308daf0c2Sdrahn         }
80408daf0c2Sdrahn         if ( bExp == 0 ) {
80508daf0c2Sdrahn             --expDiff;
80608daf0c2Sdrahn         }
80708daf0c2Sdrahn         else {
80808daf0c2Sdrahn             bSig |= 0x20000000;
80908daf0c2Sdrahn         }
81008daf0c2Sdrahn         shift32RightJamming( bSig, expDiff, &bSig );
81108daf0c2Sdrahn         zExp = aExp;
81208daf0c2Sdrahn     }
81308daf0c2Sdrahn     else if ( expDiff < 0 ) {
81408daf0c2Sdrahn         if ( bExp == 0xFF ) {
81508daf0c2Sdrahn             if ( bSig ) return propagateFloat32NaN( a, b );
81608daf0c2Sdrahn             return packFloat32( zSign, 0xFF, 0 );
81708daf0c2Sdrahn         }
81808daf0c2Sdrahn         if ( aExp == 0 ) {
81908daf0c2Sdrahn             ++expDiff;
82008daf0c2Sdrahn         }
82108daf0c2Sdrahn         else {
82208daf0c2Sdrahn             aSig |= 0x20000000;
82308daf0c2Sdrahn         }
82408daf0c2Sdrahn         shift32RightJamming( aSig, - expDiff, &aSig );
82508daf0c2Sdrahn         zExp = bExp;
82608daf0c2Sdrahn     }
82708daf0c2Sdrahn     else {
82808daf0c2Sdrahn         if ( aExp == 0xFF ) {
82908daf0c2Sdrahn             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
83008daf0c2Sdrahn             return a;
83108daf0c2Sdrahn         }
83208daf0c2Sdrahn         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
83308daf0c2Sdrahn         zSig = 0x40000000 + aSig + bSig;
83408daf0c2Sdrahn         zExp = aExp;
83508daf0c2Sdrahn         goto roundAndPack;
83608daf0c2Sdrahn     }
83708daf0c2Sdrahn     aSig |= 0x20000000;
83808daf0c2Sdrahn     zSig = ( aSig + bSig )<<1;
83908daf0c2Sdrahn     --zExp;
84008daf0c2Sdrahn     if ( (sbits32) zSig < 0 ) {
84108daf0c2Sdrahn         zSig = aSig + bSig;
84208daf0c2Sdrahn         ++zExp;
84308daf0c2Sdrahn     }
84408daf0c2Sdrahn  roundAndPack:
84508daf0c2Sdrahn     return roundAndPackFloat32( zSign, zExp, zSig );
84608daf0c2Sdrahn 
84708daf0c2Sdrahn }
84808daf0c2Sdrahn 
84908daf0c2Sdrahn /*
85008daf0c2Sdrahn -------------------------------------------------------------------------------
85108daf0c2Sdrahn Returns the result of subtracting the absolute values of the single-
85208daf0c2Sdrahn precision floating-point values `a' and `b'.  If `zSign' is 1, the
85308daf0c2Sdrahn difference is negated before being returned.  `zSign' is ignored if the
85408daf0c2Sdrahn result is a NaN.  The subtraction is performed according to the IEC/IEEE
85508daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic.
85608daf0c2Sdrahn -------------------------------------------------------------------------------
85708daf0c2Sdrahn */
subFloat32Sigs(float32 a,float32 b,flag zSign)85808daf0c2Sdrahn static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
85908daf0c2Sdrahn {
86008daf0c2Sdrahn     int16 aExp, bExp, zExp;
86108daf0c2Sdrahn     bits32 aSig, bSig, zSig;
86208daf0c2Sdrahn     int16 expDiff;
86308daf0c2Sdrahn 
86408daf0c2Sdrahn     aSig = extractFloat32Frac( a );
86508daf0c2Sdrahn     aExp = extractFloat32Exp( a );
86608daf0c2Sdrahn     bSig = extractFloat32Frac( b );
86708daf0c2Sdrahn     bExp = extractFloat32Exp( b );
86808daf0c2Sdrahn     expDiff = aExp - bExp;
86908daf0c2Sdrahn     aSig <<= 7;
87008daf0c2Sdrahn     bSig <<= 7;
87108daf0c2Sdrahn     if ( 0 < expDiff ) goto aExpBigger;
87208daf0c2Sdrahn     if ( expDiff < 0 ) goto bExpBigger;
87308daf0c2Sdrahn     if ( aExp == 0xFF ) {
87408daf0c2Sdrahn         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
87508daf0c2Sdrahn         float_raise( float_flag_invalid );
87608daf0c2Sdrahn         return float32_default_nan;
87708daf0c2Sdrahn     }
87808daf0c2Sdrahn     if ( aExp == 0 ) {
87908daf0c2Sdrahn         aExp = 1;
88008daf0c2Sdrahn         bExp = 1;
88108daf0c2Sdrahn     }
88208daf0c2Sdrahn     if ( bSig < aSig ) goto aBigger;
88308daf0c2Sdrahn     if ( aSig < bSig ) goto bBigger;
88408daf0c2Sdrahn     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
88508daf0c2Sdrahn  bExpBigger:
88608daf0c2Sdrahn     if ( bExp == 0xFF ) {
88708daf0c2Sdrahn         if ( bSig ) return propagateFloat32NaN( a, b );
88808daf0c2Sdrahn         return packFloat32( zSign ^ 1, 0xFF, 0 );
88908daf0c2Sdrahn     }
89008daf0c2Sdrahn     if ( aExp == 0 ) {
89108daf0c2Sdrahn         ++expDiff;
89208daf0c2Sdrahn     }
89308daf0c2Sdrahn     else {
89408daf0c2Sdrahn         aSig |= 0x40000000;
89508daf0c2Sdrahn     }
89608daf0c2Sdrahn     shift32RightJamming( aSig, - expDiff, &aSig );
89708daf0c2Sdrahn     bSig |= 0x40000000;
89808daf0c2Sdrahn  bBigger:
89908daf0c2Sdrahn     zSig = bSig - aSig;
90008daf0c2Sdrahn     zExp = bExp;
90108daf0c2Sdrahn     zSign ^= 1;
90208daf0c2Sdrahn     goto normalizeRoundAndPack;
90308daf0c2Sdrahn  aExpBigger:
90408daf0c2Sdrahn     if ( aExp == 0xFF ) {
90508daf0c2Sdrahn         if ( aSig ) return propagateFloat32NaN( a, b );
90608daf0c2Sdrahn         return a;
90708daf0c2Sdrahn     }
90808daf0c2Sdrahn     if ( bExp == 0 ) {
90908daf0c2Sdrahn         --expDiff;
91008daf0c2Sdrahn     }
91108daf0c2Sdrahn     else {
91208daf0c2Sdrahn         bSig |= 0x40000000;
91308daf0c2Sdrahn     }
91408daf0c2Sdrahn     shift32RightJamming( bSig, expDiff, &bSig );
91508daf0c2Sdrahn     aSig |= 0x40000000;
91608daf0c2Sdrahn  aBigger:
91708daf0c2Sdrahn     zSig = aSig - bSig;
91808daf0c2Sdrahn     zExp = aExp;
91908daf0c2Sdrahn  normalizeRoundAndPack:
92008daf0c2Sdrahn     --zExp;
92108daf0c2Sdrahn     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
92208daf0c2Sdrahn 
92308daf0c2Sdrahn }
92408daf0c2Sdrahn 
92508daf0c2Sdrahn /*
92608daf0c2Sdrahn -------------------------------------------------------------------------------
92708daf0c2Sdrahn Returns the result of adding the single-precision floating-point values `a'
92808daf0c2Sdrahn and `b'.  The operation is performed according to the IEC/IEEE Standard for
92908daf0c2Sdrahn Binary Floating-Point Arithmetic.
93008daf0c2Sdrahn -------------------------------------------------------------------------------
93108daf0c2Sdrahn */
float32_add(float32 a,float32 b)93208daf0c2Sdrahn float32 float32_add( float32 a, float32 b )
93308daf0c2Sdrahn {
93408daf0c2Sdrahn     flag aSign, bSign;
93508daf0c2Sdrahn 
93608daf0c2Sdrahn     aSign = extractFloat32Sign( a );
93708daf0c2Sdrahn     bSign = extractFloat32Sign( b );
93808daf0c2Sdrahn     if ( aSign == bSign ) {
93908daf0c2Sdrahn         return addFloat32Sigs( a, b, aSign );
94008daf0c2Sdrahn     }
94108daf0c2Sdrahn     else {
94208daf0c2Sdrahn         return subFloat32Sigs( a, b, aSign );
94308daf0c2Sdrahn     }
94408daf0c2Sdrahn 
94508daf0c2Sdrahn }
94608daf0c2Sdrahn 
94708daf0c2Sdrahn /*
94808daf0c2Sdrahn -------------------------------------------------------------------------------
94908daf0c2Sdrahn Returns the result of subtracting the single-precision floating-point values
95008daf0c2Sdrahn `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
95108daf0c2Sdrahn for Binary Floating-Point Arithmetic.
95208daf0c2Sdrahn -------------------------------------------------------------------------------
95308daf0c2Sdrahn */
float32_sub(float32 a,float32 b)95408daf0c2Sdrahn float32 float32_sub( float32 a, float32 b )
95508daf0c2Sdrahn {
95608daf0c2Sdrahn     flag aSign, bSign;
95708daf0c2Sdrahn 
95808daf0c2Sdrahn     aSign = extractFloat32Sign( a );
95908daf0c2Sdrahn     bSign = extractFloat32Sign( b );
96008daf0c2Sdrahn     if ( aSign == bSign ) {
96108daf0c2Sdrahn         return subFloat32Sigs( a, b, aSign );
96208daf0c2Sdrahn     }
96308daf0c2Sdrahn     else {
96408daf0c2Sdrahn         return addFloat32Sigs( a, b, aSign );
96508daf0c2Sdrahn     }
96608daf0c2Sdrahn 
96708daf0c2Sdrahn }
96808daf0c2Sdrahn 
96908daf0c2Sdrahn /*
97008daf0c2Sdrahn -------------------------------------------------------------------------------
97108daf0c2Sdrahn Returns the result of multiplying the single-precision floating-point values
97208daf0c2Sdrahn `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
97308daf0c2Sdrahn for Binary Floating-Point Arithmetic.
97408daf0c2Sdrahn -------------------------------------------------------------------------------
97508daf0c2Sdrahn */
float32_mul(float32 a,float32 b)97608daf0c2Sdrahn float32 float32_mul( float32 a, float32 b )
97708daf0c2Sdrahn {
97808daf0c2Sdrahn     flag aSign, bSign, zSign;
97908daf0c2Sdrahn     int16 aExp, bExp, zExp;
98008daf0c2Sdrahn     bits32 aSig, bSig, zSig0, zSig1;
98108daf0c2Sdrahn 
98208daf0c2Sdrahn     aSig = extractFloat32Frac( a );
98308daf0c2Sdrahn     aExp = extractFloat32Exp( a );
98408daf0c2Sdrahn     aSign = extractFloat32Sign( a );
98508daf0c2Sdrahn     bSig = extractFloat32Frac( b );
98608daf0c2Sdrahn     bExp = extractFloat32Exp( b );
98708daf0c2Sdrahn     bSign = extractFloat32Sign( b );
98808daf0c2Sdrahn     zSign = aSign ^ bSign;
98908daf0c2Sdrahn     if ( aExp == 0xFF ) {
99008daf0c2Sdrahn         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
99108daf0c2Sdrahn             return propagateFloat32NaN( a, b );
99208daf0c2Sdrahn         }
99308daf0c2Sdrahn         if ( ( bExp | bSig ) == 0 ) {
99408daf0c2Sdrahn             float_raise( float_flag_invalid );
99508daf0c2Sdrahn             return float32_default_nan;
99608daf0c2Sdrahn         }
99708daf0c2Sdrahn         return packFloat32( zSign, 0xFF, 0 );
99808daf0c2Sdrahn     }
99908daf0c2Sdrahn     if ( bExp == 0xFF ) {
100008daf0c2Sdrahn         if ( bSig ) return propagateFloat32NaN( a, b );
100108daf0c2Sdrahn         if ( ( aExp | aSig ) == 0 ) {
100208daf0c2Sdrahn             float_raise( float_flag_invalid );
100308daf0c2Sdrahn             return float32_default_nan;
100408daf0c2Sdrahn         }
100508daf0c2Sdrahn         return packFloat32( zSign, 0xFF, 0 );
100608daf0c2Sdrahn     }
100708daf0c2Sdrahn     if ( aExp == 0 ) {
100808daf0c2Sdrahn         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
100908daf0c2Sdrahn         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
101008daf0c2Sdrahn     }
101108daf0c2Sdrahn     if ( bExp == 0 ) {
101208daf0c2Sdrahn         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
101308daf0c2Sdrahn         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
101408daf0c2Sdrahn     }
101508daf0c2Sdrahn     zExp = aExp + bExp - 0x7F;
101608daf0c2Sdrahn     aSig = ( aSig | 0x00800000 )<<7;
101708daf0c2Sdrahn     bSig = ( bSig | 0x00800000 )<<8;
101808daf0c2Sdrahn     mul32To64( aSig, bSig, &zSig0, &zSig1 );
101908daf0c2Sdrahn     zSig0 |= ( zSig1 != 0 );
102008daf0c2Sdrahn     if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
102108daf0c2Sdrahn         zSig0 <<= 1;
102208daf0c2Sdrahn         --zExp;
102308daf0c2Sdrahn     }
102408daf0c2Sdrahn     return roundAndPackFloat32( zSign, zExp, zSig0 );
102508daf0c2Sdrahn 
102608daf0c2Sdrahn }
102708daf0c2Sdrahn 
102808daf0c2Sdrahn /*
102908daf0c2Sdrahn -------------------------------------------------------------------------------
103008daf0c2Sdrahn Returns the result of dividing the single-precision floating-point value `a'
103108daf0c2Sdrahn by the corresponding value `b'.  The operation is performed according to the
103208daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic.
103308daf0c2Sdrahn -------------------------------------------------------------------------------
103408daf0c2Sdrahn */
float32_div(float32 a,float32 b)103508daf0c2Sdrahn float32 float32_div( float32 a, float32 b )
103608daf0c2Sdrahn {
103708daf0c2Sdrahn     flag aSign, bSign, zSign;
103808daf0c2Sdrahn     int16 aExp, bExp, zExp;
103908daf0c2Sdrahn     bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
104008daf0c2Sdrahn 
104108daf0c2Sdrahn     aSig = extractFloat32Frac( a );
104208daf0c2Sdrahn     aExp = extractFloat32Exp( a );
104308daf0c2Sdrahn     aSign = extractFloat32Sign( a );
104408daf0c2Sdrahn     bSig = extractFloat32Frac( b );
104508daf0c2Sdrahn     bExp = extractFloat32Exp( b );
104608daf0c2Sdrahn     bSign = extractFloat32Sign( b );
104708daf0c2Sdrahn     zSign = aSign ^ bSign;
104808daf0c2Sdrahn     if ( aExp == 0xFF ) {
104908daf0c2Sdrahn         if ( aSig ) return propagateFloat32NaN( a, b );
105008daf0c2Sdrahn         if ( bExp == 0xFF ) {
105108daf0c2Sdrahn             if ( bSig ) return propagateFloat32NaN( a, b );
105208daf0c2Sdrahn             float_raise( float_flag_invalid );
105308daf0c2Sdrahn             return float32_default_nan;
105408daf0c2Sdrahn         }
105508daf0c2Sdrahn         return packFloat32( zSign, 0xFF, 0 );
105608daf0c2Sdrahn     }
105708daf0c2Sdrahn     if ( bExp == 0xFF ) {
105808daf0c2Sdrahn         if ( bSig ) return propagateFloat32NaN( a, b );
105908daf0c2Sdrahn         return packFloat32( zSign, 0, 0 );
106008daf0c2Sdrahn     }
106108daf0c2Sdrahn     if ( bExp == 0 ) {
106208daf0c2Sdrahn         if ( bSig == 0 ) {
106308daf0c2Sdrahn             if ( ( aExp | aSig ) == 0 ) {
106408daf0c2Sdrahn                 float_raise( float_flag_invalid );
106508daf0c2Sdrahn                 return float32_default_nan;
106608daf0c2Sdrahn             }
106708daf0c2Sdrahn             float_raise( float_flag_divbyzero );
106808daf0c2Sdrahn             return packFloat32( zSign, 0xFF, 0 );
106908daf0c2Sdrahn         }
107008daf0c2Sdrahn         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
107108daf0c2Sdrahn     }
107208daf0c2Sdrahn     if ( aExp == 0 ) {
107308daf0c2Sdrahn         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
107408daf0c2Sdrahn         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
107508daf0c2Sdrahn     }
107608daf0c2Sdrahn     zExp = aExp - bExp + 0x7D;
107708daf0c2Sdrahn     aSig = ( aSig | 0x00800000 )<<7;
107808daf0c2Sdrahn     bSig = ( bSig | 0x00800000 )<<8;
107908daf0c2Sdrahn     if ( bSig <= ( aSig + aSig ) ) {
108008daf0c2Sdrahn         aSig >>= 1;
108108daf0c2Sdrahn         ++zExp;
108208daf0c2Sdrahn     }
108308daf0c2Sdrahn     zSig = estimateDiv64To32( aSig, 0, bSig );
108408daf0c2Sdrahn     if ( ( zSig & 0x3F ) <= 2 ) {
108508daf0c2Sdrahn         mul32To64( bSig, zSig, &term0, &term1 );
108608daf0c2Sdrahn         sub64( aSig, 0, term0, term1, &rem0, &rem1 );
108708daf0c2Sdrahn         while ( (sbits32) rem0 < 0 ) {
108808daf0c2Sdrahn             --zSig;
108908daf0c2Sdrahn             add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
109008daf0c2Sdrahn         }
109108daf0c2Sdrahn         zSig |= ( rem1 != 0 );
109208daf0c2Sdrahn     }
109308daf0c2Sdrahn     return roundAndPackFloat32( zSign, zExp, zSig );
109408daf0c2Sdrahn 
109508daf0c2Sdrahn }
109608daf0c2Sdrahn 
109708daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
109808daf0c2Sdrahn /*
109908daf0c2Sdrahn -------------------------------------------------------------------------------
110008daf0c2Sdrahn Returns the remainder of the single-precision floating-point value `a'
110108daf0c2Sdrahn with respect to the corresponding value `b'.  The operation is performed
110208daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
110308daf0c2Sdrahn -------------------------------------------------------------------------------
110408daf0c2Sdrahn */
float32_rem(float32 a,float32 b)110508daf0c2Sdrahn float32 float32_rem( float32 a, float32 b )
110608daf0c2Sdrahn {
110708daf0c2Sdrahn     flag aSign, bSign, zSign;
110808daf0c2Sdrahn     int16 aExp, bExp, expDiff;
110908daf0c2Sdrahn     bits32 aSig, bSig, q, allZero, alternateASig;
111008daf0c2Sdrahn     sbits32 sigMean;
111108daf0c2Sdrahn 
111208daf0c2Sdrahn     aSig = extractFloat32Frac( a );
111308daf0c2Sdrahn     aExp = extractFloat32Exp( a );
111408daf0c2Sdrahn     aSign = extractFloat32Sign( a );
111508daf0c2Sdrahn     bSig = extractFloat32Frac( b );
111608daf0c2Sdrahn     bExp = extractFloat32Exp( b );
111708daf0c2Sdrahn     bSign = extractFloat32Sign( b );
111808daf0c2Sdrahn     if ( aExp == 0xFF ) {
111908daf0c2Sdrahn         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
112008daf0c2Sdrahn             return propagateFloat32NaN( a, b );
112108daf0c2Sdrahn         }
112208daf0c2Sdrahn         float_raise( float_flag_invalid );
112308daf0c2Sdrahn         return float32_default_nan;
112408daf0c2Sdrahn     }
112508daf0c2Sdrahn     if ( bExp == 0xFF ) {
112608daf0c2Sdrahn         if ( bSig ) return propagateFloat32NaN( a, b );
112708daf0c2Sdrahn         return a;
112808daf0c2Sdrahn     }
112908daf0c2Sdrahn     if ( bExp == 0 ) {
113008daf0c2Sdrahn         if ( bSig == 0 ) {
113108daf0c2Sdrahn             float_raise( float_flag_invalid );
113208daf0c2Sdrahn             return float32_default_nan;
113308daf0c2Sdrahn         }
113408daf0c2Sdrahn         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
113508daf0c2Sdrahn     }
113608daf0c2Sdrahn     if ( aExp == 0 ) {
113708daf0c2Sdrahn         if ( aSig == 0 ) return a;
113808daf0c2Sdrahn         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
113908daf0c2Sdrahn     }
114008daf0c2Sdrahn     expDiff = aExp - bExp;
114108daf0c2Sdrahn     aSig = ( aSig | 0x00800000 )<<8;
114208daf0c2Sdrahn     bSig = ( bSig | 0x00800000 )<<8;
114308daf0c2Sdrahn     if ( expDiff < 0 ) {
114408daf0c2Sdrahn         if ( expDiff < -1 ) return a;
114508daf0c2Sdrahn         aSig >>= 1;
114608daf0c2Sdrahn     }
114708daf0c2Sdrahn     q = ( bSig <= aSig );
114808daf0c2Sdrahn     if ( q ) aSig -= bSig;
114908daf0c2Sdrahn     expDiff -= 32;
115008daf0c2Sdrahn     while ( 0 < expDiff ) {
115108daf0c2Sdrahn         q = estimateDiv64To32( aSig, 0, bSig );
115208daf0c2Sdrahn         q = ( 2 < q ) ? q - 2 : 0;
115308daf0c2Sdrahn         aSig = - ( ( bSig>>2 ) * q );
115408daf0c2Sdrahn         expDiff -= 30;
115508daf0c2Sdrahn     }
115608daf0c2Sdrahn     expDiff += 32;
115708daf0c2Sdrahn     if ( 0 < expDiff ) {
115808daf0c2Sdrahn         q = estimateDiv64To32( aSig, 0, bSig );
115908daf0c2Sdrahn         q = ( 2 < q ) ? q - 2 : 0;
116008daf0c2Sdrahn         q >>= 32 - expDiff;
116108daf0c2Sdrahn         bSig >>= 2;
116208daf0c2Sdrahn         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
116308daf0c2Sdrahn     }
116408daf0c2Sdrahn     else {
116508daf0c2Sdrahn         aSig >>= 2;
116608daf0c2Sdrahn         bSig >>= 2;
116708daf0c2Sdrahn     }
116808daf0c2Sdrahn     do {
116908daf0c2Sdrahn         alternateASig = aSig;
117008daf0c2Sdrahn         ++q;
117108daf0c2Sdrahn         aSig -= bSig;
117208daf0c2Sdrahn     } while ( 0 <= (sbits32) aSig );
117308daf0c2Sdrahn     sigMean = aSig + alternateASig;
117408daf0c2Sdrahn     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
117508daf0c2Sdrahn         aSig = alternateASig;
117608daf0c2Sdrahn     }
117708daf0c2Sdrahn     zSign = ( (sbits32) aSig < 0 );
117808daf0c2Sdrahn     if ( zSign ) aSig = - aSig;
117908daf0c2Sdrahn     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
118008daf0c2Sdrahn 
118108daf0c2Sdrahn }
118208daf0c2Sdrahn #endif
118308daf0c2Sdrahn 
118408daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
118508daf0c2Sdrahn /*
118608daf0c2Sdrahn -------------------------------------------------------------------------------
118708daf0c2Sdrahn Returns the square root of the single-precision floating-point value `a'.
118808daf0c2Sdrahn The operation is performed according to the IEC/IEEE Standard for Binary
118908daf0c2Sdrahn Floating-Point Arithmetic.
119008daf0c2Sdrahn -------------------------------------------------------------------------------
119108daf0c2Sdrahn */
float32_sqrt(float32 a)119208daf0c2Sdrahn float32 float32_sqrt( float32 a )
119308daf0c2Sdrahn {
119408daf0c2Sdrahn     flag aSign;
119508daf0c2Sdrahn     int16 aExp, zExp;
119608daf0c2Sdrahn     bits32 aSig, zSig, rem0, rem1, term0, term1;
119708daf0c2Sdrahn 
119808daf0c2Sdrahn     aSig = extractFloat32Frac( a );
119908daf0c2Sdrahn     aExp = extractFloat32Exp( a );
120008daf0c2Sdrahn     aSign = extractFloat32Sign( a );
120108daf0c2Sdrahn     if ( aExp == 0xFF ) {
120208daf0c2Sdrahn         if ( aSig ) return propagateFloat32NaN( a, 0 );
120308daf0c2Sdrahn         if ( ! aSign ) return a;
120408daf0c2Sdrahn         float_raise( float_flag_invalid );
120508daf0c2Sdrahn         return float32_default_nan;
120608daf0c2Sdrahn     }
120708daf0c2Sdrahn     if ( aSign ) {
120808daf0c2Sdrahn         if ( ( aExp | aSig ) == 0 ) return a;
120908daf0c2Sdrahn         float_raise( float_flag_invalid );
121008daf0c2Sdrahn         return float32_default_nan;
121108daf0c2Sdrahn     }
121208daf0c2Sdrahn     if ( aExp == 0 ) {
121308daf0c2Sdrahn         if ( aSig == 0 ) return 0;
121408daf0c2Sdrahn         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
121508daf0c2Sdrahn     }
121608daf0c2Sdrahn     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
121708daf0c2Sdrahn     aSig = ( aSig | 0x00800000 )<<8;
121808daf0c2Sdrahn     zSig = estimateSqrt32( aExp, aSig ) + 2;
121908daf0c2Sdrahn     if ( ( zSig & 0x7F ) <= 5 ) {
122008daf0c2Sdrahn         if ( zSig < 2 ) {
122108daf0c2Sdrahn             zSig = 0x7FFFFFFF;
122208daf0c2Sdrahn             goto roundAndPack;
122308daf0c2Sdrahn         }
122408daf0c2Sdrahn         else {
122508daf0c2Sdrahn             aSig >>= aExp & 1;
122608daf0c2Sdrahn             mul32To64( zSig, zSig, &term0, &term1 );
122708daf0c2Sdrahn             sub64( aSig, 0, term0, term1, &rem0, &rem1 );
122808daf0c2Sdrahn             while ( (sbits32) rem0 < 0 ) {
122908daf0c2Sdrahn                 --zSig;
123008daf0c2Sdrahn                 shortShift64Left( 0, zSig, 1, &term0, &term1 );
123108daf0c2Sdrahn                 term1 |= 1;
123208daf0c2Sdrahn                 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
123308daf0c2Sdrahn             }
123408daf0c2Sdrahn             zSig |= ( ( rem0 | rem1 ) != 0 );
123508daf0c2Sdrahn         }
123608daf0c2Sdrahn     }
123708daf0c2Sdrahn     shift32RightJamming( zSig, 1, &zSig );
123808daf0c2Sdrahn  roundAndPack:
123908daf0c2Sdrahn     return roundAndPackFloat32( 0, zExp, zSig );
124008daf0c2Sdrahn 
124108daf0c2Sdrahn }
124208daf0c2Sdrahn #endif
124308daf0c2Sdrahn 
124408daf0c2Sdrahn /*
124508daf0c2Sdrahn -------------------------------------------------------------------------------
124608daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is equal to
124708daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  The comparison is performed
124808daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
124908daf0c2Sdrahn -------------------------------------------------------------------------------
125008daf0c2Sdrahn */
float32_eq(float32 a,float32 b)125108daf0c2Sdrahn flag float32_eq( float32 a, float32 b )
125208daf0c2Sdrahn {
125308daf0c2Sdrahn 
125408daf0c2Sdrahn     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
125508daf0c2Sdrahn          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
125608daf0c2Sdrahn        ) {
125708daf0c2Sdrahn         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
125808daf0c2Sdrahn             float_raise( float_flag_invalid );
125908daf0c2Sdrahn         }
126008daf0c2Sdrahn         return 0;
126108daf0c2Sdrahn     }
126208daf0c2Sdrahn     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
126308daf0c2Sdrahn 
126408daf0c2Sdrahn }
126508daf0c2Sdrahn 
126608daf0c2Sdrahn /*
126708daf0c2Sdrahn -------------------------------------------------------------------------------
126808daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than
126908daf0c2Sdrahn or equal to the corresponding value `b', and 0 otherwise.  The comparison
127008daf0c2Sdrahn is performed according to the IEC/IEEE Standard for Binary Floating-Point
127108daf0c2Sdrahn Arithmetic.
127208daf0c2Sdrahn -------------------------------------------------------------------------------
127308daf0c2Sdrahn */
float32_le(float32 a,float32 b)127408daf0c2Sdrahn flag float32_le( float32 a, float32 b )
127508daf0c2Sdrahn {
127608daf0c2Sdrahn     flag aSign, bSign;
127708daf0c2Sdrahn 
127808daf0c2Sdrahn     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
127908daf0c2Sdrahn          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
128008daf0c2Sdrahn        ) {
128108daf0c2Sdrahn         float_raise( float_flag_invalid );
128208daf0c2Sdrahn         return 0;
128308daf0c2Sdrahn     }
128408daf0c2Sdrahn     aSign = extractFloat32Sign( a );
128508daf0c2Sdrahn     bSign = extractFloat32Sign( b );
128608daf0c2Sdrahn     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
128708daf0c2Sdrahn     return ( a == b ) || ( aSign ^ ( a < b ) );
128808daf0c2Sdrahn 
128908daf0c2Sdrahn }
129008daf0c2Sdrahn 
129108daf0c2Sdrahn /*
129208daf0c2Sdrahn -------------------------------------------------------------------------------
129308daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than
129408daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  The comparison is performed
129508daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129608daf0c2Sdrahn -------------------------------------------------------------------------------
129708daf0c2Sdrahn */
float32_lt(float32 a,float32 b)129808daf0c2Sdrahn flag float32_lt( float32 a, float32 b )
129908daf0c2Sdrahn {
130008daf0c2Sdrahn     flag aSign, bSign;
130108daf0c2Sdrahn 
130208daf0c2Sdrahn     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
130308daf0c2Sdrahn          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
130408daf0c2Sdrahn        ) {
130508daf0c2Sdrahn         float_raise( float_flag_invalid );
130608daf0c2Sdrahn         return 0;
130708daf0c2Sdrahn     }
130808daf0c2Sdrahn     aSign = extractFloat32Sign( a );
130908daf0c2Sdrahn     bSign = extractFloat32Sign( b );
131008daf0c2Sdrahn     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
131108daf0c2Sdrahn     return ( a != b ) && ( aSign ^ ( a < b ) );
131208daf0c2Sdrahn 
131308daf0c2Sdrahn }
131408daf0c2Sdrahn 
131508daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
131608daf0c2Sdrahn /*
131708daf0c2Sdrahn -------------------------------------------------------------------------------
131808daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is equal to
131908daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  The invalid exception is
132008daf0c2Sdrahn raised if either operand is a NaN.  Otherwise, the comparison is performed
132108daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
132208daf0c2Sdrahn -------------------------------------------------------------------------------
132308daf0c2Sdrahn */
float32_eq_signaling(float32 a,float32 b)132408daf0c2Sdrahn flag float32_eq_signaling( float32 a, float32 b )
132508daf0c2Sdrahn {
132608daf0c2Sdrahn 
132708daf0c2Sdrahn     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
132808daf0c2Sdrahn          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
132908daf0c2Sdrahn        ) {
133008daf0c2Sdrahn         float_raise( float_flag_invalid );
133108daf0c2Sdrahn         return 0;
133208daf0c2Sdrahn     }
133308daf0c2Sdrahn     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
133408daf0c2Sdrahn 
133508daf0c2Sdrahn }
133608daf0c2Sdrahn 
133708daf0c2Sdrahn /*
133808daf0c2Sdrahn -------------------------------------------------------------------------------
133908daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than or
134008daf0c2Sdrahn equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
134108daf0c2Sdrahn cause an exception.  Otherwise, the comparison is performed according to the
134208daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic.
134308daf0c2Sdrahn -------------------------------------------------------------------------------
134408daf0c2Sdrahn */
float32_le_quiet(float32 a,float32 b)134508daf0c2Sdrahn flag float32_le_quiet( float32 a, float32 b )
134608daf0c2Sdrahn {
134708daf0c2Sdrahn     flag aSign, bSign;
134808daf0c2Sdrahn     int16 aExp, bExp;
134908daf0c2Sdrahn 
135008daf0c2Sdrahn     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
135108daf0c2Sdrahn          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
135208daf0c2Sdrahn        ) {
135308daf0c2Sdrahn         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
135408daf0c2Sdrahn             float_raise( float_flag_invalid );
135508daf0c2Sdrahn         }
135608daf0c2Sdrahn         return 0;
135708daf0c2Sdrahn     }
135808daf0c2Sdrahn     aSign = extractFloat32Sign( a );
135908daf0c2Sdrahn     bSign = extractFloat32Sign( b );
136008daf0c2Sdrahn     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
136108daf0c2Sdrahn     return ( a == b ) || ( aSign ^ ( a < b ) );
136208daf0c2Sdrahn 
136308daf0c2Sdrahn }
136408daf0c2Sdrahn 
136508daf0c2Sdrahn /*
136608daf0c2Sdrahn -------------------------------------------------------------------------------
136708daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than
136808daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
136908daf0c2Sdrahn exception.  Otherwise, the comparison is performed according to the IEC/IEEE
137008daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic.
137108daf0c2Sdrahn -------------------------------------------------------------------------------
137208daf0c2Sdrahn */
float32_lt_quiet(float32 a,float32 b)137308daf0c2Sdrahn flag float32_lt_quiet( float32 a, float32 b )
137408daf0c2Sdrahn {
137508daf0c2Sdrahn     flag aSign, bSign;
137608daf0c2Sdrahn 
137708daf0c2Sdrahn     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
137808daf0c2Sdrahn          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
137908daf0c2Sdrahn        ) {
138008daf0c2Sdrahn         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
138108daf0c2Sdrahn             float_raise( float_flag_invalid );
138208daf0c2Sdrahn         }
138308daf0c2Sdrahn         return 0;
138408daf0c2Sdrahn     }
138508daf0c2Sdrahn     aSign = extractFloat32Sign( a );
138608daf0c2Sdrahn     bSign = extractFloat32Sign( b );
138708daf0c2Sdrahn     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
138808daf0c2Sdrahn     return ( a != b ) && ( aSign ^ ( a < b ) );
138908daf0c2Sdrahn 
139008daf0c2Sdrahn }
139108daf0c2Sdrahn #endif /* !SOFTFLOAT_FOR_GCC */
139208daf0c2Sdrahn 
139308daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
139408daf0c2Sdrahn /*
139508daf0c2Sdrahn -------------------------------------------------------------------------------
139608daf0c2Sdrahn Returns the result of converting the double-precision floating-point value
139708daf0c2Sdrahn `a' to the 32-bit two's complement integer format.  The conversion is
139808daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
139908daf0c2Sdrahn Arithmetic---which means in particular that the conversion is rounded
140008daf0c2Sdrahn according to the current rounding mode.  If `a' is a NaN, the largest
140108daf0c2Sdrahn positive integer is returned.  Otherwise, if the conversion overflows, the
140208daf0c2Sdrahn largest integer with the same sign as `a' is returned.
140308daf0c2Sdrahn -------------------------------------------------------------------------------
140408daf0c2Sdrahn */
float64_to_int32(float64 a)140508daf0c2Sdrahn int32 float64_to_int32( float64 a )
140608daf0c2Sdrahn {
140708daf0c2Sdrahn     flag aSign;
140808daf0c2Sdrahn     int16 aExp, shiftCount;
140908daf0c2Sdrahn     bits32 aSig0, aSig1, absZ, aSigExtra;
141008daf0c2Sdrahn     int32 z;
141108daf0c2Sdrahn     int8 roundingMode;
141208daf0c2Sdrahn 
141308daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
141408daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
141508daf0c2Sdrahn     aExp = extractFloat64Exp( a );
141608daf0c2Sdrahn     aSign = extractFloat64Sign( a );
141708daf0c2Sdrahn     shiftCount = aExp - 0x413;
141808daf0c2Sdrahn     if ( 0 <= shiftCount ) {
141908daf0c2Sdrahn         if ( 0x41E < aExp ) {
142008daf0c2Sdrahn             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
142108daf0c2Sdrahn             goto invalid;
142208daf0c2Sdrahn         }
142308daf0c2Sdrahn         shortShift64Left(
142408daf0c2Sdrahn             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
142508daf0c2Sdrahn         if ( 0x80000000 < absZ ) goto invalid;
142608daf0c2Sdrahn     }
142708daf0c2Sdrahn     else {
142808daf0c2Sdrahn         aSig1 = ( aSig1 != 0 );
142908daf0c2Sdrahn         if ( aExp < 0x3FE ) {
143008daf0c2Sdrahn             aSigExtra = aExp | aSig0 | aSig1;
143108daf0c2Sdrahn             absZ = 0;
143208daf0c2Sdrahn         }
143308daf0c2Sdrahn         else {
143408daf0c2Sdrahn             aSig0 |= 0x00100000;
143508daf0c2Sdrahn             aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
143608daf0c2Sdrahn             absZ = aSig0>>( - shiftCount );
143708daf0c2Sdrahn         }
143808daf0c2Sdrahn     }
143908daf0c2Sdrahn     roundingMode = float_rounding_mode;
144008daf0c2Sdrahn     if ( roundingMode == float_round_nearest_even ) {
144108daf0c2Sdrahn         if ( (sbits32) aSigExtra < 0 ) {
144208daf0c2Sdrahn             ++absZ;
144308daf0c2Sdrahn             if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
144408daf0c2Sdrahn         }
144508daf0c2Sdrahn         z = aSign ? - absZ : absZ;
144608daf0c2Sdrahn     }
144708daf0c2Sdrahn     else {
144808daf0c2Sdrahn         aSigExtra = ( aSigExtra != 0 );
144908daf0c2Sdrahn         if ( aSign ) {
145008daf0c2Sdrahn             z = - (   absZ
145108daf0c2Sdrahn                     + ( ( roundingMode == float_round_down ) & aSigExtra ) );
145208daf0c2Sdrahn         }
145308daf0c2Sdrahn         else {
145408daf0c2Sdrahn             z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
145508daf0c2Sdrahn         }
145608daf0c2Sdrahn     }
145708daf0c2Sdrahn     if ( ( aSign ^ ( z < 0 ) ) && z ) {
145808daf0c2Sdrahn  invalid:
145908daf0c2Sdrahn         float_raise( float_flag_invalid );
146008daf0c2Sdrahn         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
146108daf0c2Sdrahn     }
146208daf0c2Sdrahn     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
146308daf0c2Sdrahn     return z;
146408daf0c2Sdrahn 
146508daf0c2Sdrahn }
146608daf0c2Sdrahn #endif /* !SOFTFLOAT_FOR_GCC */
146708daf0c2Sdrahn 
146808daf0c2Sdrahn /*
146908daf0c2Sdrahn -------------------------------------------------------------------------------
147008daf0c2Sdrahn Returns the result of converting the double-precision floating-point value
147108daf0c2Sdrahn `a' to the 32-bit two's complement integer format.  The conversion is
147208daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
147308daf0c2Sdrahn Arithmetic, except that the conversion is always rounded toward zero.
147408daf0c2Sdrahn If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
147508daf0c2Sdrahn the conversion overflows, the largest integer with the same sign as `a' is
147608daf0c2Sdrahn returned.
147708daf0c2Sdrahn -------------------------------------------------------------------------------
147808daf0c2Sdrahn */
float64_to_int32_round_to_zero(float64 a)147908daf0c2Sdrahn int32 float64_to_int32_round_to_zero( float64 a )
148008daf0c2Sdrahn {
148108daf0c2Sdrahn     flag aSign;
148208daf0c2Sdrahn     int16 aExp, shiftCount;
148308daf0c2Sdrahn     bits32 aSig0, aSig1, absZ, aSigExtra;
148408daf0c2Sdrahn     int32 z;
148508daf0c2Sdrahn 
148608daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
148708daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
148808daf0c2Sdrahn     aExp = extractFloat64Exp( a );
148908daf0c2Sdrahn     aSign = extractFloat64Sign( a );
149008daf0c2Sdrahn     shiftCount = aExp - 0x413;
149108daf0c2Sdrahn     if ( 0 <= shiftCount ) {
149208daf0c2Sdrahn         if ( 0x41E < aExp ) {
149308daf0c2Sdrahn             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
149408daf0c2Sdrahn             goto invalid;
149508daf0c2Sdrahn         }
149608daf0c2Sdrahn         shortShift64Left(
149708daf0c2Sdrahn             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
149808daf0c2Sdrahn     }
149908daf0c2Sdrahn     else {
150008daf0c2Sdrahn         if ( aExp < 0x3FF ) {
150108daf0c2Sdrahn             if ( aExp | aSig0 | aSig1 ) {
150208daf0c2Sdrahn                 float_exception_flags |= float_flag_inexact;
150308daf0c2Sdrahn             }
150408daf0c2Sdrahn             return 0;
150508daf0c2Sdrahn         }
150608daf0c2Sdrahn         aSig0 |= 0x00100000;
150708daf0c2Sdrahn         aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
150808daf0c2Sdrahn         absZ = aSig0>>( - shiftCount );
150908daf0c2Sdrahn     }
151008daf0c2Sdrahn     z = aSign ? - absZ : absZ;
151108daf0c2Sdrahn     if ( ( aSign ^ ( z < 0 ) ) && z ) {
151208daf0c2Sdrahn  invalid:
151308daf0c2Sdrahn         float_raise( float_flag_invalid );
151408daf0c2Sdrahn         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
151508daf0c2Sdrahn     }
151608daf0c2Sdrahn     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
151708daf0c2Sdrahn     return z;
151808daf0c2Sdrahn 
151908daf0c2Sdrahn }
152008daf0c2Sdrahn 
152108daf0c2Sdrahn /*
152208daf0c2Sdrahn -------------------------------------------------------------------------------
152308daf0c2Sdrahn Returns the result of converting the double-precision floating-point value
152408daf0c2Sdrahn `a' to the single-precision floating-point format.  The conversion is
152508daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
152608daf0c2Sdrahn Arithmetic.
152708daf0c2Sdrahn -------------------------------------------------------------------------------
152808daf0c2Sdrahn */
float64_to_float32(float64 a)152908daf0c2Sdrahn float32 float64_to_float32( float64 a )
153008daf0c2Sdrahn {
153108daf0c2Sdrahn     flag aSign;
153208daf0c2Sdrahn     int16 aExp;
153308daf0c2Sdrahn     bits32 aSig0, aSig1, zSig;
153408daf0c2Sdrahn     bits32 allZero;
153508daf0c2Sdrahn 
153608daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
153708daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
153808daf0c2Sdrahn     aExp = extractFloat64Exp( a );
153908daf0c2Sdrahn     aSign = extractFloat64Sign( a );
154008daf0c2Sdrahn     if ( aExp == 0x7FF ) {
154108daf0c2Sdrahn         if ( aSig0 | aSig1 ) {
154208daf0c2Sdrahn             return commonNaNToFloat32( float64ToCommonNaN( a ) );
154308daf0c2Sdrahn         }
154408daf0c2Sdrahn         return packFloat32( aSign, 0xFF, 0 );
154508daf0c2Sdrahn     }
154608daf0c2Sdrahn     shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
154708daf0c2Sdrahn     if ( aExp ) zSig |= 0x40000000;
154808daf0c2Sdrahn     return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
154908daf0c2Sdrahn 
155008daf0c2Sdrahn }
155108daf0c2Sdrahn 
155208daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
155308daf0c2Sdrahn /*
155408daf0c2Sdrahn -------------------------------------------------------------------------------
155508daf0c2Sdrahn Rounds the double-precision floating-point value `a' to an integer,
155608daf0c2Sdrahn and returns the result as a double-precision floating-point value.  The
155708daf0c2Sdrahn operation is performed according to the IEC/IEEE Standard for Binary
155808daf0c2Sdrahn Floating-Point Arithmetic.
155908daf0c2Sdrahn -------------------------------------------------------------------------------
156008daf0c2Sdrahn */
float64_round_to_int(float64 a)156108daf0c2Sdrahn float64 float64_round_to_int( float64 a )
156208daf0c2Sdrahn {
156308daf0c2Sdrahn     flag aSign;
156408daf0c2Sdrahn     int16 aExp;
156508daf0c2Sdrahn     bits32 lastBitMask, roundBitsMask;
156608daf0c2Sdrahn     int8 roundingMode;
156708daf0c2Sdrahn     float64 z;
156808daf0c2Sdrahn 
156908daf0c2Sdrahn     aExp = extractFloat64Exp( a );
157008daf0c2Sdrahn     if ( 0x413 <= aExp ) {
157108daf0c2Sdrahn         if ( 0x433 <= aExp ) {
157208daf0c2Sdrahn             if (    ( aExp == 0x7FF )
157308daf0c2Sdrahn                  && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
157408daf0c2Sdrahn                 return propagateFloat64NaN( a, a );
157508daf0c2Sdrahn             }
157608daf0c2Sdrahn             return a;
157708daf0c2Sdrahn         }
157808daf0c2Sdrahn         lastBitMask = 1;
157908daf0c2Sdrahn         lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
158008daf0c2Sdrahn         roundBitsMask = lastBitMask - 1;
158108daf0c2Sdrahn         z = a;
158208daf0c2Sdrahn         roundingMode = float_rounding_mode;
158308daf0c2Sdrahn         if ( roundingMode == float_round_nearest_even ) {
158408daf0c2Sdrahn             if ( lastBitMask ) {
158508daf0c2Sdrahn                 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
158608daf0c2Sdrahn                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
158708daf0c2Sdrahn             }
158808daf0c2Sdrahn             else {
158908daf0c2Sdrahn                 if ( (sbits32) z.low < 0 ) {
159008daf0c2Sdrahn                     ++z.high;
159108daf0c2Sdrahn                     if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
159208daf0c2Sdrahn                 }
159308daf0c2Sdrahn             }
159408daf0c2Sdrahn         }
159508daf0c2Sdrahn         else if ( roundingMode != float_round_to_zero ) {
159608daf0c2Sdrahn             if (   extractFloat64Sign( z )
159708daf0c2Sdrahn                  ^ ( roundingMode == float_round_up ) ) {
159808daf0c2Sdrahn                 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
159908daf0c2Sdrahn             }
160008daf0c2Sdrahn         }
160108daf0c2Sdrahn         z.low &= ~ roundBitsMask;
160208daf0c2Sdrahn     }
160308daf0c2Sdrahn     else {
160408daf0c2Sdrahn         if ( aExp <= 0x3FE ) {
160508daf0c2Sdrahn             if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
160608daf0c2Sdrahn             float_exception_flags |= float_flag_inexact;
160708daf0c2Sdrahn             aSign = extractFloat64Sign( a );
160808daf0c2Sdrahn             switch ( float_rounding_mode ) {
160908daf0c2Sdrahn              case float_round_nearest_even:
161008daf0c2Sdrahn                 if (    ( aExp == 0x3FE )
161108daf0c2Sdrahn                      && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
161208daf0c2Sdrahn                    ) {
161308daf0c2Sdrahn                     return packFloat64( aSign, 0x3FF, 0, 0 );
161408daf0c2Sdrahn                 }
161508daf0c2Sdrahn                 break;
161608daf0c2Sdrahn              case float_round_down:
161708daf0c2Sdrahn                 return
161808daf0c2Sdrahn                       aSign ? packFloat64( 1, 0x3FF, 0, 0 )
161908daf0c2Sdrahn                     : packFloat64( 0, 0, 0, 0 );
162008daf0c2Sdrahn              case float_round_up:
162108daf0c2Sdrahn                 return
162208daf0c2Sdrahn                       aSign ? packFloat64( 1, 0, 0, 0 )
162308daf0c2Sdrahn                     : packFloat64( 0, 0x3FF, 0, 0 );
162408daf0c2Sdrahn             }
162508daf0c2Sdrahn             return packFloat64( aSign, 0, 0, 0 );
162608daf0c2Sdrahn         }
162708daf0c2Sdrahn         lastBitMask = 1;
162808daf0c2Sdrahn         lastBitMask <<= 0x413 - aExp;
162908daf0c2Sdrahn         roundBitsMask = lastBitMask - 1;
163008daf0c2Sdrahn         z.low = 0;
163108daf0c2Sdrahn         z.high = a.high;
163208daf0c2Sdrahn         roundingMode = float_rounding_mode;
163308daf0c2Sdrahn         if ( roundingMode == float_round_nearest_even ) {
163408daf0c2Sdrahn             z.high += lastBitMask>>1;
163508daf0c2Sdrahn             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
163608daf0c2Sdrahn                 z.high &= ~ lastBitMask;
163708daf0c2Sdrahn             }
163808daf0c2Sdrahn         }
163908daf0c2Sdrahn         else if ( roundingMode != float_round_to_zero ) {
164008daf0c2Sdrahn             if (   extractFloat64Sign( z )
164108daf0c2Sdrahn                  ^ ( roundingMode == float_round_up ) ) {
164208daf0c2Sdrahn                 z.high |= ( a.low != 0 );
164308daf0c2Sdrahn                 z.high += roundBitsMask;
164408daf0c2Sdrahn             }
164508daf0c2Sdrahn         }
164608daf0c2Sdrahn         z.high &= ~ roundBitsMask;
164708daf0c2Sdrahn     }
164808daf0c2Sdrahn     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
164908daf0c2Sdrahn         float_exception_flags |= float_flag_inexact;
165008daf0c2Sdrahn     }
165108daf0c2Sdrahn     return z;
165208daf0c2Sdrahn 
165308daf0c2Sdrahn }
165408daf0c2Sdrahn #endif
165508daf0c2Sdrahn 
165608daf0c2Sdrahn /*
165708daf0c2Sdrahn -------------------------------------------------------------------------------
165808daf0c2Sdrahn Returns the result of adding the absolute values of the double-precision
165908daf0c2Sdrahn floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
166008daf0c2Sdrahn before being returned.  `zSign' is ignored if the result is a NaN.
166108daf0c2Sdrahn The addition is performed according to the IEC/IEEE Standard for Binary
166208daf0c2Sdrahn Floating-Point Arithmetic.
166308daf0c2Sdrahn -------------------------------------------------------------------------------
166408daf0c2Sdrahn */
addFloat64Sigs(float64 a,float64 b,flag zSign)166508daf0c2Sdrahn static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
166608daf0c2Sdrahn {
166708daf0c2Sdrahn     int16 aExp, bExp, zExp;
166808daf0c2Sdrahn     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
166908daf0c2Sdrahn     int16 expDiff;
167008daf0c2Sdrahn 
167108daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
167208daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
167308daf0c2Sdrahn     aExp = extractFloat64Exp( a );
167408daf0c2Sdrahn     bSig1 = extractFloat64Frac1( b );
167508daf0c2Sdrahn     bSig0 = extractFloat64Frac0( b );
167608daf0c2Sdrahn     bExp = extractFloat64Exp( b );
167708daf0c2Sdrahn     expDiff = aExp - bExp;
167808daf0c2Sdrahn     if ( 0 < expDiff ) {
167908daf0c2Sdrahn         if ( aExp == 0x7FF ) {
168008daf0c2Sdrahn             if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
168108daf0c2Sdrahn             return a;
168208daf0c2Sdrahn         }
168308daf0c2Sdrahn         if ( bExp == 0 ) {
168408daf0c2Sdrahn             --expDiff;
168508daf0c2Sdrahn         }
168608daf0c2Sdrahn         else {
168708daf0c2Sdrahn             bSig0 |= 0x00100000;
168808daf0c2Sdrahn         }
168908daf0c2Sdrahn         shift64ExtraRightJamming(
169008daf0c2Sdrahn             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
169108daf0c2Sdrahn         zExp = aExp;
169208daf0c2Sdrahn     }
169308daf0c2Sdrahn     else if ( expDiff < 0 ) {
169408daf0c2Sdrahn         if ( bExp == 0x7FF ) {
169508daf0c2Sdrahn             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
169608daf0c2Sdrahn             return packFloat64( zSign, 0x7FF, 0, 0 );
169708daf0c2Sdrahn         }
169808daf0c2Sdrahn         if ( aExp == 0 ) {
169908daf0c2Sdrahn             ++expDiff;
170008daf0c2Sdrahn         }
170108daf0c2Sdrahn         else {
170208daf0c2Sdrahn             aSig0 |= 0x00100000;
170308daf0c2Sdrahn         }
170408daf0c2Sdrahn         shift64ExtraRightJamming(
170508daf0c2Sdrahn             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
170608daf0c2Sdrahn         zExp = bExp;
170708daf0c2Sdrahn     }
170808daf0c2Sdrahn     else {
170908daf0c2Sdrahn         if ( aExp == 0x7FF ) {
171008daf0c2Sdrahn             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
171108daf0c2Sdrahn                 return propagateFloat64NaN( a, b );
171208daf0c2Sdrahn             }
171308daf0c2Sdrahn             return a;
171408daf0c2Sdrahn         }
171508daf0c2Sdrahn         add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
171608daf0c2Sdrahn         if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
171708daf0c2Sdrahn         zSig2 = 0;
171808daf0c2Sdrahn         zSig0 |= 0x00200000;
171908daf0c2Sdrahn         zExp = aExp;
172008daf0c2Sdrahn         goto shiftRight1;
172108daf0c2Sdrahn     }
172208daf0c2Sdrahn     aSig0 |= 0x00100000;
172308daf0c2Sdrahn     add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
172408daf0c2Sdrahn     --zExp;
172508daf0c2Sdrahn     if ( zSig0 < 0x00200000 ) goto roundAndPack;
172608daf0c2Sdrahn     ++zExp;
172708daf0c2Sdrahn  shiftRight1:
172808daf0c2Sdrahn     shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
172908daf0c2Sdrahn  roundAndPack:
173008daf0c2Sdrahn     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
173108daf0c2Sdrahn 
173208daf0c2Sdrahn }
173308daf0c2Sdrahn 
173408daf0c2Sdrahn /*
173508daf0c2Sdrahn -------------------------------------------------------------------------------
173608daf0c2Sdrahn Returns the result of subtracting the absolute values of the double-
173708daf0c2Sdrahn precision floating-point values `a' and `b'.  If `zSign' is 1, the
173808daf0c2Sdrahn difference is negated before being returned.  `zSign' is ignored if the
173908daf0c2Sdrahn result is a NaN.  The subtraction is performed according to the IEC/IEEE
174008daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic.
174108daf0c2Sdrahn -------------------------------------------------------------------------------
174208daf0c2Sdrahn */
subFloat64Sigs(float64 a,float64 b,flag zSign)174308daf0c2Sdrahn static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
174408daf0c2Sdrahn {
174508daf0c2Sdrahn     int16 aExp, bExp, zExp;
174608daf0c2Sdrahn     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
174708daf0c2Sdrahn     int16 expDiff;
174808daf0c2Sdrahn 
174908daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
175008daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
175108daf0c2Sdrahn     aExp = extractFloat64Exp( a );
175208daf0c2Sdrahn     bSig1 = extractFloat64Frac1( b );
175308daf0c2Sdrahn     bSig0 = extractFloat64Frac0( b );
175408daf0c2Sdrahn     bExp = extractFloat64Exp( b );
175508daf0c2Sdrahn     expDiff = aExp - bExp;
175608daf0c2Sdrahn     shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
175708daf0c2Sdrahn     shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
175808daf0c2Sdrahn     if ( 0 < expDiff ) goto aExpBigger;
175908daf0c2Sdrahn     if ( expDiff < 0 ) goto bExpBigger;
176008daf0c2Sdrahn     if ( aExp == 0x7FF ) {
176108daf0c2Sdrahn         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
176208daf0c2Sdrahn             return propagateFloat64NaN( a, b );
176308daf0c2Sdrahn         }
176408daf0c2Sdrahn         float_raise( float_flag_invalid );
176508daf0c2Sdrahn         return float64_default_nan;
176608daf0c2Sdrahn     }
176708daf0c2Sdrahn     if ( aExp == 0 ) {
176808daf0c2Sdrahn         aExp = 1;
176908daf0c2Sdrahn         bExp = 1;
177008daf0c2Sdrahn     }
177108daf0c2Sdrahn     if ( bSig0 < aSig0 ) goto aBigger;
177208daf0c2Sdrahn     if ( aSig0 < bSig0 ) goto bBigger;
177308daf0c2Sdrahn     if ( bSig1 < aSig1 ) goto aBigger;
177408daf0c2Sdrahn     if ( aSig1 < bSig1 ) goto bBigger;
177508daf0c2Sdrahn     return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
177608daf0c2Sdrahn  bExpBigger:
177708daf0c2Sdrahn     if ( bExp == 0x7FF ) {
177808daf0c2Sdrahn         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
177908daf0c2Sdrahn         return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
178008daf0c2Sdrahn     }
178108daf0c2Sdrahn     if ( aExp == 0 ) {
178208daf0c2Sdrahn         ++expDiff;
178308daf0c2Sdrahn     }
178408daf0c2Sdrahn     else {
178508daf0c2Sdrahn         aSig0 |= 0x40000000;
178608daf0c2Sdrahn     }
178708daf0c2Sdrahn     shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
178808daf0c2Sdrahn     bSig0 |= 0x40000000;
178908daf0c2Sdrahn  bBigger:
179008daf0c2Sdrahn     sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
179108daf0c2Sdrahn     zExp = bExp;
179208daf0c2Sdrahn     zSign ^= 1;
179308daf0c2Sdrahn     goto normalizeRoundAndPack;
179408daf0c2Sdrahn  aExpBigger:
179508daf0c2Sdrahn     if ( aExp == 0x7FF ) {
179608daf0c2Sdrahn         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
179708daf0c2Sdrahn         return a;
179808daf0c2Sdrahn     }
179908daf0c2Sdrahn     if ( bExp == 0 ) {
180008daf0c2Sdrahn         --expDiff;
180108daf0c2Sdrahn     }
180208daf0c2Sdrahn     else {
180308daf0c2Sdrahn         bSig0 |= 0x40000000;
180408daf0c2Sdrahn     }
180508daf0c2Sdrahn     shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
180608daf0c2Sdrahn     aSig0 |= 0x40000000;
180708daf0c2Sdrahn  aBigger:
180808daf0c2Sdrahn     sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
180908daf0c2Sdrahn     zExp = aExp;
181008daf0c2Sdrahn  normalizeRoundAndPack:
181108daf0c2Sdrahn     --zExp;
181208daf0c2Sdrahn     return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
181308daf0c2Sdrahn 
181408daf0c2Sdrahn }
181508daf0c2Sdrahn 
181608daf0c2Sdrahn /*
181708daf0c2Sdrahn -------------------------------------------------------------------------------
181808daf0c2Sdrahn Returns the result of adding the double-precision floating-point values `a'
181908daf0c2Sdrahn and `b'.  The operation is performed according to the IEC/IEEE Standard for
182008daf0c2Sdrahn Binary Floating-Point Arithmetic.
182108daf0c2Sdrahn -------------------------------------------------------------------------------
182208daf0c2Sdrahn */
float64_add(float64 a,float64 b)182308daf0c2Sdrahn float64 float64_add( float64 a, float64 b )
182408daf0c2Sdrahn {
182508daf0c2Sdrahn     flag aSign, bSign;
182608daf0c2Sdrahn 
182708daf0c2Sdrahn     aSign = extractFloat64Sign( a );
182808daf0c2Sdrahn     bSign = extractFloat64Sign( b );
182908daf0c2Sdrahn     if ( aSign == bSign ) {
183008daf0c2Sdrahn         return addFloat64Sigs( a, b, aSign );
183108daf0c2Sdrahn     }
183208daf0c2Sdrahn     else {
183308daf0c2Sdrahn         return subFloat64Sigs( a, b, aSign );
183408daf0c2Sdrahn     }
183508daf0c2Sdrahn 
183608daf0c2Sdrahn }
183708daf0c2Sdrahn 
183808daf0c2Sdrahn /*
183908daf0c2Sdrahn -------------------------------------------------------------------------------
184008daf0c2Sdrahn Returns the result of subtracting the double-precision floating-point values
184108daf0c2Sdrahn `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
184208daf0c2Sdrahn for Binary Floating-Point Arithmetic.
184308daf0c2Sdrahn -------------------------------------------------------------------------------
184408daf0c2Sdrahn */
float64_sub(float64 a,float64 b)184508daf0c2Sdrahn float64 float64_sub( float64 a, float64 b )
184608daf0c2Sdrahn {
184708daf0c2Sdrahn     flag aSign, bSign;
184808daf0c2Sdrahn 
184908daf0c2Sdrahn     aSign = extractFloat64Sign( a );
185008daf0c2Sdrahn     bSign = extractFloat64Sign( b );
185108daf0c2Sdrahn     if ( aSign == bSign ) {
185208daf0c2Sdrahn         return subFloat64Sigs( a, b, aSign );
185308daf0c2Sdrahn     }
185408daf0c2Sdrahn     else {
185508daf0c2Sdrahn         return addFloat64Sigs( a, b, aSign );
185608daf0c2Sdrahn     }
185708daf0c2Sdrahn 
185808daf0c2Sdrahn }
185908daf0c2Sdrahn 
186008daf0c2Sdrahn /*
186108daf0c2Sdrahn -------------------------------------------------------------------------------
186208daf0c2Sdrahn Returns the result of multiplying the double-precision floating-point values
186308daf0c2Sdrahn `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
186408daf0c2Sdrahn for Binary Floating-Point Arithmetic.
186508daf0c2Sdrahn -------------------------------------------------------------------------------
186608daf0c2Sdrahn */
float64_mul(float64 a,float64 b)186708daf0c2Sdrahn float64 float64_mul( float64 a, float64 b )
186808daf0c2Sdrahn {
186908daf0c2Sdrahn     flag aSign, bSign, zSign;
187008daf0c2Sdrahn     int16 aExp, bExp, zExp;
187108daf0c2Sdrahn     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
187208daf0c2Sdrahn 
187308daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
187408daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
187508daf0c2Sdrahn     aExp = extractFloat64Exp( a );
187608daf0c2Sdrahn     aSign = extractFloat64Sign( a );
187708daf0c2Sdrahn     bSig1 = extractFloat64Frac1( b );
187808daf0c2Sdrahn     bSig0 = extractFloat64Frac0( b );
187908daf0c2Sdrahn     bExp = extractFloat64Exp( b );
188008daf0c2Sdrahn     bSign = extractFloat64Sign( b );
188108daf0c2Sdrahn     zSign = aSign ^ bSign;
188208daf0c2Sdrahn     if ( aExp == 0x7FF ) {
188308daf0c2Sdrahn         if (    ( aSig0 | aSig1 )
188408daf0c2Sdrahn              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
188508daf0c2Sdrahn             return propagateFloat64NaN( a, b );
188608daf0c2Sdrahn         }
188708daf0c2Sdrahn         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
188808daf0c2Sdrahn         return packFloat64( zSign, 0x7FF, 0, 0 );
188908daf0c2Sdrahn     }
189008daf0c2Sdrahn     if ( bExp == 0x7FF ) {
189108daf0c2Sdrahn         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
189208daf0c2Sdrahn         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
189308daf0c2Sdrahn  invalid:
189408daf0c2Sdrahn             float_raise( float_flag_invalid );
189508daf0c2Sdrahn             return float64_default_nan;
189608daf0c2Sdrahn         }
189708daf0c2Sdrahn         return packFloat64( zSign, 0x7FF, 0, 0 );
189808daf0c2Sdrahn     }
189908daf0c2Sdrahn     if ( aExp == 0 ) {
190008daf0c2Sdrahn         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
190108daf0c2Sdrahn         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
190208daf0c2Sdrahn     }
190308daf0c2Sdrahn     if ( bExp == 0 ) {
190408daf0c2Sdrahn         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
190508daf0c2Sdrahn         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
190608daf0c2Sdrahn     }
190708daf0c2Sdrahn     zExp = aExp + bExp - 0x400;
190808daf0c2Sdrahn     aSig0 |= 0x00100000;
190908daf0c2Sdrahn     shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
191008daf0c2Sdrahn     mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
191108daf0c2Sdrahn     add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
191208daf0c2Sdrahn     zSig2 |= ( zSig3 != 0 );
191308daf0c2Sdrahn     if ( 0x00200000 <= zSig0 ) {
191408daf0c2Sdrahn         shift64ExtraRightJamming(
191508daf0c2Sdrahn             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
191608daf0c2Sdrahn         ++zExp;
191708daf0c2Sdrahn     }
191808daf0c2Sdrahn     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
191908daf0c2Sdrahn 
192008daf0c2Sdrahn }
192108daf0c2Sdrahn 
192208daf0c2Sdrahn /*
192308daf0c2Sdrahn -------------------------------------------------------------------------------
192408daf0c2Sdrahn Returns the result of dividing the double-precision floating-point value `a'
192508daf0c2Sdrahn by the corresponding value `b'.  The operation is performed according to the
192608daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic.
192708daf0c2Sdrahn -------------------------------------------------------------------------------
192808daf0c2Sdrahn */
float64_div(float64 a,float64 b)192908daf0c2Sdrahn float64 float64_div( float64 a, float64 b )
193008daf0c2Sdrahn {
193108daf0c2Sdrahn     flag aSign, bSign, zSign;
193208daf0c2Sdrahn     int16 aExp, bExp, zExp;
193308daf0c2Sdrahn     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
193408daf0c2Sdrahn     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
193508daf0c2Sdrahn 
193608daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
193708daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
193808daf0c2Sdrahn     aExp = extractFloat64Exp( a );
193908daf0c2Sdrahn     aSign = extractFloat64Sign( a );
194008daf0c2Sdrahn     bSig1 = extractFloat64Frac1( b );
194108daf0c2Sdrahn     bSig0 = extractFloat64Frac0( b );
194208daf0c2Sdrahn     bExp = extractFloat64Exp( b );
194308daf0c2Sdrahn     bSign = extractFloat64Sign( b );
194408daf0c2Sdrahn     zSign = aSign ^ bSign;
194508daf0c2Sdrahn     if ( aExp == 0x7FF ) {
194608daf0c2Sdrahn         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
194708daf0c2Sdrahn         if ( bExp == 0x7FF ) {
194808daf0c2Sdrahn             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
194908daf0c2Sdrahn             goto invalid;
195008daf0c2Sdrahn         }
195108daf0c2Sdrahn         return packFloat64( zSign, 0x7FF, 0, 0 );
195208daf0c2Sdrahn     }
195308daf0c2Sdrahn     if ( bExp == 0x7FF ) {
195408daf0c2Sdrahn         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
195508daf0c2Sdrahn         return packFloat64( zSign, 0, 0, 0 );
195608daf0c2Sdrahn     }
195708daf0c2Sdrahn     if ( bExp == 0 ) {
195808daf0c2Sdrahn         if ( ( bSig0 | bSig1 ) == 0 ) {
195908daf0c2Sdrahn             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
196008daf0c2Sdrahn  invalid:
196108daf0c2Sdrahn                 float_raise( float_flag_invalid );
196208daf0c2Sdrahn                 return float64_default_nan;
196308daf0c2Sdrahn             }
196408daf0c2Sdrahn             float_raise( float_flag_divbyzero );
196508daf0c2Sdrahn             return packFloat64( zSign, 0x7FF, 0, 0 );
196608daf0c2Sdrahn         }
196708daf0c2Sdrahn         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
196808daf0c2Sdrahn     }
196908daf0c2Sdrahn     if ( aExp == 0 ) {
197008daf0c2Sdrahn         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
197108daf0c2Sdrahn         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
197208daf0c2Sdrahn     }
197308daf0c2Sdrahn     zExp = aExp - bExp + 0x3FD;
197408daf0c2Sdrahn     shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
197508daf0c2Sdrahn     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
197608daf0c2Sdrahn     if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
197708daf0c2Sdrahn         shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
197808daf0c2Sdrahn         ++zExp;
197908daf0c2Sdrahn     }
198008daf0c2Sdrahn     zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
198108daf0c2Sdrahn     mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
198208daf0c2Sdrahn     sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
198308daf0c2Sdrahn     while ( (sbits32) rem0 < 0 ) {
198408daf0c2Sdrahn         --zSig0;
198508daf0c2Sdrahn         add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
198608daf0c2Sdrahn     }
198708daf0c2Sdrahn     zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
198808daf0c2Sdrahn     if ( ( zSig1 & 0x3FF ) <= 4 ) {
198908daf0c2Sdrahn         mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
199008daf0c2Sdrahn         sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
199108daf0c2Sdrahn         while ( (sbits32) rem1 < 0 ) {
199208daf0c2Sdrahn             --zSig1;
199308daf0c2Sdrahn             add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
199408daf0c2Sdrahn         }
199508daf0c2Sdrahn         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
199608daf0c2Sdrahn     }
199708daf0c2Sdrahn     shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
199808daf0c2Sdrahn     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
199908daf0c2Sdrahn 
200008daf0c2Sdrahn }
200108daf0c2Sdrahn 
200208daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
200308daf0c2Sdrahn /*
200408daf0c2Sdrahn -------------------------------------------------------------------------------
200508daf0c2Sdrahn Returns the remainder of the double-precision floating-point value `a'
200608daf0c2Sdrahn with respect to the corresponding value `b'.  The operation is performed
200708daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
200808daf0c2Sdrahn -------------------------------------------------------------------------------
200908daf0c2Sdrahn */
float64_rem(float64 a,float64 b)201008daf0c2Sdrahn float64 float64_rem( float64 a, float64 b )
201108daf0c2Sdrahn {
201208daf0c2Sdrahn     flag aSign, bSign, zSign;
201308daf0c2Sdrahn     int16 aExp, bExp, expDiff;
201408daf0c2Sdrahn     bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
201508daf0c2Sdrahn     bits32 allZero, alternateASig0, alternateASig1, sigMean1;
201608daf0c2Sdrahn     sbits32 sigMean0;
201708daf0c2Sdrahn     float64 z;
201808daf0c2Sdrahn 
201908daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
202008daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
202108daf0c2Sdrahn     aExp = extractFloat64Exp( a );
202208daf0c2Sdrahn     aSign = extractFloat64Sign( a );
202308daf0c2Sdrahn     bSig1 = extractFloat64Frac1( b );
202408daf0c2Sdrahn     bSig0 = extractFloat64Frac0( b );
202508daf0c2Sdrahn     bExp = extractFloat64Exp( b );
202608daf0c2Sdrahn     bSign = extractFloat64Sign( b );
202708daf0c2Sdrahn     if ( aExp == 0x7FF ) {
202808daf0c2Sdrahn         if (    ( aSig0 | aSig1 )
202908daf0c2Sdrahn              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
203008daf0c2Sdrahn             return propagateFloat64NaN( a, b );
203108daf0c2Sdrahn         }
203208daf0c2Sdrahn         goto invalid;
203308daf0c2Sdrahn     }
203408daf0c2Sdrahn     if ( bExp == 0x7FF ) {
203508daf0c2Sdrahn         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
203608daf0c2Sdrahn         return a;
203708daf0c2Sdrahn     }
203808daf0c2Sdrahn     if ( bExp == 0 ) {
203908daf0c2Sdrahn         if ( ( bSig0 | bSig1 ) == 0 ) {
204008daf0c2Sdrahn  invalid:
204108daf0c2Sdrahn             float_raise( float_flag_invalid );
204208daf0c2Sdrahn             return float64_default_nan;
204308daf0c2Sdrahn         }
204408daf0c2Sdrahn         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
204508daf0c2Sdrahn     }
204608daf0c2Sdrahn     if ( aExp == 0 ) {
204708daf0c2Sdrahn         if ( ( aSig0 | aSig1 ) == 0 ) return a;
204808daf0c2Sdrahn         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
204908daf0c2Sdrahn     }
205008daf0c2Sdrahn     expDiff = aExp - bExp;
205108daf0c2Sdrahn     if ( expDiff < -1 ) return a;
205208daf0c2Sdrahn     shortShift64Left(
205308daf0c2Sdrahn         aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
205408daf0c2Sdrahn     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
205508daf0c2Sdrahn     q = le64( bSig0, bSig1, aSig0, aSig1 );
205608daf0c2Sdrahn     if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
205708daf0c2Sdrahn     expDiff -= 32;
205808daf0c2Sdrahn     while ( 0 < expDiff ) {
205908daf0c2Sdrahn         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
206008daf0c2Sdrahn         q = ( 4 < q ) ? q - 4 : 0;
206108daf0c2Sdrahn         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
206208daf0c2Sdrahn         shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
206308daf0c2Sdrahn         shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
206408daf0c2Sdrahn         sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
206508daf0c2Sdrahn         expDiff -= 29;
206608daf0c2Sdrahn     }
206708daf0c2Sdrahn     if ( -32 < expDiff ) {
206808daf0c2Sdrahn         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
206908daf0c2Sdrahn         q = ( 4 < q ) ? q - 4 : 0;
207008daf0c2Sdrahn         q >>= - expDiff;
207108daf0c2Sdrahn         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
207208daf0c2Sdrahn         expDiff += 24;
207308daf0c2Sdrahn         if ( expDiff < 0 ) {
207408daf0c2Sdrahn             shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
207508daf0c2Sdrahn         }
207608daf0c2Sdrahn         else {
207708daf0c2Sdrahn             shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
207808daf0c2Sdrahn         }
207908daf0c2Sdrahn         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
208008daf0c2Sdrahn         sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
208108daf0c2Sdrahn     }
208208daf0c2Sdrahn     else {
208308daf0c2Sdrahn         shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
208408daf0c2Sdrahn         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
208508daf0c2Sdrahn     }
208608daf0c2Sdrahn     do {
208708daf0c2Sdrahn         alternateASig0 = aSig0;
208808daf0c2Sdrahn         alternateASig1 = aSig1;
208908daf0c2Sdrahn         ++q;
209008daf0c2Sdrahn         sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
209108daf0c2Sdrahn     } while ( 0 <= (sbits32) aSig0 );
209208daf0c2Sdrahn     add64(
209308daf0c2Sdrahn         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
209408daf0c2Sdrahn     if (    ( sigMean0 < 0 )
209508daf0c2Sdrahn          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
209608daf0c2Sdrahn         aSig0 = alternateASig0;
209708daf0c2Sdrahn         aSig1 = alternateASig1;
209808daf0c2Sdrahn     }
209908daf0c2Sdrahn     zSign = ( (sbits32) aSig0 < 0 );
210008daf0c2Sdrahn     if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
210108daf0c2Sdrahn     return
210208daf0c2Sdrahn         normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
210308daf0c2Sdrahn 
210408daf0c2Sdrahn }
210508daf0c2Sdrahn #endif
210608daf0c2Sdrahn 
210708daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
210808daf0c2Sdrahn /*
210908daf0c2Sdrahn -------------------------------------------------------------------------------
211008daf0c2Sdrahn Returns the square root of the double-precision floating-point value `a'.
211108daf0c2Sdrahn The operation is performed according to the IEC/IEEE Standard for Binary
211208daf0c2Sdrahn Floating-Point Arithmetic.
211308daf0c2Sdrahn -------------------------------------------------------------------------------
211408daf0c2Sdrahn */
float64_sqrt(float64 a)211508daf0c2Sdrahn float64 float64_sqrt( float64 a )
211608daf0c2Sdrahn {
211708daf0c2Sdrahn     flag aSign;
211808daf0c2Sdrahn     int16 aExp, zExp;
211908daf0c2Sdrahn     bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
212008daf0c2Sdrahn     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
212108daf0c2Sdrahn     float64 z;
212208daf0c2Sdrahn 
212308daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
212408daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
212508daf0c2Sdrahn     aExp = extractFloat64Exp( a );
212608daf0c2Sdrahn     aSign = extractFloat64Sign( a );
212708daf0c2Sdrahn     if ( aExp == 0x7FF ) {
212808daf0c2Sdrahn         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
212908daf0c2Sdrahn         if ( ! aSign ) return a;
213008daf0c2Sdrahn         goto invalid;
213108daf0c2Sdrahn     }
213208daf0c2Sdrahn     if ( aSign ) {
213308daf0c2Sdrahn         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
213408daf0c2Sdrahn  invalid:
213508daf0c2Sdrahn         float_raise( float_flag_invalid );
213608daf0c2Sdrahn         return float64_default_nan;
213708daf0c2Sdrahn     }
213808daf0c2Sdrahn     if ( aExp == 0 ) {
213908daf0c2Sdrahn         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
214008daf0c2Sdrahn         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
214108daf0c2Sdrahn     }
214208daf0c2Sdrahn     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
214308daf0c2Sdrahn     aSig0 |= 0x00100000;
214408daf0c2Sdrahn     shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
214508daf0c2Sdrahn     zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
214608daf0c2Sdrahn     if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
214708daf0c2Sdrahn     doubleZSig0 = zSig0 + zSig0;
214808daf0c2Sdrahn     shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
214908daf0c2Sdrahn     mul32To64( zSig0, zSig0, &term0, &term1 );
215008daf0c2Sdrahn     sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
215108daf0c2Sdrahn     while ( (sbits32) rem0 < 0 ) {
215208daf0c2Sdrahn         --zSig0;
215308daf0c2Sdrahn         doubleZSig0 -= 2;
215408daf0c2Sdrahn         add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
215508daf0c2Sdrahn     }
215608daf0c2Sdrahn     zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
215708daf0c2Sdrahn     if ( ( zSig1 & 0x1FF ) <= 5 ) {
215808daf0c2Sdrahn         if ( zSig1 == 0 ) zSig1 = 1;
215908daf0c2Sdrahn         mul32To64( doubleZSig0, zSig1, &term1, &term2 );
216008daf0c2Sdrahn         sub64( rem1, 0, term1, term2, &rem1, &rem2 );
216108daf0c2Sdrahn         mul32To64( zSig1, zSig1, &term2, &term3 );
216208daf0c2Sdrahn         sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
216308daf0c2Sdrahn         while ( (sbits32) rem1 < 0 ) {
216408daf0c2Sdrahn             --zSig1;
216508daf0c2Sdrahn             shortShift64Left( 0, zSig1, 1, &term2, &term3 );
216608daf0c2Sdrahn             term3 |= 1;
216708daf0c2Sdrahn             term2 |= doubleZSig0;
216808daf0c2Sdrahn             add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
216908daf0c2Sdrahn         }
217008daf0c2Sdrahn         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
217108daf0c2Sdrahn     }
217208daf0c2Sdrahn     shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
217308daf0c2Sdrahn     return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
217408daf0c2Sdrahn 
217508daf0c2Sdrahn }
217608daf0c2Sdrahn #endif
217708daf0c2Sdrahn 
217808daf0c2Sdrahn /*
217908daf0c2Sdrahn -------------------------------------------------------------------------------
218008daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is equal to
218108daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  The comparison is performed
218208daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
218308daf0c2Sdrahn -------------------------------------------------------------------------------
218408daf0c2Sdrahn */
float64_eq(float64 a,float64 b)218508daf0c2Sdrahn flag float64_eq( float64 a, float64 b )
218608daf0c2Sdrahn {
218708daf0c2Sdrahn 
218808daf0c2Sdrahn     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
218908daf0c2Sdrahn               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
219008daf0c2Sdrahn          || (    ( extractFloat64Exp( b ) == 0x7FF )
219108daf0c2Sdrahn               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
219208daf0c2Sdrahn        ) {
219308daf0c2Sdrahn         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
219408daf0c2Sdrahn             float_raise( float_flag_invalid );
219508daf0c2Sdrahn         }
219608daf0c2Sdrahn         return 0;
219708daf0c2Sdrahn     }
219808daf0c2Sdrahn     return ( a == b ) ||
219908daf0c2Sdrahn 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
220008daf0c2Sdrahn 
220108daf0c2Sdrahn }
220208daf0c2Sdrahn 
220308daf0c2Sdrahn /*
220408daf0c2Sdrahn -------------------------------------------------------------------------------
220508daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than
220608daf0c2Sdrahn or equal to the corresponding value `b', and 0 otherwise.  The comparison
220708daf0c2Sdrahn is performed according to the IEC/IEEE Standard for Binary Floating-Point
220808daf0c2Sdrahn Arithmetic.
220908daf0c2Sdrahn -------------------------------------------------------------------------------
221008daf0c2Sdrahn */
float64_le(float64 a,float64 b)221108daf0c2Sdrahn flag float64_le( float64 a, float64 b )
221208daf0c2Sdrahn {
221308daf0c2Sdrahn     flag aSign, bSign;
221408daf0c2Sdrahn 
221508daf0c2Sdrahn     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
221608daf0c2Sdrahn               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
221708daf0c2Sdrahn          || (    ( extractFloat64Exp( b ) == 0x7FF )
221808daf0c2Sdrahn               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
221908daf0c2Sdrahn        ) {
222008daf0c2Sdrahn         float_raise( float_flag_invalid );
222108daf0c2Sdrahn         return 0;
222208daf0c2Sdrahn     }
222308daf0c2Sdrahn     aSign = extractFloat64Sign( a );
222408daf0c2Sdrahn     bSign = extractFloat64Sign( b );
222508daf0c2Sdrahn     if ( aSign != bSign )
222608daf0c2Sdrahn 	return aSign ||
222708daf0c2Sdrahn 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
222808daf0c2Sdrahn 	      0 );
222908daf0c2Sdrahn     return ( a == b ) ||
223008daf0c2Sdrahn 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
223108daf0c2Sdrahn }
223208daf0c2Sdrahn 
223308daf0c2Sdrahn /*
223408daf0c2Sdrahn -------------------------------------------------------------------------------
223508daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than
223608daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  The comparison is performed
223708daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
223808daf0c2Sdrahn -------------------------------------------------------------------------------
223908daf0c2Sdrahn */
float64_lt(float64 a,float64 b)224008daf0c2Sdrahn flag float64_lt( float64 a, float64 b )
224108daf0c2Sdrahn {
224208daf0c2Sdrahn     flag aSign, bSign;
224308daf0c2Sdrahn 
224408daf0c2Sdrahn     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
224508daf0c2Sdrahn               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
224608daf0c2Sdrahn          || (    ( extractFloat64Exp( b ) == 0x7FF )
224708daf0c2Sdrahn               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
224808daf0c2Sdrahn        ) {
224908daf0c2Sdrahn         float_raise( float_flag_invalid );
225008daf0c2Sdrahn         return 0;
225108daf0c2Sdrahn     }
225208daf0c2Sdrahn     aSign = extractFloat64Sign( a );
225308daf0c2Sdrahn     bSign = extractFloat64Sign( b );
225408daf0c2Sdrahn     if ( aSign != bSign )
225508daf0c2Sdrahn 	return aSign &&
225608daf0c2Sdrahn 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
225708daf0c2Sdrahn 	      0 );
225808daf0c2Sdrahn     return ( a != b ) &&
225908daf0c2Sdrahn 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
226008daf0c2Sdrahn 
226108daf0c2Sdrahn }
226208daf0c2Sdrahn 
226308daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
226408daf0c2Sdrahn /*
226508daf0c2Sdrahn -------------------------------------------------------------------------------
226608daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is equal to
226708daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  The invalid exception is
226808daf0c2Sdrahn raised if either operand is a NaN.  Otherwise, the comparison is performed
226908daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
227008daf0c2Sdrahn -------------------------------------------------------------------------------
227108daf0c2Sdrahn */
float64_eq_signaling(float64 a,float64 b)227208daf0c2Sdrahn flag float64_eq_signaling( float64 a, float64 b )
227308daf0c2Sdrahn {
227408daf0c2Sdrahn 
227508daf0c2Sdrahn     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
227608daf0c2Sdrahn               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
227708daf0c2Sdrahn          || (    ( extractFloat64Exp( b ) == 0x7FF )
227808daf0c2Sdrahn               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
227908daf0c2Sdrahn        ) {
228008daf0c2Sdrahn         float_raise( float_flag_invalid );
228108daf0c2Sdrahn         return 0;
228208daf0c2Sdrahn     }
228308daf0c2Sdrahn     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
228408daf0c2Sdrahn 
228508daf0c2Sdrahn }
228608daf0c2Sdrahn 
228708daf0c2Sdrahn /*
228808daf0c2Sdrahn -------------------------------------------------------------------------------
228908daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than or
229008daf0c2Sdrahn equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
229108daf0c2Sdrahn cause an exception.  Otherwise, the comparison is performed according to the
229208daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic.
229308daf0c2Sdrahn -------------------------------------------------------------------------------
229408daf0c2Sdrahn */
float64_le_quiet(float64 a,float64 b)229508daf0c2Sdrahn flag float64_le_quiet( float64 a, float64 b )
229608daf0c2Sdrahn {
229708daf0c2Sdrahn     flag aSign, bSign;
229808daf0c2Sdrahn 
229908daf0c2Sdrahn     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
230008daf0c2Sdrahn               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
230108daf0c2Sdrahn          || (    ( extractFloat64Exp( b ) == 0x7FF )
230208daf0c2Sdrahn               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
230308daf0c2Sdrahn        ) {
230408daf0c2Sdrahn         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
230508daf0c2Sdrahn             float_raise( float_flag_invalid );
230608daf0c2Sdrahn         }
230708daf0c2Sdrahn         return 0;
230808daf0c2Sdrahn     }
230908daf0c2Sdrahn     aSign = extractFloat64Sign( a );
231008daf0c2Sdrahn     bSign = extractFloat64Sign( b );
231108daf0c2Sdrahn     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
231208daf0c2Sdrahn     return ( a == b ) || ( aSign ^ ( a < b ) );
231308daf0c2Sdrahn 
231408daf0c2Sdrahn }
231508daf0c2Sdrahn 
231608daf0c2Sdrahn /*
231708daf0c2Sdrahn -------------------------------------------------------------------------------
231808daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than
231908daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
232008daf0c2Sdrahn exception.  Otherwise, the comparison is performed according to the IEC/IEEE
232108daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic.
232208daf0c2Sdrahn -------------------------------------------------------------------------------
232308daf0c2Sdrahn */
float64_lt_quiet(float64 a,float64 b)232408daf0c2Sdrahn flag float64_lt_quiet( float64 a, float64 b )
232508daf0c2Sdrahn {
232608daf0c2Sdrahn     flag aSign, bSign;
232708daf0c2Sdrahn 
232808daf0c2Sdrahn     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
232908daf0c2Sdrahn               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
233008daf0c2Sdrahn          || (    ( extractFloat64Exp( b ) == 0x7FF )
233108daf0c2Sdrahn               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
233208daf0c2Sdrahn        ) {
233308daf0c2Sdrahn         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
233408daf0c2Sdrahn             float_raise( float_flag_invalid );
233508daf0c2Sdrahn         }
233608daf0c2Sdrahn         return 0;
233708daf0c2Sdrahn     }
233808daf0c2Sdrahn     aSign = extractFloat64Sign( a );
233908daf0c2Sdrahn     bSign = extractFloat64Sign( b );
234008daf0c2Sdrahn     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
234108daf0c2Sdrahn     return ( a != b ) && ( aSign ^ ( a < b ) );
234208daf0c2Sdrahn 
234308daf0c2Sdrahn }
234408daf0c2Sdrahn 
234508daf0c2Sdrahn #endif
2346