xref: /netbsd/lib/libc/softfloat/bits32/softfloat.c (revision 9c7bce54)
1*9c7bce54Smatt /* $NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $ */
2936b7f4cSbjh21 
3936b7f4cSbjh21 /*
4936b7f4cSbjh21  * This version hacked for use with gcc -msoft-float by bjh21.
5936b7f4cSbjh21  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6936b7f4cSbjh21  *  itself).
7936b7f4cSbjh21  */
8936b7f4cSbjh21 
9936b7f4cSbjh21 /*
10936b7f4cSbjh21  * Things you may want to define:
11936b7f4cSbjh21  *
12936b7f4cSbjh21  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13936b7f4cSbjh21  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
14936b7f4cSbjh21  *   properly renamed.
15936b7f4cSbjh21  */
16936b7f4cSbjh21 
17936b7f4cSbjh21 /*
18936b7f4cSbjh21  * This differs from the standard bits32/softfloat.c in that float64
19936b7f4cSbjh21  * is defined to be a 64-bit integer rather than a structure.  The
20936b7f4cSbjh21  * structure is float64s, with translation between the two going via
21936b7f4cSbjh21  * float64u.
22936b7f4cSbjh21  */
23936b7f4cSbjh21 
24936b7f4cSbjh21 /*
25936b7f4cSbjh21 ===============================================================================
26936b7f4cSbjh21 
27936b7f4cSbjh21 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
28936b7f4cSbjh21 Arithmetic Package, Release 2a.
29936b7f4cSbjh21 
30936b7f4cSbjh21 Written by John R. Hauser.  This work was made possible in part by the
31936b7f4cSbjh21 International Computer Science Institute, located at Suite 600, 1947 Center
32936b7f4cSbjh21 Street, Berkeley, California 94704.  Funding was partially provided by the
33936b7f4cSbjh21 National Science Foundation under grant MIP-9311980.  The original version
34936b7f4cSbjh21 of this code was written as part of a project to build a fixed-point vector
35936b7f4cSbjh21 processor in collaboration with the University of California at Berkeley,
36936b7f4cSbjh21 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
37936b7f4cSbjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
38936b7f4cSbjh21 arithmetic/SoftFloat.html'.
39936b7f4cSbjh21 
40936b7f4cSbjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
41936b7f4cSbjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
42936b7f4cSbjh21 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
43936b7f4cSbjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
44936b7f4cSbjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
45936b7f4cSbjh21 
46936b7f4cSbjh21 Derivative works are acceptable, even for commercial purposes, so long as
47936b7f4cSbjh21 (1) they include prominent notice that the work is derivative, and (2) they
48936b7f4cSbjh21 include prominent notice akin to these four paragraphs for those parts of
49936b7f4cSbjh21 this code that are retained.
50936b7f4cSbjh21 
51936b7f4cSbjh21 ===============================================================================
52936b7f4cSbjh21 */
53936b7f4cSbjh21 
54936b7f4cSbjh21 #include <sys/cdefs.h>
55936b7f4cSbjh21 #if defined(LIBC_SCCS) && !defined(lint)
56*9c7bce54Smatt __RCSID("$NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $");
57936b7f4cSbjh21 #endif /* LIBC_SCCS and not lint */
58936b7f4cSbjh21 
59936b7f4cSbjh21 #ifdef SOFTFLOAT_FOR_GCC
60936b7f4cSbjh21 #include "softfloat-for-gcc.h"
61936b7f4cSbjh21 #endif
62936b7f4cSbjh21 
63936b7f4cSbjh21 #include "milieu.h"
64936b7f4cSbjh21 #include "softfloat.h"
65936b7f4cSbjh21 
66936b7f4cSbjh21 /*
67936b7f4cSbjh21  * Conversions between floats as stored in memory and floats as
68936b7f4cSbjh21  * SoftFloat uses them
69936b7f4cSbjh21  */
70936b7f4cSbjh21 #ifndef FLOAT64_DEMANGLE
71936b7f4cSbjh21 #define FLOAT64_DEMANGLE(a)	(a)
72936b7f4cSbjh21 #endif
73936b7f4cSbjh21 #ifndef FLOAT64_MANGLE
74936b7f4cSbjh21 #define FLOAT64_MANGLE(a)	(a)
75936b7f4cSbjh21 #endif
76936b7f4cSbjh21 
77936b7f4cSbjh21 /*
78936b7f4cSbjh21 -------------------------------------------------------------------------------
79936b7f4cSbjh21 Floating-point rounding mode and exception flags.
80936b7f4cSbjh21 -------------------------------------------------------------------------------
81936b7f4cSbjh21 */
82*9c7bce54Smatt #ifndef set_float_rounding_mode
83936b7f4cSbjh21 fp_rnd float_rounding_mode = float_round_nearest_even;
84936b7f4cSbjh21 fp_except float_exception_flags = 0;
85*9c7bce54Smatt #endif
86*9c7bce54Smatt #ifndef set_float_exception_inexact_flag
87*9c7bce54Smatt #define set_float_exception_inexact_flag() \
88*9c7bce54Smatt 	((void)(float_exception_flags |= float_flag_inexact))
89*9c7bce54Smatt #endif
90936b7f4cSbjh21 
91936b7f4cSbjh21 /*
92936b7f4cSbjh21 -------------------------------------------------------------------------------
93936b7f4cSbjh21 Primitive arithmetic functions, including multi-word arithmetic, and
94936b7f4cSbjh21 division and square root approximations.  (Can be specialized to target if
95936b7f4cSbjh21 desired.)
96936b7f4cSbjh21 -------------------------------------------------------------------------------
97936b7f4cSbjh21 */
98936b7f4cSbjh21 #include "softfloat-macros"
99936b7f4cSbjh21 
100936b7f4cSbjh21 /*
101936b7f4cSbjh21 -------------------------------------------------------------------------------
102936b7f4cSbjh21 Functions and definitions to determine:  (1) whether tininess for underflow
103936b7f4cSbjh21 is detected before or after rounding by default, (2) what (if anything)
104936b7f4cSbjh21 happens when exceptions are raised, (3) how signaling NaNs are distinguished
105936b7f4cSbjh21 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
106936b7f4cSbjh21 are propagated from function inputs to output.  These details are target-
107936b7f4cSbjh21 specific.
108936b7f4cSbjh21 -------------------------------------------------------------------------------
109936b7f4cSbjh21 */
110936b7f4cSbjh21 #include "softfloat-specialize"
111936b7f4cSbjh21 
112936b7f4cSbjh21 /*
113936b7f4cSbjh21 -------------------------------------------------------------------------------
114936b7f4cSbjh21 Returns the fraction bits of the single-precision floating-point value `a'.
115936b7f4cSbjh21 -------------------------------------------------------------------------------
116936b7f4cSbjh21 */
extractFloat32Frac(float32 a)117936b7f4cSbjh21 INLINE bits32 extractFloat32Frac( float32 a )
118936b7f4cSbjh21 {
119936b7f4cSbjh21 
120936b7f4cSbjh21     return a & 0x007FFFFF;
121936b7f4cSbjh21 
122936b7f4cSbjh21 }
123936b7f4cSbjh21 
124936b7f4cSbjh21 /*
125936b7f4cSbjh21 -------------------------------------------------------------------------------
126936b7f4cSbjh21 Returns the exponent bits of the single-precision floating-point value `a'.
127936b7f4cSbjh21 -------------------------------------------------------------------------------
128936b7f4cSbjh21 */
extractFloat32Exp(float32 a)129936b7f4cSbjh21 INLINE int16 extractFloat32Exp( float32 a )
130936b7f4cSbjh21 {
131936b7f4cSbjh21 
132936b7f4cSbjh21     return ( a>>23 ) & 0xFF;
133936b7f4cSbjh21 
134936b7f4cSbjh21 }
135936b7f4cSbjh21 
136936b7f4cSbjh21 /*
137936b7f4cSbjh21 -------------------------------------------------------------------------------
138936b7f4cSbjh21 Returns the sign bit of the single-precision floating-point value `a'.
139936b7f4cSbjh21 -------------------------------------------------------------------------------
140936b7f4cSbjh21 */
extractFloat32Sign(float32 a)141936b7f4cSbjh21 INLINE flag extractFloat32Sign( float32 a )
142936b7f4cSbjh21 {
143936b7f4cSbjh21 
144936b7f4cSbjh21     return a>>31;
145936b7f4cSbjh21 
146936b7f4cSbjh21 }
147936b7f4cSbjh21 
148936b7f4cSbjh21 /*
149936b7f4cSbjh21 -------------------------------------------------------------------------------
150936b7f4cSbjh21 Normalizes the subnormal single-precision floating-point value represented
151936b7f4cSbjh21 by the denormalized significand `aSig'.  The normalized exponent and
152936b7f4cSbjh21 significand are stored at the locations pointed to by `zExpPtr' and
153936b7f4cSbjh21 `zSigPtr', respectively.
154936b7f4cSbjh21 -------------------------------------------------------------------------------
155936b7f4cSbjh21 */
156936b7f4cSbjh21 static void
normalizeFloat32Subnormal(bits32 aSig,int16 * zExpPtr,bits32 * zSigPtr)157936b7f4cSbjh21  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
158936b7f4cSbjh21 {
159936b7f4cSbjh21     int8 shiftCount;
160936b7f4cSbjh21 
161936b7f4cSbjh21     shiftCount = countLeadingZeros32( aSig ) - 8;
162936b7f4cSbjh21     *zSigPtr = aSig<<shiftCount;
163936b7f4cSbjh21     *zExpPtr = 1 - shiftCount;
164936b7f4cSbjh21 
165936b7f4cSbjh21 }
166936b7f4cSbjh21 
167936b7f4cSbjh21 /*
168936b7f4cSbjh21 -------------------------------------------------------------------------------
169936b7f4cSbjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
170936b7f4cSbjh21 single-precision floating-point value, returning the result.  After being
171936b7f4cSbjh21 shifted into the proper positions, the three fields are simply added
172936b7f4cSbjh21 together to form the result.  This means that any integer portion of `zSig'
173936b7f4cSbjh21 will be added into the exponent.  Since a properly normalized significand
174936b7f4cSbjh21 will have an integer portion equal to 1, the `zExp' input should be 1 less
175936b7f4cSbjh21 than the desired result exponent whenever `zSig' is a complete, normalized
176936b7f4cSbjh21 significand.
177936b7f4cSbjh21 -------------------------------------------------------------------------------
178936b7f4cSbjh21 */
packFloat32(flag zSign,int16 zExp,bits32 zSig)179936b7f4cSbjh21 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
180936b7f4cSbjh21 {
181936b7f4cSbjh21 
182936b7f4cSbjh21     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
183936b7f4cSbjh21 
184936b7f4cSbjh21 }
185936b7f4cSbjh21 
186936b7f4cSbjh21 /*
187936b7f4cSbjh21 -------------------------------------------------------------------------------
188936b7f4cSbjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
189936b7f4cSbjh21 and significand `zSig', and returns the proper single-precision floating-
190936b7f4cSbjh21 point value corresponding to the abstract input.  Ordinarily, the abstract
191936b7f4cSbjh21 value is simply rounded and packed into the single-precision format, with
192936b7f4cSbjh21 the inexact exception raised if the abstract input cannot be represented
193936b7f4cSbjh21 exactly.  However, if the abstract value is too large, the overflow and
194936b7f4cSbjh21 inexact exceptions are raised and an infinity or maximal finite value is
195936b7f4cSbjh21 returned.  If the abstract value is too small, the input value is rounded to
196936b7f4cSbjh21 a subnormal number, and the underflow and inexact exceptions are raised if
197936b7f4cSbjh21 the abstract input cannot be represented exactly as a subnormal single-
198936b7f4cSbjh21 precision floating-point number.
199936b7f4cSbjh21     The input significand `zSig' has its binary point between bits 30
200936b7f4cSbjh21 and 29, which is 7 bits to the left of the usual location.  This shifted
201936b7f4cSbjh21 significand must be normalized or smaller.  If `zSig' is not normalized,
202936b7f4cSbjh21 `zExp' must be 0; in that case, the result returned is a subnormal number,
203936b7f4cSbjh21 and it must not require rounding.  In the usual case that `zSig' is
204936b7f4cSbjh21 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
205936b7f4cSbjh21 The handling of underflow and overflow follows the IEC/IEEE Standard for
206936b7f4cSbjh21 Binary Floating-Point Arithmetic.
207936b7f4cSbjh21 -------------------------------------------------------------------------------
208936b7f4cSbjh21 */
roundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)209936b7f4cSbjh21 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
210936b7f4cSbjh21 {
211936b7f4cSbjh21     int8 roundingMode;
212936b7f4cSbjh21     flag roundNearestEven;
213936b7f4cSbjh21     int8 roundIncrement, roundBits;
214936b7f4cSbjh21     flag isTiny;
215936b7f4cSbjh21 
216936b7f4cSbjh21     roundingMode = float_rounding_mode;
217936b7f4cSbjh21     roundNearestEven = roundingMode == float_round_nearest_even;
218936b7f4cSbjh21     roundIncrement = 0x40;
219936b7f4cSbjh21     if ( ! roundNearestEven ) {
220936b7f4cSbjh21         if ( roundingMode == float_round_to_zero ) {
221936b7f4cSbjh21             roundIncrement = 0;
222936b7f4cSbjh21         }
223936b7f4cSbjh21         else {
224936b7f4cSbjh21             roundIncrement = 0x7F;
225936b7f4cSbjh21             if ( zSign ) {
226936b7f4cSbjh21                 if ( roundingMode == float_round_up ) roundIncrement = 0;
227936b7f4cSbjh21             }
228936b7f4cSbjh21             else {
229936b7f4cSbjh21                 if ( roundingMode == float_round_down ) roundIncrement = 0;
230936b7f4cSbjh21             }
231936b7f4cSbjh21         }
232936b7f4cSbjh21     }
233936b7f4cSbjh21     roundBits = zSig & 0x7F;
234936b7f4cSbjh21     if ( 0xFD <= (bits16) zExp ) {
235936b7f4cSbjh21         if (    ( 0xFD < zExp )
236936b7f4cSbjh21              || (    ( zExp == 0xFD )
237936b7f4cSbjh21                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
238936b7f4cSbjh21            ) {
239936b7f4cSbjh21             float_raise( float_flag_overflow | float_flag_inexact );
240936b7f4cSbjh21             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
241936b7f4cSbjh21         }
242936b7f4cSbjh21         if ( zExp < 0 ) {
243936b7f4cSbjh21             isTiny =
244936b7f4cSbjh21                    ( float_detect_tininess == float_tininess_before_rounding )
245936b7f4cSbjh21                 || ( zExp < -1 )
2462cec0187Schristos                 || ( zSig + roundIncrement < (uint32)0x80000000 );
247936b7f4cSbjh21             shift32RightJamming( zSig, - zExp, &zSig );
248936b7f4cSbjh21             zExp = 0;
249936b7f4cSbjh21             roundBits = zSig & 0x7F;
250936b7f4cSbjh21             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
251936b7f4cSbjh21         }
252936b7f4cSbjh21     }
253*9c7bce54Smatt     if ( roundBits ) set_float_exception_inexact_flag();
254936b7f4cSbjh21     zSig = ( zSig + roundIncrement )>>7;
255936b7f4cSbjh21     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
256936b7f4cSbjh21     if ( zSig == 0 ) zExp = 0;
257936b7f4cSbjh21     return packFloat32( zSign, zExp, zSig );
258936b7f4cSbjh21 
259936b7f4cSbjh21 }
260936b7f4cSbjh21 
261936b7f4cSbjh21 /*
262936b7f4cSbjh21 -------------------------------------------------------------------------------
263936b7f4cSbjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
264936b7f4cSbjh21 and significand `zSig', and returns the proper single-precision floating-
265936b7f4cSbjh21 point value corresponding to the abstract input.  This routine is just like
266936b7f4cSbjh21 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
267936b7f4cSbjh21 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
268936b7f4cSbjh21 floating-point exponent.
269936b7f4cSbjh21 -------------------------------------------------------------------------------
270936b7f4cSbjh21 */
271936b7f4cSbjh21 static float32
normalizeRoundAndPackFloat32(flag zSign,int16 zExp,bits32 zSig)272936b7f4cSbjh21  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
273936b7f4cSbjh21 {
274936b7f4cSbjh21     int8 shiftCount;
275936b7f4cSbjh21 
276936b7f4cSbjh21     shiftCount = countLeadingZeros32( zSig ) - 1;
277936b7f4cSbjh21     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
278936b7f4cSbjh21 
279936b7f4cSbjh21 }
280936b7f4cSbjh21 
281936b7f4cSbjh21 /*
282936b7f4cSbjh21 -------------------------------------------------------------------------------
283936b7f4cSbjh21 Returns the least-significant 32 fraction bits of the double-precision
284936b7f4cSbjh21 floating-point value `a'.
285936b7f4cSbjh21 -------------------------------------------------------------------------------
286936b7f4cSbjh21 */
extractFloat64Frac1(float64 a)287936b7f4cSbjh21 INLINE bits32 extractFloat64Frac1( float64 a )
288936b7f4cSbjh21 {
289936b7f4cSbjh21 
2902cec0187Schristos     return (bits32)(FLOAT64_DEMANGLE(a) & LIT64(0x00000000FFFFFFFF));
291936b7f4cSbjh21 
292936b7f4cSbjh21 }
293936b7f4cSbjh21 
294936b7f4cSbjh21 /*
295936b7f4cSbjh21 -------------------------------------------------------------------------------
296936b7f4cSbjh21 Returns the most-significant 20 fraction bits of the double-precision
297936b7f4cSbjh21 floating-point value `a'.
298936b7f4cSbjh21 -------------------------------------------------------------------------------
299936b7f4cSbjh21 */
extractFloat64Frac0(float64 a)300936b7f4cSbjh21 INLINE bits32 extractFloat64Frac0( float64 a )
301936b7f4cSbjh21 {
302936b7f4cSbjh21 
3032cec0187Schristos     return (bits32)((FLOAT64_DEMANGLE(a) >> 32) & 0x000FFFFF);
304936b7f4cSbjh21 
305936b7f4cSbjh21 }
306936b7f4cSbjh21 
307936b7f4cSbjh21 /*
308936b7f4cSbjh21 -------------------------------------------------------------------------------
309936b7f4cSbjh21 Returns the exponent bits of the double-precision floating-point value `a'.
310936b7f4cSbjh21 -------------------------------------------------------------------------------
311936b7f4cSbjh21 */
extractFloat64Exp(float64 a)312936b7f4cSbjh21 INLINE int16 extractFloat64Exp( float64 a )
313936b7f4cSbjh21 {
314936b7f4cSbjh21 
3152cec0187Schristos     return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
316936b7f4cSbjh21 
317936b7f4cSbjh21 }
318936b7f4cSbjh21 
319936b7f4cSbjh21 /*
320936b7f4cSbjh21 -------------------------------------------------------------------------------
321936b7f4cSbjh21 Returns the sign bit of the double-precision floating-point value `a'.
322936b7f4cSbjh21 -------------------------------------------------------------------------------
323936b7f4cSbjh21 */
extractFloat64Sign(float64 a)324936b7f4cSbjh21 INLINE flag extractFloat64Sign( float64 a )
325936b7f4cSbjh21 {
326936b7f4cSbjh21 
3272cec0187Schristos     return (flag)(FLOAT64_DEMANGLE(a) >> 63);
328936b7f4cSbjh21 
329936b7f4cSbjh21 }
330936b7f4cSbjh21 
331936b7f4cSbjh21 /*
332936b7f4cSbjh21 -------------------------------------------------------------------------------
333936b7f4cSbjh21 Normalizes the subnormal double-precision floating-point value represented
334936b7f4cSbjh21 by the denormalized significand formed by the concatenation of `aSig0' and
335936b7f4cSbjh21 `aSig1'.  The normalized exponent is stored at the location pointed to by
336936b7f4cSbjh21 `zExpPtr'.  The most significant 21 bits of the normalized significand are
337936b7f4cSbjh21 stored at the location pointed to by `zSig0Ptr', and the least significant
338936b7f4cSbjh21 32 bits of the normalized significand are stored at the location pointed to
339936b7f4cSbjh21 by `zSig1Ptr'.
340936b7f4cSbjh21 -------------------------------------------------------------------------------
341936b7f4cSbjh21 */
342936b7f4cSbjh21 static void
normalizeFloat64Subnormal(bits32 aSig0,bits32 aSig1,int16 * zExpPtr,bits32 * zSig0Ptr,bits32 * zSig1Ptr)343936b7f4cSbjh21  normalizeFloat64Subnormal(
344936b7f4cSbjh21      bits32 aSig0,
345936b7f4cSbjh21      bits32 aSig1,
346936b7f4cSbjh21      int16 *zExpPtr,
347936b7f4cSbjh21      bits32 *zSig0Ptr,
348936b7f4cSbjh21      bits32 *zSig1Ptr
349936b7f4cSbjh21  )
350936b7f4cSbjh21 {
351936b7f4cSbjh21     int8 shiftCount;
352936b7f4cSbjh21 
353936b7f4cSbjh21     if ( aSig0 == 0 ) {
354936b7f4cSbjh21         shiftCount = countLeadingZeros32( aSig1 ) - 11;
355936b7f4cSbjh21         if ( shiftCount < 0 ) {
356936b7f4cSbjh21             *zSig0Ptr = aSig1>>( - shiftCount );
357936b7f4cSbjh21             *zSig1Ptr = aSig1<<( shiftCount & 31 );
358936b7f4cSbjh21         }
359936b7f4cSbjh21         else {
360936b7f4cSbjh21             *zSig0Ptr = aSig1<<shiftCount;
361936b7f4cSbjh21             *zSig1Ptr = 0;
362936b7f4cSbjh21         }
363936b7f4cSbjh21         *zExpPtr = - shiftCount - 31;
364936b7f4cSbjh21     }
365936b7f4cSbjh21     else {
366936b7f4cSbjh21         shiftCount = countLeadingZeros32( aSig0 ) - 11;
367936b7f4cSbjh21         shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
368936b7f4cSbjh21         *zExpPtr = 1 - shiftCount;
369936b7f4cSbjh21     }
370936b7f4cSbjh21 
371936b7f4cSbjh21 }
372936b7f4cSbjh21 
373936b7f4cSbjh21 /*
374936b7f4cSbjh21 -------------------------------------------------------------------------------
375936b7f4cSbjh21 Packs the sign `zSign', the exponent `zExp', and the significand formed by
376936b7f4cSbjh21 the concatenation of `zSig0' and `zSig1' into a double-precision floating-
377936b7f4cSbjh21 point value, returning the result.  After being shifted into the proper
378936b7f4cSbjh21 positions, the three fields `zSign', `zExp', and `zSig0' are simply added
379936b7f4cSbjh21 together to form the most significant 32 bits of the result.  This means
380936b7f4cSbjh21 that any integer portion of `zSig0' will be added into the exponent.  Since
381936b7f4cSbjh21 a properly normalized significand will have an integer portion equal to 1,
382936b7f4cSbjh21 the `zExp' input should be 1 less than the desired result exponent whenever
383936b7f4cSbjh21 `zSig0' and `zSig1' concatenated form a complete, normalized significand.
384936b7f4cSbjh21 -------------------------------------------------------------------------------
385936b7f4cSbjh21 */
386936b7f4cSbjh21 INLINE float64
packFloat64(flag zSign,int16 zExp,bits32 zSig0,bits32 zSig1)387936b7f4cSbjh21  packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
388936b7f4cSbjh21 {
389936b7f4cSbjh21 
390936b7f4cSbjh21     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
391936b7f4cSbjh21 			   ( ( (bits64) zExp )<<52 ) +
392936b7f4cSbjh21 			   ( ( (bits64) zSig0 )<<32 ) + zSig1 );
393936b7f4cSbjh21 
394936b7f4cSbjh21 
395936b7f4cSbjh21 }
396936b7f4cSbjh21 
397936b7f4cSbjh21 /*
398936b7f4cSbjh21 -------------------------------------------------------------------------------
399936b7f4cSbjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
400936b7f4cSbjh21 and extended significand formed by the concatenation of `zSig0', `zSig1',
401936b7f4cSbjh21 and `zSig2', and returns the proper double-precision floating-point value
402936b7f4cSbjh21 corresponding to the abstract input.  Ordinarily, the abstract value is
403936b7f4cSbjh21 simply rounded and packed into the double-precision format, with the inexact
404936b7f4cSbjh21 exception raised if the abstract input cannot be represented exactly.
405936b7f4cSbjh21 However, if the abstract value is too large, the overflow and inexact
406936b7f4cSbjh21 exceptions are raised and an infinity or maximal finite value is returned.
407936b7f4cSbjh21 If the abstract value is too small, the input value is rounded to a
408936b7f4cSbjh21 subnormal number, and the underflow and inexact exceptions are raised if the
409936b7f4cSbjh21 abstract input cannot be represented exactly as a subnormal double-precision
410936b7f4cSbjh21 floating-point number.
411936b7f4cSbjh21     The input significand must be normalized or smaller.  If the input
412936b7f4cSbjh21 significand is not normalized, `zExp' must be 0; in that case, the result
413936b7f4cSbjh21 returned is a subnormal number, and it must not require rounding.  In the
414936b7f4cSbjh21 usual case that the input significand is normalized, `zExp' must be 1 less
415936b7f4cSbjh21 than the ``true'' floating-point exponent.  The handling of underflow and
416936b7f4cSbjh21 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
417936b7f4cSbjh21 -------------------------------------------------------------------------------
418936b7f4cSbjh21 */
419936b7f4cSbjh21 static float64
roundAndPackFloat64(flag zSign,int16 zExp,bits32 zSig0,bits32 zSig1,bits32 zSig2)420936b7f4cSbjh21  roundAndPackFloat64(
421936b7f4cSbjh21      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
422936b7f4cSbjh21 {
423936b7f4cSbjh21     int8 roundingMode;
424936b7f4cSbjh21     flag roundNearestEven, increment, isTiny;
425936b7f4cSbjh21 
426936b7f4cSbjh21     roundingMode = float_rounding_mode;
427936b7f4cSbjh21     roundNearestEven = ( roundingMode == float_round_nearest_even );
428936b7f4cSbjh21     increment = ( (sbits32) zSig2 < 0 );
429936b7f4cSbjh21     if ( ! roundNearestEven ) {
430936b7f4cSbjh21         if ( roundingMode == float_round_to_zero ) {
431936b7f4cSbjh21             increment = 0;
432936b7f4cSbjh21         }
433936b7f4cSbjh21         else {
434936b7f4cSbjh21             if ( zSign ) {
435936b7f4cSbjh21                 increment = ( roundingMode == float_round_down ) && zSig2;
436936b7f4cSbjh21             }
437936b7f4cSbjh21             else {
438936b7f4cSbjh21                 increment = ( roundingMode == float_round_up ) && zSig2;
439936b7f4cSbjh21             }
440936b7f4cSbjh21         }
441936b7f4cSbjh21     }
442936b7f4cSbjh21     if ( 0x7FD <= (bits16) zExp ) {
443936b7f4cSbjh21         if (    ( 0x7FD < zExp )
444936b7f4cSbjh21              || (    ( zExp == 0x7FD )
445936b7f4cSbjh21                   && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
446936b7f4cSbjh21                   && increment
447936b7f4cSbjh21                 )
448936b7f4cSbjh21            ) {
449936b7f4cSbjh21             float_raise( float_flag_overflow | float_flag_inexact );
450936b7f4cSbjh21             if (    ( roundingMode == float_round_to_zero )
451936b7f4cSbjh21                  || ( zSign && ( roundingMode == float_round_up ) )
452936b7f4cSbjh21                  || ( ! zSign && ( roundingMode == float_round_down ) )
453936b7f4cSbjh21                ) {
454936b7f4cSbjh21                 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
455936b7f4cSbjh21             }
456936b7f4cSbjh21             return packFloat64( zSign, 0x7FF, 0, 0 );
457936b7f4cSbjh21         }
458936b7f4cSbjh21         if ( zExp < 0 ) {
459936b7f4cSbjh21             isTiny =
460936b7f4cSbjh21                    ( float_detect_tininess == float_tininess_before_rounding )
461936b7f4cSbjh21                 || ( zExp < -1 )
462936b7f4cSbjh21                 || ! increment
463936b7f4cSbjh21                 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
464936b7f4cSbjh21             shift64ExtraRightJamming(
465936b7f4cSbjh21                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
466936b7f4cSbjh21             zExp = 0;
467936b7f4cSbjh21             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
468936b7f4cSbjh21             if ( roundNearestEven ) {
469936b7f4cSbjh21                 increment = ( (sbits32) zSig2 < 0 );
470936b7f4cSbjh21             }
471936b7f4cSbjh21             else {
472936b7f4cSbjh21                 if ( zSign ) {
473936b7f4cSbjh21                     increment = ( roundingMode == float_round_down ) && zSig2;
474936b7f4cSbjh21                 }
475936b7f4cSbjh21                 else {
476936b7f4cSbjh21                     increment = ( roundingMode == float_round_up ) && zSig2;
477936b7f4cSbjh21                 }
478936b7f4cSbjh21             }
479936b7f4cSbjh21         }
480936b7f4cSbjh21     }
481*9c7bce54Smatt     if ( zSig2 ) set_float_exception_inexact_flag();
482936b7f4cSbjh21     if ( increment ) {
483936b7f4cSbjh21         add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
484936b7f4cSbjh21         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
485936b7f4cSbjh21     }
486936b7f4cSbjh21     else {
487936b7f4cSbjh21         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
488936b7f4cSbjh21     }
489936b7f4cSbjh21     return packFloat64( zSign, zExp, zSig0, zSig1 );
490936b7f4cSbjh21 
491936b7f4cSbjh21 }
492936b7f4cSbjh21 
493936b7f4cSbjh21 /*
494936b7f4cSbjh21 -------------------------------------------------------------------------------
495936b7f4cSbjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
496936b7f4cSbjh21 and significand formed by the concatenation of `zSig0' and `zSig1', and
497936b7f4cSbjh21 returns the proper double-precision floating-point value corresponding
498936b7f4cSbjh21 to the abstract input.  This routine is just like `roundAndPackFloat64'
499936b7f4cSbjh21 except that the input significand has fewer bits and does not have to be
500936b7f4cSbjh21 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
501936b7f4cSbjh21 point exponent.
502936b7f4cSbjh21 -------------------------------------------------------------------------------
503936b7f4cSbjh21 */
504936b7f4cSbjh21 static float64
normalizeRoundAndPackFloat64(flag zSign,int16 zExp,bits32 zSig0,bits32 zSig1)505936b7f4cSbjh21  normalizeRoundAndPackFloat64(
506936b7f4cSbjh21      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
507936b7f4cSbjh21 {
508936b7f4cSbjh21     int8 shiftCount;
509936b7f4cSbjh21     bits32 zSig2;
510936b7f4cSbjh21 
511936b7f4cSbjh21     if ( zSig0 == 0 ) {
512936b7f4cSbjh21         zSig0 = zSig1;
513936b7f4cSbjh21         zSig1 = 0;
514936b7f4cSbjh21         zExp -= 32;
515936b7f4cSbjh21     }
516936b7f4cSbjh21     shiftCount = countLeadingZeros32( zSig0 ) - 11;
517936b7f4cSbjh21     if ( 0 <= shiftCount ) {
518936b7f4cSbjh21         zSig2 = 0;
519936b7f4cSbjh21         shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
520936b7f4cSbjh21     }
521936b7f4cSbjh21     else {
522936b7f4cSbjh21         shift64ExtraRightJamming(
523936b7f4cSbjh21             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
524936b7f4cSbjh21     }
525936b7f4cSbjh21     zExp -= shiftCount;
526936b7f4cSbjh21     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
527936b7f4cSbjh21 
528936b7f4cSbjh21 }
529936b7f4cSbjh21 
530936b7f4cSbjh21 /*
531936b7f4cSbjh21 -------------------------------------------------------------------------------
532936b7f4cSbjh21 Returns the result of converting the 32-bit two's complement integer `a' to
533936b7f4cSbjh21 the single-precision floating-point format.  The conversion is performed
534936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
535936b7f4cSbjh21 -------------------------------------------------------------------------------
536936b7f4cSbjh21 */
int32_to_float32(int32 a)537936b7f4cSbjh21 float32 int32_to_float32( int32 a )
538936b7f4cSbjh21 {
539936b7f4cSbjh21     flag zSign;
540936b7f4cSbjh21 
541936b7f4cSbjh21     if ( a == 0 ) return 0;
542936b7f4cSbjh21     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
543936b7f4cSbjh21     zSign = ( a < 0 );
5442cec0187Schristos     return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
545936b7f4cSbjh21 
546936b7f4cSbjh21 }
547936b7f4cSbjh21 
548936b7f4cSbjh21 /*
549936b7f4cSbjh21 -------------------------------------------------------------------------------
550936b7f4cSbjh21 Returns the result of converting the 32-bit two's complement integer `a' to
551936b7f4cSbjh21 the double-precision floating-point format.  The conversion is performed
552936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
553936b7f4cSbjh21 -------------------------------------------------------------------------------
554936b7f4cSbjh21 */
int32_to_float64(int32 a)555936b7f4cSbjh21 float64 int32_to_float64( int32 a )
556936b7f4cSbjh21 {
557936b7f4cSbjh21     flag zSign;
558936b7f4cSbjh21     bits32 absA;
559936b7f4cSbjh21     int8 shiftCount;
560936b7f4cSbjh21     bits32 zSig0, zSig1;
561936b7f4cSbjh21 
562936b7f4cSbjh21     if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
563936b7f4cSbjh21     zSign = ( a < 0 );
564936b7f4cSbjh21     absA = zSign ? - a : a;
565936b7f4cSbjh21     shiftCount = countLeadingZeros32( absA ) - 11;
566936b7f4cSbjh21     if ( 0 <= shiftCount ) {
567936b7f4cSbjh21         zSig0 = absA<<shiftCount;
568936b7f4cSbjh21         zSig1 = 0;
569936b7f4cSbjh21     }
570936b7f4cSbjh21     else {
571936b7f4cSbjh21         shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
572936b7f4cSbjh21     }
573936b7f4cSbjh21     return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
574936b7f4cSbjh21 
575936b7f4cSbjh21 }
576936b7f4cSbjh21 
577936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
578936b7f4cSbjh21 /*
579936b7f4cSbjh21 -------------------------------------------------------------------------------
580936b7f4cSbjh21 Returns the result of converting the single-precision floating-point value
581936b7f4cSbjh21 `a' to the 32-bit two's complement integer format.  The conversion is
582936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
583936b7f4cSbjh21 Arithmetic---which means in particular that the conversion is rounded
584936b7f4cSbjh21 according to the current rounding mode.  If `a' is a NaN, the largest
585936b7f4cSbjh21 positive integer is returned.  Otherwise, if the conversion overflows, the
586936b7f4cSbjh21 largest integer with the same sign as `a' is returned.
587936b7f4cSbjh21 -------------------------------------------------------------------------------
588936b7f4cSbjh21 */
float32_to_int32(float32 a)589936b7f4cSbjh21 int32 float32_to_int32( float32 a )
590936b7f4cSbjh21 {
591936b7f4cSbjh21     flag aSign;
592936b7f4cSbjh21     int16 aExp, shiftCount;
593936b7f4cSbjh21     bits32 aSig, aSigExtra;
594936b7f4cSbjh21     int32 z;
595936b7f4cSbjh21     int8 roundingMode;
596936b7f4cSbjh21 
597936b7f4cSbjh21     aSig = extractFloat32Frac( a );
598936b7f4cSbjh21     aExp = extractFloat32Exp( a );
599936b7f4cSbjh21     aSign = extractFloat32Sign( a );
600936b7f4cSbjh21     shiftCount = aExp - 0x96;
601936b7f4cSbjh21     if ( 0 <= shiftCount ) {
602936b7f4cSbjh21         if ( 0x9E <= aExp ) {
603936b7f4cSbjh21             if ( a != 0xCF000000 ) {
604936b7f4cSbjh21                 float_raise( float_flag_invalid );
605936b7f4cSbjh21                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
606936b7f4cSbjh21                     return 0x7FFFFFFF;
607936b7f4cSbjh21                 }
608936b7f4cSbjh21             }
609936b7f4cSbjh21             return (sbits32) 0x80000000;
610936b7f4cSbjh21         }
611936b7f4cSbjh21         z = ( aSig | 0x00800000 )<<shiftCount;
612936b7f4cSbjh21         if ( aSign ) z = - z;
613936b7f4cSbjh21     }
614936b7f4cSbjh21     else {
615936b7f4cSbjh21         if ( aExp < 0x7E ) {
616936b7f4cSbjh21             aSigExtra = aExp | aSig;
617936b7f4cSbjh21             z = 0;
618936b7f4cSbjh21         }
619936b7f4cSbjh21         else {
620936b7f4cSbjh21             aSig |= 0x00800000;
621936b7f4cSbjh21             aSigExtra = aSig<<( shiftCount & 31 );
622936b7f4cSbjh21             z = aSig>>( - shiftCount );
623936b7f4cSbjh21         }
624*9c7bce54Smatt         if ( aSigExtra ) set_float_exception_inexact_flag();
625936b7f4cSbjh21         roundingMode = float_rounding_mode;
626936b7f4cSbjh21         if ( roundingMode == float_round_nearest_even ) {
627936b7f4cSbjh21             if ( (sbits32) aSigExtra < 0 ) {
628936b7f4cSbjh21                 ++z;
629936b7f4cSbjh21                 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
630936b7f4cSbjh21             }
631936b7f4cSbjh21             if ( aSign ) z = - z;
632936b7f4cSbjh21         }
633936b7f4cSbjh21         else {
634936b7f4cSbjh21             aSigExtra = ( aSigExtra != 0 );
635936b7f4cSbjh21             if ( aSign ) {
636936b7f4cSbjh21                 z += ( roundingMode == float_round_down ) & aSigExtra;
637936b7f4cSbjh21                 z = - z;
638936b7f4cSbjh21             }
639936b7f4cSbjh21             else {
640936b7f4cSbjh21                 z += ( roundingMode == float_round_up ) & aSigExtra;
641936b7f4cSbjh21             }
642936b7f4cSbjh21         }
643936b7f4cSbjh21     }
644936b7f4cSbjh21     return z;
645936b7f4cSbjh21 
646936b7f4cSbjh21 }
647936b7f4cSbjh21 #endif
648936b7f4cSbjh21 
649936b7f4cSbjh21 /*
650936b7f4cSbjh21 -------------------------------------------------------------------------------
651936b7f4cSbjh21 Returns the result of converting the single-precision floating-point value
652936b7f4cSbjh21 `a' to the 32-bit two's complement integer format.  The conversion is
653936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
654936b7f4cSbjh21 Arithmetic, except that the conversion is always rounded toward zero.
655936b7f4cSbjh21 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
656936b7f4cSbjh21 the conversion overflows, the largest integer with the same sign as `a' is
657936b7f4cSbjh21 returned.
658936b7f4cSbjh21 -------------------------------------------------------------------------------
659936b7f4cSbjh21 */
float32_to_int32_round_to_zero(float32 a)660936b7f4cSbjh21 int32 float32_to_int32_round_to_zero( float32 a )
661936b7f4cSbjh21 {
662936b7f4cSbjh21     flag aSign;
663936b7f4cSbjh21     int16 aExp, shiftCount;
664936b7f4cSbjh21     bits32 aSig;
665936b7f4cSbjh21     int32 z;
666936b7f4cSbjh21 
667936b7f4cSbjh21     aSig = extractFloat32Frac( a );
668936b7f4cSbjh21     aExp = extractFloat32Exp( a );
669936b7f4cSbjh21     aSign = extractFloat32Sign( a );
670936b7f4cSbjh21     shiftCount = aExp - 0x9E;
671936b7f4cSbjh21     if ( 0 <= shiftCount ) {
672936b7f4cSbjh21         if ( a != 0xCF000000 ) {
673936b7f4cSbjh21             float_raise( float_flag_invalid );
674936b7f4cSbjh21             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
675936b7f4cSbjh21         }
676936b7f4cSbjh21         return (sbits32) 0x80000000;
677936b7f4cSbjh21     }
678936b7f4cSbjh21     else if ( aExp <= 0x7E ) {
679*9c7bce54Smatt         if ( aExp | aSig ) set_float_exception_inexact_flag();
680936b7f4cSbjh21         return 0;
681936b7f4cSbjh21     }
682936b7f4cSbjh21     aSig = ( aSig | 0x00800000 )<<8;
683936b7f4cSbjh21     z = aSig>>( - shiftCount );
684936b7f4cSbjh21     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
685*9c7bce54Smatt         set_float_exception_inexact_flag();
686936b7f4cSbjh21     }
687936b7f4cSbjh21     if ( aSign ) z = - z;
688936b7f4cSbjh21     return z;
689936b7f4cSbjh21 
690936b7f4cSbjh21 }
691936b7f4cSbjh21 
692936b7f4cSbjh21 /*
693936b7f4cSbjh21 -------------------------------------------------------------------------------
694936b7f4cSbjh21 Returns the result of converting the single-precision floating-point value
695936b7f4cSbjh21 `a' to the double-precision floating-point format.  The conversion is
696936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
697936b7f4cSbjh21 Arithmetic.
698936b7f4cSbjh21 -------------------------------------------------------------------------------
699936b7f4cSbjh21 */
float32_to_float64(float32 a)700936b7f4cSbjh21 float64 float32_to_float64( float32 a )
701936b7f4cSbjh21 {
702936b7f4cSbjh21     flag aSign;
703936b7f4cSbjh21     int16 aExp;
704936b7f4cSbjh21     bits32 aSig, zSig0, zSig1;
705936b7f4cSbjh21 
706936b7f4cSbjh21     aSig = extractFloat32Frac( a );
707936b7f4cSbjh21     aExp = extractFloat32Exp( a );
708936b7f4cSbjh21     aSign = extractFloat32Sign( a );
709936b7f4cSbjh21     if ( aExp == 0xFF ) {
710936b7f4cSbjh21         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
711936b7f4cSbjh21         return packFloat64( aSign, 0x7FF, 0, 0 );
712936b7f4cSbjh21     }
713936b7f4cSbjh21     if ( aExp == 0 ) {
714936b7f4cSbjh21         if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
715936b7f4cSbjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
716936b7f4cSbjh21         --aExp;
717936b7f4cSbjh21     }
718936b7f4cSbjh21     shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
719936b7f4cSbjh21     return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
720936b7f4cSbjh21 
721936b7f4cSbjh21 }
722936b7f4cSbjh21 
723936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
724936b7f4cSbjh21 /*
725936b7f4cSbjh21 -------------------------------------------------------------------------------
726936b7f4cSbjh21 Rounds the single-precision floating-point value `a' to an integer,
727936b7f4cSbjh21 and returns the result as a single-precision floating-point value.  The
728936b7f4cSbjh21 operation is performed according to the IEC/IEEE Standard for Binary
729936b7f4cSbjh21 Floating-Point Arithmetic.
730936b7f4cSbjh21 -------------------------------------------------------------------------------
731936b7f4cSbjh21 */
float32_round_to_int(float32 a)732936b7f4cSbjh21 float32 float32_round_to_int( float32 a )
733936b7f4cSbjh21 {
734936b7f4cSbjh21     flag aSign;
735936b7f4cSbjh21     int16 aExp;
736936b7f4cSbjh21     bits32 lastBitMask, roundBitsMask;
737936b7f4cSbjh21     int8 roundingMode;
738936b7f4cSbjh21     float32 z;
739936b7f4cSbjh21 
740936b7f4cSbjh21     aExp = extractFloat32Exp( a );
741936b7f4cSbjh21     if ( 0x96 <= aExp ) {
742936b7f4cSbjh21         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
743936b7f4cSbjh21             return propagateFloat32NaN( a, a );
744936b7f4cSbjh21         }
745936b7f4cSbjh21         return a;
746936b7f4cSbjh21     }
747936b7f4cSbjh21     if ( aExp <= 0x7E ) {
748936b7f4cSbjh21         if ( (bits32) ( a<<1 ) == 0 ) return a;
749*9c7bce54Smatt         set_float_exception_inexact_flag();
750936b7f4cSbjh21         aSign = extractFloat32Sign( a );
751936b7f4cSbjh21         switch ( float_rounding_mode ) {
752936b7f4cSbjh21          case float_round_nearest_even:
753936b7f4cSbjh21             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
754936b7f4cSbjh21                 return packFloat32( aSign, 0x7F, 0 );
755936b7f4cSbjh21             }
756936b7f4cSbjh21             break;
757936b7f4cSbjh21 	 case float_round_to_zero:
758936b7f4cSbjh21 	    break;
759936b7f4cSbjh21          case float_round_down:
760936b7f4cSbjh21             return aSign ? 0xBF800000 : 0;
761936b7f4cSbjh21          case float_round_up:
762936b7f4cSbjh21             return aSign ? 0x80000000 : 0x3F800000;
763936b7f4cSbjh21         }
764936b7f4cSbjh21         return packFloat32( aSign, 0, 0 );
765936b7f4cSbjh21     }
766936b7f4cSbjh21     lastBitMask = 1;
767936b7f4cSbjh21     lastBitMask <<= 0x96 - aExp;
768936b7f4cSbjh21     roundBitsMask = lastBitMask - 1;
769936b7f4cSbjh21     z = a;
770936b7f4cSbjh21     roundingMode = float_rounding_mode;
771936b7f4cSbjh21     if ( roundingMode == float_round_nearest_even ) {
772936b7f4cSbjh21         z += lastBitMask>>1;
773936b7f4cSbjh21         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
774936b7f4cSbjh21     }
775936b7f4cSbjh21     else if ( roundingMode != float_round_to_zero ) {
776936b7f4cSbjh21         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
777936b7f4cSbjh21             z += roundBitsMask;
778936b7f4cSbjh21         }
779936b7f4cSbjh21     }
780936b7f4cSbjh21     z &= ~ roundBitsMask;
781*9c7bce54Smatt     if ( z != a ) set_float_exception_inexact_flag();
782936b7f4cSbjh21     return z;
783936b7f4cSbjh21 
784936b7f4cSbjh21 }
785936b7f4cSbjh21 #endif
786936b7f4cSbjh21 
787936b7f4cSbjh21 /*
788936b7f4cSbjh21 -------------------------------------------------------------------------------
789936b7f4cSbjh21 Returns the result of adding the absolute values of the single-precision
790936b7f4cSbjh21 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
791936b7f4cSbjh21 before being returned.  `zSign' is ignored if the result is a NaN.
792936b7f4cSbjh21 The addition is performed according to the IEC/IEEE Standard for Binary
793936b7f4cSbjh21 Floating-Point Arithmetic.
794936b7f4cSbjh21 -------------------------------------------------------------------------------
795936b7f4cSbjh21 */
addFloat32Sigs(float32 a,float32 b,flag zSign)796936b7f4cSbjh21 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
797936b7f4cSbjh21 {
798936b7f4cSbjh21     int16 aExp, bExp, zExp;
799936b7f4cSbjh21     bits32 aSig, bSig, zSig;
800936b7f4cSbjh21     int16 expDiff;
801936b7f4cSbjh21 
802936b7f4cSbjh21     aSig = extractFloat32Frac( a );
803936b7f4cSbjh21     aExp = extractFloat32Exp( a );
804936b7f4cSbjh21     bSig = extractFloat32Frac( b );
805936b7f4cSbjh21     bExp = extractFloat32Exp( b );
806936b7f4cSbjh21     expDiff = aExp - bExp;
807936b7f4cSbjh21     aSig <<= 6;
808936b7f4cSbjh21     bSig <<= 6;
809936b7f4cSbjh21     if ( 0 < expDiff ) {
810936b7f4cSbjh21         if ( aExp == 0xFF ) {
811936b7f4cSbjh21             if ( aSig ) return propagateFloat32NaN( a, b );
812936b7f4cSbjh21             return a;
813936b7f4cSbjh21         }
814936b7f4cSbjh21         if ( bExp == 0 ) {
815936b7f4cSbjh21             --expDiff;
816936b7f4cSbjh21         }
817936b7f4cSbjh21         else {
818936b7f4cSbjh21             bSig |= 0x20000000;
819936b7f4cSbjh21         }
820936b7f4cSbjh21         shift32RightJamming( bSig, expDiff, &bSig );
821936b7f4cSbjh21         zExp = aExp;
822936b7f4cSbjh21     }
823936b7f4cSbjh21     else if ( expDiff < 0 ) {
824936b7f4cSbjh21         if ( bExp == 0xFF ) {
825936b7f4cSbjh21             if ( bSig ) return propagateFloat32NaN( a, b );
826936b7f4cSbjh21             return packFloat32( zSign, 0xFF, 0 );
827936b7f4cSbjh21         }
828936b7f4cSbjh21         if ( aExp == 0 ) {
829936b7f4cSbjh21             ++expDiff;
830936b7f4cSbjh21         }
831936b7f4cSbjh21         else {
832936b7f4cSbjh21             aSig |= 0x20000000;
833936b7f4cSbjh21         }
834936b7f4cSbjh21         shift32RightJamming( aSig, - expDiff, &aSig );
835936b7f4cSbjh21         zExp = bExp;
836936b7f4cSbjh21     }
837936b7f4cSbjh21     else {
838936b7f4cSbjh21         if ( aExp == 0xFF ) {
839936b7f4cSbjh21             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
840936b7f4cSbjh21             return a;
841936b7f4cSbjh21         }
842936b7f4cSbjh21         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
843936b7f4cSbjh21         zSig = 0x40000000 + aSig + bSig;
844936b7f4cSbjh21         zExp = aExp;
845936b7f4cSbjh21         goto roundAndPack;
846936b7f4cSbjh21     }
847936b7f4cSbjh21     aSig |= 0x20000000;
848936b7f4cSbjh21     zSig = ( aSig + bSig )<<1;
849936b7f4cSbjh21     --zExp;
850936b7f4cSbjh21     if ( (sbits32) zSig < 0 ) {
851936b7f4cSbjh21         zSig = aSig + bSig;
852936b7f4cSbjh21         ++zExp;
853936b7f4cSbjh21     }
854936b7f4cSbjh21  roundAndPack:
855936b7f4cSbjh21     return roundAndPackFloat32( zSign, zExp, zSig );
856936b7f4cSbjh21 
857936b7f4cSbjh21 }
858936b7f4cSbjh21 
859936b7f4cSbjh21 /*
860936b7f4cSbjh21 -------------------------------------------------------------------------------
861936b7f4cSbjh21 Returns the result of subtracting the absolute values of the single-
862936b7f4cSbjh21 precision floating-point values `a' and `b'.  If `zSign' is 1, the
863936b7f4cSbjh21 difference is negated before being returned.  `zSign' is ignored if the
864936b7f4cSbjh21 result is a NaN.  The subtraction is performed according to the IEC/IEEE
865936b7f4cSbjh21 Standard for Binary Floating-Point Arithmetic.
866936b7f4cSbjh21 -------------------------------------------------------------------------------
867936b7f4cSbjh21 */
subFloat32Sigs(float32 a,float32 b,flag zSign)868936b7f4cSbjh21 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
869936b7f4cSbjh21 {
870936b7f4cSbjh21     int16 aExp, bExp, zExp;
871936b7f4cSbjh21     bits32 aSig, bSig, zSig;
872936b7f4cSbjh21     int16 expDiff;
873936b7f4cSbjh21 
874936b7f4cSbjh21     aSig = extractFloat32Frac( a );
875936b7f4cSbjh21     aExp = extractFloat32Exp( a );
876936b7f4cSbjh21     bSig = extractFloat32Frac( b );
877936b7f4cSbjh21     bExp = extractFloat32Exp( b );
878936b7f4cSbjh21     expDiff = aExp - bExp;
879936b7f4cSbjh21     aSig <<= 7;
880936b7f4cSbjh21     bSig <<= 7;
881936b7f4cSbjh21     if ( 0 < expDiff ) goto aExpBigger;
882936b7f4cSbjh21     if ( expDiff < 0 ) goto bExpBigger;
883936b7f4cSbjh21     if ( aExp == 0xFF ) {
884936b7f4cSbjh21         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
885936b7f4cSbjh21         float_raise( float_flag_invalid );
886936b7f4cSbjh21         return float32_default_nan;
887936b7f4cSbjh21     }
888936b7f4cSbjh21     if ( aExp == 0 ) {
889936b7f4cSbjh21         aExp = 1;
890936b7f4cSbjh21         bExp = 1;
891936b7f4cSbjh21     }
892936b7f4cSbjh21     if ( bSig < aSig ) goto aBigger;
893936b7f4cSbjh21     if ( aSig < bSig ) goto bBigger;
894936b7f4cSbjh21     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
895936b7f4cSbjh21  bExpBigger:
896936b7f4cSbjh21     if ( bExp == 0xFF ) {
897936b7f4cSbjh21         if ( bSig ) return propagateFloat32NaN( a, b );
898936b7f4cSbjh21         return packFloat32( zSign ^ 1, 0xFF, 0 );
899936b7f4cSbjh21     }
900936b7f4cSbjh21     if ( aExp == 0 ) {
901936b7f4cSbjh21         ++expDiff;
902936b7f4cSbjh21     }
903936b7f4cSbjh21     else {
904936b7f4cSbjh21         aSig |= 0x40000000;
905936b7f4cSbjh21     }
906936b7f4cSbjh21     shift32RightJamming( aSig, - expDiff, &aSig );
907936b7f4cSbjh21     bSig |= 0x40000000;
908936b7f4cSbjh21  bBigger:
909936b7f4cSbjh21     zSig = bSig - aSig;
910936b7f4cSbjh21     zExp = bExp;
911936b7f4cSbjh21     zSign ^= 1;
912936b7f4cSbjh21     goto normalizeRoundAndPack;
913936b7f4cSbjh21  aExpBigger:
914936b7f4cSbjh21     if ( aExp == 0xFF ) {
915936b7f4cSbjh21         if ( aSig ) return propagateFloat32NaN( a, b );
916936b7f4cSbjh21         return a;
917936b7f4cSbjh21     }
918936b7f4cSbjh21     if ( bExp == 0 ) {
919936b7f4cSbjh21         --expDiff;
920936b7f4cSbjh21     }
921936b7f4cSbjh21     else {
922936b7f4cSbjh21         bSig |= 0x40000000;
923936b7f4cSbjh21     }
924936b7f4cSbjh21     shift32RightJamming( bSig, expDiff, &bSig );
925936b7f4cSbjh21     aSig |= 0x40000000;
926936b7f4cSbjh21  aBigger:
927936b7f4cSbjh21     zSig = aSig - bSig;
928936b7f4cSbjh21     zExp = aExp;
929936b7f4cSbjh21  normalizeRoundAndPack:
930936b7f4cSbjh21     --zExp;
931936b7f4cSbjh21     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
932936b7f4cSbjh21 
933936b7f4cSbjh21 }
934936b7f4cSbjh21 
935936b7f4cSbjh21 /*
936936b7f4cSbjh21 -------------------------------------------------------------------------------
937936b7f4cSbjh21 Returns the result of adding the single-precision floating-point values `a'
938936b7f4cSbjh21 and `b'.  The operation is performed according to the IEC/IEEE Standard for
939936b7f4cSbjh21 Binary Floating-Point Arithmetic.
940936b7f4cSbjh21 -------------------------------------------------------------------------------
941936b7f4cSbjh21 */
float32_add(float32 a,float32 b)942936b7f4cSbjh21 float32 float32_add( float32 a, float32 b )
943936b7f4cSbjh21 {
944936b7f4cSbjh21     flag aSign, bSign;
945936b7f4cSbjh21 
946936b7f4cSbjh21     aSign = extractFloat32Sign( a );
947936b7f4cSbjh21     bSign = extractFloat32Sign( b );
948936b7f4cSbjh21     if ( aSign == bSign ) {
949936b7f4cSbjh21         return addFloat32Sigs( a, b, aSign );
950936b7f4cSbjh21     }
951936b7f4cSbjh21     else {
952936b7f4cSbjh21         return subFloat32Sigs( a, b, aSign );
953936b7f4cSbjh21     }
954936b7f4cSbjh21 
955936b7f4cSbjh21 }
956936b7f4cSbjh21 
957936b7f4cSbjh21 /*
958936b7f4cSbjh21 -------------------------------------------------------------------------------
959936b7f4cSbjh21 Returns the result of subtracting the single-precision floating-point values
960936b7f4cSbjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
961936b7f4cSbjh21 for Binary Floating-Point Arithmetic.
962936b7f4cSbjh21 -------------------------------------------------------------------------------
963936b7f4cSbjh21 */
float32_sub(float32 a,float32 b)964936b7f4cSbjh21 float32 float32_sub( float32 a, float32 b )
965936b7f4cSbjh21 {
966936b7f4cSbjh21     flag aSign, bSign;
967936b7f4cSbjh21 
968936b7f4cSbjh21     aSign = extractFloat32Sign( a );
969936b7f4cSbjh21     bSign = extractFloat32Sign( b );
970936b7f4cSbjh21     if ( aSign == bSign ) {
971936b7f4cSbjh21         return subFloat32Sigs( a, b, aSign );
972936b7f4cSbjh21     }
973936b7f4cSbjh21     else {
974936b7f4cSbjh21         return addFloat32Sigs( a, b, aSign );
975936b7f4cSbjh21     }
976936b7f4cSbjh21 
977936b7f4cSbjh21 }
978936b7f4cSbjh21 
979936b7f4cSbjh21 /*
980936b7f4cSbjh21 -------------------------------------------------------------------------------
981936b7f4cSbjh21 Returns the result of multiplying the single-precision floating-point values
982936b7f4cSbjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
983936b7f4cSbjh21 for Binary Floating-Point Arithmetic.
984936b7f4cSbjh21 -------------------------------------------------------------------------------
985936b7f4cSbjh21 */
float32_mul(float32 a,float32 b)986936b7f4cSbjh21 float32 float32_mul( float32 a, float32 b )
987936b7f4cSbjh21 {
988936b7f4cSbjh21     flag aSign, bSign, zSign;
989936b7f4cSbjh21     int16 aExp, bExp, zExp;
990936b7f4cSbjh21     bits32 aSig, bSig, zSig0, zSig1;
991936b7f4cSbjh21 
992936b7f4cSbjh21     aSig = extractFloat32Frac( a );
993936b7f4cSbjh21     aExp = extractFloat32Exp( a );
994936b7f4cSbjh21     aSign = extractFloat32Sign( a );
995936b7f4cSbjh21     bSig = extractFloat32Frac( b );
996936b7f4cSbjh21     bExp = extractFloat32Exp( b );
997936b7f4cSbjh21     bSign = extractFloat32Sign( b );
998936b7f4cSbjh21     zSign = aSign ^ bSign;
999936b7f4cSbjh21     if ( aExp == 0xFF ) {
1000936b7f4cSbjh21         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1001936b7f4cSbjh21             return propagateFloat32NaN( a, b );
1002936b7f4cSbjh21         }
1003936b7f4cSbjh21         if ( ( bExp | bSig ) == 0 ) {
1004936b7f4cSbjh21             float_raise( float_flag_invalid );
1005936b7f4cSbjh21             return float32_default_nan;
1006936b7f4cSbjh21         }
1007936b7f4cSbjh21         return packFloat32( zSign, 0xFF, 0 );
1008936b7f4cSbjh21     }
1009936b7f4cSbjh21     if ( bExp == 0xFF ) {
1010936b7f4cSbjh21         if ( bSig ) return propagateFloat32NaN( a, b );
1011936b7f4cSbjh21         if ( ( aExp | aSig ) == 0 ) {
1012936b7f4cSbjh21             float_raise( float_flag_invalid );
1013936b7f4cSbjh21             return float32_default_nan;
1014936b7f4cSbjh21         }
1015936b7f4cSbjh21         return packFloat32( zSign, 0xFF, 0 );
1016936b7f4cSbjh21     }
1017936b7f4cSbjh21     if ( aExp == 0 ) {
1018936b7f4cSbjh21         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1019936b7f4cSbjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1020936b7f4cSbjh21     }
1021936b7f4cSbjh21     if ( bExp == 0 ) {
1022936b7f4cSbjh21         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1023936b7f4cSbjh21         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1024936b7f4cSbjh21     }
1025936b7f4cSbjh21     zExp = aExp + bExp - 0x7F;
1026936b7f4cSbjh21     aSig = ( aSig | 0x00800000 )<<7;
1027936b7f4cSbjh21     bSig = ( bSig | 0x00800000 )<<8;
1028936b7f4cSbjh21     mul32To64( aSig, bSig, &zSig0, &zSig1 );
1029936b7f4cSbjh21     zSig0 |= ( zSig1 != 0 );
1030936b7f4cSbjh21     if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1031936b7f4cSbjh21         zSig0 <<= 1;
1032936b7f4cSbjh21         --zExp;
1033936b7f4cSbjh21     }
1034936b7f4cSbjh21     return roundAndPackFloat32( zSign, zExp, zSig0 );
1035936b7f4cSbjh21 
1036936b7f4cSbjh21 }
1037936b7f4cSbjh21 
1038936b7f4cSbjh21 /*
1039936b7f4cSbjh21 -------------------------------------------------------------------------------
1040936b7f4cSbjh21 Returns the result of dividing the single-precision floating-point value `a'
1041936b7f4cSbjh21 by the corresponding value `b'.  The operation is performed according to the
1042936b7f4cSbjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1043936b7f4cSbjh21 -------------------------------------------------------------------------------
1044936b7f4cSbjh21 */
float32_div(float32 a,float32 b)1045936b7f4cSbjh21 float32 float32_div( float32 a, float32 b )
1046936b7f4cSbjh21 {
1047936b7f4cSbjh21     flag aSign, bSign, zSign;
1048936b7f4cSbjh21     int16 aExp, bExp, zExp;
1049936b7f4cSbjh21     bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1050936b7f4cSbjh21 
1051936b7f4cSbjh21     aSig = extractFloat32Frac( a );
1052936b7f4cSbjh21     aExp = extractFloat32Exp( a );
1053936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1054936b7f4cSbjh21     bSig = extractFloat32Frac( b );
1055936b7f4cSbjh21     bExp = extractFloat32Exp( b );
1056936b7f4cSbjh21     bSign = extractFloat32Sign( b );
1057936b7f4cSbjh21     zSign = aSign ^ bSign;
1058936b7f4cSbjh21     if ( aExp == 0xFF ) {
1059936b7f4cSbjh21         if ( aSig ) return propagateFloat32NaN( a, b );
1060936b7f4cSbjh21         if ( bExp == 0xFF ) {
1061936b7f4cSbjh21             if ( bSig ) return propagateFloat32NaN( a, b );
1062936b7f4cSbjh21             float_raise( float_flag_invalid );
1063936b7f4cSbjh21             return float32_default_nan;
1064936b7f4cSbjh21         }
1065936b7f4cSbjh21         return packFloat32( zSign, 0xFF, 0 );
1066936b7f4cSbjh21     }
1067936b7f4cSbjh21     if ( bExp == 0xFF ) {
1068936b7f4cSbjh21         if ( bSig ) return propagateFloat32NaN( a, b );
1069936b7f4cSbjh21         return packFloat32( zSign, 0, 0 );
1070936b7f4cSbjh21     }
1071936b7f4cSbjh21     if ( bExp == 0 ) {
1072936b7f4cSbjh21         if ( bSig == 0 ) {
1073936b7f4cSbjh21             if ( ( aExp | aSig ) == 0 ) {
1074936b7f4cSbjh21                 float_raise( float_flag_invalid );
1075936b7f4cSbjh21                 return float32_default_nan;
1076936b7f4cSbjh21             }
1077936b7f4cSbjh21             float_raise( float_flag_divbyzero );
1078936b7f4cSbjh21             return packFloat32( zSign, 0xFF, 0 );
1079936b7f4cSbjh21         }
1080936b7f4cSbjh21         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1081936b7f4cSbjh21     }
1082936b7f4cSbjh21     if ( aExp == 0 ) {
1083936b7f4cSbjh21         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1084936b7f4cSbjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1085936b7f4cSbjh21     }
1086936b7f4cSbjh21     zExp = aExp - bExp + 0x7D;
1087936b7f4cSbjh21     aSig = ( aSig | 0x00800000 )<<7;
1088936b7f4cSbjh21     bSig = ( bSig | 0x00800000 )<<8;
1089936b7f4cSbjh21     if ( bSig <= ( aSig + aSig ) ) {
1090936b7f4cSbjh21         aSig >>= 1;
1091936b7f4cSbjh21         ++zExp;
1092936b7f4cSbjh21     }
1093936b7f4cSbjh21     zSig = estimateDiv64To32( aSig, 0, bSig );
1094936b7f4cSbjh21     if ( ( zSig & 0x3F ) <= 2 ) {
1095936b7f4cSbjh21         mul32To64( bSig, zSig, &term0, &term1 );
1096936b7f4cSbjh21         sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1097936b7f4cSbjh21         while ( (sbits32) rem0 < 0 ) {
1098936b7f4cSbjh21             --zSig;
1099936b7f4cSbjh21             add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1100936b7f4cSbjh21         }
1101936b7f4cSbjh21         zSig |= ( rem1 != 0 );
1102936b7f4cSbjh21     }
1103936b7f4cSbjh21     return roundAndPackFloat32( zSign, zExp, zSig );
1104936b7f4cSbjh21 
1105936b7f4cSbjh21 }
1106936b7f4cSbjh21 
1107936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
1108936b7f4cSbjh21 /*
1109936b7f4cSbjh21 -------------------------------------------------------------------------------
1110936b7f4cSbjh21 Returns the remainder of the single-precision floating-point value `a'
1111936b7f4cSbjh21 with respect to the corresponding value `b'.  The operation is performed
1112936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1113936b7f4cSbjh21 -------------------------------------------------------------------------------
1114936b7f4cSbjh21 */
float32_rem(float32 a,float32 b)1115936b7f4cSbjh21 float32 float32_rem( float32 a, float32 b )
1116936b7f4cSbjh21 {
1117936b7f4cSbjh21     flag aSign, bSign, zSign;
1118936b7f4cSbjh21     int16 aExp, bExp, expDiff;
1119936b7f4cSbjh21     bits32 aSig, bSig, q, allZero, alternateASig;
1120936b7f4cSbjh21     sbits32 sigMean;
1121936b7f4cSbjh21 
1122936b7f4cSbjh21     aSig = extractFloat32Frac( a );
1123936b7f4cSbjh21     aExp = extractFloat32Exp( a );
1124936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1125936b7f4cSbjh21     bSig = extractFloat32Frac( b );
1126936b7f4cSbjh21     bExp = extractFloat32Exp( b );
1127936b7f4cSbjh21     bSign = extractFloat32Sign( b );
1128936b7f4cSbjh21     if ( aExp == 0xFF ) {
1129936b7f4cSbjh21         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1130936b7f4cSbjh21             return propagateFloat32NaN( a, b );
1131936b7f4cSbjh21         }
1132936b7f4cSbjh21         float_raise( float_flag_invalid );
1133936b7f4cSbjh21         return float32_default_nan;
1134936b7f4cSbjh21     }
1135936b7f4cSbjh21     if ( bExp == 0xFF ) {
1136936b7f4cSbjh21         if ( bSig ) return propagateFloat32NaN( a, b );
1137936b7f4cSbjh21         return a;
1138936b7f4cSbjh21     }
1139936b7f4cSbjh21     if ( bExp == 0 ) {
1140936b7f4cSbjh21         if ( bSig == 0 ) {
1141936b7f4cSbjh21             float_raise( float_flag_invalid );
1142936b7f4cSbjh21             return float32_default_nan;
1143936b7f4cSbjh21         }
1144936b7f4cSbjh21         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1145936b7f4cSbjh21     }
1146936b7f4cSbjh21     if ( aExp == 0 ) {
1147936b7f4cSbjh21         if ( aSig == 0 ) return a;
1148936b7f4cSbjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1149936b7f4cSbjh21     }
1150936b7f4cSbjh21     expDiff = aExp - bExp;
1151936b7f4cSbjh21     aSig = ( aSig | 0x00800000 )<<8;
1152936b7f4cSbjh21     bSig = ( bSig | 0x00800000 )<<8;
1153936b7f4cSbjh21     if ( expDiff < 0 ) {
1154936b7f4cSbjh21         if ( expDiff < -1 ) return a;
1155936b7f4cSbjh21         aSig >>= 1;
1156936b7f4cSbjh21     }
1157936b7f4cSbjh21     q = ( bSig <= aSig );
1158936b7f4cSbjh21     if ( q ) aSig -= bSig;
1159936b7f4cSbjh21     expDiff -= 32;
1160936b7f4cSbjh21     while ( 0 < expDiff ) {
1161936b7f4cSbjh21         q = estimateDiv64To32( aSig, 0, bSig );
1162936b7f4cSbjh21         q = ( 2 < q ) ? q - 2 : 0;
1163936b7f4cSbjh21         aSig = - ( ( bSig>>2 ) * q );
1164936b7f4cSbjh21         expDiff -= 30;
1165936b7f4cSbjh21     }
1166936b7f4cSbjh21     expDiff += 32;
1167936b7f4cSbjh21     if ( 0 < expDiff ) {
1168936b7f4cSbjh21         q = estimateDiv64To32( aSig, 0, bSig );
1169936b7f4cSbjh21         q = ( 2 < q ) ? q - 2 : 0;
1170936b7f4cSbjh21         q >>= 32 - expDiff;
1171936b7f4cSbjh21         bSig >>= 2;
1172936b7f4cSbjh21         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1173936b7f4cSbjh21     }
1174936b7f4cSbjh21     else {
1175936b7f4cSbjh21         aSig >>= 2;
1176936b7f4cSbjh21         bSig >>= 2;
1177936b7f4cSbjh21     }
1178936b7f4cSbjh21     do {
1179936b7f4cSbjh21         alternateASig = aSig;
1180936b7f4cSbjh21         ++q;
1181936b7f4cSbjh21         aSig -= bSig;
1182936b7f4cSbjh21     } while ( 0 <= (sbits32) aSig );
1183936b7f4cSbjh21     sigMean = aSig + alternateASig;
1184936b7f4cSbjh21     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1185936b7f4cSbjh21         aSig = alternateASig;
1186936b7f4cSbjh21     }
1187936b7f4cSbjh21     zSign = ( (sbits32) aSig < 0 );
1188936b7f4cSbjh21     if ( zSign ) aSig = - aSig;
1189936b7f4cSbjh21     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1190936b7f4cSbjh21 
1191936b7f4cSbjh21 }
1192936b7f4cSbjh21 #endif
1193936b7f4cSbjh21 
1194936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
1195936b7f4cSbjh21 /*
1196936b7f4cSbjh21 -------------------------------------------------------------------------------
1197936b7f4cSbjh21 Returns the square root of the single-precision floating-point value `a'.
1198936b7f4cSbjh21 The operation is performed according to the IEC/IEEE Standard for Binary
1199936b7f4cSbjh21 Floating-Point Arithmetic.
1200936b7f4cSbjh21 -------------------------------------------------------------------------------
1201936b7f4cSbjh21 */
float32_sqrt(float32 a)1202936b7f4cSbjh21 float32 float32_sqrt( float32 a )
1203936b7f4cSbjh21 {
1204936b7f4cSbjh21     flag aSign;
1205936b7f4cSbjh21     int16 aExp, zExp;
1206936b7f4cSbjh21     bits32 aSig, zSig, rem0, rem1, term0, term1;
1207936b7f4cSbjh21 
1208936b7f4cSbjh21     aSig = extractFloat32Frac( a );
1209936b7f4cSbjh21     aExp = extractFloat32Exp( a );
1210936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1211936b7f4cSbjh21     if ( aExp == 0xFF ) {
1212936b7f4cSbjh21         if ( aSig ) return propagateFloat32NaN( a, 0 );
1213936b7f4cSbjh21         if ( ! aSign ) return a;
1214936b7f4cSbjh21         float_raise( float_flag_invalid );
1215936b7f4cSbjh21         return float32_default_nan;
1216936b7f4cSbjh21     }
1217936b7f4cSbjh21     if ( aSign ) {
1218936b7f4cSbjh21         if ( ( aExp | aSig ) == 0 ) return a;
1219936b7f4cSbjh21         float_raise( float_flag_invalid );
1220936b7f4cSbjh21         return float32_default_nan;
1221936b7f4cSbjh21     }
1222936b7f4cSbjh21     if ( aExp == 0 ) {
1223936b7f4cSbjh21         if ( aSig == 0 ) return 0;
1224936b7f4cSbjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1225936b7f4cSbjh21     }
1226936b7f4cSbjh21     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1227936b7f4cSbjh21     aSig = ( aSig | 0x00800000 )<<8;
1228936b7f4cSbjh21     zSig = estimateSqrt32( aExp, aSig ) + 2;
1229936b7f4cSbjh21     if ( ( zSig & 0x7F ) <= 5 ) {
1230936b7f4cSbjh21         if ( zSig < 2 ) {
1231936b7f4cSbjh21             zSig = 0x7FFFFFFF;
1232936b7f4cSbjh21             goto roundAndPack;
1233936b7f4cSbjh21         }
1234936b7f4cSbjh21         else {
1235936b7f4cSbjh21             aSig >>= aExp & 1;
1236936b7f4cSbjh21             mul32To64( zSig, zSig, &term0, &term1 );
1237936b7f4cSbjh21             sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1238936b7f4cSbjh21             while ( (sbits32) rem0 < 0 ) {
1239936b7f4cSbjh21                 --zSig;
1240936b7f4cSbjh21                 shortShift64Left( 0, zSig, 1, &term0, &term1 );
1241936b7f4cSbjh21                 term1 |= 1;
1242936b7f4cSbjh21                 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1243936b7f4cSbjh21             }
1244936b7f4cSbjh21             zSig |= ( ( rem0 | rem1 ) != 0 );
1245936b7f4cSbjh21         }
1246936b7f4cSbjh21     }
1247936b7f4cSbjh21     shift32RightJamming( zSig, 1, &zSig );
1248936b7f4cSbjh21  roundAndPack:
1249936b7f4cSbjh21     return roundAndPackFloat32( 0, zExp, zSig );
1250936b7f4cSbjh21 
1251936b7f4cSbjh21 }
1252936b7f4cSbjh21 #endif
1253936b7f4cSbjh21 
1254936b7f4cSbjh21 /*
1255936b7f4cSbjh21 -------------------------------------------------------------------------------
1256936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is equal to
1257936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
1258936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1259936b7f4cSbjh21 -------------------------------------------------------------------------------
1260936b7f4cSbjh21 */
float32_eq(float32 a,float32 b)1261936b7f4cSbjh21 flag float32_eq( float32 a, float32 b )
1262936b7f4cSbjh21 {
1263936b7f4cSbjh21 
1264936b7f4cSbjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1265936b7f4cSbjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1266936b7f4cSbjh21        ) {
1267936b7f4cSbjh21         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1268936b7f4cSbjh21             float_raise( float_flag_invalid );
1269936b7f4cSbjh21         }
1270936b7f4cSbjh21         return 0;
1271936b7f4cSbjh21     }
1272936b7f4cSbjh21     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1273936b7f4cSbjh21 
1274936b7f4cSbjh21 }
1275936b7f4cSbjh21 
1276936b7f4cSbjh21 /*
1277936b7f4cSbjh21 -------------------------------------------------------------------------------
1278936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is less than
1279936b7f4cSbjh21 or equal to the corresponding value `b', and 0 otherwise.  The comparison
1280936b7f4cSbjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
1281936b7f4cSbjh21 Arithmetic.
1282936b7f4cSbjh21 -------------------------------------------------------------------------------
1283936b7f4cSbjh21 */
float32_le(float32 a,float32 b)1284936b7f4cSbjh21 flag float32_le( float32 a, float32 b )
1285936b7f4cSbjh21 {
1286936b7f4cSbjh21     flag aSign, bSign;
1287936b7f4cSbjh21 
1288936b7f4cSbjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1289936b7f4cSbjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1290936b7f4cSbjh21        ) {
1291936b7f4cSbjh21         float_raise( float_flag_invalid );
1292936b7f4cSbjh21         return 0;
1293936b7f4cSbjh21     }
1294936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1295936b7f4cSbjh21     bSign = extractFloat32Sign( b );
1296936b7f4cSbjh21     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1297936b7f4cSbjh21     return ( a == b ) || ( aSign ^ ( a < b ) );
1298936b7f4cSbjh21 
1299936b7f4cSbjh21 }
1300936b7f4cSbjh21 
1301936b7f4cSbjh21 /*
1302936b7f4cSbjh21 -------------------------------------------------------------------------------
1303936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is less than
1304936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
1305936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1306936b7f4cSbjh21 -------------------------------------------------------------------------------
1307936b7f4cSbjh21 */
float32_lt(float32 a,float32 b)1308936b7f4cSbjh21 flag float32_lt( float32 a, float32 b )
1309936b7f4cSbjh21 {
1310936b7f4cSbjh21     flag aSign, bSign;
1311936b7f4cSbjh21 
1312936b7f4cSbjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1313936b7f4cSbjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1314936b7f4cSbjh21        ) {
1315936b7f4cSbjh21         float_raise( float_flag_invalid );
1316936b7f4cSbjh21         return 0;
1317936b7f4cSbjh21     }
1318936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1319936b7f4cSbjh21     bSign = extractFloat32Sign( b );
1320936b7f4cSbjh21     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1321936b7f4cSbjh21     return ( a != b ) && ( aSign ^ ( a < b ) );
1322936b7f4cSbjh21 
1323936b7f4cSbjh21 }
1324936b7f4cSbjh21 
1325936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1326936b7f4cSbjh21 /*
1327936b7f4cSbjh21 -------------------------------------------------------------------------------
1328936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is equal to
1329936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  The invalid exception is
1330936b7f4cSbjh21 raised if either operand is a NaN.  Otherwise, the comparison is performed
1331936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1332936b7f4cSbjh21 -------------------------------------------------------------------------------
1333936b7f4cSbjh21 */
float32_eq_signaling(float32 a,float32 b)1334936b7f4cSbjh21 flag float32_eq_signaling( float32 a, float32 b )
1335936b7f4cSbjh21 {
1336936b7f4cSbjh21 
1337936b7f4cSbjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1338936b7f4cSbjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1339936b7f4cSbjh21        ) {
1340936b7f4cSbjh21         float_raise( float_flag_invalid );
1341936b7f4cSbjh21         return 0;
1342936b7f4cSbjh21     }
1343936b7f4cSbjh21     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1344936b7f4cSbjh21 
1345936b7f4cSbjh21 }
1346936b7f4cSbjh21 
1347936b7f4cSbjh21 /*
1348936b7f4cSbjh21 -------------------------------------------------------------------------------
1349936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is less than or
1350936b7f4cSbjh21 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
1351936b7f4cSbjh21 cause an exception.  Otherwise, the comparison is performed according to the
1352936b7f4cSbjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1353936b7f4cSbjh21 -------------------------------------------------------------------------------
1354936b7f4cSbjh21 */
float32_le_quiet(float32 a,float32 b)1355936b7f4cSbjh21 flag float32_le_quiet( float32 a, float32 b )
1356936b7f4cSbjh21 {
1357936b7f4cSbjh21     flag aSign, bSign;
1358936b7f4cSbjh21     int16 aExp, bExp;
1359936b7f4cSbjh21 
1360936b7f4cSbjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1361936b7f4cSbjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1362936b7f4cSbjh21        ) {
1363936b7f4cSbjh21         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1364936b7f4cSbjh21             float_raise( float_flag_invalid );
1365936b7f4cSbjh21         }
1366936b7f4cSbjh21         return 0;
1367936b7f4cSbjh21     }
1368936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1369936b7f4cSbjh21     bSign = extractFloat32Sign( b );
1370936b7f4cSbjh21     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1371936b7f4cSbjh21     return ( a == b ) || ( aSign ^ ( a < b ) );
1372936b7f4cSbjh21 
1373936b7f4cSbjh21 }
1374936b7f4cSbjh21 
1375936b7f4cSbjh21 /*
1376936b7f4cSbjh21 -------------------------------------------------------------------------------
1377936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is less than
1378936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
1379936b7f4cSbjh21 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
1380936b7f4cSbjh21 Standard for Binary Floating-Point Arithmetic.
1381936b7f4cSbjh21 -------------------------------------------------------------------------------
1382936b7f4cSbjh21 */
float32_lt_quiet(float32 a,float32 b)1383936b7f4cSbjh21 flag float32_lt_quiet( float32 a, float32 b )
1384936b7f4cSbjh21 {
1385936b7f4cSbjh21     flag aSign, bSign;
1386936b7f4cSbjh21 
1387936b7f4cSbjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1388936b7f4cSbjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1389936b7f4cSbjh21        ) {
1390936b7f4cSbjh21         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1391936b7f4cSbjh21             float_raise( float_flag_invalid );
1392936b7f4cSbjh21         }
1393936b7f4cSbjh21         return 0;
1394936b7f4cSbjh21     }
1395936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1396936b7f4cSbjh21     bSign = extractFloat32Sign( b );
1397936b7f4cSbjh21     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1398936b7f4cSbjh21     return ( a != b ) && ( aSign ^ ( a < b ) );
1399936b7f4cSbjh21 
1400936b7f4cSbjh21 }
1401936b7f4cSbjh21 #endif /* !SOFTFLOAT_FOR_GCC */
1402936b7f4cSbjh21 
1403936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1404936b7f4cSbjh21 /*
1405936b7f4cSbjh21 -------------------------------------------------------------------------------
1406936b7f4cSbjh21 Returns the result of converting the double-precision floating-point value
1407936b7f4cSbjh21 `a' to the 32-bit two's complement integer format.  The conversion is
1408936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
1409936b7f4cSbjh21 Arithmetic---which means in particular that the conversion is rounded
1410936b7f4cSbjh21 according to the current rounding mode.  If `a' is a NaN, the largest
1411936b7f4cSbjh21 positive integer is returned.  Otherwise, if the conversion overflows, the
1412936b7f4cSbjh21 largest integer with the same sign as `a' is returned.
1413936b7f4cSbjh21 -------------------------------------------------------------------------------
1414936b7f4cSbjh21 */
float64_to_int32(float64 a)1415936b7f4cSbjh21 int32 float64_to_int32( float64 a )
1416936b7f4cSbjh21 {
1417936b7f4cSbjh21     flag aSign;
1418936b7f4cSbjh21     int16 aExp, shiftCount;
1419936b7f4cSbjh21     bits32 aSig0, aSig1, absZ, aSigExtra;
1420936b7f4cSbjh21     int32 z;
1421936b7f4cSbjh21     int8 roundingMode;
1422936b7f4cSbjh21 
1423936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1424936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1425936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1426936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1427936b7f4cSbjh21     shiftCount = aExp - 0x413;
1428936b7f4cSbjh21     if ( 0 <= shiftCount ) {
1429936b7f4cSbjh21         if ( 0x41E < aExp ) {
1430936b7f4cSbjh21             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1431936b7f4cSbjh21             goto invalid;
1432936b7f4cSbjh21         }
1433936b7f4cSbjh21         shortShift64Left(
1434936b7f4cSbjh21             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1435936b7f4cSbjh21         if ( 0x80000000 < absZ ) goto invalid;
1436936b7f4cSbjh21     }
1437936b7f4cSbjh21     else {
1438936b7f4cSbjh21         aSig1 = ( aSig1 != 0 );
1439936b7f4cSbjh21         if ( aExp < 0x3FE ) {
1440936b7f4cSbjh21             aSigExtra = aExp | aSig0 | aSig1;
1441936b7f4cSbjh21             absZ = 0;
1442936b7f4cSbjh21         }
1443936b7f4cSbjh21         else {
1444936b7f4cSbjh21             aSig0 |= 0x00100000;
1445936b7f4cSbjh21             aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1446936b7f4cSbjh21             absZ = aSig0>>( - shiftCount );
1447936b7f4cSbjh21         }
1448936b7f4cSbjh21     }
1449936b7f4cSbjh21     roundingMode = float_rounding_mode;
1450936b7f4cSbjh21     if ( roundingMode == float_round_nearest_even ) {
1451936b7f4cSbjh21         if ( (sbits32) aSigExtra < 0 ) {
1452936b7f4cSbjh21             ++absZ;
1453936b7f4cSbjh21             if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1454936b7f4cSbjh21         }
1455936b7f4cSbjh21         z = aSign ? - absZ : absZ;
1456936b7f4cSbjh21     }
1457936b7f4cSbjh21     else {
1458936b7f4cSbjh21         aSigExtra = ( aSigExtra != 0 );
1459936b7f4cSbjh21         if ( aSign ) {
1460936b7f4cSbjh21             z = - (   absZ
1461936b7f4cSbjh21                     + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1462936b7f4cSbjh21         }
1463936b7f4cSbjh21         else {
1464936b7f4cSbjh21             z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1465936b7f4cSbjh21         }
1466936b7f4cSbjh21     }
1467936b7f4cSbjh21     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1468936b7f4cSbjh21  invalid:
1469936b7f4cSbjh21         float_raise( float_flag_invalid );
1470936b7f4cSbjh21         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1471936b7f4cSbjh21     }
1472*9c7bce54Smatt     if ( aSigExtra ) set_float_exception_inexact_flag();
1473936b7f4cSbjh21     return z;
1474936b7f4cSbjh21 
1475936b7f4cSbjh21 }
1476936b7f4cSbjh21 #endif /* !SOFTFLOAT_FOR_GCC */
1477936b7f4cSbjh21 
1478936b7f4cSbjh21 /*
1479936b7f4cSbjh21 -------------------------------------------------------------------------------
1480936b7f4cSbjh21 Returns the result of converting the double-precision floating-point value
1481936b7f4cSbjh21 `a' to the 32-bit two's complement integer format.  The conversion is
1482936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
1483936b7f4cSbjh21 Arithmetic, except that the conversion is always rounded toward zero.
1484936b7f4cSbjh21 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1485936b7f4cSbjh21 the conversion overflows, the largest integer with the same sign as `a' is
1486936b7f4cSbjh21 returned.
1487936b7f4cSbjh21 -------------------------------------------------------------------------------
1488936b7f4cSbjh21 */
float64_to_int32_round_to_zero(float64 a)1489936b7f4cSbjh21 int32 float64_to_int32_round_to_zero( float64 a )
1490936b7f4cSbjh21 {
1491936b7f4cSbjh21     flag aSign;
1492936b7f4cSbjh21     int16 aExp, shiftCount;
1493936b7f4cSbjh21     bits32 aSig0, aSig1, absZ, aSigExtra;
1494936b7f4cSbjh21     int32 z;
1495936b7f4cSbjh21 
1496936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1497936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1498936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1499936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1500936b7f4cSbjh21     shiftCount = aExp - 0x413;
1501936b7f4cSbjh21     if ( 0 <= shiftCount ) {
1502936b7f4cSbjh21         if ( 0x41E < aExp ) {
1503936b7f4cSbjh21             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1504936b7f4cSbjh21             goto invalid;
1505936b7f4cSbjh21         }
1506936b7f4cSbjh21         shortShift64Left(
1507936b7f4cSbjh21             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1508936b7f4cSbjh21     }
1509936b7f4cSbjh21     else {
1510936b7f4cSbjh21         if ( aExp < 0x3FF ) {
1511936b7f4cSbjh21             if ( aExp | aSig0 | aSig1 ) {
1512*9c7bce54Smatt                 set_float_exception_inexact_flag();
1513936b7f4cSbjh21             }
1514936b7f4cSbjh21             return 0;
1515936b7f4cSbjh21         }
1516936b7f4cSbjh21         aSig0 |= 0x00100000;
1517936b7f4cSbjh21         aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1518936b7f4cSbjh21         absZ = aSig0>>( - shiftCount );
1519936b7f4cSbjh21     }
1520936b7f4cSbjh21     z = aSign ? - absZ : absZ;
1521936b7f4cSbjh21     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1522936b7f4cSbjh21  invalid:
1523936b7f4cSbjh21         float_raise( float_flag_invalid );
1524936b7f4cSbjh21         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1525936b7f4cSbjh21     }
1526*9c7bce54Smatt     if ( aSigExtra ) set_float_exception_inexact_flag();
1527936b7f4cSbjh21     return z;
1528936b7f4cSbjh21 
1529936b7f4cSbjh21 }
1530936b7f4cSbjh21 
1531936b7f4cSbjh21 /*
1532936b7f4cSbjh21 -------------------------------------------------------------------------------
1533936b7f4cSbjh21 Returns the result of converting the double-precision floating-point value
1534936b7f4cSbjh21 `a' to the single-precision floating-point format.  The conversion is
1535936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
1536936b7f4cSbjh21 Arithmetic.
1537936b7f4cSbjh21 -------------------------------------------------------------------------------
1538936b7f4cSbjh21 */
float64_to_float32(float64 a)1539936b7f4cSbjh21 float32 float64_to_float32( float64 a )
1540936b7f4cSbjh21 {
1541936b7f4cSbjh21     flag aSign;
1542936b7f4cSbjh21     int16 aExp;
1543936b7f4cSbjh21     bits32 aSig0, aSig1, zSig;
1544936b7f4cSbjh21     bits32 allZero;
1545936b7f4cSbjh21 
1546936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1547936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1548936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1549936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1550936b7f4cSbjh21     if ( aExp == 0x7FF ) {
1551936b7f4cSbjh21         if ( aSig0 | aSig1 ) {
1552936b7f4cSbjh21             return commonNaNToFloat32( float64ToCommonNaN( a ) );
1553936b7f4cSbjh21         }
1554936b7f4cSbjh21         return packFloat32( aSign, 0xFF, 0 );
1555936b7f4cSbjh21     }
1556936b7f4cSbjh21     shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1557936b7f4cSbjh21     if ( aExp ) zSig |= 0x40000000;
1558936b7f4cSbjh21     return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1559936b7f4cSbjh21 
1560936b7f4cSbjh21 }
1561936b7f4cSbjh21 
1562936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
1563936b7f4cSbjh21 /*
1564936b7f4cSbjh21 -------------------------------------------------------------------------------
1565936b7f4cSbjh21 Rounds the double-precision floating-point value `a' to an integer,
1566936b7f4cSbjh21 and returns the result as a double-precision floating-point value.  The
1567936b7f4cSbjh21 operation is performed according to the IEC/IEEE Standard for Binary
1568936b7f4cSbjh21 Floating-Point Arithmetic.
1569936b7f4cSbjh21 -------------------------------------------------------------------------------
1570936b7f4cSbjh21 */
float64_round_to_int(float64 a)1571936b7f4cSbjh21 float64 float64_round_to_int( float64 a )
1572936b7f4cSbjh21 {
1573936b7f4cSbjh21     flag aSign;
1574936b7f4cSbjh21     int16 aExp;
1575936b7f4cSbjh21     bits32 lastBitMask, roundBitsMask;
1576936b7f4cSbjh21     int8 roundingMode;
1577936b7f4cSbjh21     float64 z;
1578936b7f4cSbjh21 
1579936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1580936b7f4cSbjh21     if ( 0x413 <= aExp ) {
1581936b7f4cSbjh21         if ( 0x433 <= aExp ) {
1582936b7f4cSbjh21             if (    ( aExp == 0x7FF )
1583936b7f4cSbjh21                  && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1584936b7f4cSbjh21                 return propagateFloat64NaN( a, a );
1585936b7f4cSbjh21             }
1586936b7f4cSbjh21             return a;
1587936b7f4cSbjh21         }
1588936b7f4cSbjh21         lastBitMask = 1;
1589936b7f4cSbjh21         lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1590936b7f4cSbjh21         roundBitsMask = lastBitMask - 1;
1591936b7f4cSbjh21         z = a;
1592936b7f4cSbjh21         roundingMode = float_rounding_mode;
1593936b7f4cSbjh21         if ( roundingMode == float_round_nearest_even ) {
1594936b7f4cSbjh21             if ( lastBitMask ) {
1595936b7f4cSbjh21                 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1596936b7f4cSbjh21                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1597936b7f4cSbjh21             }
1598936b7f4cSbjh21             else {
1599936b7f4cSbjh21                 if ( (sbits32) z.low < 0 ) {
1600936b7f4cSbjh21                     ++z.high;
1601936b7f4cSbjh21                     if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1602936b7f4cSbjh21                 }
1603936b7f4cSbjh21             }
1604936b7f4cSbjh21         }
1605936b7f4cSbjh21         else if ( roundingMode != float_round_to_zero ) {
1606936b7f4cSbjh21             if (   extractFloat64Sign( z )
1607936b7f4cSbjh21                  ^ ( roundingMode == float_round_up ) ) {
1608936b7f4cSbjh21                 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1609936b7f4cSbjh21             }
1610936b7f4cSbjh21         }
1611936b7f4cSbjh21         z.low &= ~ roundBitsMask;
1612936b7f4cSbjh21     }
1613936b7f4cSbjh21     else {
1614936b7f4cSbjh21         if ( aExp <= 0x3FE ) {
1615936b7f4cSbjh21             if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1616*9c7bce54Smatt             set_float_exception_inexact_flag();
1617936b7f4cSbjh21             aSign = extractFloat64Sign( a );
1618936b7f4cSbjh21             switch ( float_rounding_mode ) {
1619936b7f4cSbjh21              case float_round_nearest_even:
1620936b7f4cSbjh21                 if (    ( aExp == 0x3FE )
1621936b7f4cSbjh21                      && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1622936b7f4cSbjh21                    ) {
1623936b7f4cSbjh21                     return packFloat64( aSign, 0x3FF, 0, 0 );
1624936b7f4cSbjh21                 }
1625936b7f4cSbjh21                 break;
1626936b7f4cSbjh21              case float_round_down:
1627936b7f4cSbjh21                 return
1628936b7f4cSbjh21                       aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1629936b7f4cSbjh21                     : packFloat64( 0, 0, 0, 0 );
1630936b7f4cSbjh21              case float_round_up:
1631936b7f4cSbjh21                 return
1632936b7f4cSbjh21                       aSign ? packFloat64( 1, 0, 0, 0 )
1633936b7f4cSbjh21                     : packFloat64( 0, 0x3FF, 0, 0 );
1634936b7f4cSbjh21             }
1635936b7f4cSbjh21             return packFloat64( aSign, 0, 0, 0 );
1636936b7f4cSbjh21         }
1637936b7f4cSbjh21         lastBitMask = 1;
1638936b7f4cSbjh21         lastBitMask <<= 0x413 - aExp;
1639936b7f4cSbjh21         roundBitsMask = lastBitMask - 1;
1640936b7f4cSbjh21         z.low = 0;
1641936b7f4cSbjh21         z.high = a.high;
1642936b7f4cSbjh21         roundingMode = float_rounding_mode;
1643936b7f4cSbjh21         if ( roundingMode == float_round_nearest_even ) {
1644936b7f4cSbjh21             z.high += lastBitMask>>1;
1645936b7f4cSbjh21             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1646936b7f4cSbjh21                 z.high &= ~ lastBitMask;
1647936b7f4cSbjh21             }
1648936b7f4cSbjh21         }
1649936b7f4cSbjh21         else if ( roundingMode != float_round_to_zero ) {
1650936b7f4cSbjh21             if (   extractFloat64Sign( z )
1651936b7f4cSbjh21                  ^ ( roundingMode == float_round_up ) ) {
1652936b7f4cSbjh21                 z.high |= ( a.low != 0 );
1653936b7f4cSbjh21                 z.high += roundBitsMask;
1654936b7f4cSbjh21             }
1655936b7f4cSbjh21         }
1656936b7f4cSbjh21         z.high &= ~ roundBitsMask;
1657936b7f4cSbjh21     }
1658936b7f4cSbjh21     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1659*9c7bce54Smatt         set_float_exception_inexact_flag();
1660936b7f4cSbjh21     }
1661936b7f4cSbjh21     return z;
1662936b7f4cSbjh21 
1663936b7f4cSbjh21 }
1664936b7f4cSbjh21 #endif
1665936b7f4cSbjh21 
1666936b7f4cSbjh21 /*
1667936b7f4cSbjh21 -------------------------------------------------------------------------------
1668936b7f4cSbjh21 Returns the result of adding the absolute values of the double-precision
1669936b7f4cSbjh21 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1670936b7f4cSbjh21 before being returned.  `zSign' is ignored if the result is a NaN.
1671936b7f4cSbjh21 The addition is performed according to the IEC/IEEE Standard for Binary
1672936b7f4cSbjh21 Floating-Point Arithmetic.
1673936b7f4cSbjh21 -------------------------------------------------------------------------------
1674936b7f4cSbjh21 */
addFloat64Sigs(float64 a,float64 b,flag zSign)1675936b7f4cSbjh21 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1676936b7f4cSbjh21 {
1677936b7f4cSbjh21     int16 aExp, bExp, zExp;
1678936b7f4cSbjh21     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1679936b7f4cSbjh21     int16 expDiff;
1680936b7f4cSbjh21 
1681936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1682936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1683936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1684936b7f4cSbjh21     bSig1 = extractFloat64Frac1( b );
1685936b7f4cSbjh21     bSig0 = extractFloat64Frac0( b );
1686936b7f4cSbjh21     bExp = extractFloat64Exp( b );
1687936b7f4cSbjh21     expDiff = aExp - bExp;
1688936b7f4cSbjh21     if ( 0 < expDiff ) {
1689936b7f4cSbjh21         if ( aExp == 0x7FF ) {
1690936b7f4cSbjh21             if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1691936b7f4cSbjh21             return a;
1692936b7f4cSbjh21         }
1693936b7f4cSbjh21         if ( bExp == 0 ) {
1694936b7f4cSbjh21             --expDiff;
1695936b7f4cSbjh21         }
1696936b7f4cSbjh21         else {
1697936b7f4cSbjh21             bSig0 |= 0x00100000;
1698936b7f4cSbjh21         }
1699936b7f4cSbjh21         shift64ExtraRightJamming(
1700936b7f4cSbjh21             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1701936b7f4cSbjh21         zExp = aExp;
1702936b7f4cSbjh21     }
1703936b7f4cSbjh21     else if ( expDiff < 0 ) {
1704936b7f4cSbjh21         if ( bExp == 0x7FF ) {
1705936b7f4cSbjh21             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1706936b7f4cSbjh21             return packFloat64( zSign, 0x7FF, 0, 0 );
1707936b7f4cSbjh21         }
1708936b7f4cSbjh21         if ( aExp == 0 ) {
1709936b7f4cSbjh21             ++expDiff;
1710936b7f4cSbjh21         }
1711936b7f4cSbjh21         else {
1712936b7f4cSbjh21             aSig0 |= 0x00100000;
1713936b7f4cSbjh21         }
1714936b7f4cSbjh21         shift64ExtraRightJamming(
1715936b7f4cSbjh21             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1716936b7f4cSbjh21         zExp = bExp;
1717936b7f4cSbjh21     }
1718936b7f4cSbjh21     else {
1719936b7f4cSbjh21         if ( aExp == 0x7FF ) {
1720936b7f4cSbjh21             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1721936b7f4cSbjh21                 return propagateFloat64NaN( a, b );
1722936b7f4cSbjh21             }
1723936b7f4cSbjh21             return a;
1724936b7f4cSbjh21         }
1725936b7f4cSbjh21         add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1726936b7f4cSbjh21         if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1727936b7f4cSbjh21         zSig2 = 0;
1728936b7f4cSbjh21         zSig0 |= 0x00200000;
1729936b7f4cSbjh21         zExp = aExp;
1730936b7f4cSbjh21         goto shiftRight1;
1731936b7f4cSbjh21     }
1732936b7f4cSbjh21     aSig0 |= 0x00100000;
1733936b7f4cSbjh21     add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1734936b7f4cSbjh21     --zExp;
1735936b7f4cSbjh21     if ( zSig0 < 0x00200000 ) goto roundAndPack;
1736936b7f4cSbjh21     ++zExp;
1737936b7f4cSbjh21  shiftRight1:
1738936b7f4cSbjh21     shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1739936b7f4cSbjh21  roundAndPack:
1740936b7f4cSbjh21     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1741936b7f4cSbjh21 
1742936b7f4cSbjh21 }
1743936b7f4cSbjh21 
1744936b7f4cSbjh21 /*
1745936b7f4cSbjh21 -------------------------------------------------------------------------------
1746936b7f4cSbjh21 Returns the result of subtracting the absolute values of the double-
1747936b7f4cSbjh21 precision floating-point values `a' and `b'.  If `zSign' is 1, the
1748936b7f4cSbjh21 difference is negated before being returned.  `zSign' is ignored if the
1749936b7f4cSbjh21 result is a NaN.  The subtraction is performed according to the IEC/IEEE
1750936b7f4cSbjh21 Standard for Binary Floating-Point Arithmetic.
1751936b7f4cSbjh21 -------------------------------------------------------------------------------
1752936b7f4cSbjh21 */
subFloat64Sigs(float64 a,float64 b,flag zSign)1753936b7f4cSbjh21 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1754936b7f4cSbjh21 {
1755936b7f4cSbjh21     int16 aExp, bExp, zExp;
1756936b7f4cSbjh21     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1757936b7f4cSbjh21     int16 expDiff;
1758936b7f4cSbjh21 
1759936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1760936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1761936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1762936b7f4cSbjh21     bSig1 = extractFloat64Frac1( b );
1763936b7f4cSbjh21     bSig0 = extractFloat64Frac0( b );
1764936b7f4cSbjh21     bExp = extractFloat64Exp( b );
1765936b7f4cSbjh21     expDiff = aExp - bExp;
1766936b7f4cSbjh21     shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1767936b7f4cSbjh21     shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1768936b7f4cSbjh21     if ( 0 < expDiff ) goto aExpBigger;
1769936b7f4cSbjh21     if ( expDiff < 0 ) goto bExpBigger;
1770936b7f4cSbjh21     if ( aExp == 0x7FF ) {
1771936b7f4cSbjh21         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1772936b7f4cSbjh21             return propagateFloat64NaN( a, b );
1773936b7f4cSbjh21         }
1774936b7f4cSbjh21         float_raise( float_flag_invalid );
1775936b7f4cSbjh21         return float64_default_nan;
1776936b7f4cSbjh21     }
1777936b7f4cSbjh21     if ( aExp == 0 ) {
1778936b7f4cSbjh21         aExp = 1;
1779936b7f4cSbjh21         bExp = 1;
1780936b7f4cSbjh21     }
1781936b7f4cSbjh21     if ( bSig0 < aSig0 ) goto aBigger;
1782936b7f4cSbjh21     if ( aSig0 < bSig0 ) goto bBigger;
1783936b7f4cSbjh21     if ( bSig1 < aSig1 ) goto aBigger;
1784936b7f4cSbjh21     if ( aSig1 < bSig1 ) goto bBigger;
1785936b7f4cSbjh21     return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1786936b7f4cSbjh21  bExpBigger:
1787936b7f4cSbjh21     if ( bExp == 0x7FF ) {
1788936b7f4cSbjh21         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1789936b7f4cSbjh21         return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1790936b7f4cSbjh21     }
1791936b7f4cSbjh21     if ( aExp == 0 ) {
1792936b7f4cSbjh21         ++expDiff;
1793936b7f4cSbjh21     }
1794936b7f4cSbjh21     else {
1795936b7f4cSbjh21         aSig0 |= 0x40000000;
1796936b7f4cSbjh21     }
1797936b7f4cSbjh21     shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1798936b7f4cSbjh21     bSig0 |= 0x40000000;
1799936b7f4cSbjh21  bBigger:
1800936b7f4cSbjh21     sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1801936b7f4cSbjh21     zExp = bExp;
1802936b7f4cSbjh21     zSign ^= 1;
1803936b7f4cSbjh21     goto normalizeRoundAndPack;
1804936b7f4cSbjh21  aExpBigger:
1805936b7f4cSbjh21     if ( aExp == 0x7FF ) {
1806936b7f4cSbjh21         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1807936b7f4cSbjh21         return a;
1808936b7f4cSbjh21     }
1809936b7f4cSbjh21     if ( bExp == 0 ) {
1810936b7f4cSbjh21         --expDiff;
1811936b7f4cSbjh21     }
1812936b7f4cSbjh21     else {
1813936b7f4cSbjh21         bSig0 |= 0x40000000;
1814936b7f4cSbjh21     }
1815936b7f4cSbjh21     shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1816936b7f4cSbjh21     aSig0 |= 0x40000000;
1817936b7f4cSbjh21  aBigger:
1818936b7f4cSbjh21     sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1819936b7f4cSbjh21     zExp = aExp;
1820936b7f4cSbjh21  normalizeRoundAndPack:
1821936b7f4cSbjh21     --zExp;
1822936b7f4cSbjh21     return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1823936b7f4cSbjh21 
1824936b7f4cSbjh21 }
1825936b7f4cSbjh21 
1826936b7f4cSbjh21 /*
1827936b7f4cSbjh21 -------------------------------------------------------------------------------
1828936b7f4cSbjh21 Returns the result of adding the double-precision floating-point values `a'
1829936b7f4cSbjh21 and `b'.  The operation is performed according to the IEC/IEEE Standard for
1830936b7f4cSbjh21 Binary Floating-Point Arithmetic.
1831936b7f4cSbjh21 -------------------------------------------------------------------------------
1832936b7f4cSbjh21 */
float64_add(float64 a,float64 b)1833936b7f4cSbjh21 float64 float64_add( float64 a, float64 b )
1834936b7f4cSbjh21 {
1835936b7f4cSbjh21     flag aSign, bSign;
1836936b7f4cSbjh21 
1837936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1838936b7f4cSbjh21     bSign = extractFloat64Sign( b );
1839936b7f4cSbjh21     if ( aSign == bSign ) {
1840936b7f4cSbjh21         return addFloat64Sigs( a, b, aSign );
1841936b7f4cSbjh21     }
1842936b7f4cSbjh21     else {
1843936b7f4cSbjh21         return subFloat64Sigs( a, b, aSign );
1844936b7f4cSbjh21     }
1845936b7f4cSbjh21 
1846936b7f4cSbjh21 }
1847936b7f4cSbjh21 
1848936b7f4cSbjh21 /*
1849936b7f4cSbjh21 -------------------------------------------------------------------------------
1850936b7f4cSbjh21 Returns the result of subtracting the double-precision floating-point values
1851936b7f4cSbjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1852936b7f4cSbjh21 for Binary Floating-Point Arithmetic.
1853936b7f4cSbjh21 -------------------------------------------------------------------------------
1854936b7f4cSbjh21 */
float64_sub(float64 a,float64 b)1855936b7f4cSbjh21 float64 float64_sub( float64 a, float64 b )
1856936b7f4cSbjh21 {
1857936b7f4cSbjh21     flag aSign, bSign;
1858936b7f4cSbjh21 
1859936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1860936b7f4cSbjh21     bSign = extractFloat64Sign( b );
1861936b7f4cSbjh21     if ( aSign == bSign ) {
1862936b7f4cSbjh21         return subFloat64Sigs( a, b, aSign );
1863936b7f4cSbjh21     }
1864936b7f4cSbjh21     else {
1865936b7f4cSbjh21         return addFloat64Sigs( a, b, aSign );
1866936b7f4cSbjh21     }
1867936b7f4cSbjh21 
1868936b7f4cSbjh21 }
1869936b7f4cSbjh21 
1870936b7f4cSbjh21 /*
1871936b7f4cSbjh21 -------------------------------------------------------------------------------
1872936b7f4cSbjh21 Returns the result of multiplying the double-precision floating-point values
1873936b7f4cSbjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1874936b7f4cSbjh21 for Binary Floating-Point Arithmetic.
1875936b7f4cSbjh21 -------------------------------------------------------------------------------
1876936b7f4cSbjh21 */
float64_mul(float64 a,float64 b)1877936b7f4cSbjh21 float64 float64_mul( float64 a, float64 b )
1878936b7f4cSbjh21 {
1879936b7f4cSbjh21     flag aSign, bSign, zSign;
1880936b7f4cSbjh21     int16 aExp, bExp, zExp;
1881936b7f4cSbjh21     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1882936b7f4cSbjh21 
1883936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1884936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1885936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1886936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1887936b7f4cSbjh21     bSig1 = extractFloat64Frac1( b );
1888936b7f4cSbjh21     bSig0 = extractFloat64Frac0( b );
1889936b7f4cSbjh21     bExp = extractFloat64Exp( b );
1890936b7f4cSbjh21     bSign = extractFloat64Sign( b );
1891936b7f4cSbjh21     zSign = aSign ^ bSign;
1892936b7f4cSbjh21     if ( aExp == 0x7FF ) {
1893936b7f4cSbjh21         if (    ( aSig0 | aSig1 )
1894936b7f4cSbjh21              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1895936b7f4cSbjh21             return propagateFloat64NaN( a, b );
1896936b7f4cSbjh21         }
1897936b7f4cSbjh21         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1898936b7f4cSbjh21         return packFloat64( zSign, 0x7FF, 0, 0 );
1899936b7f4cSbjh21     }
1900936b7f4cSbjh21     if ( bExp == 0x7FF ) {
1901936b7f4cSbjh21         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1902936b7f4cSbjh21         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1903936b7f4cSbjh21  invalid:
1904936b7f4cSbjh21             float_raise( float_flag_invalid );
1905936b7f4cSbjh21             return float64_default_nan;
1906936b7f4cSbjh21         }
1907936b7f4cSbjh21         return packFloat64( zSign, 0x7FF, 0, 0 );
1908936b7f4cSbjh21     }
1909936b7f4cSbjh21     if ( aExp == 0 ) {
1910936b7f4cSbjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1911936b7f4cSbjh21         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1912936b7f4cSbjh21     }
1913936b7f4cSbjh21     if ( bExp == 0 ) {
1914936b7f4cSbjh21         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1915936b7f4cSbjh21         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1916936b7f4cSbjh21     }
1917936b7f4cSbjh21     zExp = aExp + bExp - 0x400;
1918936b7f4cSbjh21     aSig0 |= 0x00100000;
1919936b7f4cSbjh21     shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1920936b7f4cSbjh21     mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1921936b7f4cSbjh21     add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1922936b7f4cSbjh21     zSig2 |= ( zSig3 != 0 );
1923936b7f4cSbjh21     if ( 0x00200000 <= zSig0 ) {
1924936b7f4cSbjh21         shift64ExtraRightJamming(
1925936b7f4cSbjh21             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1926936b7f4cSbjh21         ++zExp;
1927936b7f4cSbjh21     }
1928936b7f4cSbjh21     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1929936b7f4cSbjh21 
1930936b7f4cSbjh21 }
1931936b7f4cSbjh21 
1932936b7f4cSbjh21 /*
1933936b7f4cSbjh21 -------------------------------------------------------------------------------
1934936b7f4cSbjh21 Returns the result of dividing the double-precision floating-point value `a'
1935936b7f4cSbjh21 by the corresponding value `b'.  The operation is performed according to the
1936936b7f4cSbjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1937936b7f4cSbjh21 -------------------------------------------------------------------------------
1938936b7f4cSbjh21 */
float64_div(float64 a,float64 b)1939936b7f4cSbjh21 float64 float64_div( float64 a, float64 b )
1940936b7f4cSbjh21 {
1941936b7f4cSbjh21     flag aSign, bSign, zSign;
1942936b7f4cSbjh21     int16 aExp, bExp, zExp;
1943936b7f4cSbjh21     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1944936b7f4cSbjh21     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1945936b7f4cSbjh21 
1946936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1947936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1948936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1949936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1950936b7f4cSbjh21     bSig1 = extractFloat64Frac1( b );
1951936b7f4cSbjh21     bSig0 = extractFloat64Frac0( b );
1952936b7f4cSbjh21     bExp = extractFloat64Exp( b );
1953936b7f4cSbjh21     bSign = extractFloat64Sign( b );
1954936b7f4cSbjh21     zSign = aSign ^ bSign;
1955936b7f4cSbjh21     if ( aExp == 0x7FF ) {
1956936b7f4cSbjh21         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1957936b7f4cSbjh21         if ( bExp == 0x7FF ) {
1958936b7f4cSbjh21             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1959936b7f4cSbjh21             goto invalid;
1960936b7f4cSbjh21         }
1961936b7f4cSbjh21         return packFloat64( zSign, 0x7FF, 0, 0 );
1962936b7f4cSbjh21     }
1963936b7f4cSbjh21     if ( bExp == 0x7FF ) {
1964936b7f4cSbjh21         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1965936b7f4cSbjh21         return packFloat64( zSign, 0, 0, 0 );
1966936b7f4cSbjh21     }
1967936b7f4cSbjh21     if ( bExp == 0 ) {
1968936b7f4cSbjh21         if ( ( bSig0 | bSig1 ) == 0 ) {
1969936b7f4cSbjh21             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1970936b7f4cSbjh21  invalid:
1971936b7f4cSbjh21                 float_raise( float_flag_invalid );
1972936b7f4cSbjh21                 return float64_default_nan;
1973936b7f4cSbjh21             }
1974936b7f4cSbjh21             float_raise( float_flag_divbyzero );
1975936b7f4cSbjh21             return packFloat64( zSign, 0x7FF, 0, 0 );
1976936b7f4cSbjh21         }
1977936b7f4cSbjh21         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1978936b7f4cSbjh21     }
1979936b7f4cSbjh21     if ( aExp == 0 ) {
1980936b7f4cSbjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1981936b7f4cSbjh21         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1982936b7f4cSbjh21     }
1983936b7f4cSbjh21     zExp = aExp - bExp + 0x3FD;
1984936b7f4cSbjh21     shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1985936b7f4cSbjh21     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1986936b7f4cSbjh21     if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1987936b7f4cSbjh21         shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1988936b7f4cSbjh21         ++zExp;
1989936b7f4cSbjh21     }
1990936b7f4cSbjh21     zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1991936b7f4cSbjh21     mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1992936b7f4cSbjh21     sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1993936b7f4cSbjh21     while ( (sbits32) rem0 < 0 ) {
1994936b7f4cSbjh21         --zSig0;
1995936b7f4cSbjh21         add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1996936b7f4cSbjh21     }
1997936b7f4cSbjh21     zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1998936b7f4cSbjh21     if ( ( zSig1 & 0x3FF ) <= 4 ) {
1999936b7f4cSbjh21         mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
2000936b7f4cSbjh21         sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
2001936b7f4cSbjh21         while ( (sbits32) rem1 < 0 ) {
2002936b7f4cSbjh21             --zSig1;
2003936b7f4cSbjh21             add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
2004936b7f4cSbjh21         }
2005936b7f4cSbjh21         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2006936b7f4cSbjh21     }
2007936b7f4cSbjh21     shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
2008936b7f4cSbjh21     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
2009936b7f4cSbjh21 
2010936b7f4cSbjh21 }
2011936b7f4cSbjh21 
2012936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
2013936b7f4cSbjh21 /*
2014936b7f4cSbjh21 -------------------------------------------------------------------------------
2015936b7f4cSbjh21 Returns the remainder of the double-precision floating-point value `a'
2016936b7f4cSbjh21 with respect to the corresponding value `b'.  The operation is performed
2017936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2018936b7f4cSbjh21 -------------------------------------------------------------------------------
2019936b7f4cSbjh21 */
float64_rem(float64 a,float64 b)2020936b7f4cSbjh21 float64 float64_rem( float64 a, float64 b )
2021936b7f4cSbjh21 {
2022936b7f4cSbjh21     flag aSign, bSign, zSign;
2023936b7f4cSbjh21     int16 aExp, bExp, expDiff;
2024936b7f4cSbjh21     bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2025936b7f4cSbjh21     bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2026936b7f4cSbjh21     sbits32 sigMean0;
2027936b7f4cSbjh21     float64 z;
2028936b7f4cSbjh21 
2029936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
2030936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
2031936b7f4cSbjh21     aExp = extractFloat64Exp( a );
2032936b7f4cSbjh21     aSign = extractFloat64Sign( a );
2033936b7f4cSbjh21     bSig1 = extractFloat64Frac1( b );
2034936b7f4cSbjh21     bSig0 = extractFloat64Frac0( b );
2035936b7f4cSbjh21     bExp = extractFloat64Exp( b );
2036936b7f4cSbjh21     bSign = extractFloat64Sign( b );
2037936b7f4cSbjh21     if ( aExp == 0x7FF ) {
2038936b7f4cSbjh21         if (    ( aSig0 | aSig1 )
2039936b7f4cSbjh21              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2040936b7f4cSbjh21             return propagateFloat64NaN( a, b );
2041936b7f4cSbjh21         }
2042936b7f4cSbjh21         goto invalid;
2043936b7f4cSbjh21     }
2044936b7f4cSbjh21     if ( bExp == 0x7FF ) {
2045936b7f4cSbjh21         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2046936b7f4cSbjh21         return a;
2047936b7f4cSbjh21     }
2048936b7f4cSbjh21     if ( bExp == 0 ) {
2049936b7f4cSbjh21         if ( ( bSig0 | bSig1 ) == 0 ) {
2050936b7f4cSbjh21  invalid:
2051936b7f4cSbjh21             float_raise( float_flag_invalid );
2052936b7f4cSbjh21             return float64_default_nan;
2053936b7f4cSbjh21         }
2054936b7f4cSbjh21         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2055936b7f4cSbjh21     }
2056936b7f4cSbjh21     if ( aExp == 0 ) {
2057936b7f4cSbjh21         if ( ( aSig0 | aSig1 ) == 0 ) return a;
2058936b7f4cSbjh21         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2059936b7f4cSbjh21     }
2060936b7f4cSbjh21     expDiff = aExp - bExp;
2061936b7f4cSbjh21     if ( expDiff < -1 ) return a;
2062936b7f4cSbjh21     shortShift64Left(
2063936b7f4cSbjh21         aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2064936b7f4cSbjh21     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2065936b7f4cSbjh21     q = le64( bSig0, bSig1, aSig0, aSig1 );
2066936b7f4cSbjh21     if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2067936b7f4cSbjh21     expDiff -= 32;
2068936b7f4cSbjh21     while ( 0 < expDiff ) {
2069936b7f4cSbjh21         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2070936b7f4cSbjh21         q = ( 4 < q ) ? q - 4 : 0;
2071936b7f4cSbjh21         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2072936b7f4cSbjh21         shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2073936b7f4cSbjh21         shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2074936b7f4cSbjh21         sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2075936b7f4cSbjh21         expDiff -= 29;
2076936b7f4cSbjh21     }
2077936b7f4cSbjh21     if ( -32 < expDiff ) {
2078936b7f4cSbjh21         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2079936b7f4cSbjh21         q = ( 4 < q ) ? q - 4 : 0;
2080936b7f4cSbjh21         q >>= - expDiff;
2081936b7f4cSbjh21         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2082936b7f4cSbjh21         expDiff += 24;
2083936b7f4cSbjh21         if ( expDiff < 0 ) {
2084936b7f4cSbjh21             shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2085936b7f4cSbjh21         }
2086936b7f4cSbjh21         else {
2087936b7f4cSbjh21             shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2088936b7f4cSbjh21         }
2089936b7f4cSbjh21         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2090936b7f4cSbjh21         sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2091936b7f4cSbjh21     }
2092936b7f4cSbjh21     else {
2093936b7f4cSbjh21         shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2094936b7f4cSbjh21         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2095936b7f4cSbjh21     }
2096936b7f4cSbjh21     do {
2097936b7f4cSbjh21         alternateASig0 = aSig0;
2098936b7f4cSbjh21         alternateASig1 = aSig1;
2099936b7f4cSbjh21         ++q;
2100936b7f4cSbjh21         sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2101936b7f4cSbjh21     } while ( 0 <= (sbits32) aSig0 );
2102936b7f4cSbjh21     add64(
2103936b7f4cSbjh21         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2104936b7f4cSbjh21     if (    ( sigMean0 < 0 )
2105936b7f4cSbjh21          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2106936b7f4cSbjh21         aSig0 = alternateASig0;
2107936b7f4cSbjh21         aSig1 = alternateASig1;
2108936b7f4cSbjh21     }
2109936b7f4cSbjh21     zSign = ( (sbits32) aSig0 < 0 );
2110936b7f4cSbjh21     if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2111936b7f4cSbjh21     return
2112936b7f4cSbjh21         normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2113936b7f4cSbjh21 
2114936b7f4cSbjh21 }
2115936b7f4cSbjh21 #endif
2116936b7f4cSbjh21 
2117936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
2118936b7f4cSbjh21 /*
2119936b7f4cSbjh21 -------------------------------------------------------------------------------
2120936b7f4cSbjh21 Returns the square root of the double-precision floating-point value `a'.
2121936b7f4cSbjh21 The operation is performed according to the IEC/IEEE Standard for Binary
2122936b7f4cSbjh21 Floating-Point Arithmetic.
2123936b7f4cSbjh21 -------------------------------------------------------------------------------
2124936b7f4cSbjh21 */
float64_sqrt(float64 a)2125936b7f4cSbjh21 float64 float64_sqrt( float64 a )
2126936b7f4cSbjh21 {
2127936b7f4cSbjh21     flag aSign;
2128936b7f4cSbjh21     int16 aExp, zExp;
2129936b7f4cSbjh21     bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2130936b7f4cSbjh21     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2131936b7f4cSbjh21     float64 z;
2132936b7f4cSbjh21 
2133936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
2134936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
2135936b7f4cSbjh21     aExp = extractFloat64Exp( a );
2136936b7f4cSbjh21     aSign = extractFloat64Sign( a );
2137936b7f4cSbjh21     if ( aExp == 0x7FF ) {
2138936b7f4cSbjh21         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2139936b7f4cSbjh21         if ( ! aSign ) return a;
2140936b7f4cSbjh21         goto invalid;
2141936b7f4cSbjh21     }
2142936b7f4cSbjh21     if ( aSign ) {
2143936b7f4cSbjh21         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2144936b7f4cSbjh21  invalid:
2145936b7f4cSbjh21         float_raise( float_flag_invalid );
2146936b7f4cSbjh21         return float64_default_nan;
2147936b7f4cSbjh21     }
2148936b7f4cSbjh21     if ( aExp == 0 ) {
2149936b7f4cSbjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2150936b7f4cSbjh21         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2151936b7f4cSbjh21     }
2152936b7f4cSbjh21     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2153936b7f4cSbjh21     aSig0 |= 0x00100000;
2154936b7f4cSbjh21     shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2155936b7f4cSbjh21     zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2156936b7f4cSbjh21     if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2157936b7f4cSbjh21     doubleZSig0 = zSig0 + zSig0;
2158936b7f4cSbjh21     shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2159936b7f4cSbjh21     mul32To64( zSig0, zSig0, &term0, &term1 );
2160936b7f4cSbjh21     sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2161936b7f4cSbjh21     while ( (sbits32) rem0 < 0 ) {
2162936b7f4cSbjh21         --zSig0;
2163936b7f4cSbjh21         doubleZSig0 -= 2;
2164936b7f4cSbjh21         add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2165936b7f4cSbjh21     }
2166936b7f4cSbjh21     zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2167936b7f4cSbjh21     if ( ( zSig1 & 0x1FF ) <= 5 ) {
2168936b7f4cSbjh21         if ( zSig1 == 0 ) zSig1 = 1;
2169936b7f4cSbjh21         mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2170936b7f4cSbjh21         sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2171936b7f4cSbjh21         mul32To64( zSig1, zSig1, &term2, &term3 );
2172936b7f4cSbjh21         sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2173936b7f4cSbjh21         while ( (sbits32) rem1 < 0 ) {
2174936b7f4cSbjh21             --zSig1;
2175936b7f4cSbjh21             shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2176936b7f4cSbjh21             term3 |= 1;
2177936b7f4cSbjh21             term2 |= doubleZSig0;
2178936b7f4cSbjh21             add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2179936b7f4cSbjh21         }
2180936b7f4cSbjh21         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2181936b7f4cSbjh21     }
2182936b7f4cSbjh21     shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2183936b7f4cSbjh21     return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2184936b7f4cSbjh21 
2185936b7f4cSbjh21 }
2186936b7f4cSbjh21 #endif
2187936b7f4cSbjh21 
2188936b7f4cSbjh21 /*
2189936b7f4cSbjh21 -------------------------------------------------------------------------------
2190936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is equal to
2191936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
2192936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2193936b7f4cSbjh21 -------------------------------------------------------------------------------
2194936b7f4cSbjh21 */
float64_eq(float64 a,float64 b)2195936b7f4cSbjh21 flag float64_eq( float64 a, float64 b )
2196936b7f4cSbjh21 {
2197936b7f4cSbjh21 
2198936b7f4cSbjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2199936b7f4cSbjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2200936b7f4cSbjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
2201936b7f4cSbjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2202936b7f4cSbjh21        ) {
2203936b7f4cSbjh21         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2204936b7f4cSbjh21             float_raise( float_flag_invalid );
2205936b7f4cSbjh21         }
2206936b7f4cSbjh21         return 0;
2207936b7f4cSbjh21     }
2208936b7f4cSbjh21     return ( a == b ) ||
2209936b7f4cSbjh21 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2210936b7f4cSbjh21 
2211936b7f4cSbjh21 }
2212936b7f4cSbjh21 
2213936b7f4cSbjh21 /*
2214936b7f4cSbjh21 -------------------------------------------------------------------------------
2215936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is less than
2216936b7f4cSbjh21 or equal to the corresponding value `b', and 0 otherwise.  The comparison
2217936b7f4cSbjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
2218936b7f4cSbjh21 Arithmetic.
2219936b7f4cSbjh21 -------------------------------------------------------------------------------
2220936b7f4cSbjh21 */
float64_le(float64 a,float64 b)2221936b7f4cSbjh21 flag float64_le( float64 a, float64 b )
2222936b7f4cSbjh21 {
2223936b7f4cSbjh21     flag aSign, bSign;
2224936b7f4cSbjh21 
2225936b7f4cSbjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2226936b7f4cSbjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2227936b7f4cSbjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
2228936b7f4cSbjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2229936b7f4cSbjh21        ) {
2230936b7f4cSbjh21         float_raise( float_flag_invalid );
2231936b7f4cSbjh21         return 0;
2232936b7f4cSbjh21     }
2233936b7f4cSbjh21     aSign = extractFloat64Sign( a );
2234936b7f4cSbjh21     bSign = extractFloat64Sign( b );
2235936b7f4cSbjh21     if ( aSign != bSign )
2236936b7f4cSbjh21 	return aSign ||
2237936b7f4cSbjh21 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2238936b7f4cSbjh21 	      0 );
2239936b7f4cSbjh21     return ( a == b ) ||
2240936b7f4cSbjh21 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2241936b7f4cSbjh21 }
2242936b7f4cSbjh21 
2243936b7f4cSbjh21 /*
2244936b7f4cSbjh21 -------------------------------------------------------------------------------
2245936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is less than
2246936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
2247936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2248936b7f4cSbjh21 -------------------------------------------------------------------------------
2249936b7f4cSbjh21 */
float64_lt(float64 a,float64 b)2250936b7f4cSbjh21 flag float64_lt( float64 a, float64 b )
2251936b7f4cSbjh21 {
2252936b7f4cSbjh21     flag aSign, bSign;
2253936b7f4cSbjh21 
2254936b7f4cSbjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2255936b7f4cSbjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2256936b7f4cSbjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
2257936b7f4cSbjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2258936b7f4cSbjh21        ) {
2259936b7f4cSbjh21         float_raise( float_flag_invalid );
2260936b7f4cSbjh21         return 0;
2261936b7f4cSbjh21     }
2262936b7f4cSbjh21     aSign = extractFloat64Sign( a );
2263936b7f4cSbjh21     bSign = extractFloat64Sign( b );
2264936b7f4cSbjh21     if ( aSign != bSign )
2265936b7f4cSbjh21 	return aSign &&
2266936b7f4cSbjh21 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2267936b7f4cSbjh21 	      0 );
2268936b7f4cSbjh21     return ( a != b ) &&
2269936b7f4cSbjh21 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2270936b7f4cSbjh21 
2271936b7f4cSbjh21 }
2272936b7f4cSbjh21 
2273936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
2274936b7f4cSbjh21 /*
2275936b7f4cSbjh21 -------------------------------------------------------------------------------
2276936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is equal to
2277936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  The invalid exception is
2278936b7f4cSbjh21 raised if either operand is a NaN.  Otherwise, the comparison is performed
2279936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2280936b7f4cSbjh21 -------------------------------------------------------------------------------
2281936b7f4cSbjh21 */
float64_eq_signaling(float64 a,float64 b)2282936b7f4cSbjh21 flag float64_eq_signaling( float64 a, float64 b )
2283936b7f4cSbjh21 {
2284936b7f4cSbjh21 
2285936b7f4cSbjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2286936b7f4cSbjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2287936b7f4cSbjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
2288936b7f4cSbjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2289936b7f4cSbjh21        ) {
2290936b7f4cSbjh21         float_raise( float_flag_invalid );
2291936b7f4cSbjh21         return 0;
2292936b7f4cSbjh21     }
2293936b7f4cSbjh21     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2294936b7f4cSbjh21 
2295936b7f4cSbjh21 }
2296936b7f4cSbjh21 
2297936b7f4cSbjh21 /*
2298936b7f4cSbjh21 -------------------------------------------------------------------------------
2299936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is less than or
2300936b7f4cSbjh21 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2301936b7f4cSbjh21 cause an exception.  Otherwise, the comparison is performed according to the
2302936b7f4cSbjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2303936b7f4cSbjh21 -------------------------------------------------------------------------------
2304936b7f4cSbjh21 */
float64_le_quiet(float64 a,float64 b)2305936b7f4cSbjh21 flag float64_le_quiet( float64 a, float64 b )
2306936b7f4cSbjh21 {
2307936b7f4cSbjh21     flag aSign, bSign;
2308936b7f4cSbjh21 
2309936b7f4cSbjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2310936b7f4cSbjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2311936b7f4cSbjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
2312936b7f4cSbjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2313936b7f4cSbjh21        ) {
2314936b7f4cSbjh21         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2315936b7f4cSbjh21             float_raise( float_flag_invalid );
2316936b7f4cSbjh21         }
2317936b7f4cSbjh21         return 0;
2318936b7f4cSbjh21     }
2319936b7f4cSbjh21     aSign = extractFloat64Sign( a );
2320936b7f4cSbjh21     bSign = extractFloat64Sign( b );
2321936b7f4cSbjh21     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2322936b7f4cSbjh21     return ( a == b ) || ( aSign ^ ( a < b ) );
2323936b7f4cSbjh21 
2324936b7f4cSbjh21 }
2325936b7f4cSbjh21 
2326936b7f4cSbjh21 /*
2327936b7f4cSbjh21 -------------------------------------------------------------------------------
2328936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is less than
2329936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2330936b7f4cSbjh21 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2331936b7f4cSbjh21 Standard for Binary Floating-Point Arithmetic.
2332936b7f4cSbjh21 -------------------------------------------------------------------------------
2333936b7f4cSbjh21 */
float64_lt_quiet(float64 a,float64 b)2334936b7f4cSbjh21 flag float64_lt_quiet( float64 a, float64 b )
2335936b7f4cSbjh21 {
2336936b7f4cSbjh21     flag aSign, bSign;
2337936b7f4cSbjh21 
2338936b7f4cSbjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2339936b7f4cSbjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2340936b7f4cSbjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
2341936b7f4cSbjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2342936b7f4cSbjh21        ) {
2343936b7f4cSbjh21         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2344936b7f4cSbjh21             float_raise( float_flag_invalid );
2345936b7f4cSbjh21         }
2346936b7f4cSbjh21         return 0;
2347936b7f4cSbjh21     }
2348936b7f4cSbjh21     aSign = extractFloat64Sign( a );
2349936b7f4cSbjh21     bSign = extractFloat64Sign( b );
2350936b7f4cSbjh21     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2351936b7f4cSbjh21     return ( a != b ) && ( aSign ^ ( a < b ) );
2352936b7f4cSbjh21 
2353936b7f4cSbjh21 }
2354936b7f4cSbjh21 
2355936b7f4cSbjh21 #endif
2356