xref: /netbsd/lib/libc/softfloat/bits32/softfloat.c (revision 2cec0187)
1*2cec0187Schristos /* $NetBSD: softfloat.c,v 1.2 2012/03/21 14:17:54 christos Exp $ */
2936b7f4cSbjh21 
3936b7f4cSbjh21 /*
4936b7f4cSbjh21  * This version hacked for use with gcc -msoft-float by bjh21.
5936b7f4cSbjh21  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6936b7f4cSbjh21  *  itself).
7936b7f4cSbjh21  */
8936b7f4cSbjh21 
9936b7f4cSbjh21 /*
10936b7f4cSbjh21  * Things you may want to define:
11936b7f4cSbjh21  *
12936b7f4cSbjh21  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13936b7f4cSbjh21  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
14936b7f4cSbjh21  *   properly renamed.
15936b7f4cSbjh21  */
16936b7f4cSbjh21 
17936b7f4cSbjh21 /*
18936b7f4cSbjh21  * This differs from the standard bits32/softfloat.c in that float64
19936b7f4cSbjh21  * is defined to be a 64-bit integer rather than a structure.  The
20936b7f4cSbjh21  * structure is float64s, with translation between the two going via
21936b7f4cSbjh21  * float64u.
22936b7f4cSbjh21  */
23936b7f4cSbjh21 
24936b7f4cSbjh21 /*
25936b7f4cSbjh21 ===============================================================================
26936b7f4cSbjh21 
27936b7f4cSbjh21 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
28936b7f4cSbjh21 Arithmetic Package, Release 2a.
29936b7f4cSbjh21 
30936b7f4cSbjh21 Written by John R. Hauser.  This work was made possible in part by the
31936b7f4cSbjh21 International Computer Science Institute, located at Suite 600, 1947 Center
32936b7f4cSbjh21 Street, Berkeley, California 94704.  Funding was partially provided by the
33936b7f4cSbjh21 National Science Foundation under grant MIP-9311980.  The original version
34936b7f4cSbjh21 of this code was written as part of a project to build a fixed-point vector
35936b7f4cSbjh21 processor in collaboration with the University of California at Berkeley,
36936b7f4cSbjh21 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
37936b7f4cSbjh21 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
38936b7f4cSbjh21 arithmetic/SoftFloat.html'.
39936b7f4cSbjh21 
40936b7f4cSbjh21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
41936b7f4cSbjh21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
42936b7f4cSbjh21 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
43936b7f4cSbjh21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
44936b7f4cSbjh21 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
45936b7f4cSbjh21 
46936b7f4cSbjh21 Derivative works are acceptable, even for commercial purposes, so long as
47936b7f4cSbjh21 (1) they include prominent notice that the work is derivative, and (2) they
48936b7f4cSbjh21 include prominent notice akin to these four paragraphs for those parts of
49936b7f4cSbjh21 this code that are retained.
50936b7f4cSbjh21 
51936b7f4cSbjh21 ===============================================================================
52936b7f4cSbjh21 */
53936b7f4cSbjh21 
54936b7f4cSbjh21 #include <sys/cdefs.h>
55936b7f4cSbjh21 #if defined(LIBC_SCCS) && !defined(lint)
56*2cec0187Schristos __RCSID("$NetBSD: softfloat.c,v 1.2 2012/03/21 14:17:54 christos Exp $");
57936b7f4cSbjh21 #endif /* LIBC_SCCS and not lint */
58936b7f4cSbjh21 
59936b7f4cSbjh21 #ifdef SOFTFLOAT_FOR_GCC
60936b7f4cSbjh21 #include "softfloat-for-gcc.h"
61936b7f4cSbjh21 #endif
62936b7f4cSbjh21 
63936b7f4cSbjh21 #include "milieu.h"
64936b7f4cSbjh21 #include "softfloat.h"
65936b7f4cSbjh21 
66936b7f4cSbjh21 /*
67936b7f4cSbjh21  * Conversions between floats as stored in memory and floats as
68936b7f4cSbjh21  * SoftFloat uses them
69936b7f4cSbjh21  */
70936b7f4cSbjh21 #ifndef FLOAT64_DEMANGLE
71936b7f4cSbjh21 #define FLOAT64_DEMANGLE(a)	(a)
72936b7f4cSbjh21 #endif
73936b7f4cSbjh21 #ifndef FLOAT64_MANGLE
74936b7f4cSbjh21 #define FLOAT64_MANGLE(a)	(a)
75936b7f4cSbjh21 #endif
76936b7f4cSbjh21 
77936b7f4cSbjh21 /*
78936b7f4cSbjh21 -------------------------------------------------------------------------------
79936b7f4cSbjh21 Floating-point rounding mode and exception flags.
80936b7f4cSbjh21 -------------------------------------------------------------------------------
81936b7f4cSbjh21 */
82936b7f4cSbjh21 fp_rnd float_rounding_mode = float_round_nearest_even;
83936b7f4cSbjh21 fp_except float_exception_flags = 0;
84936b7f4cSbjh21 
85936b7f4cSbjh21 /*
86936b7f4cSbjh21 -------------------------------------------------------------------------------
87936b7f4cSbjh21 Primitive arithmetic functions, including multi-word arithmetic, and
88936b7f4cSbjh21 division and square root approximations.  (Can be specialized to target if
89936b7f4cSbjh21 desired.)
90936b7f4cSbjh21 -------------------------------------------------------------------------------
91936b7f4cSbjh21 */
92936b7f4cSbjh21 #include "softfloat-macros"
93936b7f4cSbjh21 
94936b7f4cSbjh21 /*
95936b7f4cSbjh21 -------------------------------------------------------------------------------
96936b7f4cSbjh21 Functions and definitions to determine:  (1) whether tininess for underflow
97936b7f4cSbjh21 is detected before or after rounding by default, (2) what (if anything)
98936b7f4cSbjh21 happens when exceptions are raised, (3) how signaling NaNs are distinguished
99936b7f4cSbjh21 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
100936b7f4cSbjh21 are propagated from function inputs to output.  These details are target-
101936b7f4cSbjh21 specific.
102936b7f4cSbjh21 -------------------------------------------------------------------------------
103936b7f4cSbjh21 */
104936b7f4cSbjh21 #include "softfloat-specialize"
105936b7f4cSbjh21 
106936b7f4cSbjh21 /*
107936b7f4cSbjh21 -------------------------------------------------------------------------------
108936b7f4cSbjh21 Returns the fraction bits of the single-precision floating-point value `a'.
109936b7f4cSbjh21 -------------------------------------------------------------------------------
110936b7f4cSbjh21 */
111936b7f4cSbjh21 INLINE bits32 extractFloat32Frac( float32 a )
112936b7f4cSbjh21 {
113936b7f4cSbjh21 
114936b7f4cSbjh21     return a & 0x007FFFFF;
115936b7f4cSbjh21 
116936b7f4cSbjh21 }
117936b7f4cSbjh21 
118936b7f4cSbjh21 /*
119936b7f4cSbjh21 -------------------------------------------------------------------------------
120936b7f4cSbjh21 Returns the exponent bits of the single-precision floating-point value `a'.
121936b7f4cSbjh21 -------------------------------------------------------------------------------
122936b7f4cSbjh21 */
123936b7f4cSbjh21 INLINE int16 extractFloat32Exp( float32 a )
124936b7f4cSbjh21 {
125936b7f4cSbjh21 
126936b7f4cSbjh21     return ( a>>23 ) & 0xFF;
127936b7f4cSbjh21 
128936b7f4cSbjh21 }
129936b7f4cSbjh21 
130936b7f4cSbjh21 /*
131936b7f4cSbjh21 -------------------------------------------------------------------------------
132936b7f4cSbjh21 Returns the sign bit of the single-precision floating-point value `a'.
133936b7f4cSbjh21 -------------------------------------------------------------------------------
134936b7f4cSbjh21 */
135936b7f4cSbjh21 INLINE flag extractFloat32Sign( float32 a )
136936b7f4cSbjh21 {
137936b7f4cSbjh21 
138936b7f4cSbjh21     return a>>31;
139936b7f4cSbjh21 
140936b7f4cSbjh21 }
141936b7f4cSbjh21 
142936b7f4cSbjh21 /*
143936b7f4cSbjh21 -------------------------------------------------------------------------------
144936b7f4cSbjh21 Normalizes the subnormal single-precision floating-point value represented
145936b7f4cSbjh21 by the denormalized significand `aSig'.  The normalized exponent and
146936b7f4cSbjh21 significand are stored at the locations pointed to by `zExpPtr' and
147936b7f4cSbjh21 `zSigPtr', respectively.
148936b7f4cSbjh21 -------------------------------------------------------------------------------
149936b7f4cSbjh21 */
150936b7f4cSbjh21 static void
151936b7f4cSbjh21  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
152936b7f4cSbjh21 {
153936b7f4cSbjh21     int8 shiftCount;
154936b7f4cSbjh21 
155936b7f4cSbjh21     shiftCount = countLeadingZeros32( aSig ) - 8;
156936b7f4cSbjh21     *zSigPtr = aSig<<shiftCount;
157936b7f4cSbjh21     *zExpPtr = 1 - shiftCount;
158936b7f4cSbjh21 
159936b7f4cSbjh21 }
160936b7f4cSbjh21 
161936b7f4cSbjh21 /*
162936b7f4cSbjh21 -------------------------------------------------------------------------------
163936b7f4cSbjh21 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
164936b7f4cSbjh21 single-precision floating-point value, returning the result.  After being
165936b7f4cSbjh21 shifted into the proper positions, the three fields are simply added
166936b7f4cSbjh21 together to form the result.  This means that any integer portion of `zSig'
167936b7f4cSbjh21 will be added into the exponent.  Since a properly normalized significand
168936b7f4cSbjh21 will have an integer portion equal to 1, the `zExp' input should be 1 less
169936b7f4cSbjh21 than the desired result exponent whenever `zSig' is a complete, normalized
170936b7f4cSbjh21 significand.
171936b7f4cSbjh21 -------------------------------------------------------------------------------
172936b7f4cSbjh21 */
173936b7f4cSbjh21 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
174936b7f4cSbjh21 {
175936b7f4cSbjh21 
176936b7f4cSbjh21     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
177936b7f4cSbjh21 
178936b7f4cSbjh21 }
179936b7f4cSbjh21 
180936b7f4cSbjh21 /*
181936b7f4cSbjh21 -------------------------------------------------------------------------------
182936b7f4cSbjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
183936b7f4cSbjh21 and significand `zSig', and returns the proper single-precision floating-
184936b7f4cSbjh21 point value corresponding to the abstract input.  Ordinarily, the abstract
185936b7f4cSbjh21 value is simply rounded and packed into the single-precision format, with
186936b7f4cSbjh21 the inexact exception raised if the abstract input cannot be represented
187936b7f4cSbjh21 exactly.  However, if the abstract value is too large, the overflow and
188936b7f4cSbjh21 inexact exceptions are raised and an infinity or maximal finite value is
189936b7f4cSbjh21 returned.  If the abstract value is too small, the input value is rounded to
190936b7f4cSbjh21 a subnormal number, and the underflow and inexact exceptions are raised if
191936b7f4cSbjh21 the abstract input cannot be represented exactly as a subnormal single-
192936b7f4cSbjh21 precision floating-point number.
193936b7f4cSbjh21     The input significand `zSig' has its binary point between bits 30
194936b7f4cSbjh21 and 29, which is 7 bits to the left of the usual location.  This shifted
195936b7f4cSbjh21 significand must be normalized or smaller.  If `zSig' is not normalized,
196936b7f4cSbjh21 `zExp' must be 0; in that case, the result returned is a subnormal number,
197936b7f4cSbjh21 and it must not require rounding.  In the usual case that `zSig' is
198936b7f4cSbjh21 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
199936b7f4cSbjh21 The handling of underflow and overflow follows the IEC/IEEE Standard for
200936b7f4cSbjh21 Binary Floating-Point Arithmetic.
201936b7f4cSbjh21 -------------------------------------------------------------------------------
202936b7f4cSbjh21 */
203936b7f4cSbjh21 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
204936b7f4cSbjh21 {
205936b7f4cSbjh21     int8 roundingMode;
206936b7f4cSbjh21     flag roundNearestEven;
207936b7f4cSbjh21     int8 roundIncrement, roundBits;
208936b7f4cSbjh21     flag isTiny;
209936b7f4cSbjh21 
210936b7f4cSbjh21     roundingMode = float_rounding_mode;
211936b7f4cSbjh21     roundNearestEven = roundingMode == float_round_nearest_even;
212936b7f4cSbjh21     roundIncrement = 0x40;
213936b7f4cSbjh21     if ( ! roundNearestEven ) {
214936b7f4cSbjh21         if ( roundingMode == float_round_to_zero ) {
215936b7f4cSbjh21             roundIncrement = 0;
216936b7f4cSbjh21         }
217936b7f4cSbjh21         else {
218936b7f4cSbjh21             roundIncrement = 0x7F;
219936b7f4cSbjh21             if ( zSign ) {
220936b7f4cSbjh21                 if ( roundingMode == float_round_up ) roundIncrement = 0;
221936b7f4cSbjh21             }
222936b7f4cSbjh21             else {
223936b7f4cSbjh21                 if ( roundingMode == float_round_down ) roundIncrement = 0;
224936b7f4cSbjh21             }
225936b7f4cSbjh21         }
226936b7f4cSbjh21     }
227936b7f4cSbjh21     roundBits = zSig & 0x7F;
228936b7f4cSbjh21     if ( 0xFD <= (bits16) zExp ) {
229936b7f4cSbjh21         if (    ( 0xFD < zExp )
230936b7f4cSbjh21              || (    ( zExp == 0xFD )
231936b7f4cSbjh21                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
232936b7f4cSbjh21            ) {
233936b7f4cSbjh21             float_raise( float_flag_overflow | float_flag_inexact );
234936b7f4cSbjh21             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
235936b7f4cSbjh21         }
236936b7f4cSbjh21         if ( zExp < 0 ) {
237936b7f4cSbjh21             isTiny =
238936b7f4cSbjh21                    ( float_detect_tininess == float_tininess_before_rounding )
239936b7f4cSbjh21                 || ( zExp < -1 )
240*2cec0187Schristos                 || ( zSig + roundIncrement < (uint32)0x80000000 );
241936b7f4cSbjh21             shift32RightJamming( zSig, - zExp, &zSig );
242936b7f4cSbjh21             zExp = 0;
243936b7f4cSbjh21             roundBits = zSig & 0x7F;
244936b7f4cSbjh21             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
245936b7f4cSbjh21         }
246936b7f4cSbjh21     }
247936b7f4cSbjh21     if ( roundBits ) float_exception_flags |= float_flag_inexact;
248936b7f4cSbjh21     zSig = ( zSig + roundIncrement )>>7;
249936b7f4cSbjh21     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
250936b7f4cSbjh21     if ( zSig == 0 ) zExp = 0;
251936b7f4cSbjh21     return packFloat32( zSign, zExp, zSig );
252936b7f4cSbjh21 
253936b7f4cSbjh21 }
254936b7f4cSbjh21 
255936b7f4cSbjh21 /*
256936b7f4cSbjh21 -------------------------------------------------------------------------------
257936b7f4cSbjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
258936b7f4cSbjh21 and significand `zSig', and returns the proper single-precision floating-
259936b7f4cSbjh21 point value corresponding to the abstract input.  This routine is just like
260936b7f4cSbjh21 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
261936b7f4cSbjh21 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
262936b7f4cSbjh21 floating-point exponent.
263936b7f4cSbjh21 -------------------------------------------------------------------------------
264936b7f4cSbjh21 */
265936b7f4cSbjh21 static float32
266936b7f4cSbjh21  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
267936b7f4cSbjh21 {
268936b7f4cSbjh21     int8 shiftCount;
269936b7f4cSbjh21 
270936b7f4cSbjh21     shiftCount = countLeadingZeros32( zSig ) - 1;
271936b7f4cSbjh21     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
272936b7f4cSbjh21 
273936b7f4cSbjh21 }
274936b7f4cSbjh21 
275936b7f4cSbjh21 /*
276936b7f4cSbjh21 -------------------------------------------------------------------------------
277936b7f4cSbjh21 Returns the least-significant 32 fraction bits of the double-precision
278936b7f4cSbjh21 floating-point value `a'.
279936b7f4cSbjh21 -------------------------------------------------------------------------------
280936b7f4cSbjh21 */
281936b7f4cSbjh21 INLINE bits32 extractFloat64Frac1( float64 a )
282936b7f4cSbjh21 {
283936b7f4cSbjh21 
284*2cec0187Schristos     return (bits32)(FLOAT64_DEMANGLE(a) & LIT64(0x00000000FFFFFFFF));
285936b7f4cSbjh21 
286936b7f4cSbjh21 }
287936b7f4cSbjh21 
288936b7f4cSbjh21 /*
289936b7f4cSbjh21 -------------------------------------------------------------------------------
290936b7f4cSbjh21 Returns the most-significant 20 fraction bits of the double-precision
291936b7f4cSbjh21 floating-point value `a'.
292936b7f4cSbjh21 -------------------------------------------------------------------------------
293936b7f4cSbjh21 */
294936b7f4cSbjh21 INLINE bits32 extractFloat64Frac0( float64 a )
295936b7f4cSbjh21 {
296936b7f4cSbjh21 
297*2cec0187Schristos     return (bits32)((FLOAT64_DEMANGLE(a) >> 32) & 0x000FFFFF);
298936b7f4cSbjh21 
299936b7f4cSbjh21 }
300936b7f4cSbjh21 
301936b7f4cSbjh21 /*
302936b7f4cSbjh21 -------------------------------------------------------------------------------
303936b7f4cSbjh21 Returns the exponent bits of the double-precision floating-point value `a'.
304936b7f4cSbjh21 -------------------------------------------------------------------------------
305936b7f4cSbjh21 */
306936b7f4cSbjh21 INLINE int16 extractFloat64Exp( float64 a )
307936b7f4cSbjh21 {
308936b7f4cSbjh21 
309*2cec0187Schristos     return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
310936b7f4cSbjh21 
311936b7f4cSbjh21 }
312936b7f4cSbjh21 
313936b7f4cSbjh21 /*
314936b7f4cSbjh21 -------------------------------------------------------------------------------
315936b7f4cSbjh21 Returns the sign bit of the double-precision floating-point value `a'.
316936b7f4cSbjh21 -------------------------------------------------------------------------------
317936b7f4cSbjh21 */
318936b7f4cSbjh21 INLINE flag extractFloat64Sign( float64 a )
319936b7f4cSbjh21 {
320936b7f4cSbjh21 
321*2cec0187Schristos     return (flag)(FLOAT64_DEMANGLE(a) >> 63);
322936b7f4cSbjh21 
323936b7f4cSbjh21 }
324936b7f4cSbjh21 
325936b7f4cSbjh21 /*
326936b7f4cSbjh21 -------------------------------------------------------------------------------
327936b7f4cSbjh21 Normalizes the subnormal double-precision floating-point value represented
328936b7f4cSbjh21 by the denormalized significand formed by the concatenation of `aSig0' and
329936b7f4cSbjh21 `aSig1'.  The normalized exponent is stored at the location pointed to by
330936b7f4cSbjh21 `zExpPtr'.  The most significant 21 bits of the normalized significand are
331936b7f4cSbjh21 stored at the location pointed to by `zSig0Ptr', and the least significant
332936b7f4cSbjh21 32 bits of the normalized significand are stored at the location pointed to
333936b7f4cSbjh21 by `zSig1Ptr'.
334936b7f4cSbjh21 -------------------------------------------------------------------------------
335936b7f4cSbjh21 */
336936b7f4cSbjh21 static void
337936b7f4cSbjh21  normalizeFloat64Subnormal(
338936b7f4cSbjh21      bits32 aSig0,
339936b7f4cSbjh21      bits32 aSig1,
340936b7f4cSbjh21      int16 *zExpPtr,
341936b7f4cSbjh21      bits32 *zSig0Ptr,
342936b7f4cSbjh21      bits32 *zSig1Ptr
343936b7f4cSbjh21  )
344936b7f4cSbjh21 {
345936b7f4cSbjh21     int8 shiftCount;
346936b7f4cSbjh21 
347936b7f4cSbjh21     if ( aSig0 == 0 ) {
348936b7f4cSbjh21         shiftCount = countLeadingZeros32( aSig1 ) - 11;
349936b7f4cSbjh21         if ( shiftCount < 0 ) {
350936b7f4cSbjh21             *zSig0Ptr = aSig1>>( - shiftCount );
351936b7f4cSbjh21             *zSig1Ptr = aSig1<<( shiftCount & 31 );
352936b7f4cSbjh21         }
353936b7f4cSbjh21         else {
354936b7f4cSbjh21             *zSig0Ptr = aSig1<<shiftCount;
355936b7f4cSbjh21             *zSig1Ptr = 0;
356936b7f4cSbjh21         }
357936b7f4cSbjh21         *zExpPtr = - shiftCount - 31;
358936b7f4cSbjh21     }
359936b7f4cSbjh21     else {
360936b7f4cSbjh21         shiftCount = countLeadingZeros32( aSig0 ) - 11;
361936b7f4cSbjh21         shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
362936b7f4cSbjh21         *zExpPtr = 1 - shiftCount;
363936b7f4cSbjh21     }
364936b7f4cSbjh21 
365936b7f4cSbjh21 }
366936b7f4cSbjh21 
367936b7f4cSbjh21 /*
368936b7f4cSbjh21 -------------------------------------------------------------------------------
369936b7f4cSbjh21 Packs the sign `zSign', the exponent `zExp', and the significand formed by
370936b7f4cSbjh21 the concatenation of `zSig0' and `zSig1' into a double-precision floating-
371936b7f4cSbjh21 point value, returning the result.  After being shifted into the proper
372936b7f4cSbjh21 positions, the three fields `zSign', `zExp', and `zSig0' are simply added
373936b7f4cSbjh21 together to form the most significant 32 bits of the result.  This means
374936b7f4cSbjh21 that any integer portion of `zSig0' will be added into the exponent.  Since
375936b7f4cSbjh21 a properly normalized significand will have an integer portion equal to 1,
376936b7f4cSbjh21 the `zExp' input should be 1 less than the desired result exponent whenever
377936b7f4cSbjh21 `zSig0' and `zSig1' concatenated form a complete, normalized significand.
378936b7f4cSbjh21 -------------------------------------------------------------------------------
379936b7f4cSbjh21 */
380936b7f4cSbjh21 INLINE float64
381936b7f4cSbjh21  packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
382936b7f4cSbjh21 {
383936b7f4cSbjh21 
384936b7f4cSbjh21     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
385936b7f4cSbjh21 			   ( ( (bits64) zExp )<<52 ) +
386936b7f4cSbjh21 			   ( ( (bits64) zSig0 )<<32 ) + zSig1 );
387936b7f4cSbjh21 
388936b7f4cSbjh21 
389936b7f4cSbjh21 }
390936b7f4cSbjh21 
391936b7f4cSbjh21 /*
392936b7f4cSbjh21 -------------------------------------------------------------------------------
393936b7f4cSbjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
394936b7f4cSbjh21 and extended significand formed by the concatenation of `zSig0', `zSig1',
395936b7f4cSbjh21 and `zSig2', and returns the proper double-precision floating-point value
396936b7f4cSbjh21 corresponding to the abstract input.  Ordinarily, the abstract value is
397936b7f4cSbjh21 simply rounded and packed into the double-precision format, with the inexact
398936b7f4cSbjh21 exception raised if the abstract input cannot be represented exactly.
399936b7f4cSbjh21 However, if the abstract value is too large, the overflow and inexact
400936b7f4cSbjh21 exceptions are raised and an infinity or maximal finite value is returned.
401936b7f4cSbjh21 If the abstract value is too small, the input value is rounded to a
402936b7f4cSbjh21 subnormal number, and the underflow and inexact exceptions are raised if the
403936b7f4cSbjh21 abstract input cannot be represented exactly as a subnormal double-precision
404936b7f4cSbjh21 floating-point number.
405936b7f4cSbjh21     The input significand must be normalized or smaller.  If the input
406936b7f4cSbjh21 significand is not normalized, `zExp' must be 0; in that case, the result
407936b7f4cSbjh21 returned is a subnormal number, and it must not require rounding.  In the
408936b7f4cSbjh21 usual case that the input significand is normalized, `zExp' must be 1 less
409936b7f4cSbjh21 than the ``true'' floating-point exponent.  The handling of underflow and
410936b7f4cSbjh21 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
411936b7f4cSbjh21 -------------------------------------------------------------------------------
412936b7f4cSbjh21 */
413936b7f4cSbjh21 static float64
414936b7f4cSbjh21  roundAndPackFloat64(
415936b7f4cSbjh21      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
416936b7f4cSbjh21 {
417936b7f4cSbjh21     int8 roundingMode;
418936b7f4cSbjh21     flag roundNearestEven, increment, isTiny;
419936b7f4cSbjh21 
420936b7f4cSbjh21     roundingMode = float_rounding_mode;
421936b7f4cSbjh21     roundNearestEven = ( roundingMode == float_round_nearest_even );
422936b7f4cSbjh21     increment = ( (sbits32) zSig2 < 0 );
423936b7f4cSbjh21     if ( ! roundNearestEven ) {
424936b7f4cSbjh21         if ( roundingMode == float_round_to_zero ) {
425936b7f4cSbjh21             increment = 0;
426936b7f4cSbjh21         }
427936b7f4cSbjh21         else {
428936b7f4cSbjh21             if ( zSign ) {
429936b7f4cSbjh21                 increment = ( roundingMode == float_round_down ) && zSig2;
430936b7f4cSbjh21             }
431936b7f4cSbjh21             else {
432936b7f4cSbjh21                 increment = ( roundingMode == float_round_up ) && zSig2;
433936b7f4cSbjh21             }
434936b7f4cSbjh21         }
435936b7f4cSbjh21     }
436936b7f4cSbjh21     if ( 0x7FD <= (bits16) zExp ) {
437936b7f4cSbjh21         if (    ( 0x7FD < zExp )
438936b7f4cSbjh21              || (    ( zExp == 0x7FD )
439936b7f4cSbjh21                   && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
440936b7f4cSbjh21                   && increment
441936b7f4cSbjh21                 )
442936b7f4cSbjh21            ) {
443936b7f4cSbjh21             float_raise( float_flag_overflow | float_flag_inexact );
444936b7f4cSbjh21             if (    ( roundingMode == float_round_to_zero )
445936b7f4cSbjh21                  || ( zSign && ( roundingMode == float_round_up ) )
446936b7f4cSbjh21                  || ( ! zSign && ( roundingMode == float_round_down ) )
447936b7f4cSbjh21                ) {
448936b7f4cSbjh21                 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
449936b7f4cSbjh21             }
450936b7f4cSbjh21             return packFloat64( zSign, 0x7FF, 0, 0 );
451936b7f4cSbjh21         }
452936b7f4cSbjh21         if ( zExp < 0 ) {
453936b7f4cSbjh21             isTiny =
454936b7f4cSbjh21                    ( float_detect_tininess == float_tininess_before_rounding )
455936b7f4cSbjh21                 || ( zExp < -1 )
456936b7f4cSbjh21                 || ! increment
457936b7f4cSbjh21                 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
458936b7f4cSbjh21             shift64ExtraRightJamming(
459936b7f4cSbjh21                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
460936b7f4cSbjh21             zExp = 0;
461936b7f4cSbjh21             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
462936b7f4cSbjh21             if ( roundNearestEven ) {
463936b7f4cSbjh21                 increment = ( (sbits32) zSig2 < 0 );
464936b7f4cSbjh21             }
465936b7f4cSbjh21             else {
466936b7f4cSbjh21                 if ( zSign ) {
467936b7f4cSbjh21                     increment = ( roundingMode == float_round_down ) && zSig2;
468936b7f4cSbjh21                 }
469936b7f4cSbjh21                 else {
470936b7f4cSbjh21                     increment = ( roundingMode == float_round_up ) && zSig2;
471936b7f4cSbjh21                 }
472936b7f4cSbjh21             }
473936b7f4cSbjh21         }
474936b7f4cSbjh21     }
475936b7f4cSbjh21     if ( zSig2 ) float_exception_flags |= float_flag_inexact;
476936b7f4cSbjh21     if ( increment ) {
477936b7f4cSbjh21         add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
478936b7f4cSbjh21         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
479936b7f4cSbjh21     }
480936b7f4cSbjh21     else {
481936b7f4cSbjh21         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
482936b7f4cSbjh21     }
483936b7f4cSbjh21     return packFloat64( zSign, zExp, zSig0, zSig1 );
484936b7f4cSbjh21 
485936b7f4cSbjh21 }
486936b7f4cSbjh21 
487936b7f4cSbjh21 /*
488936b7f4cSbjh21 -------------------------------------------------------------------------------
489936b7f4cSbjh21 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
490936b7f4cSbjh21 and significand formed by the concatenation of `zSig0' and `zSig1', and
491936b7f4cSbjh21 returns the proper double-precision floating-point value corresponding
492936b7f4cSbjh21 to the abstract input.  This routine is just like `roundAndPackFloat64'
493936b7f4cSbjh21 except that the input significand has fewer bits and does not have to be
494936b7f4cSbjh21 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
495936b7f4cSbjh21 point exponent.
496936b7f4cSbjh21 -------------------------------------------------------------------------------
497936b7f4cSbjh21 */
498936b7f4cSbjh21 static float64
499936b7f4cSbjh21  normalizeRoundAndPackFloat64(
500936b7f4cSbjh21      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
501936b7f4cSbjh21 {
502936b7f4cSbjh21     int8 shiftCount;
503936b7f4cSbjh21     bits32 zSig2;
504936b7f4cSbjh21 
505936b7f4cSbjh21     if ( zSig0 == 0 ) {
506936b7f4cSbjh21         zSig0 = zSig1;
507936b7f4cSbjh21         zSig1 = 0;
508936b7f4cSbjh21         zExp -= 32;
509936b7f4cSbjh21     }
510936b7f4cSbjh21     shiftCount = countLeadingZeros32( zSig0 ) - 11;
511936b7f4cSbjh21     if ( 0 <= shiftCount ) {
512936b7f4cSbjh21         zSig2 = 0;
513936b7f4cSbjh21         shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
514936b7f4cSbjh21     }
515936b7f4cSbjh21     else {
516936b7f4cSbjh21         shift64ExtraRightJamming(
517936b7f4cSbjh21             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
518936b7f4cSbjh21     }
519936b7f4cSbjh21     zExp -= shiftCount;
520936b7f4cSbjh21     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
521936b7f4cSbjh21 
522936b7f4cSbjh21 }
523936b7f4cSbjh21 
524936b7f4cSbjh21 /*
525936b7f4cSbjh21 -------------------------------------------------------------------------------
526936b7f4cSbjh21 Returns the result of converting the 32-bit two's complement integer `a' to
527936b7f4cSbjh21 the single-precision floating-point format.  The conversion is performed
528936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
529936b7f4cSbjh21 -------------------------------------------------------------------------------
530936b7f4cSbjh21 */
531936b7f4cSbjh21 float32 int32_to_float32( int32 a )
532936b7f4cSbjh21 {
533936b7f4cSbjh21     flag zSign;
534936b7f4cSbjh21 
535936b7f4cSbjh21     if ( a == 0 ) return 0;
536936b7f4cSbjh21     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
537936b7f4cSbjh21     zSign = ( a < 0 );
538*2cec0187Schristos     return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
539936b7f4cSbjh21 
540936b7f4cSbjh21 }
541936b7f4cSbjh21 
542936b7f4cSbjh21 /*
543936b7f4cSbjh21 -------------------------------------------------------------------------------
544936b7f4cSbjh21 Returns the result of converting the 32-bit two's complement integer `a' to
545936b7f4cSbjh21 the double-precision floating-point format.  The conversion is performed
546936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
547936b7f4cSbjh21 -------------------------------------------------------------------------------
548936b7f4cSbjh21 */
549936b7f4cSbjh21 float64 int32_to_float64( int32 a )
550936b7f4cSbjh21 {
551936b7f4cSbjh21     flag zSign;
552936b7f4cSbjh21     bits32 absA;
553936b7f4cSbjh21     int8 shiftCount;
554936b7f4cSbjh21     bits32 zSig0, zSig1;
555936b7f4cSbjh21 
556936b7f4cSbjh21     if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
557936b7f4cSbjh21     zSign = ( a < 0 );
558936b7f4cSbjh21     absA = zSign ? - a : a;
559936b7f4cSbjh21     shiftCount = countLeadingZeros32( absA ) - 11;
560936b7f4cSbjh21     if ( 0 <= shiftCount ) {
561936b7f4cSbjh21         zSig0 = absA<<shiftCount;
562936b7f4cSbjh21         zSig1 = 0;
563936b7f4cSbjh21     }
564936b7f4cSbjh21     else {
565936b7f4cSbjh21         shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
566936b7f4cSbjh21     }
567936b7f4cSbjh21     return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
568936b7f4cSbjh21 
569936b7f4cSbjh21 }
570936b7f4cSbjh21 
571936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
572936b7f4cSbjh21 /*
573936b7f4cSbjh21 -------------------------------------------------------------------------------
574936b7f4cSbjh21 Returns the result of converting the single-precision floating-point value
575936b7f4cSbjh21 `a' to the 32-bit two's complement integer format.  The conversion is
576936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
577936b7f4cSbjh21 Arithmetic---which means in particular that the conversion is rounded
578936b7f4cSbjh21 according to the current rounding mode.  If `a' is a NaN, the largest
579936b7f4cSbjh21 positive integer is returned.  Otherwise, if the conversion overflows, the
580936b7f4cSbjh21 largest integer with the same sign as `a' is returned.
581936b7f4cSbjh21 -------------------------------------------------------------------------------
582936b7f4cSbjh21 */
583936b7f4cSbjh21 int32 float32_to_int32( float32 a )
584936b7f4cSbjh21 {
585936b7f4cSbjh21     flag aSign;
586936b7f4cSbjh21     int16 aExp, shiftCount;
587936b7f4cSbjh21     bits32 aSig, aSigExtra;
588936b7f4cSbjh21     int32 z;
589936b7f4cSbjh21     int8 roundingMode;
590936b7f4cSbjh21 
591936b7f4cSbjh21     aSig = extractFloat32Frac( a );
592936b7f4cSbjh21     aExp = extractFloat32Exp( a );
593936b7f4cSbjh21     aSign = extractFloat32Sign( a );
594936b7f4cSbjh21     shiftCount = aExp - 0x96;
595936b7f4cSbjh21     if ( 0 <= shiftCount ) {
596936b7f4cSbjh21         if ( 0x9E <= aExp ) {
597936b7f4cSbjh21             if ( a != 0xCF000000 ) {
598936b7f4cSbjh21                 float_raise( float_flag_invalid );
599936b7f4cSbjh21                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
600936b7f4cSbjh21                     return 0x7FFFFFFF;
601936b7f4cSbjh21                 }
602936b7f4cSbjh21             }
603936b7f4cSbjh21             return (sbits32) 0x80000000;
604936b7f4cSbjh21         }
605936b7f4cSbjh21         z = ( aSig | 0x00800000 )<<shiftCount;
606936b7f4cSbjh21         if ( aSign ) z = - z;
607936b7f4cSbjh21     }
608936b7f4cSbjh21     else {
609936b7f4cSbjh21         if ( aExp < 0x7E ) {
610936b7f4cSbjh21             aSigExtra = aExp | aSig;
611936b7f4cSbjh21             z = 0;
612936b7f4cSbjh21         }
613936b7f4cSbjh21         else {
614936b7f4cSbjh21             aSig |= 0x00800000;
615936b7f4cSbjh21             aSigExtra = aSig<<( shiftCount & 31 );
616936b7f4cSbjh21             z = aSig>>( - shiftCount );
617936b7f4cSbjh21         }
618936b7f4cSbjh21         if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
619936b7f4cSbjh21         roundingMode = float_rounding_mode;
620936b7f4cSbjh21         if ( roundingMode == float_round_nearest_even ) {
621936b7f4cSbjh21             if ( (sbits32) aSigExtra < 0 ) {
622936b7f4cSbjh21                 ++z;
623936b7f4cSbjh21                 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
624936b7f4cSbjh21             }
625936b7f4cSbjh21             if ( aSign ) z = - z;
626936b7f4cSbjh21         }
627936b7f4cSbjh21         else {
628936b7f4cSbjh21             aSigExtra = ( aSigExtra != 0 );
629936b7f4cSbjh21             if ( aSign ) {
630936b7f4cSbjh21                 z += ( roundingMode == float_round_down ) & aSigExtra;
631936b7f4cSbjh21                 z = - z;
632936b7f4cSbjh21             }
633936b7f4cSbjh21             else {
634936b7f4cSbjh21                 z += ( roundingMode == float_round_up ) & aSigExtra;
635936b7f4cSbjh21             }
636936b7f4cSbjh21         }
637936b7f4cSbjh21     }
638936b7f4cSbjh21     return z;
639936b7f4cSbjh21 
640936b7f4cSbjh21 }
641936b7f4cSbjh21 #endif
642936b7f4cSbjh21 
643936b7f4cSbjh21 /*
644936b7f4cSbjh21 -------------------------------------------------------------------------------
645936b7f4cSbjh21 Returns the result of converting the single-precision floating-point value
646936b7f4cSbjh21 `a' to the 32-bit two's complement integer format.  The conversion is
647936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
648936b7f4cSbjh21 Arithmetic, except that the conversion is always rounded toward zero.
649936b7f4cSbjh21 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
650936b7f4cSbjh21 the conversion overflows, the largest integer with the same sign as `a' is
651936b7f4cSbjh21 returned.
652936b7f4cSbjh21 -------------------------------------------------------------------------------
653936b7f4cSbjh21 */
654936b7f4cSbjh21 int32 float32_to_int32_round_to_zero( float32 a )
655936b7f4cSbjh21 {
656936b7f4cSbjh21     flag aSign;
657936b7f4cSbjh21     int16 aExp, shiftCount;
658936b7f4cSbjh21     bits32 aSig;
659936b7f4cSbjh21     int32 z;
660936b7f4cSbjh21 
661936b7f4cSbjh21     aSig = extractFloat32Frac( a );
662936b7f4cSbjh21     aExp = extractFloat32Exp( a );
663936b7f4cSbjh21     aSign = extractFloat32Sign( a );
664936b7f4cSbjh21     shiftCount = aExp - 0x9E;
665936b7f4cSbjh21     if ( 0 <= shiftCount ) {
666936b7f4cSbjh21         if ( a != 0xCF000000 ) {
667936b7f4cSbjh21             float_raise( float_flag_invalid );
668936b7f4cSbjh21             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
669936b7f4cSbjh21         }
670936b7f4cSbjh21         return (sbits32) 0x80000000;
671936b7f4cSbjh21     }
672936b7f4cSbjh21     else if ( aExp <= 0x7E ) {
673936b7f4cSbjh21         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
674936b7f4cSbjh21         return 0;
675936b7f4cSbjh21     }
676936b7f4cSbjh21     aSig = ( aSig | 0x00800000 )<<8;
677936b7f4cSbjh21     z = aSig>>( - shiftCount );
678936b7f4cSbjh21     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
679936b7f4cSbjh21         float_exception_flags |= float_flag_inexact;
680936b7f4cSbjh21     }
681936b7f4cSbjh21     if ( aSign ) z = - z;
682936b7f4cSbjh21     return z;
683936b7f4cSbjh21 
684936b7f4cSbjh21 }
685936b7f4cSbjh21 
686936b7f4cSbjh21 /*
687936b7f4cSbjh21 -------------------------------------------------------------------------------
688936b7f4cSbjh21 Returns the result of converting the single-precision floating-point value
689936b7f4cSbjh21 `a' to the double-precision floating-point format.  The conversion is
690936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
691936b7f4cSbjh21 Arithmetic.
692936b7f4cSbjh21 -------------------------------------------------------------------------------
693936b7f4cSbjh21 */
694936b7f4cSbjh21 float64 float32_to_float64( float32 a )
695936b7f4cSbjh21 {
696936b7f4cSbjh21     flag aSign;
697936b7f4cSbjh21     int16 aExp;
698936b7f4cSbjh21     bits32 aSig, zSig0, zSig1;
699936b7f4cSbjh21 
700936b7f4cSbjh21     aSig = extractFloat32Frac( a );
701936b7f4cSbjh21     aExp = extractFloat32Exp( a );
702936b7f4cSbjh21     aSign = extractFloat32Sign( a );
703936b7f4cSbjh21     if ( aExp == 0xFF ) {
704936b7f4cSbjh21         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
705936b7f4cSbjh21         return packFloat64( aSign, 0x7FF, 0, 0 );
706936b7f4cSbjh21     }
707936b7f4cSbjh21     if ( aExp == 0 ) {
708936b7f4cSbjh21         if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
709936b7f4cSbjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
710936b7f4cSbjh21         --aExp;
711936b7f4cSbjh21     }
712936b7f4cSbjh21     shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
713936b7f4cSbjh21     return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
714936b7f4cSbjh21 
715936b7f4cSbjh21 }
716936b7f4cSbjh21 
717936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
718936b7f4cSbjh21 /*
719936b7f4cSbjh21 -------------------------------------------------------------------------------
720936b7f4cSbjh21 Rounds the single-precision floating-point value `a' to an integer,
721936b7f4cSbjh21 and returns the result as a single-precision floating-point value.  The
722936b7f4cSbjh21 operation is performed according to the IEC/IEEE Standard for Binary
723936b7f4cSbjh21 Floating-Point Arithmetic.
724936b7f4cSbjh21 -------------------------------------------------------------------------------
725936b7f4cSbjh21 */
726936b7f4cSbjh21 float32 float32_round_to_int( float32 a )
727936b7f4cSbjh21 {
728936b7f4cSbjh21     flag aSign;
729936b7f4cSbjh21     int16 aExp;
730936b7f4cSbjh21     bits32 lastBitMask, roundBitsMask;
731936b7f4cSbjh21     int8 roundingMode;
732936b7f4cSbjh21     float32 z;
733936b7f4cSbjh21 
734936b7f4cSbjh21     aExp = extractFloat32Exp( a );
735936b7f4cSbjh21     if ( 0x96 <= aExp ) {
736936b7f4cSbjh21         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
737936b7f4cSbjh21             return propagateFloat32NaN( a, a );
738936b7f4cSbjh21         }
739936b7f4cSbjh21         return a;
740936b7f4cSbjh21     }
741936b7f4cSbjh21     if ( aExp <= 0x7E ) {
742936b7f4cSbjh21         if ( (bits32) ( a<<1 ) == 0 ) return a;
743936b7f4cSbjh21         float_exception_flags |= float_flag_inexact;
744936b7f4cSbjh21         aSign = extractFloat32Sign( a );
745936b7f4cSbjh21         switch ( float_rounding_mode ) {
746936b7f4cSbjh21          case float_round_nearest_even:
747936b7f4cSbjh21             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
748936b7f4cSbjh21                 return packFloat32( aSign, 0x7F, 0 );
749936b7f4cSbjh21             }
750936b7f4cSbjh21             break;
751936b7f4cSbjh21 	 case float_round_to_zero:
752936b7f4cSbjh21 	    break;
753936b7f4cSbjh21          case float_round_down:
754936b7f4cSbjh21             return aSign ? 0xBF800000 : 0;
755936b7f4cSbjh21          case float_round_up:
756936b7f4cSbjh21             return aSign ? 0x80000000 : 0x3F800000;
757936b7f4cSbjh21         }
758936b7f4cSbjh21         return packFloat32( aSign, 0, 0 );
759936b7f4cSbjh21     }
760936b7f4cSbjh21     lastBitMask = 1;
761936b7f4cSbjh21     lastBitMask <<= 0x96 - aExp;
762936b7f4cSbjh21     roundBitsMask = lastBitMask - 1;
763936b7f4cSbjh21     z = a;
764936b7f4cSbjh21     roundingMode = float_rounding_mode;
765936b7f4cSbjh21     if ( roundingMode == float_round_nearest_even ) {
766936b7f4cSbjh21         z += lastBitMask>>1;
767936b7f4cSbjh21         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
768936b7f4cSbjh21     }
769936b7f4cSbjh21     else if ( roundingMode != float_round_to_zero ) {
770936b7f4cSbjh21         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
771936b7f4cSbjh21             z += roundBitsMask;
772936b7f4cSbjh21         }
773936b7f4cSbjh21     }
774936b7f4cSbjh21     z &= ~ roundBitsMask;
775936b7f4cSbjh21     if ( z != a ) float_exception_flags |= float_flag_inexact;
776936b7f4cSbjh21     return z;
777936b7f4cSbjh21 
778936b7f4cSbjh21 }
779936b7f4cSbjh21 #endif
780936b7f4cSbjh21 
781936b7f4cSbjh21 /*
782936b7f4cSbjh21 -------------------------------------------------------------------------------
783936b7f4cSbjh21 Returns the result of adding the absolute values of the single-precision
784936b7f4cSbjh21 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
785936b7f4cSbjh21 before being returned.  `zSign' is ignored if the result is a NaN.
786936b7f4cSbjh21 The addition is performed according to the IEC/IEEE Standard for Binary
787936b7f4cSbjh21 Floating-Point Arithmetic.
788936b7f4cSbjh21 -------------------------------------------------------------------------------
789936b7f4cSbjh21 */
790936b7f4cSbjh21 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
791936b7f4cSbjh21 {
792936b7f4cSbjh21     int16 aExp, bExp, zExp;
793936b7f4cSbjh21     bits32 aSig, bSig, zSig;
794936b7f4cSbjh21     int16 expDiff;
795936b7f4cSbjh21 
796936b7f4cSbjh21     aSig = extractFloat32Frac( a );
797936b7f4cSbjh21     aExp = extractFloat32Exp( a );
798936b7f4cSbjh21     bSig = extractFloat32Frac( b );
799936b7f4cSbjh21     bExp = extractFloat32Exp( b );
800936b7f4cSbjh21     expDiff = aExp - bExp;
801936b7f4cSbjh21     aSig <<= 6;
802936b7f4cSbjh21     bSig <<= 6;
803936b7f4cSbjh21     if ( 0 < expDiff ) {
804936b7f4cSbjh21         if ( aExp == 0xFF ) {
805936b7f4cSbjh21             if ( aSig ) return propagateFloat32NaN( a, b );
806936b7f4cSbjh21             return a;
807936b7f4cSbjh21         }
808936b7f4cSbjh21         if ( bExp == 0 ) {
809936b7f4cSbjh21             --expDiff;
810936b7f4cSbjh21         }
811936b7f4cSbjh21         else {
812936b7f4cSbjh21             bSig |= 0x20000000;
813936b7f4cSbjh21         }
814936b7f4cSbjh21         shift32RightJamming( bSig, expDiff, &bSig );
815936b7f4cSbjh21         zExp = aExp;
816936b7f4cSbjh21     }
817936b7f4cSbjh21     else if ( expDiff < 0 ) {
818936b7f4cSbjh21         if ( bExp == 0xFF ) {
819936b7f4cSbjh21             if ( bSig ) return propagateFloat32NaN( a, b );
820936b7f4cSbjh21             return packFloat32( zSign, 0xFF, 0 );
821936b7f4cSbjh21         }
822936b7f4cSbjh21         if ( aExp == 0 ) {
823936b7f4cSbjh21             ++expDiff;
824936b7f4cSbjh21         }
825936b7f4cSbjh21         else {
826936b7f4cSbjh21             aSig |= 0x20000000;
827936b7f4cSbjh21         }
828936b7f4cSbjh21         shift32RightJamming( aSig, - expDiff, &aSig );
829936b7f4cSbjh21         zExp = bExp;
830936b7f4cSbjh21     }
831936b7f4cSbjh21     else {
832936b7f4cSbjh21         if ( aExp == 0xFF ) {
833936b7f4cSbjh21             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
834936b7f4cSbjh21             return a;
835936b7f4cSbjh21         }
836936b7f4cSbjh21         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
837936b7f4cSbjh21         zSig = 0x40000000 + aSig + bSig;
838936b7f4cSbjh21         zExp = aExp;
839936b7f4cSbjh21         goto roundAndPack;
840936b7f4cSbjh21     }
841936b7f4cSbjh21     aSig |= 0x20000000;
842936b7f4cSbjh21     zSig = ( aSig + bSig )<<1;
843936b7f4cSbjh21     --zExp;
844936b7f4cSbjh21     if ( (sbits32) zSig < 0 ) {
845936b7f4cSbjh21         zSig = aSig + bSig;
846936b7f4cSbjh21         ++zExp;
847936b7f4cSbjh21     }
848936b7f4cSbjh21  roundAndPack:
849936b7f4cSbjh21     return roundAndPackFloat32( zSign, zExp, zSig );
850936b7f4cSbjh21 
851936b7f4cSbjh21 }
852936b7f4cSbjh21 
853936b7f4cSbjh21 /*
854936b7f4cSbjh21 -------------------------------------------------------------------------------
855936b7f4cSbjh21 Returns the result of subtracting the absolute values of the single-
856936b7f4cSbjh21 precision floating-point values `a' and `b'.  If `zSign' is 1, the
857936b7f4cSbjh21 difference is negated before being returned.  `zSign' is ignored if the
858936b7f4cSbjh21 result is a NaN.  The subtraction is performed according to the IEC/IEEE
859936b7f4cSbjh21 Standard for Binary Floating-Point Arithmetic.
860936b7f4cSbjh21 -------------------------------------------------------------------------------
861936b7f4cSbjh21 */
862936b7f4cSbjh21 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
863936b7f4cSbjh21 {
864936b7f4cSbjh21     int16 aExp, bExp, zExp;
865936b7f4cSbjh21     bits32 aSig, bSig, zSig;
866936b7f4cSbjh21     int16 expDiff;
867936b7f4cSbjh21 
868936b7f4cSbjh21     aSig = extractFloat32Frac( a );
869936b7f4cSbjh21     aExp = extractFloat32Exp( a );
870936b7f4cSbjh21     bSig = extractFloat32Frac( b );
871936b7f4cSbjh21     bExp = extractFloat32Exp( b );
872936b7f4cSbjh21     expDiff = aExp - bExp;
873936b7f4cSbjh21     aSig <<= 7;
874936b7f4cSbjh21     bSig <<= 7;
875936b7f4cSbjh21     if ( 0 < expDiff ) goto aExpBigger;
876936b7f4cSbjh21     if ( expDiff < 0 ) goto bExpBigger;
877936b7f4cSbjh21     if ( aExp == 0xFF ) {
878936b7f4cSbjh21         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
879936b7f4cSbjh21         float_raise( float_flag_invalid );
880936b7f4cSbjh21         return float32_default_nan;
881936b7f4cSbjh21     }
882936b7f4cSbjh21     if ( aExp == 0 ) {
883936b7f4cSbjh21         aExp = 1;
884936b7f4cSbjh21         bExp = 1;
885936b7f4cSbjh21     }
886936b7f4cSbjh21     if ( bSig < aSig ) goto aBigger;
887936b7f4cSbjh21     if ( aSig < bSig ) goto bBigger;
888936b7f4cSbjh21     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
889936b7f4cSbjh21  bExpBigger:
890936b7f4cSbjh21     if ( bExp == 0xFF ) {
891936b7f4cSbjh21         if ( bSig ) return propagateFloat32NaN( a, b );
892936b7f4cSbjh21         return packFloat32( zSign ^ 1, 0xFF, 0 );
893936b7f4cSbjh21     }
894936b7f4cSbjh21     if ( aExp == 0 ) {
895936b7f4cSbjh21         ++expDiff;
896936b7f4cSbjh21     }
897936b7f4cSbjh21     else {
898936b7f4cSbjh21         aSig |= 0x40000000;
899936b7f4cSbjh21     }
900936b7f4cSbjh21     shift32RightJamming( aSig, - expDiff, &aSig );
901936b7f4cSbjh21     bSig |= 0x40000000;
902936b7f4cSbjh21  bBigger:
903936b7f4cSbjh21     zSig = bSig - aSig;
904936b7f4cSbjh21     zExp = bExp;
905936b7f4cSbjh21     zSign ^= 1;
906936b7f4cSbjh21     goto normalizeRoundAndPack;
907936b7f4cSbjh21  aExpBigger:
908936b7f4cSbjh21     if ( aExp == 0xFF ) {
909936b7f4cSbjh21         if ( aSig ) return propagateFloat32NaN( a, b );
910936b7f4cSbjh21         return a;
911936b7f4cSbjh21     }
912936b7f4cSbjh21     if ( bExp == 0 ) {
913936b7f4cSbjh21         --expDiff;
914936b7f4cSbjh21     }
915936b7f4cSbjh21     else {
916936b7f4cSbjh21         bSig |= 0x40000000;
917936b7f4cSbjh21     }
918936b7f4cSbjh21     shift32RightJamming( bSig, expDiff, &bSig );
919936b7f4cSbjh21     aSig |= 0x40000000;
920936b7f4cSbjh21  aBigger:
921936b7f4cSbjh21     zSig = aSig - bSig;
922936b7f4cSbjh21     zExp = aExp;
923936b7f4cSbjh21  normalizeRoundAndPack:
924936b7f4cSbjh21     --zExp;
925936b7f4cSbjh21     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
926936b7f4cSbjh21 
927936b7f4cSbjh21 }
928936b7f4cSbjh21 
929936b7f4cSbjh21 /*
930936b7f4cSbjh21 -------------------------------------------------------------------------------
931936b7f4cSbjh21 Returns the result of adding the single-precision floating-point values `a'
932936b7f4cSbjh21 and `b'.  The operation is performed according to the IEC/IEEE Standard for
933936b7f4cSbjh21 Binary Floating-Point Arithmetic.
934936b7f4cSbjh21 -------------------------------------------------------------------------------
935936b7f4cSbjh21 */
936936b7f4cSbjh21 float32 float32_add( float32 a, float32 b )
937936b7f4cSbjh21 {
938936b7f4cSbjh21     flag aSign, bSign;
939936b7f4cSbjh21 
940936b7f4cSbjh21     aSign = extractFloat32Sign( a );
941936b7f4cSbjh21     bSign = extractFloat32Sign( b );
942936b7f4cSbjh21     if ( aSign == bSign ) {
943936b7f4cSbjh21         return addFloat32Sigs( a, b, aSign );
944936b7f4cSbjh21     }
945936b7f4cSbjh21     else {
946936b7f4cSbjh21         return subFloat32Sigs( a, b, aSign );
947936b7f4cSbjh21     }
948936b7f4cSbjh21 
949936b7f4cSbjh21 }
950936b7f4cSbjh21 
951936b7f4cSbjh21 /*
952936b7f4cSbjh21 -------------------------------------------------------------------------------
953936b7f4cSbjh21 Returns the result of subtracting the single-precision floating-point values
954936b7f4cSbjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
955936b7f4cSbjh21 for Binary Floating-Point Arithmetic.
956936b7f4cSbjh21 -------------------------------------------------------------------------------
957936b7f4cSbjh21 */
958936b7f4cSbjh21 float32 float32_sub( float32 a, float32 b )
959936b7f4cSbjh21 {
960936b7f4cSbjh21     flag aSign, bSign;
961936b7f4cSbjh21 
962936b7f4cSbjh21     aSign = extractFloat32Sign( a );
963936b7f4cSbjh21     bSign = extractFloat32Sign( b );
964936b7f4cSbjh21     if ( aSign == bSign ) {
965936b7f4cSbjh21         return subFloat32Sigs( a, b, aSign );
966936b7f4cSbjh21     }
967936b7f4cSbjh21     else {
968936b7f4cSbjh21         return addFloat32Sigs( a, b, aSign );
969936b7f4cSbjh21     }
970936b7f4cSbjh21 
971936b7f4cSbjh21 }
972936b7f4cSbjh21 
973936b7f4cSbjh21 /*
974936b7f4cSbjh21 -------------------------------------------------------------------------------
975936b7f4cSbjh21 Returns the result of multiplying the single-precision floating-point values
976936b7f4cSbjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
977936b7f4cSbjh21 for Binary Floating-Point Arithmetic.
978936b7f4cSbjh21 -------------------------------------------------------------------------------
979936b7f4cSbjh21 */
980936b7f4cSbjh21 float32 float32_mul( float32 a, float32 b )
981936b7f4cSbjh21 {
982936b7f4cSbjh21     flag aSign, bSign, zSign;
983936b7f4cSbjh21     int16 aExp, bExp, zExp;
984936b7f4cSbjh21     bits32 aSig, bSig, zSig0, zSig1;
985936b7f4cSbjh21 
986936b7f4cSbjh21     aSig = extractFloat32Frac( a );
987936b7f4cSbjh21     aExp = extractFloat32Exp( a );
988936b7f4cSbjh21     aSign = extractFloat32Sign( a );
989936b7f4cSbjh21     bSig = extractFloat32Frac( b );
990936b7f4cSbjh21     bExp = extractFloat32Exp( b );
991936b7f4cSbjh21     bSign = extractFloat32Sign( b );
992936b7f4cSbjh21     zSign = aSign ^ bSign;
993936b7f4cSbjh21     if ( aExp == 0xFF ) {
994936b7f4cSbjh21         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
995936b7f4cSbjh21             return propagateFloat32NaN( a, b );
996936b7f4cSbjh21         }
997936b7f4cSbjh21         if ( ( bExp | bSig ) == 0 ) {
998936b7f4cSbjh21             float_raise( float_flag_invalid );
999936b7f4cSbjh21             return float32_default_nan;
1000936b7f4cSbjh21         }
1001936b7f4cSbjh21         return packFloat32( zSign, 0xFF, 0 );
1002936b7f4cSbjh21     }
1003936b7f4cSbjh21     if ( bExp == 0xFF ) {
1004936b7f4cSbjh21         if ( bSig ) return propagateFloat32NaN( a, b );
1005936b7f4cSbjh21         if ( ( aExp | aSig ) == 0 ) {
1006936b7f4cSbjh21             float_raise( float_flag_invalid );
1007936b7f4cSbjh21             return float32_default_nan;
1008936b7f4cSbjh21         }
1009936b7f4cSbjh21         return packFloat32( zSign, 0xFF, 0 );
1010936b7f4cSbjh21     }
1011936b7f4cSbjh21     if ( aExp == 0 ) {
1012936b7f4cSbjh21         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1013936b7f4cSbjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1014936b7f4cSbjh21     }
1015936b7f4cSbjh21     if ( bExp == 0 ) {
1016936b7f4cSbjh21         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1017936b7f4cSbjh21         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1018936b7f4cSbjh21     }
1019936b7f4cSbjh21     zExp = aExp + bExp - 0x7F;
1020936b7f4cSbjh21     aSig = ( aSig | 0x00800000 )<<7;
1021936b7f4cSbjh21     bSig = ( bSig | 0x00800000 )<<8;
1022936b7f4cSbjh21     mul32To64( aSig, bSig, &zSig0, &zSig1 );
1023936b7f4cSbjh21     zSig0 |= ( zSig1 != 0 );
1024936b7f4cSbjh21     if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1025936b7f4cSbjh21         zSig0 <<= 1;
1026936b7f4cSbjh21         --zExp;
1027936b7f4cSbjh21     }
1028936b7f4cSbjh21     return roundAndPackFloat32( zSign, zExp, zSig0 );
1029936b7f4cSbjh21 
1030936b7f4cSbjh21 }
1031936b7f4cSbjh21 
1032936b7f4cSbjh21 /*
1033936b7f4cSbjh21 -------------------------------------------------------------------------------
1034936b7f4cSbjh21 Returns the result of dividing the single-precision floating-point value `a'
1035936b7f4cSbjh21 by the corresponding value `b'.  The operation is performed according to the
1036936b7f4cSbjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1037936b7f4cSbjh21 -------------------------------------------------------------------------------
1038936b7f4cSbjh21 */
1039936b7f4cSbjh21 float32 float32_div( float32 a, float32 b )
1040936b7f4cSbjh21 {
1041936b7f4cSbjh21     flag aSign, bSign, zSign;
1042936b7f4cSbjh21     int16 aExp, bExp, zExp;
1043936b7f4cSbjh21     bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1044936b7f4cSbjh21 
1045936b7f4cSbjh21     aSig = extractFloat32Frac( a );
1046936b7f4cSbjh21     aExp = extractFloat32Exp( a );
1047936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1048936b7f4cSbjh21     bSig = extractFloat32Frac( b );
1049936b7f4cSbjh21     bExp = extractFloat32Exp( b );
1050936b7f4cSbjh21     bSign = extractFloat32Sign( b );
1051936b7f4cSbjh21     zSign = aSign ^ bSign;
1052936b7f4cSbjh21     if ( aExp == 0xFF ) {
1053936b7f4cSbjh21         if ( aSig ) return propagateFloat32NaN( a, b );
1054936b7f4cSbjh21         if ( bExp == 0xFF ) {
1055936b7f4cSbjh21             if ( bSig ) return propagateFloat32NaN( a, b );
1056936b7f4cSbjh21             float_raise( float_flag_invalid );
1057936b7f4cSbjh21             return float32_default_nan;
1058936b7f4cSbjh21         }
1059936b7f4cSbjh21         return packFloat32( zSign, 0xFF, 0 );
1060936b7f4cSbjh21     }
1061936b7f4cSbjh21     if ( bExp == 0xFF ) {
1062936b7f4cSbjh21         if ( bSig ) return propagateFloat32NaN( a, b );
1063936b7f4cSbjh21         return packFloat32( zSign, 0, 0 );
1064936b7f4cSbjh21     }
1065936b7f4cSbjh21     if ( bExp == 0 ) {
1066936b7f4cSbjh21         if ( bSig == 0 ) {
1067936b7f4cSbjh21             if ( ( aExp | aSig ) == 0 ) {
1068936b7f4cSbjh21                 float_raise( float_flag_invalid );
1069936b7f4cSbjh21                 return float32_default_nan;
1070936b7f4cSbjh21             }
1071936b7f4cSbjh21             float_raise( float_flag_divbyzero );
1072936b7f4cSbjh21             return packFloat32( zSign, 0xFF, 0 );
1073936b7f4cSbjh21         }
1074936b7f4cSbjh21         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1075936b7f4cSbjh21     }
1076936b7f4cSbjh21     if ( aExp == 0 ) {
1077936b7f4cSbjh21         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1078936b7f4cSbjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1079936b7f4cSbjh21     }
1080936b7f4cSbjh21     zExp = aExp - bExp + 0x7D;
1081936b7f4cSbjh21     aSig = ( aSig | 0x00800000 )<<7;
1082936b7f4cSbjh21     bSig = ( bSig | 0x00800000 )<<8;
1083936b7f4cSbjh21     if ( bSig <= ( aSig + aSig ) ) {
1084936b7f4cSbjh21         aSig >>= 1;
1085936b7f4cSbjh21         ++zExp;
1086936b7f4cSbjh21     }
1087936b7f4cSbjh21     zSig = estimateDiv64To32( aSig, 0, bSig );
1088936b7f4cSbjh21     if ( ( zSig & 0x3F ) <= 2 ) {
1089936b7f4cSbjh21         mul32To64( bSig, zSig, &term0, &term1 );
1090936b7f4cSbjh21         sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1091936b7f4cSbjh21         while ( (sbits32) rem0 < 0 ) {
1092936b7f4cSbjh21             --zSig;
1093936b7f4cSbjh21             add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1094936b7f4cSbjh21         }
1095936b7f4cSbjh21         zSig |= ( rem1 != 0 );
1096936b7f4cSbjh21     }
1097936b7f4cSbjh21     return roundAndPackFloat32( zSign, zExp, zSig );
1098936b7f4cSbjh21 
1099936b7f4cSbjh21 }
1100936b7f4cSbjh21 
1101936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
1102936b7f4cSbjh21 /*
1103936b7f4cSbjh21 -------------------------------------------------------------------------------
1104936b7f4cSbjh21 Returns the remainder of the single-precision floating-point value `a'
1105936b7f4cSbjh21 with respect to the corresponding value `b'.  The operation is performed
1106936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1107936b7f4cSbjh21 -------------------------------------------------------------------------------
1108936b7f4cSbjh21 */
1109936b7f4cSbjh21 float32 float32_rem( float32 a, float32 b )
1110936b7f4cSbjh21 {
1111936b7f4cSbjh21     flag aSign, bSign, zSign;
1112936b7f4cSbjh21     int16 aExp, bExp, expDiff;
1113936b7f4cSbjh21     bits32 aSig, bSig, q, allZero, alternateASig;
1114936b7f4cSbjh21     sbits32 sigMean;
1115936b7f4cSbjh21 
1116936b7f4cSbjh21     aSig = extractFloat32Frac( a );
1117936b7f4cSbjh21     aExp = extractFloat32Exp( a );
1118936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1119936b7f4cSbjh21     bSig = extractFloat32Frac( b );
1120936b7f4cSbjh21     bExp = extractFloat32Exp( b );
1121936b7f4cSbjh21     bSign = extractFloat32Sign( b );
1122936b7f4cSbjh21     if ( aExp == 0xFF ) {
1123936b7f4cSbjh21         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1124936b7f4cSbjh21             return propagateFloat32NaN( a, b );
1125936b7f4cSbjh21         }
1126936b7f4cSbjh21         float_raise( float_flag_invalid );
1127936b7f4cSbjh21         return float32_default_nan;
1128936b7f4cSbjh21     }
1129936b7f4cSbjh21     if ( bExp == 0xFF ) {
1130936b7f4cSbjh21         if ( bSig ) return propagateFloat32NaN( a, b );
1131936b7f4cSbjh21         return a;
1132936b7f4cSbjh21     }
1133936b7f4cSbjh21     if ( bExp == 0 ) {
1134936b7f4cSbjh21         if ( bSig == 0 ) {
1135936b7f4cSbjh21             float_raise( float_flag_invalid );
1136936b7f4cSbjh21             return float32_default_nan;
1137936b7f4cSbjh21         }
1138936b7f4cSbjh21         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1139936b7f4cSbjh21     }
1140936b7f4cSbjh21     if ( aExp == 0 ) {
1141936b7f4cSbjh21         if ( aSig == 0 ) return a;
1142936b7f4cSbjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1143936b7f4cSbjh21     }
1144936b7f4cSbjh21     expDiff = aExp - bExp;
1145936b7f4cSbjh21     aSig = ( aSig | 0x00800000 )<<8;
1146936b7f4cSbjh21     bSig = ( bSig | 0x00800000 )<<8;
1147936b7f4cSbjh21     if ( expDiff < 0 ) {
1148936b7f4cSbjh21         if ( expDiff < -1 ) return a;
1149936b7f4cSbjh21         aSig >>= 1;
1150936b7f4cSbjh21     }
1151936b7f4cSbjh21     q = ( bSig <= aSig );
1152936b7f4cSbjh21     if ( q ) aSig -= bSig;
1153936b7f4cSbjh21     expDiff -= 32;
1154936b7f4cSbjh21     while ( 0 < expDiff ) {
1155936b7f4cSbjh21         q = estimateDiv64To32( aSig, 0, bSig );
1156936b7f4cSbjh21         q = ( 2 < q ) ? q - 2 : 0;
1157936b7f4cSbjh21         aSig = - ( ( bSig>>2 ) * q );
1158936b7f4cSbjh21         expDiff -= 30;
1159936b7f4cSbjh21     }
1160936b7f4cSbjh21     expDiff += 32;
1161936b7f4cSbjh21     if ( 0 < expDiff ) {
1162936b7f4cSbjh21         q = estimateDiv64To32( aSig, 0, bSig );
1163936b7f4cSbjh21         q = ( 2 < q ) ? q - 2 : 0;
1164936b7f4cSbjh21         q >>= 32 - expDiff;
1165936b7f4cSbjh21         bSig >>= 2;
1166936b7f4cSbjh21         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1167936b7f4cSbjh21     }
1168936b7f4cSbjh21     else {
1169936b7f4cSbjh21         aSig >>= 2;
1170936b7f4cSbjh21         bSig >>= 2;
1171936b7f4cSbjh21     }
1172936b7f4cSbjh21     do {
1173936b7f4cSbjh21         alternateASig = aSig;
1174936b7f4cSbjh21         ++q;
1175936b7f4cSbjh21         aSig -= bSig;
1176936b7f4cSbjh21     } while ( 0 <= (sbits32) aSig );
1177936b7f4cSbjh21     sigMean = aSig + alternateASig;
1178936b7f4cSbjh21     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1179936b7f4cSbjh21         aSig = alternateASig;
1180936b7f4cSbjh21     }
1181936b7f4cSbjh21     zSign = ( (sbits32) aSig < 0 );
1182936b7f4cSbjh21     if ( zSign ) aSig = - aSig;
1183936b7f4cSbjh21     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1184936b7f4cSbjh21 
1185936b7f4cSbjh21 }
1186936b7f4cSbjh21 #endif
1187936b7f4cSbjh21 
1188936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
1189936b7f4cSbjh21 /*
1190936b7f4cSbjh21 -------------------------------------------------------------------------------
1191936b7f4cSbjh21 Returns the square root of the single-precision floating-point value `a'.
1192936b7f4cSbjh21 The operation is performed according to the IEC/IEEE Standard for Binary
1193936b7f4cSbjh21 Floating-Point Arithmetic.
1194936b7f4cSbjh21 -------------------------------------------------------------------------------
1195936b7f4cSbjh21 */
1196936b7f4cSbjh21 float32 float32_sqrt( float32 a )
1197936b7f4cSbjh21 {
1198936b7f4cSbjh21     flag aSign;
1199936b7f4cSbjh21     int16 aExp, zExp;
1200936b7f4cSbjh21     bits32 aSig, zSig, rem0, rem1, term0, term1;
1201936b7f4cSbjh21 
1202936b7f4cSbjh21     aSig = extractFloat32Frac( a );
1203936b7f4cSbjh21     aExp = extractFloat32Exp( a );
1204936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1205936b7f4cSbjh21     if ( aExp == 0xFF ) {
1206936b7f4cSbjh21         if ( aSig ) return propagateFloat32NaN( a, 0 );
1207936b7f4cSbjh21         if ( ! aSign ) return a;
1208936b7f4cSbjh21         float_raise( float_flag_invalid );
1209936b7f4cSbjh21         return float32_default_nan;
1210936b7f4cSbjh21     }
1211936b7f4cSbjh21     if ( aSign ) {
1212936b7f4cSbjh21         if ( ( aExp | aSig ) == 0 ) return a;
1213936b7f4cSbjh21         float_raise( float_flag_invalid );
1214936b7f4cSbjh21         return float32_default_nan;
1215936b7f4cSbjh21     }
1216936b7f4cSbjh21     if ( aExp == 0 ) {
1217936b7f4cSbjh21         if ( aSig == 0 ) return 0;
1218936b7f4cSbjh21         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1219936b7f4cSbjh21     }
1220936b7f4cSbjh21     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1221936b7f4cSbjh21     aSig = ( aSig | 0x00800000 )<<8;
1222936b7f4cSbjh21     zSig = estimateSqrt32( aExp, aSig ) + 2;
1223936b7f4cSbjh21     if ( ( zSig & 0x7F ) <= 5 ) {
1224936b7f4cSbjh21         if ( zSig < 2 ) {
1225936b7f4cSbjh21             zSig = 0x7FFFFFFF;
1226936b7f4cSbjh21             goto roundAndPack;
1227936b7f4cSbjh21         }
1228936b7f4cSbjh21         else {
1229936b7f4cSbjh21             aSig >>= aExp & 1;
1230936b7f4cSbjh21             mul32To64( zSig, zSig, &term0, &term1 );
1231936b7f4cSbjh21             sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1232936b7f4cSbjh21             while ( (sbits32) rem0 < 0 ) {
1233936b7f4cSbjh21                 --zSig;
1234936b7f4cSbjh21                 shortShift64Left( 0, zSig, 1, &term0, &term1 );
1235936b7f4cSbjh21                 term1 |= 1;
1236936b7f4cSbjh21                 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1237936b7f4cSbjh21             }
1238936b7f4cSbjh21             zSig |= ( ( rem0 | rem1 ) != 0 );
1239936b7f4cSbjh21         }
1240936b7f4cSbjh21     }
1241936b7f4cSbjh21     shift32RightJamming( zSig, 1, &zSig );
1242936b7f4cSbjh21  roundAndPack:
1243936b7f4cSbjh21     return roundAndPackFloat32( 0, zExp, zSig );
1244936b7f4cSbjh21 
1245936b7f4cSbjh21 }
1246936b7f4cSbjh21 #endif
1247936b7f4cSbjh21 
1248936b7f4cSbjh21 /*
1249936b7f4cSbjh21 -------------------------------------------------------------------------------
1250936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is equal to
1251936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
1252936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1253936b7f4cSbjh21 -------------------------------------------------------------------------------
1254936b7f4cSbjh21 */
1255936b7f4cSbjh21 flag float32_eq( float32 a, float32 b )
1256936b7f4cSbjh21 {
1257936b7f4cSbjh21 
1258936b7f4cSbjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1259936b7f4cSbjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1260936b7f4cSbjh21        ) {
1261936b7f4cSbjh21         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1262936b7f4cSbjh21             float_raise( float_flag_invalid );
1263936b7f4cSbjh21         }
1264936b7f4cSbjh21         return 0;
1265936b7f4cSbjh21     }
1266936b7f4cSbjh21     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1267936b7f4cSbjh21 
1268936b7f4cSbjh21 }
1269936b7f4cSbjh21 
1270936b7f4cSbjh21 /*
1271936b7f4cSbjh21 -------------------------------------------------------------------------------
1272936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is less than
1273936b7f4cSbjh21 or equal to the corresponding value `b', and 0 otherwise.  The comparison
1274936b7f4cSbjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
1275936b7f4cSbjh21 Arithmetic.
1276936b7f4cSbjh21 -------------------------------------------------------------------------------
1277936b7f4cSbjh21 */
1278936b7f4cSbjh21 flag float32_le( float32 a, float32 b )
1279936b7f4cSbjh21 {
1280936b7f4cSbjh21     flag aSign, bSign;
1281936b7f4cSbjh21 
1282936b7f4cSbjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1283936b7f4cSbjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1284936b7f4cSbjh21        ) {
1285936b7f4cSbjh21         float_raise( float_flag_invalid );
1286936b7f4cSbjh21         return 0;
1287936b7f4cSbjh21     }
1288936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1289936b7f4cSbjh21     bSign = extractFloat32Sign( b );
1290936b7f4cSbjh21     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1291936b7f4cSbjh21     return ( a == b ) || ( aSign ^ ( a < b ) );
1292936b7f4cSbjh21 
1293936b7f4cSbjh21 }
1294936b7f4cSbjh21 
1295936b7f4cSbjh21 /*
1296936b7f4cSbjh21 -------------------------------------------------------------------------------
1297936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is less than
1298936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
1299936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1300936b7f4cSbjh21 -------------------------------------------------------------------------------
1301936b7f4cSbjh21 */
1302936b7f4cSbjh21 flag float32_lt( float32 a, float32 b )
1303936b7f4cSbjh21 {
1304936b7f4cSbjh21     flag aSign, bSign;
1305936b7f4cSbjh21 
1306936b7f4cSbjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1307936b7f4cSbjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1308936b7f4cSbjh21        ) {
1309936b7f4cSbjh21         float_raise( float_flag_invalid );
1310936b7f4cSbjh21         return 0;
1311936b7f4cSbjh21     }
1312936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1313936b7f4cSbjh21     bSign = extractFloat32Sign( b );
1314936b7f4cSbjh21     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1315936b7f4cSbjh21     return ( a != b ) && ( aSign ^ ( a < b ) );
1316936b7f4cSbjh21 
1317936b7f4cSbjh21 }
1318936b7f4cSbjh21 
1319936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1320936b7f4cSbjh21 /*
1321936b7f4cSbjh21 -------------------------------------------------------------------------------
1322936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is equal to
1323936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  The invalid exception is
1324936b7f4cSbjh21 raised if either operand is a NaN.  Otherwise, the comparison is performed
1325936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1326936b7f4cSbjh21 -------------------------------------------------------------------------------
1327936b7f4cSbjh21 */
1328936b7f4cSbjh21 flag float32_eq_signaling( float32 a, float32 b )
1329936b7f4cSbjh21 {
1330936b7f4cSbjh21 
1331936b7f4cSbjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1332936b7f4cSbjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1333936b7f4cSbjh21        ) {
1334936b7f4cSbjh21         float_raise( float_flag_invalid );
1335936b7f4cSbjh21         return 0;
1336936b7f4cSbjh21     }
1337936b7f4cSbjh21     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1338936b7f4cSbjh21 
1339936b7f4cSbjh21 }
1340936b7f4cSbjh21 
1341936b7f4cSbjh21 /*
1342936b7f4cSbjh21 -------------------------------------------------------------------------------
1343936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is less than or
1344936b7f4cSbjh21 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
1345936b7f4cSbjh21 cause an exception.  Otherwise, the comparison is performed according to the
1346936b7f4cSbjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1347936b7f4cSbjh21 -------------------------------------------------------------------------------
1348936b7f4cSbjh21 */
1349936b7f4cSbjh21 flag float32_le_quiet( float32 a, float32 b )
1350936b7f4cSbjh21 {
1351936b7f4cSbjh21     flag aSign, bSign;
1352936b7f4cSbjh21     int16 aExp, bExp;
1353936b7f4cSbjh21 
1354936b7f4cSbjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1355936b7f4cSbjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1356936b7f4cSbjh21        ) {
1357936b7f4cSbjh21         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1358936b7f4cSbjh21             float_raise( float_flag_invalid );
1359936b7f4cSbjh21         }
1360936b7f4cSbjh21         return 0;
1361936b7f4cSbjh21     }
1362936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1363936b7f4cSbjh21     bSign = extractFloat32Sign( b );
1364936b7f4cSbjh21     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1365936b7f4cSbjh21     return ( a == b ) || ( aSign ^ ( a < b ) );
1366936b7f4cSbjh21 
1367936b7f4cSbjh21 }
1368936b7f4cSbjh21 
1369936b7f4cSbjh21 /*
1370936b7f4cSbjh21 -------------------------------------------------------------------------------
1371936b7f4cSbjh21 Returns 1 if the single-precision floating-point value `a' is less than
1372936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
1373936b7f4cSbjh21 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
1374936b7f4cSbjh21 Standard for Binary Floating-Point Arithmetic.
1375936b7f4cSbjh21 -------------------------------------------------------------------------------
1376936b7f4cSbjh21 */
1377936b7f4cSbjh21 flag float32_lt_quiet( float32 a, float32 b )
1378936b7f4cSbjh21 {
1379936b7f4cSbjh21     flag aSign, bSign;
1380936b7f4cSbjh21 
1381936b7f4cSbjh21     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1382936b7f4cSbjh21          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1383936b7f4cSbjh21        ) {
1384936b7f4cSbjh21         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1385936b7f4cSbjh21             float_raise( float_flag_invalid );
1386936b7f4cSbjh21         }
1387936b7f4cSbjh21         return 0;
1388936b7f4cSbjh21     }
1389936b7f4cSbjh21     aSign = extractFloat32Sign( a );
1390936b7f4cSbjh21     bSign = extractFloat32Sign( b );
1391936b7f4cSbjh21     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1392936b7f4cSbjh21     return ( a != b ) && ( aSign ^ ( a < b ) );
1393936b7f4cSbjh21 
1394936b7f4cSbjh21 }
1395936b7f4cSbjh21 #endif /* !SOFTFLOAT_FOR_GCC */
1396936b7f4cSbjh21 
1397936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1398936b7f4cSbjh21 /*
1399936b7f4cSbjh21 -------------------------------------------------------------------------------
1400936b7f4cSbjh21 Returns the result of converting the double-precision floating-point value
1401936b7f4cSbjh21 `a' to the 32-bit two's complement integer format.  The conversion is
1402936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
1403936b7f4cSbjh21 Arithmetic---which means in particular that the conversion is rounded
1404936b7f4cSbjh21 according to the current rounding mode.  If `a' is a NaN, the largest
1405936b7f4cSbjh21 positive integer is returned.  Otherwise, if the conversion overflows, the
1406936b7f4cSbjh21 largest integer with the same sign as `a' is returned.
1407936b7f4cSbjh21 -------------------------------------------------------------------------------
1408936b7f4cSbjh21 */
1409936b7f4cSbjh21 int32 float64_to_int32( float64 a )
1410936b7f4cSbjh21 {
1411936b7f4cSbjh21     flag aSign;
1412936b7f4cSbjh21     int16 aExp, shiftCount;
1413936b7f4cSbjh21     bits32 aSig0, aSig1, absZ, aSigExtra;
1414936b7f4cSbjh21     int32 z;
1415936b7f4cSbjh21     int8 roundingMode;
1416936b7f4cSbjh21 
1417936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1418936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1419936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1420936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1421936b7f4cSbjh21     shiftCount = aExp - 0x413;
1422936b7f4cSbjh21     if ( 0 <= shiftCount ) {
1423936b7f4cSbjh21         if ( 0x41E < aExp ) {
1424936b7f4cSbjh21             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1425936b7f4cSbjh21             goto invalid;
1426936b7f4cSbjh21         }
1427936b7f4cSbjh21         shortShift64Left(
1428936b7f4cSbjh21             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1429936b7f4cSbjh21         if ( 0x80000000 < absZ ) goto invalid;
1430936b7f4cSbjh21     }
1431936b7f4cSbjh21     else {
1432936b7f4cSbjh21         aSig1 = ( aSig1 != 0 );
1433936b7f4cSbjh21         if ( aExp < 0x3FE ) {
1434936b7f4cSbjh21             aSigExtra = aExp | aSig0 | aSig1;
1435936b7f4cSbjh21             absZ = 0;
1436936b7f4cSbjh21         }
1437936b7f4cSbjh21         else {
1438936b7f4cSbjh21             aSig0 |= 0x00100000;
1439936b7f4cSbjh21             aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1440936b7f4cSbjh21             absZ = aSig0>>( - shiftCount );
1441936b7f4cSbjh21         }
1442936b7f4cSbjh21     }
1443936b7f4cSbjh21     roundingMode = float_rounding_mode;
1444936b7f4cSbjh21     if ( roundingMode == float_round_nearest_even ) {
1445936b7f4cSbjh21         if ( (sbits32) aSigExtra < 0 ) {
1446936b7f4cSbjh21             ++absZ;
1447936b7f4cSbjh21             if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1448936b7f4cSbjh21         }
1449936b7f4cSbjh21         z = aSign ? - absZ : absZ;
1450936b7f4cSbjh21     }
1451936b7f4cSbjh21     else {
1452936b7f4cSbjh21         aSigExtra = ( aSigExtra != 0 );
1453936b7f4cSbjh21         if ( aSign ) {
1454936b7f4cSbjh21             z = - (   absZ
1455936b7f4cSbjh21                     + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1456936b7f4cSbjh21         }
1457936b7f4cSbjh21         else {
1458936b7f4cSbjh21             z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1459936b7f4cSbjh21         }
1460936b7f4cSbjh21     }
1461936b7f4cSbjh21     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1462936b7f4cSbjh21  invalid:
1463936b7f4cSbjh21         float_raise( float_flag_invalid );
1464936b7f4cSbjh21         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1465936b7f4cSbjh21     }
1466936b7f4cSbjh21     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1467936b7f4cSbjh21     return z;
1468936b7f4cSbjh21 
1469936b7f4cSbjh21 }
1470936b7f4cSbjh21 #endif /* !SOFTFLOAT_FOR_GCC */
1471936b7f4cSbjh21 
1472936b7f4cSbjh21 /*
1473936b7f4cSbjh21 -------------------------------------------------------------------------------
1474936b7f4cSbjh21 Returns the result of converting the double-precision floating-point value
1475936b7f4cSbjh21 `a' to the 32-bit two's complement integer format.  The conversion is
1476936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
1477936b7f4cSbjh21 Arithmetic, except that the conversion is always rounded toward zero.
1478936b7f4cSbjh21 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1479936b7f4cSbjh21 the conversion overflows, the largest integer with the same sign as `a' is
1480936b7f4cSbjh21 returned.
1481936b7f4cSbjh21 -------------------------------------------------------------------------------
1482936b7f4cSbjh21 */
1483936b7f4cSbjh21 int32 float64_to_int32_round_to_zero( float64 a )
1484936b7f4cSbjh21 {
1485936b7f4cSbjh21     flag aSign;
1486936b7f4cSbjh21     int16 aExp, shiftCount;
1487936b7f4cSbjh21     bits32 aSig0, aSig1, absZ, aSigExtra;
1488936b7f4cSbjh21     int32 z;
1489936b7f4cSbjh21 
1490936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1491936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1492936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1493936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1494936b7f4cSbjh21     shiftCount = aExp - 0x413;
1495936b7f4cSbjh21     if ( 0 <= shiftCount ) {
1496936b7f4cSbjh21         if ( 0x41E < aExp ) {
1497936b7f4cSbjh21             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1498936b7f4cSbjh21             goto invalid;
1499936b7f4cSbjh21         }
1500936b7f4cSbjh21         shortShift64Left(
1501936b7f4cSbjh21             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1502936b7f4cSbjh21     }
1503936b7f4cSbjh21     else {
1504936b7f4cSbjh21         if ( aExp < 0x3FF ) {
1505936b7f4cSbjh21             if ( aExp | aSig0 | aSig1 ) {
1506936b7f4cSbjh21                 float_exception_flags |= float_flag_inexact;
1507936b7f4cSbjh21             }
1508936b7f4cSbjh21             return 0;
1509936b7f4cSbjh21         }
1510936b7f4cSbjh21         aSig0 |= 0x00100000;
1511936b7f4cSbjh21         aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1512936b7f4cSbjh21         absZ = aSig0>>( - shiftCount );
1513936b7f4cSbjh21     }
1514936b7f4cSbjh21     z = aSign ? - absZ : absZ;
1515936b7f4cSbjh21     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1516936b7f4cSbjh21  invalid:
1517936b7f4cSbjh21         float_raise( float_flag_invalid );
1518936b7f4cSbjh21         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1519936b7f4cSbjh21     }
1520936b7f4cSbjh21     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1521936b7f4cSbjh21     return z;
1522936b7f4cSbjh21 
1523936b7f4cSbjh21 }
1524936b7f4cSbjh21 
1525936b7f4cSbjh21 /*
1526936b7f4cSbjh21 -------------------------------------------------------------------------------
1527936b7f4cSbjh21 Returns the result of converting the double-precision floating-point value
1528936b7f4cSbjh21 `a' to the single-precision floating-point format.  The conversion is
1529936b7f4cSbjh21 performed according to the IEC/IEEE Standard for Binary Floating-Point
1530936b7f4cSbjh21 Arithmetic.
1531936b7f4cSbjh21 -------------------------------------------------------------------------------
1532936b7f4cSbjh21 */
1533936b7f4cSbjh21 float32 float64_to_float32( float64 a )
1534936b7f4cSbjh21 {
1535936b7f4cSbjh21     flag aSign;
1536936b7f4cSbjh21     int16 aExp;
1537936b7f4cSbjh21     bits32 aSig0, aSig1, zSig;
1538936b7f4cSbjh21     bits32 allZero;
1539936b7f4cSbjh21 
1540936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1541936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1542936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1543936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1544936b7f4cSbjh21     if ( aExp == 0x7FF ) {
1545936b7f4cSbjh21         if ( aSig0 | aSig1 ) {
1546936b7f4cSbjh21             return commonNaNToFloat32( float64ToCommonNaN( a ) );
1547936b7f4cSbjh21         }
1548936b7f4cSbjh21         return packFloat32( aSign, 0xFF, 0 );
1549936b7f4cSbjh21     }
1550936b7f4cSbjh21     shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1551936b7f4cSbjh21     if ( aExp ) zSig |= 0x40000000;
1552936b7f4cSbjh21     return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1553936b7f4cSbjh21 
1554936b7f4cSbjh21 }
1555936b7f4cSbjh21 
1556936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
1557936b7f4cSbjh21 /*
1558936b7f4cSbjh21 -------------------------------------------------------------------------------
1559936b7f4cSbjh21 Rounds the double-precision floating-point value `a' to an integer,
1560936b7f4cSbjh21 and returns the result as a double-precision floating-point value.  The
1561936b7f4cSbjh21 operation is performed according to the IEC/IEEE Standard for Binary
1562936b7f4cSbjh21 Floating-Point Arithmetic.
1563936b7f4cSbjh21 -------------------------------------------------------------------------------
1564936b7f4cSbjh21 */
1565936b7f4cSbjh21 float64 float64_round_to_int( float64 a )
1566936b7f4cSbjh21 {
1567936b7f4cSbjh21     flag aSign;
1568936b7f4cSbjh21     int16 aExp;
1569936b7f4cSbjh21     bits32 lastBitMask, roundBitsMask;
1570936b7f4cSbjh21     int8 roundingMode;
1571936b7f4cSbjh21     float64 z;
1572936b7f4cSbjh21 
1573936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1574936b7f4cSbjh21     if ( 0x413 <= aExp ) {
1575936b7f4cSbjh21         if ( 0x433 <= aExp ) {
1576936b7f4cSbjh21             if (    ( aExp == 0x7FF )
1577936b7f4cSbjh21                  && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1578936b7f4cSbjh21                 return propagateFloat64NaN( a, a );
1579936b7f4cSbjh21             }
1580936b7f4cSbjh21             return a;
1581936b7f4cSbjh21         }
1582936b7f4cSbjh21         lastBitMask = 1;
1583936b7f4cSbjh21         lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1584936b7f4cSbjh21         roundBitsMask = lastBitMask - 1;
1585936b7f4cSbjh21         z = a;
1586936b7f4cSbjh21         roundingMode = float_rounding_mode;
1587936b7f4cSbjh21         if ( roundingMode == float_round_nearest_even ) {
1588936b7f4cSbjh21             if ( lastBitMask ) {
1589936b7f4cSbjh21                 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1590936b7f4cSbjh21                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1591936b7f4cSbjh21             }
1592936b7f4cSbjh21             else {
1593936b7f4cSbjh21                 if ( (sbits32) z.low < 0 ) {
1594936b7f4cSbjh21                     ++z.high;
1595936b7f4cSbjh21                     if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1596936b7f4cSbjh21                 }
1597936b7f4cSbjh21             }
1598936b7f4cSbjh21         }
1599936b7f4cSbjh21         else if ( roundingMode != float_round_to_zero ) {
1600936b7f4cSbjh21             if (   extractFloat64Sign( z )
1601936b7f4cSbjh21                  ^ ( roundingMode == float_round_up ) ) {
1602936b7f4cSbjh21                 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1603936b7f4cSbjh21             }
1604936b7f4cSbjh21         }
1605936b7f4cSbjh21         z.low &= ~ roundBitsMask;
1606936b7f4cSbjh21     }
1607936b7f4cSbjh21     else {
1608936b7f4cSbjh21         if ( aExp <= 0x3FE ) {
1609936b7f4cSbjh21             if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1610936b7f4cSbjh21             float_exception_flags |= float_flag_inexact;
1611936b7f4cSbjh21             aSign = extractFloat64Sign( a );
1612936b7f4cSbjh21             switch ( float_rounding_mode ) {
1613936b7f4cSbjh21              case float_round_nearest_even:
1614936b7f4cSbjh21                 if (    ( aExp == 0x3FE )
1615936b7f4cSbjh21                      && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1616936b7f4cSbjh21                    ) {
1617936b7f4cSbjh21                     return packFloat64( aSign, 0x3FF, 0, 0 );
1618936b7f4cSbjh21                 }
1619936b7f4cSbjh21                 break;
1620936b7f4cSbjh21              case float_round_down:
1621936b7f4cSbjh21                 return
1622936b7f4cSbjh21                       aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1623936b7f4cSbjh21                     : packFloat64( 0, 0, 0, 0 );
1624936b7f4cSbjh21              case float_round_up:
1625936b7f4cSbjh21                 return
1626936b7f4cSbjh21                       aSign ? packFloat64( 1, 0, 0, 0 )
1627936b7f4cSbjh21                     : packFloat64( 0, 0x3FF, 0, 0 );
1628936b7f4cSbjh21             }
1629936b7f4cSbjh21             return packFloat64( aSign, 0, 0, 0 );
1630936b7f4cSbjh21         }
1631936b7f4cSbjh21         lastBitMask = 1;
1632936b7f4cSbjh21         lastBitMask <<= 0x413 - aExp;
1633936b7f4cSbjh21         roundBitsMask = lastBitMask - 1;
1634936b7f4cSbjh21         z.low = 0;
1635936b7f4cSbjh21         z.high = a.high;
1636936b7f4cSbjh21         roundingMode = float_rounding_mode;
1637936b7f4cSbjh21         if ( roundingMode == float_round_nearest_even ) {
1638936b7f4cSbjh21             z.high += lastBitMask>>1;
1639936b7f4cSbjh21             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1640936b7f4cSbjh21                 z.high &= ~ lastBitMask;
1641936b7f4cSbjh21             }
1642936b7f4cSbjh21         }
1643936b7f4cSbjh21         else if ( roundingMode != float_round_to_zero ) {
1644936b7f4cSbjh21             if (   extractFloat64Sign( z )
1645936b7f4cSbjh21                  ^ ( roundingMode == float_round_up ) ) {
1646936b7f4cSbjh21                 z.high |= ( a.low != 0 );
1647936b7f4cSbjh21                 z.high += roundBitsMask;
1648936b7f4cSbjh21             }
1649936b7f4cSbjh21         }
1650936b7f4cSbjh21         z.high &= ~ roundBitsMask;
1651936b7f4cSbjh21     }
1652936b7f4cSbjh21     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1653936b7f4cSbjh21         float_exception_flags |= float_flag_inexact;
1654936b7f4cSbjh21     }
1655936b7f4cSbjh21     return z;
1656936b7f4cSbjh21 
1657936b7f4cSbjh21 }
1658936b7f4cSbjh21 #endif
1659936b7f4cSbjh21 
1660936b7f4cSbjh21 /*
1661936b7f4cSbjh21 -------------------------------------------------------------------------------
1662936b7f4cSbjh21 Returns the result of adding the absolute values of the double-precision
1663936b7f4cSbjh21 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1664936b7f4cSbjh21 before being returned.  `zSign' is ignored if the result is a NaN.
1665936b7f4cSbjh21 The addition is performed according to the IEC/IEEE Standard for Binary
1666936b7f4cSbjh21 Floating-Point Arithmetic.
1667936b7f4cSbjh21 -------------------------------------------------------------------------------
1668936b7f4cSbjh21 */
1669936b7f4cSbjh21 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1670936b7f4cSbjh21 {
1671936b7f4cSbjh21     int16 aExp, bExp, zExp;
1672936b7f4cSbjh21     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1673936b7f4cSbjh21     int16 expDiff;
1674936b7f4cSbjh21 
1675936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1676936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1677936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1678936b7f4cSbjh21     bSig1 = extractFloat64Frac1( b );
1679936b7f4cSbjh21     bSig0 = extractFloat64Frac0( b );
1680936b7f4cSbjh21     bExp = extractFloat64Exp( b );
1681936b7f4cSbjh21     expDiff = aExp - bExp;
1682936b7f4cSbjh21     if ( 0 < expDiff ) {
1683936b7f4cSbjh21         if ( aExp == 0x7FF ) {
1684936b7f4cSbjh21             if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1685936b7f4cSbjh21             return a;
1686936b7f4cSbjh21         }
1687936b7f4cSbjh21         if ( bExp == 0 ) {
1688936b7f4cSbjh21             --expDiff;
1689936b7f4cSbjh21         }
1690936b7f4cSbjh21         else {
1691936b7f4cSbjh21             bSig0 |= 0x00100000;
1692936b7f4cSbjh21         }
1693936b7f4cSbjh21         shift64ExtraRightJamming(
1694936b7f4cSbjh21             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1695936b7f4cSbjh21         zExp = aExp;
1696936b7f4cSbjh21     }
1697936b7f4cSbjh21     else if ( expDiff < 0 ) {
1698936b7f4cSbjh21         if ( bExp == 0x7FF ) {
1699936b7f4cSbjh21             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1700936b7f4cSbjh21             return packFloat64( zSign, 0x7FF, 0, 0 );
1701936b7f4cSbjh21         }
1702936b7f4cSbjh21         if ( aExp == 0 ) {
1703936b7f4cSbjh21             ++expDiff;
1704936b7f4cSbjh21         }
1705936b7f4cSbjh21         else {
1706936b7f4cSbjh21             aSig0 |= 0x00100000;
1707936b7f4cSbjh21         }
1708936b7f4cSbjh21         shift64ExtraRightJamming(
1709936b7f4cSbjh21             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1710936b7f4cSbjh21         zExp = bExp;
1711936b7f4cSbjh21     }
1712936b7f4cSbjh21     else {
1713936b7f4cSbjh21         if ( aExp == 0x7FF ) {
1714936b7f4cSbjh21             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1715936b7f4cSbjh21                 return propagateFloat64NaN( a, b );
1716936b7f4cSbjh21             }
1717936b7f4cSbjh21             return a;
1718936b7f4cSbjh21         }
1719936b7f4cSbjh21         add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1720936b7f4cSbjh21         if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1721936b7f4cSbjh21         zSig2 = 0;
1722936b7f4cSbjh21         zSig0 |= 0x00200000;
1723936b7f4cSbjh21         zExp = aExp;
1724936b7f4cSbjh21         goto shiftRight1;
1725936b7f4cSbjh21     }
1726936b7f4cSbjh21     aSig0 |= 0x00100000;
1727936b7f4cSbjh21     add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1728936b7f4cSbjh21     --zExp;
1729936b7f4cSbjh21     if ( zSig0 < 0x00200000 ) goto roundAndPack;
1730936b7f4cSbjh21     ++zExp;
1731936b7f4cSbjh21  shiftRight1:
1732936b7f4cSbjh21     shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1733936b7f4cSbjh21  roundAndPack:
1734936b7f4cSbjh21     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1735936b7f4cSbjh21 
1736936b7f4cSbjh21 }
1737936b7f4cSbjh21 
1738936b7f4cSbjh21 /*
1739936b7f4cSbjh21 -------------------------------------------------------------------------------
1740936b7f4cSbjh21 Returns the result of subtracting the absolute values of the double-
1741936b7f4cSbjh21 precision floating-point values `a' and `b'.  If `zSign' is 1, the
1742936b7f4cSbjh21 difference is negated before being returned.  `zSign' is ignored if the
1743936b7f4cSbjh21 result is a NaN.  The subtraction is performed according to the IEC/IEEE
1744936b7f4cSbjh21 Standard for Binary Floating-Point Arithmetic.
1745936b7f4cSbjh21 -------------------------------------------------------------------------------
1746936b7f4cSbjh21 */
1747936b7f4cSbjh21 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1748936b7f4cSbjh21 {
1749936b7f4cSbjh21     int16 aExp, bExp, zExp;
1750936b7f4cSbjh21     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1751936b7f4cSbjh21     int16 expDiff;
1752936b7f4cSbjh21 
1753936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1754936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1755936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1756936b7f4cSbjh21     bSig1 = extractFloat64Frac1( b );
1757936b7f4cSbjh21     bSig0 = extractFloat64Frac0( b );
1758936b7f4cSbjh21     bExp = extractFloat64Exp( b );
1759936b7f4cSbjh21     expDiff = aExp - bExp;
1760936b7f4cSbjh21     shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1761936b7f4cSbjh21     shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1762936b7f4cSbjh21     if ( 0 < expDiff ) goto aExpBigger;
1763936b7f4cSbjh21     if ( expDiff < 0 ) goto bExpBigger;
1764936b7f4cSbjh21     if ( aExp == 0x7FF ) {
1765936b7f4cSbjh21         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1766936b7f4cSbjh21             return propagateFloat64NaN( a, b );
1767936b7f4cSbjh21         }
1768936b7f4cSbjh21         float_raise( float_flag_invalid );
1769936b7f4cSbjh21         return float64_default_nan;
1770936b7f4cSbjh21     }
1771936b7f4cSbjh21     if ( aExp == 0 ) {
1772936b7f4cSbjh21         aExp = 1;
1773936b7f4cSbjh21         bExp = 1;
1774936b7f4cSbjh21     }
1775936b7f4cSbjh21     if ( bSig0 < aSig0 ) goto aBigger;
1776936b7f4cSbjh21     if ( aSig0 < bSig0 ) goto bBigger;
1777936b7f4cSbjh21     if ( bSig1 < aSig1 ) goto aBigger;
1778936b7f4cSbjh21     if ( aSig1 < bSig1 ) goto bBigger;
1779936b7f4cSbjh21     return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1780936b7f4cSbjh21  bExpBigger:
1781936b7f4cSbjh21     if ( bExp == 0x7FF ) {
1782936b7f4cSbjh21         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1783936b7f4cSbjh21         return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1784936b7f4cSbjh21     }
1785936b7f4cSbjh21     if ( aExp == 0 ) {
1786936b7f4cSbjh21         ++expDiff;
1787936b7f4cSbjh21     }
1788936b7f4cSbjh21     else {
1789936b7f4cSbjh21         aSig0 |= 0x40000000;
1790936b7f4cSbjh21     }
1791936b7f4cSbjh21     shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1792936b7f4cSbjh21     bSig0 |= 0x40000000;
1793936b7f4cSbjh21  bBigger:
1794936b7f4cSbjh21     sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1795936b7f4cSbjh21     zExp = bExp;
1796936b7f4cSbjh21     zSign ^= 1;
1797936b7f4cSbjh21     goto normalizeRoundAndPack;
1798936b7f4cSbjh21  aExpBigger:
1799936b7f4cSbjh21     if ( aExp == 0x7FF ) {
1800936b7f4cSbjh21         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1801936b7f4cSbjh21         return a;
1802936b7f4cSbjh21     }
1803936b7f4cSbjh21     if ( bExp == 0 ) {
1804936b7f4cSbjh21         --expDiff;
1805936b7f4cSbjh21     }
1806936b7f4cSbjh21     else {
1807936b7f4cSbjh21         bSig0 |= 0x40000000;
1808936b7f4cSbjh21     }
1809936b7f4cSbjh21     shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1810936b7f4cSbjh21     aSig0 |= 0x40000000;
1811936b7f4cSbjh21  aBigger:
1812936b7f4cSbjh21     sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1813936b7f4cSbjh21     zExp = aExp;
1814936b7f4cSbjh21  normalizeRoundAndPack:
1815936b7f4cSbjh21     --zExp;
1816936b7f4cSbjh21     return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1817936b7f4cSbjh21 
1818936b7f4cSbjh21 }
1819936b7f4cSbjh21 
1820936b7f4cSbjh21 /*
1821936b7f4cSbjh21 -------------------------------------------------------------------------------
1822936b7f4cSbjh21 Returns the result of adding the double-precision floating-point values `a'
1823936b7f4cSbjh21 and `b'.  The operation is performed according to the IEC/IEEE Standard for
1824936b7f4cSbjh21 Binary Floating-Point Arithmetic.
1825936b7f4cSbjh21 -------------------------------------------------------------------------------
1826936b7f4cSbjh21 */
1827936b7f4cSbjh21 float64 float64_add( float64 a, float64 b )
1828936b7f4cSbjh21 {
1829936b7f4cSbjh21     flag aSign, bSign;
1830936b7f4cSbjh21 
1831936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1832936b7f4cSbjh21     bSign = extractFloat64Sign( b );
1833936b7f4cSbjh21     if ( aSign == bSign ) {
1834936b7f4cSbjh21         return addFloat64Sigs( a, b, aSign );
1835936b7f4cSbjh21     }
1836936b7f4cSbjh21     else {
1837936b7f4cSbjh21         return subFloat64Sigs( a, b, aSign );
1838936b7f4cSbjh21     }
1839936b7f4cSbjh21 
1840936b7f4cSbjh21 }
1841936b7f4cSbjh21 
1842936b7f4cSbjh21 /*
1843936b7f4cSbjh21 -------------------------------------------------------------------------------
1844936b7f4cSbjh21 Returns the result of subtracting the double-precision floating-point values
1845936b7f4cSbjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1846936b7f4cSbjh21 for Binary Floating-Point Arithmetic.
1847936b7f4cSbjh21 -------------------------------------------------------------------------------
1848936b7f4cSbjh21 */
1849936b7f4cSbjh21 float64 float64_sub( float64 a, float64 b )
1850936b7f4cSbjh21 {
1851936b7f4cSbjh21     flag aSign, bSign;
1852936b7f4cSbjh21 
1853936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1854936b7f4cSbjh21     bSign = extractFloat64Sign( b );
1855936b7f4cSbjh21     if ( aSign == bSign ) {
1856936b7f4cSbjh21         return subFloat64Sigs( a, b, aSign );
1857936b7f4cSbjh21     }
1858936b7f4cSbjh21     else {
1859936b7f4cSbjh21         return addFloat64Sigs( a, b, aSign );
1860936b7f4cSbjh21     }
1861936b7f4cSbjh21 
1862936b7f4cSbjh21 }
1863936b7f4cSbjh21 
1864936b7f4cSbjh21 /*
1865936b7f4cSbjh21 -------------------------------------------------------------------------------
1866936b7f4cSbjh21 Returns the result of multiplying the double-precision floating-point values
1867936b7f4cSbjh21 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1868936b7f4cSbjh21 for Binary Floating-Point Arithmetic.
1869936b7f4cSbjh21 -------------------------------------------------------------------------------
1870936b7f4cSbjh21 */
1871936b7f4cSbjh21 float64 float64_mul( float64 a, float64 b )
1872936b7f4cSbjh21 {
1873936b7f4cSbjh21     flag aSign, bSign, zSign;
1874936b7f4cSbjh21     int16 aExp, bExp, zExp;
1875936b7f4cSbjh21     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1876936b7f4cSbjh21 
1877936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1878936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1879936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1880936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1881936b7f4cSbjh21     bSig1 = extractFloat64Frac1( b );
1882936b7f4cSbjh21     bSig0 = extractFloat64Frac0( b );
1883936b7f4cSbjh21     bExp = extractFloat64Exp( b );
1884936b7f4cSbjh21     bSign = extractFloat64Sign( b );
1885936b7f4cSbjh21     zSign = aSign ^ bSign;
1886936b7f4cSbjh21     if ( aExp == 0x7FF ) {
1887936b7f4cSbjh21         if (    ( aSig0 | aSig1 )
1888936b7f4cSbjh21              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1889936b7f4cSbjh21             return propagateFloat64NaN( a, b );
1890936b7f4cSbjh21         }
1891936b7f4cSbjh21         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1892936b7f4cSbjh21         return packFloat64( zSign, 0x7FF, 0, 0 );
1893936b7f4cSbjh21     }
1894936b7f4cSbjh21     if ( bExp == 0x7FF ) {
1895936b7f4cSbjh21         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1896936b7f4cSbjh21         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1897936b7f4cSbjh21  invalid:
1898936b7f4cSbjh21             float_raise( float_flag_invalid );
1899936b7f4cSbjh21             return float64_default_nan;
1900936b7f4cSbjh21         }
1901936b7f4cSbjh21         return packFloat64( zSign, 0x7FF, 0, 0 );
1902936b7f4cSbjh21     }
1903936b7f4cSbjh21     if ( aExp == 0 ) {
1904936b7f4cSbjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1905936b7f4cSbjh21         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1906936b7f4cSbjh21     }
1907936b7f4cSbjh21     if ( bExp == 0 ) {
1908936b7f4cSbjh21         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1909936b7f4cSbjh21         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1910936b7f4cSbjh21     }
1911936b7f4cSbjh21     zExp = aExp + bExp - 0x400;
1912936b7f4cSbjh21     aSig0 |= 0x00100000;
1913936b7f4cSbjh21     shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1914936b7f4cSbjh21     mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1915936b7f4cSbjh21     add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1916936b7f4cSbjh21     zSig2 |= ( zSig3 != 0 );
1917936b7f4cSbjh21     if ( 0x00200000 <= zSig0 ) {
1918936b7f4cSbjh21         shift64ExtraRightJamming(
1919936b7f4cSbjh21             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1920936b7f4cSbjh21         ++zExp;
1921936b7f4cSbjh21     }
1922936b7f4cSbjh21     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1923936b7f4cSbjh21 
1924936b7f4cSbjh21 }
1925936b7f4cSbjh21 
1926936b7f4cSbjh21 /*
1927936b7f4cSbjh21 -------------------------------------------------------------------------------
1928936b7f4cSbjh21 Returns the result of dividing the double-precision floating-point value `a'
1929936b7f4cSbjh21 by the corresponding value `b'.  The operation is performed according to the
1930936b7f4cSbjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1931936b7f4cSbjh21 -------------------------------------------------------------------------------
1932936b7f4cSbjh21 */
1933936b7f4cSbjh21 float64 float64_div( float64 a, float64 b )
1934936b7f4cSbjh21 {
1935936b7f4cSbjh21     flag aSign, bSign, zSign;
1936936b7f4cSbjh21     int16 aExp, bExp, zExp;
1937936b7f4cSbjh21     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1938936b7f4cSbjh21     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1939936b7f4cSbjh21 
1940936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
1941936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
1942936b7f4cSbjh21     aExp = extractFloat64Exp( a );
1943936b7f4cSbjh21     aSign = extractFloat64Sign( a );
1944936b7f4cSbjh21     bSig1 = extractFloat64Frac1( b );
1945936b7f4cSbjh21     bSig0 = extractFloat64Frac0( b );
1946936b7f4cSbjh21     bExp = extractFloat64Exp( b );
1947936b7f4cSbjh21     bSign = extractFloat64Sign( b );
1948936b7f4cSbjh21     zSign = aSign ^ bSign;
1949936b7f4cSbjh21     if ( aExp == 0x7FF ) {
1950936b7f4cSbjh21         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1951936b7f4cSbjh21         if ( bExp == 0x7FF ) {
1952936b7f4cSbjh21             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1953936b7f4cSbjh21             goto invalid;
1954936b7f4cSbjh21         }
1955936b7f4cSbjh21         return packFloat64( zSign, 0x7FF, 0, 0 );
1956936b7f4cSbjh21     }
1957936b7f4cSbjh21     if ( bExp == 0x7FF ) {
1958936b7f4cSbjh21         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1959936b7f4cSbjh21         return packFloat64( zSign, 0, 0, 0 );
1960936b7f4cSbjh21     }
1961936b7f4cSbjh21     if ( bExp == 0 ) {
1962936b7f4cSbjh21         if ( ( bSig0 | bSig1 ) == 0 ) {
1963936b7f4cSbjh21             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1964936b7f4cSbjh21  invalid:
1965936b7f4cSbjh21                 float_raise( float_flag_invalid );
1966936b7f4cSbjh21                 return float64_default_nan;
1967936b7f4cSbjh21             }
1968936b7f4cSbjh21             float_raise( float_flag_divbyzero );
1969936b7f4cSbjh21             return packFloat64( zSign, 0x7FF, 0, 0 );
1970936b7f4cSbjh21         }
1971936b7f4cSbjh21         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1972936b7f4cSbjh21     }
1973936b7f4cSbjh21     if ( aExp == 0 ) {
1974936b7f4cSbjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1975936b7f4cSbjh21         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1976936b7f4cSbjh21     }
1977936b7f4cSbjh21     zExp = aExp - bExp + 0x3FD;
1978936b7f4cSbjh21     shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1979936b7f4cSbjh21     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1980936b7f4cSbjh21     if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1981936b7f4cSbjh21         shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1982936b7f4cSbjh21         ++zExp;
1983936b7f4cSbjh21     }
1984936b7f4cSbjh21     zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1985936b7f4cSbjh21     mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1986936b7f4cSbjh21     sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1987936b7f4cSbjh21     while ( (sbits32) rem0 < 0 ) {
1988936b7f4cSbjh21         --zSig0;
1989936b7f4cSbjh21         add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1990936b7f4cSbjh21     }
1991936b7f4cSbjh21     zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1992936b7f4cSbjh21     if ( ( zSig1 & 0x3FF ) <= 4 ) {
1993936b7f4cSbjh21         mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
1994936b7f4cSbjh21         sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
1995936b7f4cSbjh21         while ( (sbits32) rem1 < 0 ) {
1996936b7f4cSbjh21             --zSig1;
1997936b7f4cSbjh21             add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
1998936b7f4cSbjh21         }
1999936b7f4cSbjh21         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2000936b7f4cSbjh21     }
2001936b7f4cSbjh21     shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
2002936b7f4cSbjh21     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
2003936b7f4cSbjh21 
2004936b7f4cSbjh21 }
2005936b7f4cSbjh21 
2006936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
2007936b7f4cSbjh21 /*
2008936b7f4cSbjh21 -------------------------------------------------------------------------------
2009936b7f4cSbjh21 Returns the remainder of the double-precision floating-point value `a'
2010936b7f4cSbjh21 with respect to the corresponding value `b'.  The operation is performed
2011936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2012936b7f4cSbjh21 -------------------------------------------------------------------------------
2013936b7f4cSbjh21 */
2014936b7f4cSbjh21 float64 float64_rem( float64 a, float64 b )
2015936b7f4cSbjh21 {
2016936b7f4cSbjh21     flag aSign, bSign, zSign;
2017936b7f4cSbjh21     int16 aExp, bExp, expDiff;
2018936b7f4cSbjh21     bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2019936b7f4cSbjh21     bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2020936b7f4cSbjh21     sbits32 sigMean0;
2021936b7f4cSbjh21     float64 z;
2022936b7f4cSbjh21 
2023936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
2024936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
2025936b7f4cSbjh21     aExp = extractFloat64Exp( a );
2026936b7f4cSbjh21     aSign = extractFloat64Sign( a );
2027936b7f4cSbjh21     bSig1 = extractFloat64Frac1( b );
2028936b7f4cSbjh21     bSig0 = extractFloat64Frac0( b );
2029936b7f4cSbjh21     bExp = extractFloat64Exp( b );
2030936b7f4cSbjh21     bSign = extractFloat64Sign( b );
2031936b7f4cSbjh21     if ( aExp == 0x7FF ) {
2032936b7f4cSbjh21         if (    ( aSig0 | aSig1 )
2033936b7f4cSbjh21              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2034936b7f4cSbjh21             return propagateFloat64NaN( a, b );
2035936b7f4cSbjh21         }
2036936b7f4cSbjh21         goto invalid;
2037936b7f4cSbjh21     }
2038936b7f4cSbjh21     if ( bExp == 0x7FF ) {
2039936b7f4cSbjh21         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2040936b7f4cSbjh21         return a;
2041936b7f4cSbjh21     }
2042936b7f4cSbjh21     if ( bExp == 0 ) {
2043936b7f4cSbjh21         if ( ( bSig0 | bSig1 ) == 0 ) {
2044936b7f4cSbjh21  invalid:
2045936b7f4cSbjh21             float_raise( float_flag_invalid );
2046936b7f4cSbjh21             return float64_default_nan;
2047936b7f4cSbjh21         }
2048936b7f4cSbjh21         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2049936b7f4cSbjh21     }
2050936b7f4cSbjh21     if ( aExp == 0 ) {
2051936b7f4cSbjh21         if ( ( aSig0 | aSig1 ) == 0 ) return a;
2052936b7f4cSbjh21         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2053936b7f4cSbjh21     }
2054936b7f4cSbjh21     expDiff = aExp - bExp;
2055936b7f4cSbjh21     if ( expDiff < -1 ) return a;
2056936b7f4cSbjh21     shortShift64Left(
2057936b7f4cSbjh21         aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2058936b7f4cSbjh21     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2059936b7f4cSbjh21     q = le64( bSig0, bSig1, aSig0, aSig1 );
2060936b7f4cSbjh21     if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2061936b7f4cSbjh21     expDiff -= 32;
2062936b7f4cSbjh21     while ( 0 < expDiff ) {
2063936b7f4cSbjh21         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2064936b7f4cSbjh21         q = ( 4 < q ) ? q - 4 : 0;
2065936b7f4cSbjh21         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2066936b7f4cSbjh21         shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2067936b7f4cSbjh21         shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2068936b7f4cSbjh21         sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2069936b7f4cSbjh21         expDiff -= 29;
2070936b7f4cSbjh21     }
2071936b7f4cSbjh21     if ( -32 < expDiff ) {
2072936b7f4cSbjh21         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2073936b7f4cSbjh21         q = ( 4 < q ) ? q - 4 : 0;
2074936b7f4cSbjh21         q >>= - expDiff;
2075936b7f4cSbjh21         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2076936b7f4cSbjh21         expDiff += 24;
2077936b7f4cSbjh21         if ( expDiff < 0 ) {
2078936b7f4cSbjh21             shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2079936b7f4cSbjh21         }
2080936b7f4cSbjh21         else {
2081936b7f4cSbjh21             shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2082936b7f4cSbjh21         }
2083936b7f4cSbjh21         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2084936b7f4cSbjh21         sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2085936b7f4cSbjh21     }
2086936b7f4cSbjh21     else {
2087936b7f4cSbjh21         shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2088936b7f4cSbjh21         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2089936b7f4cSbjh21     }
2090936b7f4cSbjh21     do {
2091936b7f4cSbjh21         alternateASig0 = aSig0;
2092936b7f4cSbjh21         alternateASig1 = aSig1;
2093936b7f4cSbjh21         ++q;
2094936b7f4cSbjh21         sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2095936b7f4cSbjh21     } while ( 0 <= (sbits32) aSig0 );
2096936b7f4cSbjh21     add64(
2097936b7f4cSbjh21         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2098936b7f4cSbjh21     if (    ( sigMean0 < 0 )
2099936b7f4cSbjh21          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2100936b7f4cSbjh21         aSig0 = alternateASig0;
2101936b7f4cSbjh21         aSig1 = alternateASig1;
2102936b7f4cSbjh21     }
2103936b7f4cSbjh21     zSign = ( (sbits32) aSig0 < 0 );
2104936b7f4cSbjh21     if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2105936b7f4cSbjh21     return
2106936b7f4cSbjh21         normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2107936b7f4cSbjh21 
2108936b7f4cSbjh21 }
2109936b7f4cSbjh21 #endif
2110936b7f4cSbjh21 
2111936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
2112936b7f4cSbjh21 /*
2113936b7f4cSbjh21 -------------------------------------------------------------------------------
2114936b7f4cSbjh21 Returns the square root of the double-precision floating-point value `a'.
2115936b7f4cSbjh21 The operation is performed according to the IEC/IEEE Standard for Binary
2116936b7f4cSbjh21 Floating-Point Arithmetic.
2117936b7f4cSbjh21 -------------------------------------------------------------------------------
2118936b7f4cSbjh21 */
2119936b7f4cSbjh21 float64 float64_sqrt( float64 a )
2120936b7f4cSbjh21 {
2121936b7f4cSbjh21     flag aSign;
2122936b7f4cSbjh21     int16 aExp, zExp;
2123936b7f4cSbjh21     bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2124936b7f4cSbjh21     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2125936b7f4cSbjh21     float64 z;
2126936b7f4cSbjh21 
2127936b7f4cSbjh21     aSig1 = extractFloat64Frac1( a );
2128936b7f4cSbjh21     aSig0 = extractFloat64Frac0( a );
2129936b7f4cSbjh21     aExp = extractFloat64Exp( a );
2130936b7f4cSbjh21     aSign = extractFloat64Sign( a );
2131936b7f4cSbjh21     if ( aExp == 0x7FF ) {
2132936b7f4cSbjh21         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2133936b7f4cSbjh21         if ( ! aSign ) return a;
2134936b7f4cSbjh21         goto invalid;
2135936b7f4cSbjh21     }
2136936b7f4cSbjh21     if ( aSign ) {
2137936b7f4cSbjh21         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2138936b7f4cSbjh21  invalid:
2139936b7f4cSbjh21         float_raise( float_flag_invalid );
2140936b7f4cSbjh21         return float64_default_nan;
2141936b7f4cSbjh21     }
2142936b7f4cSbjh21     if ( aExp == 0 ) {
2143936b7f4cSbjh21         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2144936b7f4cSbjh21         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2145936b7f4cSbjh21     }
2146936b7f4cSbjh21     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2147936b7f4cSbjh21     aSig0 |= 0x00100000;
2148936b7f4cSbjh21     shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2149936b7f4cSbjh21     zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2150936b7f4cSbjh21     if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2151936b7f4cSbjh21     doubleZSig0 = zSig0 + zSig0;
2152936b7f4cSbjh21     shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2153936b7f4cSbjh21     mul32To64( zSig0, zSig0, &term0, &term1 );
2154936b7f4cSbjh21     sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2155936b7f4cSbjh21     while ( (sbits32) rem0 < 0 ) {
2156936b7f4cSbjh21         --zSig0;
2157936b7f4cSbjh21         doubleZSig0 -= 2;
2158936b7f4cSbjh21         add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2159936b7f4cSbjh21     }
2160936b7f4cSbjh21     zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2161936b7f4cSbjh21     if ( ( zSig1 & 0x1FF ) <= 5 ) {
2162936b7f4cSbjh21         if ( zSig1 == 0 ) zSig1 = 1;
2163936b7f4cSbjh21         mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2164936b7f4cSbjh21         sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2165936b7f4cSbjh21         mul32To64( zSig1, zSig1, &term2, &term3 );
2166936b7f4cSbjh21         sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2167936b7f4cSbjh21         while ( (sbits32) rem1 < 0 ) {
2168936b7f4cSbjh21             --zSig1;
2169936b7f4cSbjh21             shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2170936b7f4cSbjh21             term3 |= 1;
2171936b7f4cSbjh21             term2 |= doubleZSig0;
2172936b7f4cSbjh21             add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2173936b7f4cSbjh21         }
2174936b7f4cSbjh21         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2175936b7f4cSbjh21     }
2176936b7f4cSbjh21     shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2177936b7f4cSbjh21     return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2178936b7f4cSbjh21 
2179936b7f4cSbjh21 }
2180936b7f4cSbjh21 #endif
2181936b7f4cSbjh21 
2182936b7f4cSbjh21 /*
2183936b7f4cSbjh21 -------------------------------------------------------------------------------
2184936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is equal to
2185936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
2186936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2187936b7f4cSbjh21 -------------------------------------------------------------------------------
2188936b7f4cSbjh21 */
2189936b7f4cSbjh21 flag float64_eq( float64 a, float64 b )
2190936b7f4cSbjh21 {
2191936b7f4cSbjh21 
2192936b7f4cSbjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2193936b7f4cSbjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2194936b7f4cSbjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
2195936b7f4cSbjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2196936b7f4cSbjh21        ) {
2197936b7f4cSbjh21         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2198936b7f4cSbjh21             float_raise( float_flag_invalid );
2199936b7f4cSbjh21         }
2200936b7f4cSbjh21         return 0;
2201936b7f4cSbjh21     }
2202936b7f4cSbjh21     return ( a == b ) ||
2203936b7f4cSbjh21 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2204936b7f4cSbjh21 
2205936b7f4cSbjh21 }
2206936b7f4cSbjh21 
2207936b7f4cSbjh21 /*
2208936b7f4cSbjh21 -------------------------------------------------------------------------------
2209936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is less than
2210936b7f4cSbjh21 or equal to the corresponding value `b', and 0 otherwise.  The comparison
2211936b7f4cSbjh21 is performed according to the IEC/IEEE Standard for Binary Floating-Point
2212936b7f4cSbjh21 Arithmetic.
2213936b7f4cSbjh21 -------------------------------------------------------------------------------
2214936b7f4cSbjh21 */
2215936b7f4cSbjh21 flag float64_le( float64 a, float64 b )
2216936b7f4cSbjh21 {
2217936b7f4cSbjh21     flag aSign, bSign;
2218936b7f4cSbjh21 
2219936b7f4cSbjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2220936b7f4cSbjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2221936b7f4cSbjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
2222936b7f4cSbjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2223936b7f4cSbjh21        ) {
2224936b7f4cSbjh21         float_raise( float_flag_invalid );
2225936b7f4cSbjh21         return 0;
2226936b7f4cSbjh21     }
2227936b7f4cSbjh21     aSign = extractFloat64Sign( a );
2228936b7f4cSbjh21     bSign = extractFloat64Sign( b );
2229936b7f4cSbjh21     if ( aSign != bSign )
2230936b7f4cSbjh21 	return aSign ||
2231936b7f4cSbjh21 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2232936b7f4cSbjh21 	      0 );
2233936b7f4cSbjh21     return ( a == b ) ||
2234936b7f4cSbjh21 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2235936b7f4cSbjh21 }
2236936b7f4cSbjh21 
2237936b7f4cSbjh21 /*
2238936b7f4cSbjh21 -------------------------------------------------------------------------------
2239936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is less than
2240936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  The comparison is performed
2241936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2242936b7f4cSbjh21 -------------------------------------------------------------------------------
2243936b7f4cSbjh21 */
2244936b7f4cSbjh21 flag float64_lt( float64 a, float64 b )
2245936b7f4cSbjh21 {
2246936b7f4cSbjh21     flag aSign, bSign;
2247936b7f4cSbjh21 
2248936b7f4cSbjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2249936b7f4cSbjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2250936b7f4cSbjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
2251936b7f4cSbjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2252936b7f4cSbjh21        ) {
2253936b7f4cSbjh21         float_raise( float_flag_invalid );
2254936b7f4cSbjh21         return 0;
2255936b7f4cSbjh21     }
2256936b7f4cSbjh21     aSign = extractFloat64Sign( a );
2257936b7f4cSbjh21     bSign = extractFloat64Sign( b );
2258936b7f4cSbjh21     if ( aSign != bSign )
2259936b7f4cSbjh21 	return aSign &&
2260936b7f4cSbjh21 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2261936b7f4cSbjh21 	      0 );
2262936b7f4cSbjh21     return ( a != b ) &&
2263936b7f4cSbjh21 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2264936b7f4cSbjh21 
2265936b7f4cSbjh21 }
2266936b7f4cSbjh21 
2267936b7f4cSbjh21 #ifndef SOFTFLOAT_FOR_GCC
2268936b7f4cSbjh21 /*
2269936b7f4cSbjh21 -------------------------------------------------------------------------------
2270936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is equal to
2271936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  The invalid exception is
2272936b7f4cSbjh21 raised if either operand is a NaN.  Otherwise, the comparison is performed
2273936b7f4cSbjh21 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2274936b7f4cSbjh21 -------------------------------------------------------------------------------
2275936b7f4cSbjh21 */
2276936b7f4cSbjh21 flag float64_eq_signaling( float64 a, float64 b )
2277936b7f4cSbjh21 {
2278936b7f4cSbjh21 
2279936b7f4cSbjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2280936b7f4cSbjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2281936b7f4cSbjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
2282936b7f4cSbjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2283936b7f4cSbjh21        ) {
2284936b7f4cSbjh21         float_raise( float_flag_invalid );
2285936b7f4cSbjh21         return 0;
2286936b7f4cSbjh21     }
2287936b7f4cSbjh21     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2288936b7f4cSbjh21 
2289936b7f4cSbjh21 }
2290936b7f4cSbjh21 
2291936b7f4cSbjh21 /*
2292936b7f4cSbjh21 -------------------------------------------------------------------------------
2293936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is less than or
2294936b7f4cSbjh21 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2295936b7f4cSbjh21 cause an exception.  Otherwise, the comparison is performed according to the
2296936b7f4cSbjh21 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2297936b7f4cSbjh21 -------------------------------------------------------------------------------
2298936b7f4cSbjh21 */
2299936b7f4cSbjh21 flag float64_le_quiet( float64 a, float64 b )
2300936b7f4cSbjh21 {
2301936b7f4cSbjh21     flag aSign, bSign;
2302936b7f4cSbjh21 
2303936b7f4cSbjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2304936b7f4cSbjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2305936b7f4cSbjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
2306936b7f4cSbjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2307936b7f4cSbjh21        ) {
2308936b7f4cSbjh21         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2309936b7f4cSbjh21             float_raise( float_flag_invalid );
2310936b7f4cSbjh21         }
2311936b7f4cSbjh21         return 0;
2312936b7f4cSbjh21     }
2313936b7f4cSbjh21     aSign = extractFloat64Sign( a );
2314936b7f4cSbjh21     bSign = extractFloat64Sign( b );
2315936b7f4cSbjh21     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2316936b7f4cSbjh21     return ( a == b ) || ( aSign ^ ( a < b ) );
2317936b7f4cSbjh21 
2318936b7f4cSbjh21 }
2319936b7f4cSbjh21 
2320936b7f4cSbjh21 /*
2321936b7f4cSbjh21 -------------------------------------------------------------------------------
2322936b7f4cSbjh21 Returns 1 if the double-precision floating-point value `a' is less than
2323936b7f4cSbjh21 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2324936b7f4cSbjh21 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2325936b7f4cSbjh21 Standard for Binary Floating-Point Arithmetic.
2326936b7f4cSbjh21 -------------------------------------------------------------------------------
2327936b7f4cSbjh21 */
2328936b7f4cSbjh21 flag float64_lt_quiet( float64 a, float64 b )
2329936b7f4cSbjh21 {
2330936b7f4cSbjh21     flag aSign, bSign;
2331936b7f4cSbjh21 
2332936b7f4cSbjh21     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2333936b7f4cSbjh21               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2334936b7f4cSbjh21          || (    ( extractFloat64Exp( b ) == 0x7FF )
2335936b7f4cSbjh21               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2336936b7f4cSbjh21        ) {
2337936b7f4cSbjh21         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2338936b7f4cSbjh21             float_raise( float_flag_invalid );
2339936b7f4cSbjh21         }
2340936b7f4cSbjh21         return 0;
2341936b7f4cSbjh21     }
2342936b7f4cSbjh21     aSign = extractFloat64Sign( a );
2343936b7f4cSbjh21     bSign = extractFloat64Sign( b );
2344936b7f4cSbjh21     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2345936b7f4cSbjh21     return ( a != b ) && ( aSign ^ ( a < b ) );
2346936b7f4cSbjh21 
2347936b7f4cSbjh21 }
2348936b7f4cSbjh21 
2349936b7f4cSbjh21 #endif
2350