xref: /openbsd/lib/libc/softfloat/softfloat.c (revision 08daf0c2)
1*08daf0c2Sdrahn /*	$OpenBSD: softfloat.c,v 1.1 2006/11/06 15:11:37 drahn Exp $	*/
2*08daf0c2Sdrahn /* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */
3*08daf0c2Sdrahn 
4*08daf0c2Sdrahn /*
5*08daf0c2Sdrahn  * This version hacked for use with gcc -msoft-float by bjh21.
6*08daf0c2Sdrahn  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
7*08daf0c2Sdrahn  *  itself).
8*08daf0c2Sdrahn  */
9*08daf0c2Sdrahn 
10*08daf0c2Sdrahn /*
11*08daf0c2Sdrahn  * Things you may want to define:
12*08daf0c2Sdrahn  *
13*08daf0c2Sdrahn  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
14*08daf0c2Sdrahn  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
15*08daf0c2Sdrahn  *   properly renamed.
16*08daf0c2Sdrahn  */
17*08daf0c2Sdrahn 
18*08daf0c2Sdrahn /*
19*08daf0c2Sdrahn  * This differs from the standard bits32/softfloat.c in that float64
20*08daf0c2Sdrahn  * is defined to be a 64-bit integer rather than a structure.  The
21*08daf0c2Sdrahn  * structure is float64s, with translation between the two going via
22*08daf0c2Sdrahn  * float64u.
23*08daf0c2Sdrahn  */
24*08daf0c2Sdrahn 
25*08daf0c2Sdrahn /*
26*08daf0c2Sdrahn ===============================================================================
27*08daf0c2Sdrahn 
28*08daf0c2Sdrahn This C source file is part of the SoftFloat IEC/IEEE Floating-Point
29*08daf0c2Sdrahn Arithmetic Package, Release 2a.
30*08daf0c2Sdrahn 
31*08daf0c2Sdrahn Written by John R. Hauser.  This work was made possible in part by the
32*08daf0c2Sdrahn International Computer Science Institute, located at Suite 600, 1947 Center
33*08daf0c2Sdrahn Street, Berkeley, California 94704.  Funding was partially provided by the
34*08daf0c2Sdrahn National Science Foundation under grant MIP-9311980.  The original version
35*08daf0c2Sdrahn of this code was written as part of a project to build a fixed-point vector
36*08daf0c2Sdrahn processor in collaboration with the University of California at Berkeley,
37*08daf0c2Sdrahn overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
38*08daf0c2Sdrahn is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
39*08daf0c2Sdrahn arithmetic/SoftFloat.html'.
40*08daf0c2Sdrahn 
41*08daf0c2Sdrahn THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
42*08daf0c2Sdrahn has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
43*08daf0c2Sdrahn TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
44*08daf0c2Sdrahn PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
45*08daf0c2Sdrahn AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
46*08daf0c2Sdrahn 
47*08daf0c2Sdrahn Derivative works are acceptable, even for commercial purposes, so long as
48*08daf0c2Sdrahn (1) they include prominent notice that the work is derivative, and (2) they
49*08daf0c2Sdrahn include prominent notice akin to these four paragraphs for those parts of
50*08daf0c2Sdrahn this code that are retained.
51*08daf0c2Sdrahn 
52*08daf0c2Sdrahn ===============================================================================
53*08daf0c2Sdrahn */
54*08daf0c2Sdrahn 
55*08daf0c2Sdrahn #include <sys/cdefs.h>
56*08daf0c2Sdrahn 
57*08daf0c2Sdrahn #ifdef SOFTFLOAT_FOR_GCC
58*08daf0c2Sdrahn #include "softfloat-for-gcc.h"
59*08daf0c2Sdrahn #endif
60*08daf0c2Sdrahn 
61*08daf0c2Sdrahn #include "milieu.h"
62*08daf0c2Sdrahn #include "softfloat.h"
63*08daf0c2Sdrahn 
64*08daf0c2Sdrahn /*
65*08daf0c2Sdrahn  * Conversions between floats as stored in memory and floats as
66*08daf0c2Sdrahn  * SoftFloat uses them
67*08daf0c2Sdrahn  */
68*08daf0c2Sdrahn #ifndef FLOAT64_DEMANGLE
69*08daf0c2Sdrahn #define FLOAT64_DEMANGLE(a)	(a)
70*08daf0c2Sdrahn #endif
71*08daf0c2Sdrahn #ifndef FLOAT64_MANGLE
72*08daf0c2Sdrahn #define FLOAT64_MANGLE(a)	(a)
73*08daf0c2Sdrahn #endif
74*08daf0c2Sdrahn 
75*08daf0c2Sdrahn /*
76*08daf0c2Sdrahn -------------------------------------------------------------------------------
77*08daf0c2Sdrahn Primitive arithmetic functions, including multi-word arithmetic, and
78*08daf0c2Sdrahn division and square root approximations.  (Can be specialized to target if
79*08daf0c2Sdrahn desired.)
80*08daf0c2Sdrahn -------------------------------------------------------------------------------
81*08daf0c2Sdrahn */
82*08daf0c2Sdrahn #include "softfloat-macros.h"
83*08daf0c2Sdrahn 
84*08daf0c2Sdrahn /*
85*08daf0c2Sdrahn -------------------------------------------------------------------------------
86*08daf0c2Sdrahn Functions and definitions to determine:  (1) whether tininess for underflow
87*08daf0c2Sdrahn is detected before or after rounding by default, (2) what (if anything)
88*08daf0c2Sdrahn happens when exceptions are raised, (3) how signaling NaNs are distinguished
89*08daf0c2Sdrahn from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
90*08daf0c2Sdrahn are propagated from function inputs to output.  These details are target-
91*08daf0c2Sdrahn specific.
92*08daf0c2Sdrahn -------------------------------------------------------------------------------
93*08daf0c2Sdrahn */
94*08daf0c2Sdrahn #include "softfloat-specialize.h"
95*08daf0c2Sdrahn 
96*08daf0c2Sdrahn /*
97*08daf0c2Sdrahn -------------------------------------------------------------------------------
98*08daf0c2Sdrahn Floating-point rounding mode and exception flags.
99*08daf0c2Sdrahn -------------------------------------------------------------------------------
100*08daf0c2Sdrahn */
101*08daf0c2Sdrahn fp_rnd float_rounding_mode = float_round_nearest_even;
102*08daf0c2Sdrahn fp_except float_exception_flags = 0;
103*08daf0c2Sdrahn 
104*08daf0c2Sdrahn /*
105*08daf0c2Sdrahn -------------------------------------------------------------------------------
106*08daf0c2Sdrahn Returns the fraction bits of the single-precision floating-point value `a'.
107*08daf0c2Sdrahn -------------------------------------------------------------------------------
108*08daf0c2Sdrahn */
109*08daf0c2Sdrahn INLINE bits32 extractFloat32Frac( float32 a )
110*08daf0c2Sdrahn {
111*08daf0c2Sdrahn 
112*08daf0c2Sdrahn     return a & 0x007FFFFF;
113*08daf0c2Sdrahn 
114*08daf0c2Sdrahn }
115*08daf0c2Sdrahn 
116*08daf0c2Sdrahn /*
117*08daf0c2Sdrahn -------------------------------------------------------------------------------
118*08daf0c2Sdrahn Returns the exponent bits of the single-precision floating-point value `a'.
119*08daf0c2Sdrahn -------------------------------------------------------------------------------
120*08daf0c2Sdrahn */
121*08daf0c2Sdrahn INLINE int16 extractFloat32Exp( float32 a )
122*08daf0c2Sdrahn {
123*08daf0c2Sdrahn 
124*08daf0c2Sdrahn     return ( a>>23 ) & 0xFF;
125*08daf0c2Sdrahn 
126*08daf0c2Sdrahn }
127*08daf0c2Sdrahn 
128*08daf0c2Sdrahn /*
129*08daf0c2Sdrahn -------------------------------------------------------------------------------
130*08daf0c2Sdrahn Returns the sign bit of the single-precision floating-point value `a'.
131*08daf0c2Sdrahn -------------------------------------------------------------------------------
132*08daf0c2Sdrahn */
133*08daf0c2Sdrahn INLINE flag extractFloat32Sign( float32 a )
134*08daf0c2Sdrahn {
135*08daf0c2Sdrahn 
136*08daf0c2Sdrahn     return a>>31;
137*08daf0c2Sdrahn 
138*08daf0c2Sdrahn }
139*08daf0c2Sdrahn 
140*08daf0c2Sdrahn /*
141*08daf0c2Sdrahn -------------------------------------------------------------------------------
142*08daf0c2Sdrahn Normalizes the subnormal single-precision floating-point value represented
143*08daf0c2Sdrahn by the denormalized significand `aSig'.  The normalized exponent and
144*08daf0c2Sdrahn significand are stored at the locations pointed to by `zExpPtr' and
145*08daf0c2Sdrahn `zSigPtr', respectively.
146*08daf0c2Sdrahn -------------------------------------------------------------------------------
147*08daf0c2Sdrahn */
148*08daf0c2Sdrahn static void
149*08daf0c2Sdrahn  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
150*08daf0c2Sdrahn {
151*08daf0c2Sdrahn     int8 shiftCount;
152*08daf0c2Sdrahn 
153*08daf0c2Sdrahn     shiftCount = countLeadingZeros32( aSig ) - 8;
154*08daf0c2Sdrahn     *zSigPtr = aSig<<shiftCount;
155*08daf0c2Sdrahn     *zExpPtr = 1 - shiftCount;
156*08daf0c2Sdrahn 
157*08daf0c2Sdrahn }
158*08daf0c2Sdrahn 
159*08daf0c2Sdrahn /*
160*08daf0c2Sdrahn -------------------------------------------------------------------------------
161*08daf0c2Sdrahn Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
162*08daf0c2Sdrahn single-precision floating-point value, returning the result.  After being
163*08daf0c2Sdrahn shifted into the proper positions, the three fields are simply added
164*08daf0c2Sdrahn together to form the result.  This means that any integer portion of `zSig'
165*08daf0c2Sdrahn will be added into the exponent.  Since a properly normalized significand
166*08daf0c2Sdrahn will have an integer portion equal to 1, the `zExp' input should be 1 less
167*08daf0c2Sdrahn than the desired result exponent whenever `zSig' is a complete, normalized
168*08daf0c2Sdrahn significand.
169*08daf0c2Sdrahn -------------------------------------------------------------------------------
170*08daf0c2Sdrahn */
171*08daf0c2Sdrahn INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
172*08daf0c2Sdrahn {
173*08daf0c2Sdrahn 
174*08daf0c2Sdrahn     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
175*08daf0c2Sdrahn 
176*08daf0c2Sdrahn }
177*08daf0c2Sdrahn 
178*08daf0c2Sdrahn /*
179*08daf0c2Sdrahn -------------------------------------------------------------------------------
180*08daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp',
181*08daf0c2Sdrahn and significand `zSig', and returns the proper single-precision floating-
182*08daf0c2Sdrahn point value corresponding to the abstract input.  Ordinarily, the abstract
183*08daf0c2Sdrahn value is simply rounded and packed into the single-precision format, with
184*08daf0c2Sdrahn the inexact exception raised if the abstract input cannot be represented
185*08daf0c2Sdrahn exactly.  However, if the abstract value is too large, the overflow and
186*08daf0c2Sdrahn inexact exceptions are raised and an infinity or maximal finite value is
187*08daf0c2Sdrahn returned.  If the abstract value is too small, the input value is rounded to
188*08daf0c2Sdrahn a subnormal number, and the underflow and inexact exceptions are raised if
189*08daf0c2Sdrahn the abstract input cannot be represented exactly as a subnormal single-
190*08daf0c2Sdrahn precision floating-point number.
191*08daf0c2Sdrahn     The input significand `zSig' has its binary point between bits 30
192*08daf0c2Sdrahn and 29, which is 7 bits to the left of the usual location.  This shifted
193*08daf0c2Sdrahn significand must be normalized or smaller.  If `zSig' is not normalized,
194*08daf0c2Sdrahn `zExp' must be 0; in that case, the result returned is a subnormal number,
195*08daf0c2Sdrahn and it must not require rounding.  In the usual case that `zSig' is
196*08daf0c2Sdrahn normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
197*08daf0c2Sdrahn The handling of underflow and overflow follows the IEC/IEEE Standard for
198*08daf0c2Sdrahn Binary Floating-Point Arithmetic.
199*08daf0c2Sdrahn -------------------------------------------------------------------------------
200*08daf0c2Sdrahn */
201*08daf0c2Sdrahn static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
202*08daf0c2Sdrahn {
203*08daf0c2Sdrahn     int8 roundingMode;
204*08daf0c2Sdrahn     flag roundNearestEven;
205*08daf0c2Sdrahn     int8 roundIncrement, roundBits;
206*08daf0c2Sdrahn     flag isTiny;
207*08daf0c2Sdrahn 
208*08daf0c2Sdrahn     roundingMode = float_rounding_mode;
209*08daf0c2Sdrahn     roundNearestEven = roundingMode == float_round_nearest_even;
210*08daf0c2Sdrahn     roundIncrement = 0x40;
211*08daf0c2Sdrahn     if ( ! roundNearestEven ) {
212*08daf0c2Sdrahn         if ( roundingMode == float_round_to_zero ) {
213*08daf0c2Sdrahn             roundIncrement = 0;
214*08daf0c2Sdrahn         }
215*08daf0c2Sdrahn         else {
216*08daf0c2Sdrahn             roundIncrement = 0x7F;
217*08daf0c2Sdrahn             if ( zSign ) {
218*08daf0c2Sdrahn                 if ( roundingMode == float_round_up ) roundIncrement = 0;
219*08daf0c2Sdrahn             }
220*08daf0c2Sdrahn             else {
221*08daf0c2Sdrahn                 if ( roundingMode == float_round_down ) roundIncrement = 0;
222*08daf0c2Sdrahn             }
223*08daf0c2Sdrahn         }
224*08daf0c2Sdrahn     }
225*08daf0c2Sdrahn     roundBits = zSig & 0x7F;
226*08daf0c2Sdrahn     if ( 0xFD <= (bits16) zExp ) {
227*08daf0c2Sdrahn         if (    ( 0xFD < zExp )
228*08daf0c2Sdrahn              || (    ( zExp == 0xFD )
229*08daf0c2Sdrahn                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
230*08daf0c2Sdrahn            ) {
231*08daf0c2Sdrahn             float_raise( float_flag_overflow | float_flag_inexact );
232*08daf0c2Sdrahn             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
233*08daf0c2Sdrahn         }
234*08daf0c2Sdrahn         if ( zExp < 0 ) {
235*08daf0c2Sdrahn             isTiny =
236*08daf0c2Sdrahn                    ( float_detect_tininess == float_tininess_before_rounding )
237*08daf0c2Sdrahn                 || ( zExp < -1 )
238*08daf0c2Sdrahn                 || ( zSig + roundIncrement < 0x80000000 );
239*08daf0c2Sdrahn             shift32RightJamming( zSig, - zExp, &zSig );
240*08daf0c2Sdrahn             zExp = 0;
241*08daf0c2Sdrahn             roundBits = zSig & 0x7F;
242*08daf0c2Sdrahn             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
243*08daf0c2Sdrahn         }
244*08daf0c2Sdrahn     }
245*08daf0c2Sdrahn     if ( roundBits ) float_exception_flags |= float_flag_inexact;
246*08daf0c2Sdrahn     zSig = ( zSig + roundIncrement )>>7;
247*08daf0c2Sdrahn     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
248*08daf0c2Sdrahn     if ( zSig == 0 ) zExp = 0;
249*08daf0c2Sdrahn     return packFloat32( zSign, zExp, zSig );
250*08daf0c2Sdrahn 
251*08daf0c2Sdrahn }
252*08daf0c2Sdrahn 
253*08daf0c2Sdrahn /*
254*08daf0c2Sdrahn -------------------------------------------------------------------------------
255*08daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp',
256*08daf0c2Sdrahn and significand `zSig', and returns the proper single-precision floating-
257*08daf0c2Sdrahn point value corresponding to the abstract input.  This routine is just like
258*08daf0c2Sdrahn `roundAndPackFloat32' except that `zSig' does not have to be normalized.
259*08daf0c2Sdrahn Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
260*08daf0c2Sdrahn floating-point exponent.
261*08daf0c2Sdrahn -------------------------------------------------------------------------------
262*08daf0c2Sdrahn */
263*08daf0c2Sdrahn static float32
264*08daf0c2Sdrahn  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
265*08daf0c2Sdrahn {
266*08daf0c2Sdrahn     int8 shiftCount;
267*08daf0c2Sdrahn 
268*08daf0c2Sdrahn     shiftCount = countLeadingZeros32( zSig ) - 1;
269*08daf0c2Sdrahn     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
270*08daf0c2Sdrahn 
271*08daf0c2Sdrahn }
272*08daf0c2Sdrahn 
273*08daf0c2Sdrahn /*
274*08daf0c2Sdrahn -------------------------------------------------------------------------------
275*08daf0c2Sdrahn Returns the least-significant 32 fraction bits of the double-precision
276*08daf0c2Sdrahn floating-point value `a'.
277*08daf0c2Sdrahn -------------------------------------------------------------------------------
278*08daf0c2Sdrahn */
279*08daf0c2Sdrahn INLINE bits32 extractFloat64Frac1( float64 a )
280*08daf0c2Sdrahn {
281*08daf0c2Sdrahn 
282*08daf0c2Sdrahn     return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF );
283*08daf0c2Sdrahn 
284*08daf0c2Sdrahn }
285*08daf0c2Sdrahn 
286*08daf0c2Sdrahn /*
287*08daf0c2Sdrahn -------------------------------------------------------------------------------
288*08daf0c2Sdrahn Returns the most-significant 20 fraction bits of the double-precision
289*08daf0c2Sdrahn floating-point value `a'.
290*08daf0c2Sdrahn -------------------------------------------------------------------------------
291*08daf0c2Sdrahn */
292*08daf0c2Sdrahn INLINE bits32 extractFloat64Frac0( float64 a )
293*08daf0c2Sdrahn {
294*08daf0c2Sdrahn 
295*08daf0c2Sdrahn     return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF;
296*08daf0c2Sdrahn 
297*08daf0c2Sdrahn }
298*08daf0c2Sdrahn 
299*08daf0c2Sdrahn /*
300*08daf0c2Sdrahn -------------------------------------------------------------------------------
301*08daf0c2Sdrahn Returns the exponent bits of the double-precision floating-point value `a'.
302*08daf0c2Sdrahn -------------------------------------------------------------------------------
303*08daf0c2Sdrahn */
304*08daf0c2Sdrahn INLINE int16 extractFloat64Exp( float64 a )
305*08daf0c2Sdrahn {
306*08daf0c2Sdrahn 
307*08daf0c2Sdrahn     return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
308*08daf0c2Sdrahn 
309*08daf0c2Sdrahn }
310*08daf0c2Sdrahn 
311*08daf0c2Sdrahn /*
312*08daf0c2Sdrahn -------------------------------------------------------------------------------
313*08daf0c2Sdrahn Returns the sign bit of the double-precision floating-point value `a'.
314*08daf0c2Sdrahn -------------------------------------------------------------------------------
315*08daf0c2Sdrahn */
316*08daf0c2Sdrahn INLINE flag extractFloat64Sign( float64 a )
317*08daf0c2Sdrahn {
318*08daf0c2Sdrahn 
319*08daf0c2Sdrahn     return FLOAT64_DEMANGLE(a)>>63;
320*08daf0c2Sdrahn 
321*08daf0c2Sdrahn }
322*08daf0c2Sdrahn 
323*08daf0c2Sdrahn /*
324*08daf0c2Sdrahn -------------------------------------------------------------------------------
325*08daf0c2Sdrahn Normalizes the subnormal double-precision floating-point value represented
326*08daf0c2Sdrahn by the denormalized significand formed by the concatenation of `aSig0' and
327*08daf0c2Sdrahn `aSig1'.  The normalized exponent is stored at the location pointed to by
328*08daf0c2Sdrahn `zExpPtr'.  The most significant 21 bits of the normalized significand are
329*08daf0c2Sdrahn stored at the location pointed to by `zSig0Ptr', and the least significant
330*08daf0c2Sdrahn 32 bits of the normalized significand are stored at the location pointed to
331*08daf0c2Sdrahn by `zSig1Ptr'.
332*08daf0c2Sdrahn -------------------------------------------------------------------------------
333*08daf0c2Sdrahn */
334*08daf0c2Sdrahn static void
335*08daf0c2Sdrahn  normalizeFloat64Subnormal(
336*08daf0c2Sdrahn      bits32 aSig0,
337*08daf0c2Sdrahn      bits32 aSig1,
338*08daf0c2Sdrahn      int16 *zExpPtr,
339*08daf0c2Sdrahn      bits32 *zSig0Ptr,
340*08daf0c2Sdrahn      bits32 *zSig1Ptr
341*08daf0c2Sdrahn  )
342*08daf0c2Sdrahn {
343*08daf0c2Sdrahn     int8 shiftCount;
344*08daf0c2Sdrahn 
345*08daf0c2Sdrahn     if ( aSig0 == 0 ) {
346*08daf0c2Sdrahn         shiftCount = countLeadingZeros32( aSig1 ) - 11;
347*08daf0c2Sdrahn         if ( shiftCount < 0 ) {
348*08daf0c2Sdrahn             *zSig0Ptr = aSig1>>( - shiftCount );
349*08daf0c2Sdrahn             *zSig1Ptr = aSig1<<( shiftCount & 31 );
350*08daf0c2Sdrahn         }
351*08daf0c2Sdrahn         else {
352*08daf0c2Sdrahn             *zSig0Ptr = aSig1<<shiftCount;
353*08daf0c2Sdrahn             *zSig1Ptr = 0;
354*08daf0c2Sdrahn         }
355*08daf0c2Sdrahn         *zExpPtr = - shiftCount - 31;
356*08daf0c2Sdrahn     }
357*08daf0c2Sdrahn     else {
358*08daf0c2Sdrahn         shiftCount = countLeadingZeros32( aSig0 ) - 11;
359*08daf0c2Sdrahn         shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
360*08daf0c2Sdrahn         *zExpPtr = 1 - shiftCount;
361*08daf0c2Sdrahn     }
362*08daf0c2Sdrahn 
363*08daf0c2Sdrahn }
364*08daf0c2Sdrahn 
365*08daf0c2Sdrahn /*
366*08daf0c2Sdrahn -------------------------------------------------------------------------------
367*08daf0c2Sdrahn Packs the sign `zSign', the exponent `zExp', and the significand formed by
368*08daf0c2Sdrahn the concatenation of `zSig0' and `zSig1' into a double-precision floating-
369*08daf0c2Sdrahn point value, returning the result.  After being shifted into the proper
370*08daf0c2Sdrahn positions, the three fields `zSign', `zExp', and `zSig0' are simply added
371*08daf0c2Sdrahn together to form the most significant 32 bits of the result.  This means
372*08daf0c2Sdrahn that any integer portion of `zSig0' will be added into the exponent.  Since
373*08daf0c2Sdrahn a properly normalized significand will have an integer portion equal to 1,
374*08daf0c2Sdrahn the `zExp' input should be 1 less than the desired result exponent whenever
375*08daf0c2Sdrahn `zSig0' and `zSig1' concatenated form a complete, normalized significand.
376*08daf0c2Sdrahn -------------------------------------------------------------------------------
377*08daf0c2Sdrahn */
378*08daf0c2Sdrahn INLINE float64
379*08daf0c2Sdrahn  packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
380*08daf0c2Sdrahn {
381*08daf0c2Sdrahn 
382*08daf0c2Sdrahn     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
383*08daf0c2Sdrahn 			   ( ( (bits64) zExp )<<52 ) +
384*08daf0c2Sdrahn 			   ( ( (bits64) zSig0 )<<32 ) + zSig1 );
385*08daf0c2Sdrahn 
386*08daf0c2Sdrahn 
387*08daf0c2Sdrahn }
388*08daf0c2Sdrahn 
389*08daf0c2Sdrahn /*
390*08daf0c2Sdrahn -------------------------------------------------------------------------------
391*08daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp',
392*08daf0c2Sdrahn and extended significand formed by the concatenation of `zSig0', `zSig1',
393*08daf0c2Sdrahn and `zSig2', and returns the proper double-precision floating-point value
394*08daf0c2Sdrahn corresponding to the abstract input.  Ordinarily, the abstract value is
395*08daf0c2Sdrahn simply rounded and packed into the double-precision format, with the inexact
396*08daf0c2Sdrahn exception raised if the abstract input cannot be represented exactly.
397*08daf0c2Sdrahn However, if the abstract value is too large, the overflow and inexact
398*08daf0c2Sdrahn exceptions are raised and an infinity or maximal finite value is returned.
399*08daf0c2Sdrahn If the abstract value is too small, the input value is rounded to a
400*08daf0c2Sdrahn subnormal number, and the underflow and inexact exceptions are raised if the
401*08daf0c2Sdrahn abstract input cannot be represented exactly as a subnormal double-precision
402*08daf0c2Sdrahn floating-point number.
403*08daf0c2Sdrahn     The input significand must be normalized or smaller.  If the input
404*08daf0c2Sdrahn significand is not normalized, `zExp' must be 0; in that case, the result
405*08daf0c2Sdrahn returned is a subnormal number, and it must not require rounding.  In the
406*08daf0c2Sdrahn usual case that the input significand is normalized, `zExp' must be 1 less
407*08daf0c2Sdrahn than the ``true'' floating-point exponent.  The handling of underflow and
408*08daf0c2Sdrahn overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
409*08daf0c2Sdrahn -------------------------------------------------------------------------------
410*08daf0c2Sdrahn */
411*08daf0c2Sdrahn static float64
412*08daf0c2Sdrahn  roundAndPackFloat64(
413*08daf0c2Sdrahn      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
414*08daf0c2Sdrahn {
415*08daf0c2Sdrahn     int8 roundingMode;
416*08daf0c2Sdrahn     flag roundNearestEven, increment, isTiny;
417*08daf0c2Sdrahn 
418*08daf0c2Sdrahn     roundingMode = float_rounding_mode;
419*08daf0c2Sdrahn     roundNearestEven = ( roundingMode == float_round_nearest_even );
420*08daf0c2Sdrahn     increment = ( (sbits32) zSig2 < 0 );
421*08daf0c2Sdrahn     if ( ! roundNearestEven ) {
422*08daf0c2Sdrahn         if ( roundingMode == float_round_to_zero ) {
423*08daf0c2Sdrahn             increment = 0;
424*08daf0c2Sdrahn         }
425*08daf0c2Sdrahn         else {
426*08daf0c2Sdrahn             if ( zSign ) {
427*08daf0c2Sdrahn                 increment = ( roundingMode == float_round_down ) && zSig2;
428*08daf0c2Sdrahn             }
429*08daf0c2Sdrahn             else {
430*08daf0c2Sdrahn                 increment = ( roundingMode == float_round_up ) && zSig2;
431*08daf0c2Sdrahn             }
432*08daf0c2Sdrahn         }
433*08daf0c2Sdrahn     }
434*08daf0c2Sdrahn     if ( 0x7FD <= (bits16) zExp ) {
435*08daf0c2Sdrahn         if (    ( 0x7FD < zExp )
436*08daf0c2Sdrahn              || (    ( zExp == 0x7FD )
437*08daf0c2Sdrahn                   && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
438*08daf0c2Sdrahn                   && increment
439*08daf0c2Sdrahn                 )
440*08daf0c2Sdrahn            ) {
441*08daf0c2Sdrahn             float_raise( float_flag_overflow | float_flag_inexact );
442*08daf0c2Sdrahn             if (    ( roundingMode == float_round_to_zero )
443*08daf0c2Sdrahn                  || ( zSign && ( roundingMode == float_round_up ) )
444*08daf0c2Sdrahn                  || ( ! zSign && ( roundingMode == float_round_down ) )
445*08daf0c2Sdrahn                ) {
446*08daf0c2Sdrahn                 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
447*08daf0c2Sdrahn             }
448*08daf0c2Sdrahn             return packFloat64( zSign, 0x7FF, 0, 0 );
449*08daf0c2Sdrahn         }
450*08daf0c2Sdrahn         if ( zExp < 0 ) {
451*08daf0c2Sdrahn             isTiny =
452*08daf0c2Sdrahn                    ( float_detect_tininess == float_tininess_before_rounding )
453*08daf0c2Sdrahn                 || ( zExp < -1 )
454*08daf0c2Sdrahn                 || ! increment
455*08daf0c2Sdrahn                 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
456*08daf0c2Sdrahn             shift64ExtraRightJamming(
457*08daf0c2Sdrahn                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
458*08daf0c2Sdrahn             zExp = 0;
459*08daf0c2Sdrahn             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
460*08daf0c2Sdrahn             if ( roundNearestEven ) {
461*08daf0c2Sdrahn                 increment = ( (sbits32) zSig2 < 0 );
462*08daf0c2Sdrahn             }
463*08daf0c2Sdrahn             else {
464*08daf0c2Sdrahn                 if ( zSign ) {
465*08daf0c2Sdrahn                     increment = ( roundingMode == float_round_down ) && zSig2;
466*08daf0c2Sdrahn                 }
467*08daf0c2Sdrahn                 else {
468*08daf0c2Sdrahn                     increment = ( roundingMode == float_round_up ) && zSig2;
469*08daf0c2Sdrahn                 }
470*08daf0c2Sdrahn             }
471*08daf0c2Sdrahn         }
472*08daf0c2Sdrahn     }
473*08daf0c2Sdrahn     if ( zSig2 ) float_exception_flags |= float_flag_inexact;
474*08daf0c2Sdrahn     if ( increment ) {
475*08daf0c2Sdrahn         add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
476*08daf0c2Sdrahn         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
477*08daf0c2Sdrahn     }
478*08daf0c2Sdrahn     else {
479*08daf0c2Sdrahn         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
480*08daf0c2Sdrahn     }
481*08daf0c2Sdrahn     return packFloat64( zSign, zExp, zSig0, zSig1 );
482*08daf0c2Sdrahn 
483*08daf0c2Sdrahn }
484*08daf0c2Sdrahn 
485*08daf0c2Sdrahn /*
486*08daf0c2Sdrahn -------------------------------------------------------------------------------
487*08daf0c2Sdrahn Takes an abstract floating-point value having sign `zSign', exponent `zExp',
488*08daf0c2Sdrahn and significand formed by the concatenation of `zSig0' and `zSig1', and
489*08daf0c2Sdrahn returns the proper double-precision floating-point value corresponding
490*08daf0c2Sdrahn to the abstract input.  This routine is just like `roundAndPackFloat64'
491*08daf0c2Sdrahn except that the input significand has fewer bits and does not have to be
492*08daf0c2Sdrahn normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
493*08daf0c2Sdrahn point exponent.
494*08daf0c2Sdrahn -------------------------------------------------------------------------------
495*08daf0c2Sdrahn */
496*08daf0c2Sdrahn static float64
497*08daf0c2Sdrahn  normalizeRoundAndPackFloat64(
498*08daf0c2Sdrahn      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
499*08daf0c2Sdrahn {
500*08daf0c2Sdrahn     int8 shiftCount;
501*08daf0c2Sdrahn     bits32 zSig2;
502*08daf0c2Sdrahn 
503*08daf0c2Sdrahn     if ( zSig0 == 0 ) {
504*08daf0c2Sdrahn         zSig0 = zSig1;
505*08daf0c2Sdrahn         zSig1 = 0;
506*08daf0c2Sdrahn         zExp -= 32;
507*08daf0c2Sdrahn     }
508*08daf0c2Sdrahn     shiftCount = countLeadingZeros32( zSig0 ) - 11;
509*08daf0c2Sdrahn     if ( 0 <= shiftCount ) {
510*08daf0c2Sdrahn         zSig2 = 0;
511*08daf0c2Sdrahn         shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
512*08daf0c2Sdrahn     }
513*08daf0c2Sdrahn     else {
514*08daf0c2Sdrahn         shift64ExtraRightJamming(
515*08daf0c2Sdrahn             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
516*08daf0c2Sdrahn     }
517*08daf0c2Sdrahn     zExp -= shiftCount;
518*08daf0c2Sdrahn     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
519*08daf0c2Sdrahn 
520*08daf0c2Sdrahn }
521*08daf0c2Sdrahn 
522*08daf0c2Sdrahn /*
523*08daf0c2Sdrahn -------------------------------------------------------------------------------
524*08daf0c2Sdrahn Returns the result of converting the 32-bit two's complement integer `a' to
525*08daf0c2Sdrahn the single-precision floating-point format.  The conversion is performed
526*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
527*08daf0c2Sdrahn -------------------------------------------------------------------------------
528*08daf0c2Sdrahn */
529*08daf0c2Sdrahn float32 int32_to_float32( int32 a )
530*08daf0c2Sdrahn {
531*08daf0c2Sdrahn     flag zSign;
532*08daf0c2Sdrahn 
533*08daf0c2Sdrahn     if ( a == 0 ) return 0;
534*08daf0c2Sdrahn     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
535*08daf0c2Sdrahn     zSign = ( a < 0 );
536*08daf0c2Sdrahn     return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
537*08daf0c2Sdrahn 
538*08daf0c2Sdrahn }
539*08daf0c2Sdrahn 
540*08daf0c2Sdrahn /*
541*08daf0c2Sdrahn -------------------------------------------------------------------------------
542*08daf0c2Sdrahn Returns the result of converting the 32-bit two's complement integer `a' to
543*08daf0c2Sdrahn the double-precision floating-point format.  The conversion is performed
544*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
545*08daf0c2Sdrahn -------------------------------------------------------------------------------
546*08daf0c2Sdrahn */
547*08daf0c2Sdrahn float64 int32_to_float64( int32 a )
548*08daf0c2Sdrahn {
549*08daf0c2Sdrahn     flag zSign;
550*08daf0c2Sdrahn     bits32 absA;
551*08daf0c2Sdrahn     int8 shiftCount;
552*08daf0c2Sdrahn     bits32 zSig0, zSig1;
553*08daf0c2Sdrahn 
554*08daf0c2Sdrahn     if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
555*08daf0c2Sdrahn     zSign = ( a < 0 );
556*08daf0c2Sdrahn     absA = zSign ? - a : a;
557*08daf0c2Sdrahn     shiftCount = countLeadingZeros32( absA ) - 11;
558*08daf0c2Sdrahn     if ( 0 <= shiftCount ) {
559*08daf0c2Sdrahn         zSig0 = absA<<shiftCount;
560*08daf0c2Sdrahn         zSig1 = 0;
561*08daf0c2Sdrahn     }
562*08daf0c2Sdrahn     else {
563*08daf0c2Sdrahn         shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
564*08daf0c2Sdrahn     }
565*08daf0c2Sdrahn     return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
566*08daf0c2Sdrahn 
567*08daf0c2Sdrahn }
568*08daf0c2Sdrahn 
569*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
570*08daf0c2Sdrahn /*
571*08daf0c2Sdrahn -------------------------------------------------------------------------------
572*08daf0c2Sdrahn Returns the result of converting the single-precision floating-point value
573*08daf0c2Sdrahn `a' to the 32-bit two's complement integer format.  The conversion is
574*08daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
575*08daf0c2Sdrahn Arithmetic---which means in particular that the conversion is rounded
576*08daf0c2Sdrahn according to the current rounding mode.  If `a' is a NaN, the largest
577*08daf0c2Sdrahn positive integer is returned.  Otherwise, if the conversion overflows, the
578*08daf0c2Sdrahn largest integer with the same sign as `a' is returned.
579*08daf0c2Sdrahn -------------------------------------------------------------------------------
580*08daf0c2Sdrahn */
581*08daf0c2Sdrahn int32 float32_to_int32( float32 a )
582*08daf0c2Sdrahn {
583*08daf0c2Sdrahn     flag aSign;
584*08daf0c2Sdrahn     int16 aExp, shiftCount;
585*08daf0c2Sdrahn     bits32 aSig, aSigExtra;
586*08daf0c2Sdrahn     int32 z;
587*08daf0c2Sdrahn     int8 roundingMode;
588*08daf0c2Sdrahn 
589*08daf0c2Sdrahn     aSig = extractFloat32Frac( a );
590*08daf0c2Sdrahn     aExp = extractFloat32Exp( a );
591*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
592*08daf0c2Sdrahn     shiftCount = aExp - 0x96;
593*08daf0c2Sdrahn     if ( 0 <= shiftCount ) {
594*08daf0c2Sdrahn         if ( 0x9E <= aExp ) {
595*08daf0c2Sdrahn             if ( a != 0xCF000000 ) {
596*08daf0c2Sdrahn                 float_raise( float_flag_invalid );
597*08daf0c2Sdrahn                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
598*08daf0c2Sdrahn                     return 0x7FFFFFFF;
599*08daf0c2Sdrahn                 }
600*08daf0c2Sdrahn             }
601*08daf0c2Sdrahn             return (sbits32) 0x80000000;
602*08daf0c2Sdrahn         }
603*08daf0c2Sdrahn         z = ( aSig | 0x00800000 )<<shiftCount;
604*08daf0c2Sdrahn         if ( aSign ) z = - z;
605*08daf0c2Sdrahn     }
606*08daf0c2Sdrahn     else {
607*08daf0c2Sdrahn         if ( aExp < 0x7E ) {
608*08daf0c2Sdrahn             aSigExtra = aExp | aSig;
609*08daf0c2Sdrahn             z = 0;
610*08daf0c2Sdrahn         }
611*08daf0c2Sdrahn         else {
612*08daf0c2Sdrahn             aSig |= 0x00800000;
613*08daf0c2Sdrahn             aSigExtra = aSig<<( shiftCount & 31 );
614*08daf0c2Sdrahn             z = aSig>>( - shiftCount );
615*08daf0c2Sdrahn         }
616*08daf0c2Sdrahn         if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
617*08daf0c2Sdrahn         roundingMode = float_rounding_mode;
618*08daf0c2Sdrahn         if ( roundingMode == float_round_nearest_even ) {
619*08daf0c2Sdrahn             if ( (sbits32) aSigExtra < 0 ) {
620*08daf0c2Sdrahn                 ++z;
621*08daf0c2Sdrahn                 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
622*08daf0c2Sdrahn             }
623*08daf0c2Sdrahn             if ( aSign ) z = - z;
624*08daf0c2Sdrahn         }
625*08daf0c2Sdrahn         else {
626*08daf0c2Sdrahn             aSigExtra = ( aSigExtra != 0 );
627*08daf0c2Sdrahn             if ( aSign ) {
628*08daf0c2Sdrahn                 z += ( roundingMode == float_round_down ) & aSigExtra;
629*08daf0c2Sdrahn                 z = - z;
630*08daf0c2Sdrahn             }
631*08daf0c2Sdrahn             else {
632*08daf0c2Sdrahn                 z += ( roundingMode == float_round_up ) & aSigExtra;
633*08daf0c2Sdrahn             }
634*08daf0c2Sdrahn         }
635*08daf0c2Sdrahn     }
636*08daf0c2Sdrahn     return z;
637*08daf0c2Sdrahn 
638*08daf0c2Sdrahn }
639*08daf0c2Sdrahn #endif
640*08daf0c2Sdrahn 
641*08daf0c2Sdrahn /*
642*08daf0c2Sdrahn -------------------------------------------------------------------------------
643*08daf0c2Sdrahn Returns the result of converting the single-precision floating-point value
644*08daf0c2Sdrahn `a' to the 32-bit two's complement integer format.  The conversion is
645*08daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
646*08daf0c2Sdrahn Arithmetic, except that the conversion is always rounded toward zero.
647*08daf0c2Sdrahn If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
648*08daf0c2Sdrahn the conversion overflows, the largest integer with the same sign as `a' is
649*08daf0c2Sdrahn returned.
650*08daf0c2Sdrahn -------------------------------------------------------------------------------
651*08daf0c2Sdrahn */
652*08daf0c2Sdrahn int32 float32_to_int32_round_to_zero( float32 a )
653*08daf0c2Sdrahn {
654*08daf0c2Sdrahn     flag aSign;
655*08daf0c2Sdrahn     int16 aExp, shiftCount;
656*08daf0c2Sdrahn     bits32 aSig;
657*08daf0c2Sdrahn     int32 z;
658*08daf0c2Sdrahn 
659*08daf0c2Sdrahn     aSig = extractFloat32Frac( a );
660*08daf0c2Sdrahn     aExp = extractFloat32Exp( a );
661*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
662*08daf0c2Sdrahn     shiftCount = aExp - 0x9E;
663*08daf0c2Sdrahn     if ( 0 <= shiftCount ) {
664*08daf0c2Sdrahn         if ( a != 0xCF000000 ) {
665*08daf0c2Sdrahn             float_raise( float_flag_invalid );
666*08daf0c2Sdrahn             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
667*08daf0c2Sdrahn         }
668*08daf0c2Sdrahn         return (sbits32) 0x80000000;
669*08daf0c2Sdrahn     }
670*08daf0c2Sdrahn     else if ( aExp <= 0x7E ) {
671*08daf0c2Sdrahn         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
672*08daf0c2Sdrahn         return 0;
673*08daf0c2Sdrahn     }
674*08daf0c2Sdrahn     aSig = ( aSig | 0x00800000 )<<8;
675*08daf0c2Sdrahn     z = aSig>>( - shiftCount );
676*08daf0c2Sdrahn     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
677*08daf0c2Sdrahn         float_exception_flags |= float_flag_inexact;
678*08daf0c2Sdrahn     }
679*08daf0c2Sdrahn     if ( aSign ) z = - z;
680*08daf0c2Sdrahn     return z;
681*08daf0c2Sdrahn 
682*08daf0c2Sdrahn }
683*08daf0c2Sdrahn 
684*08daf0c2Sdrahn /*
685*08daf0c2Sdrahn -------------------------------------------------------------------------------
686*08daf0c2Sdrahn Returns the result of converting the single-precision floating-point value
687*08daf0c2Sdrahn `a' to the double-precision floating-point format.  The conversion is
688*08daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
689*08daf0c2Sdrahn Arithmetic.
690*08daf0c2Sdrahn -------------------------------------------------------------------------------
691*08daf0c2Sdrahn */
692*08daf0c2Sdrahn float64 float32_to_float64( float32 a )
693*08daf0c2Sdrahn {
694*08daf0c2Sdrahn     flag aSign;
695*08daf0c2Sdrahn     int16 aExp;
696*08daf0c2Sdrahn     bits32 aSig, zSig0, zSig1;
697*08daf0c2Sdrahn 
698*08daf0c2Sdrahn     aSig = extractFloat32Frac( a );
699*08daf0c2Sdrahn     aExp = extractFloat32Exp( a );
700*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
701*08daf0c2Sdrahn     if ( aExp == 0xFF ) {
702*08daf0c2Sdrahn         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
703*08daf0c2Sdrahn         return packFloat64( aSign, 0x7FF, 0, 0 );
704*08daf0c2Sdrahn     }
705*08daf0c2Sdrahn     if ( aExp == 0 ) {
706*08daf0c2Sdrahn         if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
707*08daf0c2Sdrahn         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
708*08daf0c2Sdrahn         --aExp;
709*08daf0c2Sdrahn     }
710*08daf0c2Sdrahn     shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
711*08daf0c2Sdrahn     return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
712*08daf0c2Sdrahn 
713*08daf0c2Sdrahn }
714*08daf0c2Sdrahn 
715*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
716*08daf0c2Sdrahn /*
717*08daf0c2Sdrahn -------------------------------------------------------------------------------
718*08daf0c2Sdrahn Rounds the single-precision floating-point value `a' to an integer,
719*08daf0c2Sdrahn and returns the result as a single-precision floating-point value.  The
720*08daf0c2Sdrahn operation is performed according to the IEC/IEEE Standard for Binary
721*08daf0c2Sdrahn Floating-Point Arithmetic.
722*08daf0c2Sdrahn -------------------------------------------------------------------------------
723*08daf0c2Sdrahn */
724*08daf0c2Sdrahn float32 float32_round_to_int( float32 a )
725*08daf0c2Sdrahn {
726*08daf0c2Sdrahn     flag aSign;
727*08daf0c2Sdrahn     int16 aExp;
728*08daf0c2Sdrahn     bits32 lastBitMask, roundBitsMask;
729*08daf0c2Sdrahn     int8 roundingMode;
730*08daf0c2Sdrahn     float32 z;
731*08daf0c2Sdrahn 
732*08daf0c2Sdrahn     aExp = extractFloat32Exp( a );
733*08daf0c2Sdrahn     if ( 0x96 <= aExp ) {
734*08daf0c2Sdrahn         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
735*08daf0c2Sdrahn             return propagateFloat32NaN( a, a );
736*08daf0c2Sdrahn         }
737*08daf0c2Sdrahn         return a;
738*08daf0c2Sdrahn     }
739*08daf0c2Sdrahn     if ( aExp <= 0x7E ) {
740*08daf0c2Sdrahn         if ( (bits32) ( a<<1 ) == 0 ) return a;
741*08daf0c2Sdrahn         float_exception_flags |= float_flag_inexact;
742*08daf0c2Sdrahn         aSign = extractFloat32Sign( a );
743*08daf0c2Sdrahn         switch ( float_rounding_mode ) {
744*08daf0c2Sdrahn          case float_round_nearest_even:
745*08daf0c2Sdrahn             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
746*08daf0c2Sdrahn                 return packFloat32( aSign, 0x7F, 0 );
747*08daf0c2Sdrahn             }
748*08daf0c2Sdrahn             break;
749*08daf0c2Sdrahn 	 case float_round_to_zero:
750*08daf0c2Sdrahn 	    break;
751*08daf0c2Sdrahn          case float_round_down:
752*08daf0c2Sdrahn             return aSign ? 0xBF800000 : 0;
753*08daf0c2Sdrahn          case float_round_up:
754*08daf0c2Sdrahn             return aSign ? 0x80000000 : 0x3F800000;
755*08daf0c2Sdrahn         }
756*08daf0c2Sdrahn         return packFloat32( aSign, 0, 0 );
757*08daf0c2Sdrahn     }
758*08daf0c2Sdrahn     lastBitMask = 1;
759*08daf0c2Sdrahn     lastBitMask <<= 0x96 - aExp;
760*08daf0c2Sdrahn     roundBitsMask = lastBitMask - 1;
761*08daf0c2Sdrahn     z = a;
762*08daf0c2Sdrahn     roundingMode = float_rounding_mode;
763*08daf0c2Sdrahn     if ( roundingMode == float_round_nearest_even ) {
764*08daf0c2Sdrahn         z += lastBitMask>>1;
765*08daf0c2Sdrahn         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
766*08daf0c2Sdrahn     }
767*08daf0c2Sdrahn     else if ( roundingMode != float_round_to_zero ) {
768*08daf0c2Sdrahn         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
769*08daf0c2Sdrahn             z += roundBitsMask;
770*08daf0c2Sdrahn         }
771*08daf0c2Sdrahn     }
772*08daf0c2Sdrahn     z &= ~ roundBitsMask;
773*08daf0c2Sdrahn     if ( z != a ) float_exception_flags |= float_flag_inexact;
774*08daf0c2Sdrahn     return z;
775*08daf0c2Sdrahn 
776*08daf0c2Sdrahn }
777*08daf0c2Sdrahn #endif
778*08daf0c2Sdrahn 
779*08daf0c2Sdrahn /*
780*08daf0c2Sdrahn -------------------------------------------------------------------------------
781*08daf0c2Sdrahn Returns the result of adding the absolute values of the single-precision
782*08daf0c2Sdrahn floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
783*08daf0c2Sdrahn before being returned.  `zSign' is ignored if the result is a NaN.
784*08daf0c2Sdrahn The addition is performed according to the IEC/IEEE Standard for Binary
785*08daf0c2Sdrahn Floating-Point Arithmetic.
786*08daf0c2Sdrahn -------------------------------------------------------------------------------
787*08daf0c2Sdrahn */
788*08daf0c2Sdrahn static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
789*08daf0c2Sdrahn {
790*08daf0c2Sdrahn     int16 aExp, bExp, zExp;
791*08daf0c2Sdrahn     bits32 aSig, bSig, zSig;
792*08daf0c2Sdrahn     int16 expDiff;
793*08daf0c2Sdrahn 
794*08daf0c2Sdrahn     aSig = extractFloat32Frac( a );
795*08daf0c2Sdrahn     aExp = extractFloat32Exp( a );
796*08daf0c2Sdrahn     bSig = extractFloat32Frac( b );
797*08daf0c2Sdrahn     bExp = extractFloat32Exp( b );
798*08daf0c2Sdrahn     expDiff = aExp - bExp;
799*08daf0c2Sdrahn     aSig <<= 6;
800*08daf0c2Sdrahn     bSig <<= 6;
801*08daf0c2Sdrahn     if ( 0 < expDiff ) {
802*08daf0c2Sdrahn         if ( aExp == 0xFF ) {
803*08daf0c2Sdrahn             if ( aSig ) return propagateFloat32NaN( a, b );
804*08daf0c2Sdrahn             return a;
805*08daf0c2Sdrahn         }
806*08daf0c2Sdrahn         if ( bExp == 0 ) {
807*08daf0c2Sdrahn             --expDiff;
808*08daf0c2Sdrahn         }
809*08daf0c2Sdrahn         else {
810*08daf0c2Sdrahn             bSig |= 0x20000000;
811*08daf0c2Sdrahn         }
812*08daf0c2Sdrahn         shift32RightJamming( bSig, expDiff, &bSig );
813*08daf0c2Sdrahn         zExp = aExp;
814*08daf0c2Sdrahn     }
815*08daf0c2Sdrahn     else if ( expDiff < 0 ) {
816*08daf0c2Sdrahn         if ( bExp == 0xFF ) {
817*08daf0c2Sdrahn             if ( bSig ) return propagateFloat32NaN( a, b );
818*08daf0c2Sdrahn             return packFloat32( zSign, 0xFF, 0 );
819*08daf0c2Sdrahn         }
820*08daf0c2Sdrahn         if ( aExp == 0 ) {
821*08daf0c2Sdrahn             ++expDiff;
822*08daf0c2Sdrahn         }
823*08daf0c2Sdrahn         else {
824*08daf0c2Sdrahn             aSig |= 0x20000000;
825*08daf0c2Sdrahn         }
826*08daf0c2Sdrahn         shift32RightJamming( aSig, - expDiff, &aSig );
827*08daf0c2Sdrahn         zExp = bExp;
828*08daf0c2Sdrahn     }
829*08daf0c2Sdrahn     else {
830*08daf0c2Sdrahn         if ( aExp == 0xFF ) {
831*08daf0c2Sdrahn             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
832*08daf0c2Sdrahn             return a;
833*08daf0c2Sdrahn         }
834*08daf0c2Sdrahn         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
835*08daf0c2Sdrahn         zSig = 0x40000000 + aSig + bSig;
836*08daf0c2Sdrahn         zExp = aExp;
837*08daf0c2Sdrahn         goto roundAndPack;
838*08daf0c2Sdrahn     }
839*08daf0c2Sdrahn     aSig |= 0x20000000;
840*08daf0c2Sdrahn     zSig = ( aSig + bSig )<<1;
841*08daf0c2Sdrahn     --zExp;
842*08daf0c2Sdrahn     if ( (sbits32) zSig < 0 ) {
843*08daf0c2Sdrahn         zSig = aSig + bSig;
844*08daf0c2Sdrahn         ++zExp;
845*08daf0c2Sdrahn     }
846*08daf0c2Sdrahn  roundAndPack:
847*08daf0c2Sdrahn     return roundAndPackFloat32( zSign, zExp, zSig );
848*08daf0c2Sdrahn 
849*08daf0c2Sdrahn }
850*08daf0c2Sdrahn 
851*08daf0c2Sdrahn /*
852*08daf0c2Sdrahn -------------------------------------------------------------------------------
853*08daf0c2Sdrahn Returns the result of subtracting the absolute values of the single-
854*08daf0c2Sdrahn precision floating-point values `a' and `b'.  If `zSign' is 1, the
855*08daf0c2Sdrahn difference is negated before being returned.  `zSign' is ignored if the
856*08daf0c2Sdrahn result is a NaN.  The subtraction is performed according to the IEC/IEEE
857*08daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic.
858*08daf0c2Sdrahn -------------------------------------------------------------------------------
859*08daf0c2Sdrahn */
860*08daf0c2Sdrahn static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
861*08daf0c2Sdrahn {
862*08daf0c2Sdrahn     int16 aExp, bExp, zExp;
863*08daf0c2Sdrahn     bits32 aSig, bSig, zSig;
864*08daf0c2Sdrahn     int16 expDiff;
865*08daf0c2Sdrahn 
866*08daf0c2Sdrahn     aSig = extractFloat32Frac( a );
867*08daf0c2Sdrahn     aExp = extractFloat32Exp( a );
868*08daf0c2Sdrahn     bSig = extractFloat32Frac( b );
869*08daf0c2Sdrahn     bExp = extractFloat32Exp( b );
870*08daf0c2Sdrahn     expDiff = aExp - bExp;
871*08daf0c2Sdrahn     aSig <<= 7;
872*08daf0c2Sdrahn     bSig <<= 7;
873*08daf0c2Sdrahn     if ( 0 < expDiff ) goto aExpBigger;
874*08daf0c2Sdrahn     if ( expDiff < 0 ) goto bExpBigger;
875*08daf0c2Sdrahn     if ( aExp == 0xFF ) {
876*08daf0c2Sdrahn         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
877*08daf0c2Sdrahn         float_raise( float_flag_invalid );
878*08daf0c2Sdrahn         return float32_default_nan;
879*08daf0c2Sdrahn     }
880*08daf0c2Sdrahn     if ( aExp == 0 ) {
881*08daf0c2Sdrahn         aExp = 1;
882*08daf0c2Sdrahn         bExp = 1;
883*08daf0c2Sdrahn     }
884*08daf0c2Sdrahn     if ( bSig < aSig ) goto aBigger;
885*08daf0c2Sdrahn     if ( aSig < bSig ) goto bBigger;
886*08daf0c2Sdrahn     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
887*08daf0c2Sdrahn  bExpBigger:
888*08daf0c2Sdrahn     if ( bExp == 0xFF ) {
889*08daf0c2Sdrahn         if ( bSig ) return propagateFloat32NaN( a, b );
890*08daf0c2Sdrahn         return packFloat32( zSign ^ 1, 0xFF, 0 );
891*08daf0c2Sdrahn     }
892*08daf0c2Sdrahn     if ( aExp == 0 ) {
893*08daf0c2Sdrahn         ++expDiff;
894*08daf0c2Sdrahn     }
895*08daf0c2Sdrahn     else {
896*08daf0c2Sdrahn         aSig |= 0x40000000;
897*08daf0c2Sdrahn     }
898*08daf0c2Sdrahn     shift32RightJamming( aSig, - expDiff, &aSig );
899*08daf0c2Sdrahn     bSig |= 0x40000000;
900*08daf0c2Sdrahn  bBigger:
901*08daf0c2Sdrahn     zSig = bSig - aSig;
902*08daf0c2Sdrahn     zExp = bExp;
903*08daf0c2Sdrahn     zSign ^= 1;
904*08daf0c2Sdrahn     goto normalizeRoundAndPack;
905*08daf0c2Sdrahn  aExpBigger:
906*08daf0c2Sdrahn     if ( aExp == 0xFF ) {
907*08daf0c2Sdrahn         if ( aSig ) return propagateFloat32NaN( a, b );
908*08daf0c2Sdrahn         return a;
909*08daf0c2Sdrahn     }
910*08daf0c2Sdrahn     if ( bExp == 0 ) {
911*08daf0c2Sdrahn         --expDiff;
912*08daf0c2Sdrahn     }
913*08daf0c2Sdrahn     else {
914*08daf0c2Sdrahn         bSig |= 0x40000000;
915*08daf0c2Sdrahn     }
916*08daf0c2Sdrahn     shift32RightJamming( bSig, expDiff, &bSig );
917*08daf0c2Sdrahn     aSig |= 0x40000000;
918*08daf0c2Sdrahn  aBigger:
919*08daf0c2Sdrahn     zSig = aSig - bSig;
920*08daf0c2Sdrahn     zExp = aExp;
921*08daf0c2Sdrahn  normalizeRoundAndPack:
922*08daf0c2Sdrahn     --zExp;
923*08daf0c2Sdrahn     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
924*08daf0c2Sdrahn 
925*08daf0c2Sdrahn }
926*08daf0c2Sdrahn 
927*08daf0c2Sdrahn /*
928*08daf0c2Sdrahn -------------------------------------------------------------------------------
929*08daf0c2Sdrahn Returns the result of adding the single-precision floating-point values `a'
930*08daf0c2Sdrahn and `b'.  The operation is performed according to the IEC/IEEE Standard for
931*08daf0c2Sdrahn Binary Floating-Point Arithmetic.
932*08daf0c2Sdrahn -------------------------------------------------------------------------------
933*08daf0c2Sdrahn */
934*08daf0c2Sdrahn float32 float32_add( float32 a, float32 b )
935*08daf0c2Sdrahn {
936*08daf0c2Sdrahn     flag aSign, bSign;
937*08daf0c2Sdrahn 
938*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
939*08daf0c2Sdrahn     bSign = extractFloat32Sign( b );
940*08daf0c2Sdrahn     if ( aSign == bSign ) {
941*08daf0c2Sdrahn         return addFloat32Sigs( a, b, aSign );
942*08daf0c2Sdrahn     }
943*08daf0c2Sdrahn     else {
944*08daf0c2Sdrahn         return subFloat32Sigs( a, b, aSign );
945*08daf0c2Sdrahn     }
946*08daf0c2Sdrahn 
947*08daf0c2Sdrahn }
948*08daf0c2Sdrahn 
949*08daf0c2Sdrahn /*
950*08daf0c2Sdrahn -------------------------------------------------------------------------------
951*08daf0c2Sdrahn Returns the result of subtracting the single-precision floating-point values
952*08daf0c2Sdrahn `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
953*08daf0c2Sdrahn for Binary Floating-Point Arithmetic.
954*08daf0c2Sdrahn -------------------------------------------------------------------------------
955*08daf0c2Sdrahn */
956*08daf0c2Sdrahn float32 float32_sub( float32 a, float32 b )
957*08daf0c2Sdrahn {
958*08daf0c2Sdrahn     flag aSign, bSign;
959*08daf0c2Sdrahn 
960*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
961*08daf0c2Sdrahn     bSign = extractFloat32Sign( b );
962*08daf0c2Sdrahn     if ( aSign == bSign ) {
963*08daf0c2Sdrahn         return subFloat32Sigs( a, b, aSign );
964*08daf0c2Sdrahn     }
965*08daf0c2Sdrahn     else {
966*08daf0c2Sdrahn         return addFloat32Sigs( a, b, aSign );
967*08daf0c2Sdrahn     }
968*08daf0c2Sdrahn 
969*08daf0c2Sdrahn }
970*08daf0c2Sdrahn 
971*08daf0c2Sdrahn /*
972*08daf0c2Sdrahn -------------------------------------------------------------------------------
973*08daf0c2Sdrahn Returns the result of multiplying the single-precision floating-point values
974*08daf0c2Sdrahn `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
975*08daf0c2Sdrahn for Binary Floating-Point Arithmetic.
976*08daf0c2Sdrahn -------------------------------------------------------------------------------
977*08daf0c2Sdrahn */
978*08daf0c2Sdrahn float32 float32_mul( float32 a, float32 b )
979*08daf0c2Sdrahn {
980*08daf0c2Sdrahn     flag aSign, bSign, zSign;
981*08daf0c2Sdrahn     int16 aExp, bExp, zExp;
982*08daf0c2Sdrahn     bits32 aSig, bSig, zSig0, zSig1;
983*08daf0c2Sdrahn 
984*08daf0c2Sdrahn     aSig = extractFloat32Frac( a );
985*08daf0c2Sdrahn     aExp = extractFloat32Exp( a );
986*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
987*08daf0c2Sdrahn     bSig = extractFloat32Frac( b );
988*08daf0c2Sdrahn     bExp = extractFloat32Exp( b );
989*08daf0c2Sdrahn     bSign = extractFloat32Sign( b );
990*08daf0c2Sdrahn     zSign = aSign ^ bSign;
991*08daf0c2Sdrahn     if ( aExp == 0xFF ) {
992*08daf0c2Sdrahn         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
993*08daf0c2Sdrahn             return propagateFloat32NaN( a, b );
994*08daf0c2Sdrahn         }
995*08daf0c2Sdrahn         if ( ( bExp | bSig ) == 0 ) {
996*08daf0c2Sdrahn             float_raise( float_flag_invalid );
997*08daf0c2Sdrahn             return float32_default_nan;
998*08daf0c2Sdrahn         }
999*08daf0c2Sdrahn         return packFloat32( zSign, 0xFF, 0 );
1000*08daf0c2Sdrahn     }
1001*08daf0c2Sdrahn     if ( bExp == 0xFF ) {
1002*08daf0c2Sdrahn         if ( bSig ) return propagateFloat32NaN( a, b );
1003*08daf0c2Sdrahn         if ( ( aExp | aSig ) == 0 ) {
1004*08daf0c2Sdrahn             float_raise( float_flag_invalid );
1005*08daf0c2Sdrahn             return float32_default_nan;
1006*08daf0c2Sdrahn         }
1007*08daf0c2Sdrahn         return packFloat32( zSign, 0xFF, 0 );
1008*08daf0c2Sdrahn     }
1009*08daf0c2Sdrahn     if ( aExp == 0 ) {
1010*08daf0c2Sdrahn         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1011*08daf0c2Sdrahn         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1012*08daf0c2Sdrahn     }
1013*08daf0c2Sdrahn     if ( bExp == 0 ) {
1014*08daf0c2Sdrahn         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1015*08daf0c2Sdrahn         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1016*08daf0c2Sdrahn     }
1017*08daf0c2Sdrahn     zExp = aExp + bExp - 0x7F;
1018*08daf0c2Sdrahn     aSig = ( aSig | 0x00800000 )<<7;
1019*08daf0c2Sdrahn     bSig = ( bSig | 0x00800000 )<<8;
1020*08daf0c2Sdrahn     mul32To64( aSig, bSig, &zSig0, &zSig1 );
1021*08daf0c2Sdrahn     zSig0 |= ( zSig1 != 0 );
1022*08daf0c2Sdrahn     if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1023*08daf0c2Sdrahn         zSig0 <<= 1;
1024*08daf0c2Sdrahn         --zExp;
1025*08daf0c2Sdrahn     }
1026*08daf0c2Sdrahn     return roundAndPackFloat32( zSign, zExp, zSig0 );
1027*08daf0c2Sdrahn 
1028*08daf0c2Sdrahn }
1029*08daf0c2Sdrahn 
1030*08daf0c2Sdrahn /*
1031*08daf0c2Sdrahn -------------------------------------------------------------------------------
1032*08daf0c2Sdrahn Returns the result of dividing the single-precision floating-point value `a'
1033*08daf0c2Sdrahn by the corresponding value `b'.  The operation is performed according to the
1034*08daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1035*08daf0c2Sdrahn -------------------------------------------------------------------------------
1036*08daf0c2Sdrahn */
1037*08daf0c2Sdrahn float32 float32_div( float32 a, float32 b )
1038*08daf0c2Sdrahn {
1039*08daf0c2Sdrahn     flag aSign, bSign, zSign;
1040*08daf0c2Sdrahn     int16 aExp, bExp, zExp;
1041*08daf0c2Sdrahn     bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1042*08daf0c2Sdrahn 
1043*08daf0c2Sdrahn     aSig = extractFloat32Frac( a );
1044*08daf0c2Sdrahn     aExp = extractFloat32Exp( a );
1045*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
1046*08daf0c2Sdrahn     bSig = extractFloat32Frac( b );
1047*08daf0c2Sdrahn     bExp = extractFloat32Exp( b );
1048*08daf0c2Sdrahn     bSign = extractFloat32Sign( b );
1049*08daf0c2Sdrahn     zSign = aSign ^ bSign;
1050*08daf0c2Sdrahn     if ( aExp == 0xFF ) {
1051*08daf0c2Sdrahn         if ( aSig ) return propagateFloat32NaN( a, b );
1052*08daf0c2Sdrahn         if ( bExp == 0xFF ) {
1053*08daf0c2Sdrahn             if ( bSig ) return propagateFloat32NaN( a, b );
1054*08daf0c2Sdrahn             float_raise( float_flag_invalid );
1055*08daf0c2Sdrahn             return float32_default_nan;
1056*08daf0c2Sdrahn         }
1057*08daf0c2Sdrahn         return packFloat32( zSign, 0xFF, 0 );
1058*08daf0c2Sdrahn     }
1059*08daf0c2Sdrahn     if ( bExp == 0xFF ) {
1060*08daf0c2Sdrahn         if ( bSig ) return propagateFloat32NaN( a, b );
1061*08daf0c2Sdrahn         return packFloat32( zSign, 0, 0 );
1062*08daf0c2Sdrahn     }
1063*08daf0c2Sdrahn     if ( bExp == 0 ) {
1064*08daf0c2Sdrahn         if ( bSig == 0 ) {
1065*08daf0c2Sdrahn             if ( ( aExp | aSig ) == 0 ) {
1066*08daf0c2Sdrahn                 float_raise( float_flag_invalid );
1067*08daf0c2Sdrahn                 return float32_default_nan;
1068*08daf0c2Sdrahn             }
1069*08daf0c2Sdrahn             float_raise( float_flag_divbyzero );
1070*08daf0c2Sdrahn             return packFloat32( zSign, 0xFF, 0 );
1071*08daf0c2Sdrahn         }
1072*08daf0c2Sdrahn         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1073*08daf0c2Sdrahn     }
1074*08daf0c2Sdrahn     if ( aExp == 0 ) {
1075*08daf0c2Sdrahn         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1076*08daf0c2Sdrahn         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1077*08daf0c2Sdrahn     }
1078*08daf0c2Sdrahn     zExp = aExp - bExp + 0x7D;
1079*08daf0c2Sdrahn     aSig = ( aSig | 0x00800000 )<<7;
1080*08daf0c2Sdrahn     bSig = ( bSig | 0x00800000 )<<8;
1081*08daf0c2Sdrahn     if ( bSig <= ( aSig + aSig ) ) {
1082*08daf0c2Sdrahn         aSig >>= 1;
1083*08daf0c2Sdrahn         ++zExp;
1084*08daf0c2Sdrahn     }
1085*08daf0c2Sdrahn     zSig = estimateDiv64To32( aSig, 0, bSig );
1086*08daf0c2Sdrahn     if ( ( zSig & 0x3F ) <= 2 ) {
1087*08daf0c2Sdrahn         mul32To64( bSig, zSig, &term0, &term1 );
1088*08daf0c2Sdrahn         sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1089*08daf0c2Sdrahn         while ( (sbits32) rem0 < 0 ) {
1090*08daf0c2Sdrahn             --zSig;
1091*08daf0c2Sdrahn             add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1092*08daf0c2Sdrahn         }
1093*08daf0c2Sdrahn         zSig |= ( rem1 != 0 );
1094*08daf0c2Sdrahn     }
1095*08daf0c2Sdrahn     return roundAndPackFloat32( zSign, zExp, zSig );
1096*08daf0c2Sdrahn 
1097*08daf0c2Sdrahn }
1098*08daf0c2Sdrahn 
1099*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
1100*08daf0c2Sdrahn /*
1101*08daf0c2Sdrahn -------------------------------------------------------------------------------
1102*08daf0c2Sdrahn Returns the remainder of the single-precision floating-point value `a'
1103*08daf0c2Sdrahn with respect to the corresponding value `b'.  The operation is performed
1104*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1105*08daf0c2Sdrahn -------------------------------------------------------------------------------
1106*08daf0c2Sdrahn */
1107*08daf0c2Sdrahn float32 float32_rem( float32 a, float32 b )
1108*08daf0c2Sdrahn {
1109*08daf0c2Sdrahn     flag aSign, bSign, zSign;
1110*08daf0c2Sdrahn     int16 aExp, bExp, expDiff;
1111*08daf0c2Sdrahn     bits32 aSig, bSig, q, allZero, alternateASig;
1112*08daf0c2Sdrahn     sbits32 sigMean;
1113*08daf0c2Sdrahn 
1114*08daf0c2Sdrahn     aSig = extractFloat32Frac( a );
1115*08daf0c2Sdrahn     aExp = extractFloat32Exp( a );
1116*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
1117*08daf0c2Sdrahn     bSig = extractFloat32Frac( b );
1118*08daf0c2Sdrahn     bExp = extractFloat32Exp( b );
1119*08daf0c2Sdrahn     bSign = extractFloat32Sign( b );
1120*08daf0c2Sdrahn     if ( aExp == 0xFF ) {
1121*08daf0c2Sdrahn         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1122*08daf0c2Sdrahn             return propagateFloat32NaN( a, b );
1123*08daf0c2Sdrahn         }
1124*08daf0c2Sdrahn         float_raise( float_flag_invalid );
1125*08daf0c2Sdrahn         return float32_default_nan;
1126*08daf0c2Sdrahn     }
1127*08daf0c2Sdrahn     if ( bExp == 0xFF ) {
1128*08daf0c2Sdrahn         if ( bSig ) return propagateFloat32NaN( a, b );
1129*08daf0c2Sdrahn         return a;
1130*08daf0c2Sdrahn     }
1131*08daf0c2Sdrahn     if ( bExp == 0 ) {
1132*08daf0c2Sdrahn         if ( bSig == 0 ) {
1133*08daf0c2Sdrahn             float_raise( float_flag_invalid );
1134*08daf0c2Sdrahn             return float32_default_nan;
1135*08daf0c2Sdrahn         }
1136*08daf0c2Sdrahn         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1137*08daf0c2Sdrahn     }
1138*08daf0c2Sdrahn     if ( aExp == 0 ) {
1139*08daf0c2Sdrahn         if ( aSig == 0 ) return a;
1140*08daf0c2Sdrahn         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1141*08daf0c2Sdrahn     }
1142*08daf0c2Sdrahn     expDiff = aExp - bExp;
1143*08daf0c2Sdrahn     aSig = ( aSig | 0x00800000 )<<8;
1144*08daf0c2Sdrahn     bSig = ( bSig | 0x00800000 )<<8;
1145*08daf0c2Sdrahn     if ( expDiff < 0 ) {
1146*08daf0c2Sdrahn         if ( expDiff < -1 ) return a;
1147*08daf0c2Sdrahn         aSig >>= 1;
1148*08daf0c2Sdrahn     }
1149*08daf0c2Sdrahn     q = ( bSig <= aSig );
1150*08daf0c2Sdrahn     if ( q ) aSig -= bSig;
1151*08daf0c2Sdrahn     expDiff -= 32;
1152*08daf0c2Sdrahn     while ( 0 < expDiff ) {
1153*08daf0c2Sdrahn         q = estimateDiv64To32( aSig, 0, bSig );
1154*08daf0c2Sdrahn         q = ( 2 < q ) ? q - 2 : 0;
1155*08daf0c2Sdrahn         aSig = - ( ( bSig>>2 ) * q );
1156*08daf0c2Sdrahn         expDiff -= 30;
1157*08daf0c2Sdrahn     }
1158*08daf0c2Sdrahn     expDiff += 32;
1159*08daf0c2Sdrahn     if ( 0 < expDiff ) {
1160*08daf0c2Sdrahn         q = estimateDiv64To32( aSig, 0, bSig );
1161*08daf0c2Sdrahn         q = ( 2 < q ) ? q - 2 : 0;
1162*08daf0c2Sdrahn         q >>= 32 - expDiff;
1163*08daf0c2Sdrahn         bSig >>= 2;
1164*08daf0c2Sdrahn         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1165*08daf0c2Sdrahn     }
1166*08daf0c2Sdrahn     else {
1167*08daf0c2Sdrahn         aSig >>= 2;
1168*08daf0c2Sdrahn         bSig >>= 2;
1169*08daf0c2Sdrahn     }
1170*08daf0c2Sdrahn     do {
1171*08daf0c2Sdrahn         alternateASig = aSig;
1172*08daf0c2Sdrahn         ++q;
1173*08daf0c2Sdrahn         aSig -= bSig;
1174*08daf0c2Sdrahn     } while ( 0 <= (sbits32) aSig );
1175*08daf0c2Sdrahn     sigMean = aSig + alternateASig;
1176*08daf0c2Sdrahn     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1177*08daf0c2Sdrahn         aSig = alternateASig;
1178*08daf0c2Sdrahn     }
1179*08daf0c2Sdrahn     zSign = ( (sbits32) aSig < 0 );
1180*08daf0c2Sdrahn     if ( zSign ) aSig = - aSig;
1181*08daf0c2Sdrahn     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1182*08daf0c2Sdrahn 
1183*08daf0c2Sdrahn }
1184*08daf0c2Sdrahn #endif
1185*08daf0c2Sdrahn 
1186*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
1187*08daf0c2Sdrahn /*
1188*08daf0c2Sdrahn -------------------------------------------------------------------------------
1189*08daf0c2Sdrahn Returns the square root of the single-precision floating-point value `a'.
1190*08daf0c2Sdrahn The operation is performed according to the IEC/IEEE Standard for Binary
1191*08daf0c2Sdrahn Floating-Point Arithmetic.
1192*08daf0c2Sdrahn -------------------------------------------------------------------------------
1193*08daf0c2Sdrahn */
1194*08daf0c2Sdrahn float32 float32_sqrt( float32 a )
1195*08daf0c2Sdrahn {
1196*08daf0c2Sdrahn     flag aSign;
1197*08daf0c2Sdrahn     int16 aExp, zExp;
1198*08daf0c2Sdrahn     bits32 aSig, zSig, rem0, rem1, term0, term1;
1199*08daf0c2Sdrahn 
1200*08daf0c2Sdrahn     aSig = extractFloat32Frac( a );
1201*08daf0c2Sdrahn     aExp = extractFloat32Exp( a );
1202*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
1203*08daf0c2Sdrahn     if ( aExp == 0xFF ) {
1204*08daf0c2Sdrahn         if ( aSig ) return propagateFloat32NaN( a, 0 );
1205*08daf0c2Sdrahn         if ( ! aSign ) return a;
1206*08daf0c2Sdrahn         float_raise( float_flag_invalid );
1207*08daf0c2Sdrahn         return float32_default_nan;
1208*08daf0c2Sdrahn     }
1209*08daf0c2Sdrahn     if ( aSign ) {
1210*08daf0c2Sdrahn         if ( ( aExp | aSig ) == 0 ) return a;
1211*08daf0c2Sdrahn         float_raise( float_flag_invalid );
1212*08daf0c2Sdrahn         return float32_default_nan;
1213*08daf0c2Sdrahn     }
1214*08daf0c2Sdrahn     if ( aExp == 0 ) {
1215*08daf0c2Sdrahn         if ( aSig == 0 ) return 0;
1216*08daf0c2Sdrahn         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1217*08daf0c2Sdrahn     }
1218*08daf0c2Sdrahn     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1219*08daf0c2Sdrahn     aSig = ( aSig | 0x00800000 )<<8;
1220*08daf0c2Sdrahn     zSig = estimateSqrt32( aExp, aSig ) + 2;
1221*08daf0c2Sdrahn     if ( ( zSig & 0x7F ) <= 5 ) {
1222*08daf0c2Sdrahn         if ( zSig < 2 ) {
1223*08daf0c2Sdrahn             zSig = 0x7FFFFFFF;
1224*08daf0c2Sdrahn             goto roundAndPack;
1225*08daf0c2Sdrahn         }
1226*08daf0c2Sdrahn         else {
1227*08daf0c2Sdrahn             aSig >>= aExp & 1;
1228*08daf0c2Sdrahn             mul32To64( zSig, zSig, &term0, &term1 );
1229*08daf0c2Sdrahn             sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1230*08daf0c2Sdrahn             while ( (sbits32) rem0 < 0 ) {
1231*08daf0c2Sdrahn                 --zSig;
1232*08daf0c2Sdrahn                 shortShift64Left( 0, zSig, 1, &term0, &term1 );
1233*08daf0c2Sdrahn                 term1 |= 1;
1234*08daf0c2Sdrahn                 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1235*08daf0c2Sdrahn             }
1236*08daf0c2Sdrahn             zSig |= ( ( rem0 | rem1 ) != 0 );
1237*08daf0c2Sdrahn         }
1238*08daf0c2Sdrahn     }
1239*08daf0c2Sdrahn     shift32RightJamming( zSig, 1, &zSig );
1240*08daf0c2Sdrahn  roundAndPack:
1241*08daf0c2Sdrahn     return roundAndPackFloat32( 0, zExp, zSig );
1242*08daf0c2Sdrahn 
1243*08daf0c2Sdrahn }
1244*08daf0c2Sdrahn #endif
1245*08daf0c2Sdrahn 
1246*08daf0c2Sdrahn /*
1247*08daf0c2Sdrahn -------------------------------------------------------------------------------
1248*08daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is equal to
1249*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  The comparison is performed
1250*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1251*08daf0c2Sdrahn -------------------------------------------------------------------------------
1252*08daf0c2Sdrahn */
1253*08daf0c2Sdrahn flag float32_eq( float32 a, float32 b )
1254*08daf0c2Sdrahn {
1255*08daf0c2Sdrahn 
1256*08daf0c2Sdrahn     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1257*08daf0c2Sdrahn          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1258*08daf0c2Sdrahn        ) {
1259*08daf0c2Sdrahn         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1260*08daf0c2Sdrahn             float_raise( float_flag_invalid );
1261*08daf0c2Sdrahn         }
1262*08daf0c2Sdrahn         return 0;
1263*08daf0c2Sdrahn     }
1264*08daf0c2Sdrahn     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1265*08daf0c2Sdrahn 
1266*08daf0c2Sdrahn }
1267*08daf0c2Sdrahn 
1268*08daf0c2Sdrahn /*
1269*08daf0c2Sdrahn -------------------------------------------------------------------------------
1270*08daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than
1271*08daf0c2Sdrahn or equal to the corresponding value `b', and 0 otherwise.  The comparison
1272*08daf0c2Sdrahn is performed according to the IEC/IEEE Standard for Binary Floating-Point
1273*08daf0c2Sdrahn Arithmetic.
1274*08daf0c2Sdrahn -------------------------------------------------------------------------------
1275*08daf0c2Sdrahn */
1276*08daf0c2Sdrahn flag float32_le( float32 a, float32 b )
1277*08daf0c2Sdrahn {
1278*08daf0c2Sdrahn     flag aSign, bSign;
1279*08daf0c2Sdrahn 
1280*08daf0c2Sdrahn     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1281*08daf0c2Sdrahn          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1282*08daf0c2Sdrahn        ) {
1283*08daf0c2Sdrahn         float_raise( float_flag_invalid );
1284*08daf0c2Sdrahn         return 0;
1285*08daf0c2Sdrahn     }
1286*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
1287*08daf0c2Sdrahn     bSign = extractFloat32Sign( b );
1288*08daf0c2Sdrahn     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1289*08daf0c2Sdrahn     return ( a == b ) || ( aSign ^ ( a < b ) );
1290*08daf0c2Sdrahn 
1291*08daf0c2Sdrahn }
1292*08daf0c2Sdrahn 
1293*08daf0c2Sdrahn /*
1294*08daf0c2Sdrahn -------------------------------------------------------------------------------
1295*08daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than
1296*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  The comparison is performed
1297*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1298*08daf0c2Sdrahn -------------------------------------------------------------------------------
1299*08daf0c2Sdrahn */
1300*08daf0c2Sdrahn flag float32_lt( float32 a, float32 b )
1301*08daf0c2Sdrahn {
1302*08daf0c2Sdrahn     flag aSign, bSign;
1303*08daf0c2Sdrahn 
1304*08daf0c2Sdrahn     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1305*08daf0c2Sdrahn          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1306*08daf0c2Sdrahn        ) {
1307*08daf0c2Sdrahn         float_raise( float_flag_invalid );
1308*08daf0c2Sdrahn         return 0;
1309*08daf0c2Sdrahn     }
1310*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
1311*08daf0c2Sdrahn     bSign = extractFloat32Sign( b );
1312*08daf0c2Sdrahn     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1313*08daf0c2Sdrahn     return ( a != b ) && ( aSign ^ ( a < b ) );
1314*08daf0c2Sdrahn 
1315*08daf0c2Sdrahn }
1316*08daf0c2Sdrahn 
1317*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1318*08daf0c2Sdrahn /*
1319*08daf0c2Sdrahn -------------------------------------------------------------------------------
1320*08daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is equal to
1321*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  The invalid exception is
1322*08daf0c2Sdrahn raised if either operand is a NaN.  Otherwise, the comparison is performed
1323*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1324*08daf0c2Sdrahn -------------------------------------------------------------------------------
1325*08daf0c2Sdrahn */
1326*08daf0c2Sdrahn flag float32_eq_signaling( float32 a, float32 b )
1327*08daf0c2Sdrahn {
1328*08daf0c2Sdrahn 
1329*08daf0c2Sdrahn     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1330*08daf0c2Sdrahn          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1331*08daf0c2Sdrahn        ) {
1332*08daf0c2Sdrahn         float_raise( float_flag_invalid );
1333*08daf0c2Sdrahn         return 0;
1334*08daf0c2Sdrahn     }
1335*08daf0c2Sdrahn     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1336*08daf0c2Sdrahn 
1337*08daf0c2Sdrahn }
1338*08daf0c2Sdrahn 
1339*08daf0c2Sdrahn /*
1340*08daf0c2Sdrahn -------------------------------------------------------------------------------
1341*08daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than or
1342*08daf0c2Sdrahn equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
1343*08daf0c2Sdrahn cause an exception.  Otherwise, the comparison is performed according to the
1344*08daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1345*08daf0c2Sdrahn -------------------------------------------------------------------------------
1346*08daf0c2Sdrahn */
1347*08daf0c2Sdrahn flag float32_le_quiet( float32 a, float32 b )
1348*08daf0c2Sdrahn {
1349*08daf0c2Sdrahn     flag aSign, bSign;
1350*08daf0c2Sdrahn     int16 aExp, bExp;
1351*08daf0c2Sdrahn 
1352*08daf0c2Sdrahn     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1353*08daf0c2Sdrahn          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1354*08daf0c2Sdrahn        ) {
1355*08daf0c2Sdrahn         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1356*08daf0c2Sdrahn             float_raise( float_flag_invalid );
1357*08daf0c2Sdrahn         }
1358*08daf0c2Sdrahn         return 0;
1359*08daf0c2Sdrahn     }
1360*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
1361*08daf0c2Sdrahn     bSign = extractFloat32Sign( b );
1362*08daf0c2Sdrahn     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1363*08daf0c2Sdrahn     return ( a == b ) || ( aSign ^ ( a < b ) );
1364*08daf0c2Sdrahn 
1365*08daf0c2Sdrahn }
1366*08daf0c2Sdrahn 
1367*08daf0c2Sdrahn /*
1368*08daf0c2Sdrahn -------------------------------------------------------------------------------
1369*08daf0c2Sdrahn Returns 1 if the single-precision floating-point value `a' is less than
1370*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
1371*08daf0c2Sdrahn exception.  Otherwise, the comparison is performed according to the IEC/IEEE
1372*08daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic.
1373*08daf0c2Sdrahn -------------------------------------------------------------------------------
1374*08daf0c2Sdrahn */
1375*08daf0c2Sdrahn flag float32_lt_quiet( float32 a, float32 b )
1376*08daf0c2Sdrahn {
1377*08daf0c2Sdrahn     flag aSign, bSign;
1378*08daf0c2Sdrahn 
1379*08daf0c2Sdrahn     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1380*08daf0c2Sdrahn          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1381*08daf0c2Sdrahn        ) {
1382*08daf0c2Sdrahn         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1383*08daf0c2Sdrahn             float_raise( float_flag_invalid );
1384*08daf0c2Sdrahn         }
1385*08daf0c2Sdrahn         return 0;
1386*08daf0c2Sdrahn     }
1387*08daf0c2Sdrahn     aSign = extractFloat32Sign( a );
1388*08daf0c2Sdrahn     bSign = extractFloat32Sign( b );
1389*08daf0c2Sdrahn     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1390*08daf0c2Sdrahn     return ( a != b ) && ( aSign ^ ( a < b ) );
1391*08daf0c2Sdrahn 
1392*08daf0c2Sdrahn }
1393*08daf0c2Sdrahn #endif /* !SOFTFLOAT_FOR_GCC */
1394*08daf0c2Sdrahn 
1395*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1396*08daf0c2Sdrahn /*
1397*08daf0c2Sdrahn -------------------------------------------------------------------------------
1398*08daf0c2Sdrahn Returns the result of converting the double-precision floating-point value
1399*08daf0c2Sdrahn `a' to the 32-bit two's complement integer format.  The conversion is
1400*08daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
1401*08daf0c2Sdrahn Arithmetic---which means in particular that the conversion is rounded
1402*08daf0c2Sdrahn according to the current rounding mode.  If `a' is a NaN, the largest
1403*08daf0c2Sdrahn positive integer is returned.  Otherwise, if the conversion overflows, the
1404*08daf0c2Sdrahn largest integer with the same sign as `a' is returned.
1405*08daf0c2Sdrahn -------------------------------------------------------------------------------
1406*08daf0c2Sdrahn */
1407*08daf0c2Sdrahn int32 float64_to_int32( float64 a )
1408*08daf0c2Sdrahn {
1409*08daf0c2Sdrahn     flag aSign;
1410*08daf0c2Sdrahn     int16 aExp, shiftCount;
1411*08daf0c2Sdrahn     bits32 aSig0, aSig1, absZ, aSigExtra;
1412*08daf0c2Sdrahn     int32 z;
1413*08daf0c2Sdrahn     int8 roundingMode;
1414*08daf0c2Sdrahn 
1415*08daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
1416*08daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
1417*08daf0c2Sdrahn     aExp = extractFloat64Exp( a );
1418*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
1419*08daf0c2Sdrahn     shiftCount = aExp - 0x413;
1420*08daf0c2Sdrahn     if ( 0 <= shiftCount ) {
1421*08daf0c2Sdrahn         if ( 0x41E < aExp ) {
1422*08daf0c2Sdrahn             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1423*08daf0c2Sdrahn             goto invalid;
1424*08daf0c2Sdrahn         }
1425*08daf0c2Sdrahn         shortShift64Left(
1426*08daf0c2Sdrahn             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1427*08daf0c2Sdrahn         if ( 0x80000000 < absZ ) goto invalid;
1428*08daf0c2Sdrahn     }
1429*08daf0c2Sdrahn     else {
1430*08daf0c2Sdrahn         aSig1 = ( aSig1 != 0 );
1431*08daf0c2Sdrahn         if ( aExp < 0x3FE ) {
1432*08daf0c2Sdrahn             aSigExtra = aExp | aSig0 | aSig1;
1433*08daf0c2Sdrahn             absZ = 0;
1434*08daf0c2Sdrahn         }
1435*08daf0c2Sdrahn         else {
1436*08daf0c2Sdrahn             aSig0 |= 0x00100000;
1437*08daf0c2Sdrahn             aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1438*08daf0c2Sdrahn             absZ = aSig0>>( - shiftCount );
1439*08daf0c2Sdrahn         }
1440*08daf0c2Sdrahn     }
1441*08daf0c2Sdrahn     roundingMode = float_rounding_mode;
1442*08daf0c2Sdrahn     if ( roundingMode == float_round_nearest_even ) {
1443*08daf0c2Sdrahn         if ( (sbits32) aSigExtra < 0 ) {
1444*08daf0c2Sdrahn             ++absZ;
1445*08daf0c2Sdrahn             if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1446*08daf0c2Sdrahn         }
1447*08daf0c2Sdrahn         z = aSign ? - absZ : absZ;
1448*08daf0c2Sdrahn     }
1449*08daf0c2Sdrahn     else {
1450*08daf0c2Sdrahn         aSigExtra = ( aSigExtra != 0 );
1451*08daf0c2Sdrahn         if ( aSign ) {
1452*08daf0c2Sdrahn             z = - (   absZ
1453*08daf0c2Sdrahn                     + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1454*08daf0c2Sdrahn         }
1455*08daf0c2Sdrahn         else {
1456*08daf0c2Sdrahn             z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1457*08daf0c2Sdrahn         }
1458*08daf0c2Sdrahn     }
1459*08daf0c2Sdrahn     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1460*08daf0c2Sdrahn  invalid:
1461*08daf0c2Sdrahn         float_raise( float_flag_invalid );
1462*08daf0c2Sdrahn         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1463*08daf0c2Sdrahn     }
1464*08daf0c2Sdrahn     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1465*08daf0c2Sdrahn     return z;
1466*08daf0c2Sdrahn 
1467*08daf0c2Sdrahn }
1468*08daf0c2Sdrahn #endif /* !SOFTFLOAT_FOR_GCC */
1469*08daf0c2Sdrahn 
1470*08daf0c2Sdrahn /*
1471*08daf0c2Sdrahn -------------------------------------------------------------------------------
1472*08daf0c2Sdrahn Returns the result of converting the double-precision floating-point value
1473*08daf0c2Sdrahn `a' to the 32-bit two's complement integer format.  The conversion is
1474*08daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
1475*08daf0c2Sdrahn Arithmetic, except that the conversion is always rounded toward zero.
1476*08daf0c2Sdrahn If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1477*08daf0c2Sdrahn the conversion overflows, the largest integer with the same sign as `a' is
1478*08daf0c2Sdrahn returned.
1479*08daf0c2Sdrahn -------------------------------------------------------------------------------
1480*08daf0c2Sdrahn */
1481*08daf0c2Sdrahn int32 float64_to_int32_round_to_zero( float64 a )
1482*08daf0c2Sdrahn {
1483*08daf0c2Sdrahn     flag aSign;
1484*08daf0c2Sdrahn     int16 aExp, shiftCount;
1485*08daf0c2Sdrahn     bits32 aSig0, aSig1, absZ, aSigExtra;
1486*08daf0c2Sdrahn     int32 z;
1487*08daf0c2Sdrahn 
1488*08daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
1489*08daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
1490*08daf0c2Sdrahn     aExp = extractFloat64Exp( a );
1491*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
1492*08daf0c2Sdrahn     shiftCount = aExp - 0x413;
1493*08daf0c2Sdrahn     if ( 0 <= shiftCount ) {
1494*08daf0c2Sdrahn         if ( 0x41E < aExp ) {
1495*08daf0c2Sdrahn             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1496*08daf0c2Sdrahn             goto invalid;
1497*08daf0c2Sdrahn         }
1498*08daf0c2Sdrahn         shortShift64Left(
1499*08daf0c2Sdrahn             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1500*08daf0c2Sdrahn     }
1501*08daf0c2Sdrahn     else {
1502*08daf0c2Sdrahn         if ( aExp < 0x3FF ) {
1503*08daf0c2Sdrahn             if ( aExp | aSig0 | aSig1 ) {
1504*08daf0c2Sdrahn                 float_exception_flags |= float_flag_inexact;
1505*08daf0c2Sdrahn             }
1506*08daf0c2Sdrahn             return 0;
1507*08daf0c2Sdrahn         }
1508*08daf0c2Sdrahn         aSig0 |= 0x00100000;
1509*08daf0c2Sdrahn         aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1510*08daf0c2Sdrahn         absZ = aSig0>>( - shiftCount );
1511*08daf0c2Sdrahn     }
1512*08daf0c2Sdrahn     z = aSign ? - absZ : absZ;
1513*08daf0c2Sdrahn     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1514*08daf0c2Sdrahn  invalid:
1515*08daf0c2Sdrahn         float_raise( float_flag_invalid );
1516*08daf0c2Sdrahn         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1517*08daf0c2Sdrahn     }
1518*08daf0c2Sdrahn     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1519*08daf0c2Sdrahn     return z;
1520*08daf0c2Sdrahn 
1521*08daf0c2Sdrahn }
1522*08daf0c2Sdrahn 
1523*08daf0c2Sdrahn /*
1524*08daf0c2Sdrahn -------------------------------------------------------------------------------
1525*08daf0c2Sdrahn Returns the result of converting the double-precision floating-point value
1526*08daf0c2Sdrahn `a' to the single-precision floating-point format.  The conversion is
1527*08daf0c2Sdrahn performed according to the IEC/IEEE Standard for Binary Floating-Point
1528*08daf0c2Sdrahn Arithmetic.
1529*08daf0c2Sdrahn -------------------------------------------------------------------------------
1530*08daf0c2Sdrahn */
1531*08daf0c2Sdrahn float32 float64_to_float32( float64 a )
1532*08daf0c2Sdrahn {
1533*08daf0c2Sdrahn     flag aSign;
1534*08daf0c2Sdrahn     int16 aExp;
1535*08daf0c2Sdrahn     bits32 aSig0, aSig1, zSig;
1536*08daf0c2Sdrahn     bits32 allZero;
1537*08daf0c2Sdrahn 
1538*08daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
1539*08daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
1540*08daf0c2Sdrahn     aExp = extractFloat64Exp( a );
1541*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
1542*08daf0c2Sdrahn     if ( aExp == 0x7FF ) {
1543*08daf0c2Sdrahn         if ( aSig0 | aSig1 ) {
1544*08daf0c2Sdrahn             return commonNaNToFloat32( float64ToCommonNaN( a ) );
1545*08daf0c2Sdrahn         }
1546*08daf0c2Sdrahn         return packFloat32( aSign, 0xFF, 0 );
1547*08daf0c2Sdrahn     }
1548*08daf0c2Sdrahn     shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1549*08daf0c2Sdrahn     if ( aExp ) zSig |= 0x40000000;
1550*08daf0c2Sdrahn     return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1551*08daf0c2Sdrahn 
1552*08daf0c2Sdrahn }
1553*08daf0c2Sdrahn 
1554*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
1555*08daf0c2Sdrahn /*
1556*08daf0c2Sdrahn -------------------------------------------------------------------------------
1557*08daf0c2Sdrahn Rounds the double-precision floating-point value `a' to an integer,
1558*08daf0c2Sdrahn and returns the result as a double-precision floating-point value.  The
1559*08daf0c2Sdrahn operation is performed according to the IEC/IEEE Standard for Binary
1560*08daf0c2Sdrahn Floating-Point Arithmetic.
1561*08daf0c2Sdrahn -------------------------------------------------------------------------------
1562*08daf0c2Sdrahn */
1563*08daf0c2Sdrahn float64 float64_round_to_int( float64 a )
1564*08daf0c2Sdrahn {
1565*08daf0c2Sdrahn     flag aSign;
1566*08daf0c2Sdrahn     int16 aExp;
1567*08daf0c2Sdrahn     bits32 lastBitMask, roundBitsMask;
1568*08daf0c2Sdrahn     int8 roundingMode;
1569*08daf0c2Sdrahn     float64 z;
1570*08daf0c2Sdrahn 
1571*08daf0c2Sdrahn     aExp = extractFloat64Exp( a );
1572*08daf0c2Sdrahn     if ( 0x413 <= aExp ) {
1573*08daf0c2Sdrahn         if ( 0x433 <= aExp ) {
1574*08daf0c2Sdrahn             if (    ( aExp == 0x7FF )
1575*08daf0c2Sdrahn                  && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1576*08daf0c2Sdrahn                 return propagateFloat64NaN( a, a );
1577*08daf0c2Sdrahn             }
1578*08daf0c2Sdrahn             return a;
1579*08daf0c2Sdrahn         }
1580*08daf0c2Sdrahn         lastBitMask = 1;
1581*08daf0c2Sdrahn         lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1582*08daf0c2Sdrahn         roundBitsMask = lastBitMask - 1;
1583*08daf0c2Sdrahn         z = a;
1584*08daf0c2Sdrahn         roundingMode = float_rounding_mode;
1585*08daf0c2Sdrahn         if ( roundingMode == float_round_nearest_even ) {
1586*08daf0c2Sdrahn             if ( lastBitMask ) {
1587*08daf0c2Sdrahn                 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1588*08daf0c2Sdrahn                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1589*08daf0c2Sdrahn             }
1590*08daf0c2Sdrahn             else {
1591*08daf0c2Sdrahn                 if ( (sbits32) z.low < 0 ) {
1592*08daf0c2Sdrahn                     ++z.high;
1593*08daf0c2Sdrahn                     if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1594*08daf0c2Sdrahn                 }
1595*08daf0c2Sdrahn             }
1596*08daf0c2Sdrahn         }
1597*08daf0c2Sdrahn         else if ( roundingMode != float_round_to_zero ) {
1598*08daf0c2Sdrahn             if (   extractFloat64Sign( z )
1599*08daf0c2Sdrahn                  ^ ( roundingMode == float_round_up ) ) {
1600*08daf0c2Sdrahn                 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1601*08daf0c2Sdrahn             }
1602*08daf0c2Sdrahn         }
1603*08daf0c2Sdrahn         z.low &= ~ roundBitsMask;
1604*08daf0c2Sdrahn     }
1605*08daf0c2Sdrahn     else {
1606*08daf0c2Sdrahn         if ( aExp <= 0x3FE ) {
1607*08daf0c2Sdrahn             if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1608*08daf0c2Sdrahn             float_exception_flags |= float_flag_inexact;
1609*08daf0c2Sdrahn             aSign = extractFloat64Sign( a );
1610*08daf0c2Sdrahn             switch ( float_rounding_mode ) {
1611*08daf0c2Sdrahn              case float_round_nearest_even:
1612*08daf0c2Sdrahn                 if (    ( aExp == 0x3FE )
1613*08daf0c2Sdrahn                      && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1614*08daf0c2Sdrahn                    ) {
1615*08daf0c2Sdrahn                     return packFloat64( aSign, 0x3FF, 0, 0 );
1616*08daf0c2Sdrahn                 }
1617*08daf0c2Sdrahn                 break;
1618*08daf0c2Sdrahn              case float_round_down:
1619*08daf0c2Sdrahn                 return
1620*08daf0c2Sdrahn                       aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1621*08daf0c2Sdrahn                     : packFloat64( 0, 0, 0, 0 );
1622*08daf0c2Sdrahn              case float_round_up:
1623*08daf0c2Sdrahn                 return
1624*08daf0c2Sdrahn                       aSign ? packFloat64( 1, 0, 0, 0 )
1625*08daf0c2Sdrahn                     : packFloat64( 0, 0x3FF, 0, 0 );
1626*08daf0c2Sdrahn             }
1627*08daf0c2Sdrahn             return packFloat64( aSign, 0, 0, 0 );
1628*08daf0c2Sdrahn         }
1629*08daf0c2Sdrahn         lastBitMask = 1;
1630*08daf0c2Sdrahn         lastBitMask <<= 0x413 - aExp;
1631*08daf0c2Sdrahn         roundBitsMask = lastBitMask - 1;
1632*08daf0c2Sdrahn         z.low = 0;
1633*08daf0c2Sdrahn         z.high = a.high;
1634*08daf0c2Sdrahn         roundingMode = float_rounding_mode;
1635*08daf0c2Sdrahn         if ( roundingMode == float_round_nearest_even ) {
1636*08daf0c2Sdrahn             z.high += lastBitMask>>1;
1637*08daf0c2Sdrahn             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1638*08daf0c2Sdrahn                 z.high &= ~ lastBitMask;
1639*08daf0c2Sdrahn             }
1640*08daf0c2Sdrahn         }
1641*08daf0c2Sdrahn         else if ( roundingMode != float_round_to_zero ) {
1642*08daf0c2Sdrahn             if (   extractFloat64Sign( z )
1643*08daf0c2Sdrahn                  ^ ( roundingMode == float_round_up ) ) {
1644*08daf0c2Sdrahn                 z.high |= ( a.low != 0 );
1645*08daf0c2Sdrahn                 z.high += roundBitsMask;
1646*08daf0c2Sdrahn             }
1647*08daf0c2Sdrahn         }
1648*08daf0c2Sdrahn         z.high &= ~ roundBitsMask;
1649*08daf0c2Sdrahn     }
1650*08daf0c2Sdrahn     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1651*08daf0c2Sdrahn         float_exception_flags |= float_flag_inexact;
1652*08daf0c2Sdrahn     }
1653*08daf0c2Sdrahn     return z;
1654*08daf0c2Sdrahn 
1655*08daf0c2Sdrahn }
1656*08daf0c2Sdrahn #endif
1657*08daf0c2Sdrahn 
1658*08daf0c2Sdrahn /*
1659*08daf0c2Sdrahn -------------------------------------------------------------------------------
1660*08daf0c2Sdrahn Returns the result of adding the absolute values of the double-precision
1661*08daf0c2Sdrahn floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1662*08daf0c2Sdrahn before being returned.  `zSign' is ignored if the result is a NaN.
1663*08daf0c2Sdrahn The addition is performed according to the IEC/IEEE Standard for Binary
1664*08daf0c2Sdrahn Floating-Point Arithmetic.
1665*08daf0c2Sdrahn -------------------------------------------------------------------------------
1666*08daf0c2Sdrahn */
1667*08daf0c2Sdrahn static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1668*08daf0c2Sdrahn {
1669*08daf0c2Sdrahn     int16 aExp, bExp, zExp;
1670*08daf0c2Sdrahn     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1671*08daf0c2Sdrahn     int16 expDiff;
1672*08daf0c2Sdrahn 
1673*08daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
1674*08daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
1675*08daf0c2Sdrahn     aExp = extractFloat64Exp( a );
1676*08daf0c2Sdrahn     bSig1 = extractFloat64Frac1( b );
1677*08daf0c2Sdrahn     bSig0 = extractFloat64Frac0( b );
1678*08daf0c2Sdrahn     bExp = extractFloat64Exp( b );
1679*08daf0c2Sdrahn     expDiff = aExp - bExp;
1680*08daf0c2Sdrahn     if ( 0 < expDiff ) {
1681*08daf0c2Sdrahn         if ( aExp == 0x7FF ) {
1682*08daf0c2Sdrahn             if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1683*08daf0c2Sdrahn             return a;
1684*08daf0c2Sdrahn         }
1685*08daf0c2Sdrahn         if ( bExp == 0 ) {
1686*08daf0c2Sdrahn             --expDiff;
1687*08daf0c2Sdrahn         }
1688*08daf0c2Sdrahn         else {
1689*08daf0c2Sdrahn             bSig0 |= 0x00100000;
1690*08daf0c2Sdrahn         }
1691*08daf0c2Sdrahn         shift64ExtraRightJamming(
1692*08daf0c2Sdrahn             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1693*08daf0c2Sdrahn         zExp = aExp;
1694*08daf0c2Sdrahn     }
1695*08daf0c2Sdrahn     else if ( expDiff < 0 ) {
1696*08daf0c2Sdrahn         if ( bExp == 0x7FF ) {
1697*08daf0c2Sdrahn             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1698*08daf0c2Sdrahn             return packFloat64( zSign, 0x7FF, 0, 0 );
1699*08daf0c2Sdrahn         }
1700*08daf0c2Sdrahn         if ( aExp == 0 ) {
1701*08daf0c2Sdrahn             ++expDiff;
1702*08daf0c2Sdrahn         }
1703*08daf0c2Sdrahn         else {
1704*08daf0c2Sdrahn             aSig0 |= 0x00100000;
1705*08daf0c2Sdrahn         }
1706*08daf0c2Sdrahn         shift64ExtraRightJamming(
1707*08daf0c2Sdrahn             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1708*08daf0c2Sdrahn         zExp = bExp;
1709*08daf0c2Sdrahn     }
1710*08daf0c2Sdrahn     else {
1711*08daf0c2Sdrahn         if ( aExp == 0x7FF ) {
1712*08daf0c2Sdrahn             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1713*08daf0c2Sdrahn                 return propagateFloat64NaN( a, b );
1714*08daf0c2Sdrahn             }
1715*08daf0c2Sdrahn             return a;
1716*08daf0c2Sdrahn         }
1717*08daf0c2Sdrahn         add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1718*08daf0c2Sdrahn         if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1719*08daf0c2Sdrahn         zSig2 = 0;
1720*08daf0c2Sdrahn         zSig0 |= 0x00200000;
1721*08daf0c2Sdrahn         zExp = aExp;
1722*08daf0c2Sdrahn         goto shiftRight1;
1723*08daf0c2Sdrahn     }
1724*08daf0c2Sdrahn     aSig0 |= 0x00100000;
1725*08daf0c2Sdrahn     add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1726*08daf0c2Sdrahn     --zExp;
1727*08daf0c2Sdrahn     if ( zSig0 < 0x00200000 ) goto roundAndPack;
1728*08daf0c2Sdrahn     ++zExp;
1729*08daf0c2Sdrahn  shiftRight1:
1730*08daf0c2Sdrahn     shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1731*08daf0c2Sdrahn  roundAndPack:
1732*08daf0c2Sdrahn     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1733*08daf0c2Sdrahn 
1734*08daf0c2Sdrahn }
1735*08daf0c2Sdrahn 
1736*08daf0c2Sdrahn /*
1737*08daf0c2Sdrahn -------------------------------------------------------------------------------
1738*08daf0c2Sdrahn Returns the result of subtracting the absolute values of the double-
1739*08daf0c2Sdrahn precision floating-point values `a' and `b'.  If `zSign' is 1, the
1740*08daf0c2Sdrahn difference is negated before being returned.  `zSign' is ignored if the
1741*08daf0c2Sdrahn result is a NaN.  The subtraction is performed according to the IEC/IEEE
1742*08daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic.
1743*08daf0c2Sdrahn -------------------------------------------------------------------------------
1744*08daf0c2Sdrahn */
1745*08daf0c2Sdrahn static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1746*08daf0c2Sdrahn {
1747*08daf0c2Sdrahn     int16 aExp, bExp, zExp;
1748*08daf0c2Sdrahn     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1749*08daf0c2Sdrahn     int16 expDiff;
1750*08daf0c2Sdrahn 
1751*08daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
1752*08daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
1753*08daf0c2Sdrahn     aExp = extractFloat64Exp( a );
1754*08daf0c2Sdrahn     bSig1 = extractFloat64Frac1( b );
1755*08daf0c2Sdrahn     bSig0 = extractFloat64Frac0( b );
1756*08daf0c2Sdrahn     bExp = extractFloat64Exp( b );
1757*08daf0c2Sdrahn     expDiff = aExp - bExp;
1758*08daf0c2Sdrahn     shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1759*08daf0c2Sdrahn     shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1760*08daf0c2Sdrahn     if ( 0 < expDiff ) goto aExpBigger;
1761*08daf0c2Sdrahn     if ( expDiff < 0 ) goto bExpBigger;
1762*08daf0c2Sdrahn     if ( aExp == 0x7FF ) {
1763*08daf0c2Sdrahn         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1764*08daf0c2Sdrahn             return propagateFloat64NaN( a, b );
1765*08daf0c2Sdrahn         }
1766*08daf0c2Sdrahn         float_raise( float_flag_invalid );
1767*08daf0c2Sdrahn         return float64_default_nan;
1768*08daf0c2Sdrahn     }
1769*08daf0c2Sdrahn     if ( aExp == 0 ) {
1770*08daf0c2Sdrahn         aExp = 1;
1771*08daf0c2Sdrahn         bExp = 1;
1772*08daf0c2Sdrahn     }
1773*08daf0c2Sdrahn     if ( bSig0 < aSig0 ) goto aBigger;
1774*08daf0c2Sdrahn     if ( aSig0 < bSig0 ) goto bBigger;
1775*08daf0c2Sdrahn     if ( bSig1 < aSig1 ) goto aBigger;
1776*08daf0c2Sdrahn     if ( aSig1 < bSig1 ) goto bBigger;
1777*08daf0c2Sdrahn     return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1778*08daf0c2Sdrahn  bExpBigger:
1779*08daf0c2Sdrahn     if ( bExp == 0x7FF ) {
1780*08daf0c2Sdrahn         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1781*08daf0c2Sdrahn         return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1782*08daf0c2Sdrahn     }
1783*08daf0c2Sdrahn     if ( aExp == 0 ) {
1784*08daf0c2Sdrahn         ++expDiff;
1785*08daf0c2Sdrahn     }
1786*08daf0c2Sdrahn     else {
1787*08daf0c2Sdrahn         aSig0 |= 0x40000000;
1788*08daf0c2Sdrahn     }
1789*08daf0c2Sdrahn     shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1790*08daf0c2Sdrahn     bSig0 |= 0x40000000;
1791*08daf0c2Sdrahn  bBigger:
1792*08daf0c2Sdrahn     sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1793*08daf0c2Sdrahn     zExp = bExp;
1794*08daf0c2Sdrahn     zSign ^= 1;
1795*08daf0c2Sdrahn     goto normalizeRoundAndPack;
1796*08daf0c2Sdrahn  aExpBigger:
1797*08daf0c2Sdrahn     if ( aExp == 0x7FF ) {
1798*08daf0c2Sdrahn         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1799*08daf0c2Sdrahn         return a;
1800*08daf0c2Sdrahn     }
1801*08daf0c2Sdrahn     if ( bExp == 0 ) {
1802*08daf0c2Sdrahn         --expDiff;
1803*08daf0c2Sdrahn     }
1804*08daf0c2Sdrahn     else {
1805*08daf0c2Sdrahn         bSig0 |= 0x40000000;
1806*08daf0c2Sdrahn     }
1807*08daf0c2Sdrahn     shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1808*08daf0c2Sdrahn     aSig0 |= 0x40000000;
1809*08daf0c2Sdrahn  aBigger:
1810*08daf0c2Sdrahn     sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1811*08daf0c2Sdrahn     zExp = aExp;
1812*08daf0c2Sdrahn  normalizeRoundAndPack:
1813*08daf0c2Sdrahn     --zExp;
1814*08daf0c2Sdrahn     return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1815*08daf0c2Sdrahn 
1816*08daf0c2Sdrahn }
1817*08daf0c2Sdrahn 
1818*08daf0c2Sdrahn /*
1819*08daf0c2Sdrahn -------------------------------------------------------------------------------
1820*08daf0c2Sdrahn Returns the result of adding the double-precision floating-point values `a'
1821*08daf0c2Sdrahn and `b'.  The operation is performed according to the IEC/IEEE Standard for
1822*08daf0c2Sdrahn Binary Floating-Point Arithmetic.
1823*08daf0c2Sdrahn -------------------------------------------------------------------------------
1824*08daf0c2Sdrahn */
1825*08daf0c2Sdrahn float64 float64_add( float64 a, float64 b )
1826*08daf0c2Sdrahn {
1827*08daf0c2Sdrahn     flag aSign, bSign;
1828*08daf0c2Sdrahn 
1829*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
1830*08daf0c2Sdrahn     bSign = extractFloat64Sign( b );
1831*08daf0c2Sdrahn     if ( aSign == bSign ) {
1832*08daf0c2Sdrahn         return addFloat64Sigs( a, b, aSign );
1833*08daf0c2Sdrahn     }
1834*08daf0c2Sdrahn     else {
1835*08daf0c2Sdrahn         return subFloat64Sigs( a, b, aSign );
1836*08daf0c2Sdrahn     }
1837*08daf0c2Sdrahn 
1838*08daf0c2Sdrahn }
1839*08daf0c2Sdrahn 
1840*08daf0c2Sdrahn /*
1841*08daf0c2Sdrahn -------------------------------------------------------------------------------
1842*08daf0c2Sdrahn Returns the result of subtracting the double-precision floating-point values
1843*08daf0c2Sdrahn `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1844*08daf0c2Sdrahn for Binary Floating-Point Arithmetic.
1845*08daf0c2Sdrahn -------------------------------------------------------------------------------
1846*08daf0c2Sdrahn */
1847*08daf0c2Sdrahn float64 float64_sub( float64 a, float64 b )
1848*08daf0c2Sdrahn {
1849*08daf0c2Sdrahn     flag aSign, bSign;
1850*08daf0c2Sdrahn 
1851*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
1852*08daf0c2Sdrahn     bSign = extractFloat64Sign( b );
1853*08daf0c2Sdrahn     if ( aSign == bSign ) {
1854*08daf0c2Sdrahn         return subFloat64Sigs( a, b, aSign );
1855*08daf0c2Sdrahn     }
1856*08daf0c2Sdrahn     else {
1857*08daf0c2Sdrahn         return addFloat64Sigs( a, b, aSign );
1858*08daf0c2Sdrahn     }
1859*08daf0c2Sdrahn 
1860*08daf0c2Sdrahn }
1861*08daf0c2Sdrahn 
1862*08daf0c2Sdrahn /*
1863*08daf0c2Sdrahn -------------------------------------------------------------------------------
1864*08daf0c2Sdrahn Returns the result of multiplying the double-precision floating-point values
1865*08daf0c2Sdrahn `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1866*08daf0c2Sdrahn for Binary Floating-Point Arithmetic.
1867*08daf0c2Sdrahn -------------------------------------------------------------------------------
1868*08daf0c2Sdrahn */
1869*08daf0c2Sdrahn float64 float64_mul( float64 a, float64 b )
1870*08daf0c2Sdrahn {
1871*08daf0c2Sdrahn     flag aSign, bSign, zSign;
1872*08daf0c2Sdrahn     int16 aExp, bExp, zExp;
1873*08daf0c2Sdrahn     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1874*08daf0c2Sdrahn 
1875*08daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
1876*08daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
1877*08daf0c2Sdrahn     aExp = extractFloat64Exp( a );
1878*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
1879*08daf0c2Sdrahn     bSig1 = extractFloat64Frac1( b );
1880*08daf0c2Sdrahn     bSig0 = extractFloat64Frac0( b );
1881*08daf0c2Sdrahn     bExp = extractFloat64Exp( b );
1882*08daf0c2Sdrahn     bSign = extractFloat64Sign( b );
1883*08daf0c2Sdrahn     zSign = aSign ^ bSign;
1884*08daf0c2Sdrahn     if ( aExp == 0x7FF ) {
1885*08daf0c2Sdrahn         if (    ( aSig0 | aSig1 )
1886*08daf0c2Sdrahn              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1887*08daf0c2Sdrahn             return propagateFloat64NaN( a, b );
1888*08daf0c2Sdrahn         }
1889*08daf0c2Sdrahn         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1890*08daf0c2Sdrahn         return packFloat64( zSign, 0x7FF, 0, 0 );
1891*08daf0c2Sdrahn     }
1892*08daf0c2Sdrahn     if ( bExp == 0x7FF ) {
1893*08daf0c2Sdrahn         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1894*08daf0c2Sdrahn         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1895*08daf0c2Sdrahn  invalid:
1896*08daf0c2Sdrahn             float_raise( float_flag_invalid );
1897*08daf0c2Sdrahn             return float64_default_nan;
1898*08daf0c2Sdrahn         }
1899*08daf0c2Sdrahn         return packFloat64( zSign, 0x7FF, 0, 0 );
1900*08daf0c2Sdrahn     }
1901*08daf0c2Sdrahn     if ( aExp == 0 ) {
1902*08daf0c2Sdrahn         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1903*08daf0c2Sdrahn         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1904*08daf0c2Sdrahn     }
1905*08daf0c2Sdrahn     if ( bExp == 0 ) {
1906*08daf0c2Sdrahn         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1907*08daf0c2Sdrahn         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1908*08daf0c2Sdrahn     }
1909*08daf0c2Sdrahn     zExp = aExp + bExp - 0x400;
1910*08daf0c2Sdrahn     aSig0 |= 0x00100000;
1911*08daf0c2Sdrahn     shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1912*08daf0c2Sdrahn     mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1913*08daf0c2Sdrahn     add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1914*08daf0c2Sdrahn     zSig2 |= ( zSig3 != 0 );
1915*08daf0c2Sdrahn     if ( 0x00200000 <= zSig0 ) {
1916*08daf0c2Sdrahn         shift64ExtraRightJamming(
1917*08daf0c2Sdrahn             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1918*08daf0c2Sdrahn         ++zExp;
1919*08daf0c2Sdrahn     }
1920*08daf0c2Sdrahn     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1921*08daf0c2Sdrahn 
1922*08daf0c2Sdrahn }
1923*08daf0c2Sdrahn 
1924*08daf0c2Sdrahn /*
1925*08daf0c2Sdrahn -------------------------------------------------------------------------------
1926*08daf0c2Sdrahn Returns the result of dividing the double-precision floating-point value `a'
1927*08daf0c2Sdrahn by the corresponding value `b'.  The operation is performed according to the
1928*08daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1929*08daf0c2Sdrahn -------------------------------------------------------------------------------
1930*08daf0c2Sdrahn */
1931*08daf0c2Sdrahn float64 float64_div( float64 a, float64 b )
1932*08daf0c2Sdrahn {
1933*08daf0c2Sdrahn     flag aSign, bSign, zSign;
1934*08daf0c2Sdrahn     int16 aExp, bExp, zExp;
1935*08daf0c2Sdrahn     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1936*08daf0c2Sdrahn     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1937*08daf0c2Sdrahn 
1938*08daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
1939*08daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
1940*08daf0c2Sdrahn     aExp = extractFloat64Exp( a );
1941*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
1942*08daf0c2Sdrahn     bSig1 = extractFloat64Frac1( b );
1943*08daf0c2Sdrahn     bSig0 = extractFloat64Frac0( b );
1944*08daf0c2Sdrahn     bExp = extractFloat64Exp( b );
1945*08daf0c2Sdrahn     bSign = extractFloat64Sign( b );
1946*08daf0c2Sdrahn     zSign = aSign ^ bSign;
1947*08daf0c2Sdrahn     if ( aExp == 0x7FF ) {
1948*08daf0c2Sdrahn         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1949*08daf0c2Sdrahn         if ( bExp == 0x7FF ) {
1950*08daf0c2Sdrahn             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1951*08daf0c2Sdrahn             goto invalid;
1952*08daf0c2Sdrahn         }
1953*08daf0c2Sdrahn         return packFloat64( zSign, 0x7FF, 0, 0 );
1954*08daf0c2Sdrahn     }
1955*08daf0c2Sdrahn     if ( bExp == 0x7FF ) {
1956*08daf0c2Sdrahn         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1957*08daf0c2Sdrahn         return packFloat64( zSign, 0, 0, 0 );
1958*08daf0c2Sdrahn     }
1959*08daf0c2Sdrahn     if ( bExp == 0 ) {
1960*08daf0c2Sdrahn         if ( ( bSig0 | bSig1 ) == 0 ) {
1961*08daf0c2Sdrahn             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1962*08daf0c2Sdrahn  invalid:
1963*08daf0c2Sdrahn                 float_raise( float_flag_invalid );
1964*08daf0c2Sdrahn                 return float64_default_nan;
1965*08daf0c2Sdrahn             }
1966*08daf0c2Sdrahn             float_raise( float_flag_divbyzero );
1967*08daf0c2Sdrahn             return packFloat64( zSign, 0x7FF, 0, 0 );
1968*08daf0c2Sdrahn         }
1969*08daf0c2Sdrahn         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1970*08daf0c2Sdrahn     }
1971*08daf0c2Sdrahn     if ( aExp == 0 ) {
1972*08daf0c2Sdrahn         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1973*08daf0c2Sdrahn         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1974*08daf0c2Sdrahn     }
1975*08daf0c2Sdrahn     zExp = aExp - bExp + 0x3FD;
1976*08daf0c2Sdrahn     shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1977*08daf0c2Sdrahn     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1978*08daf0c2Sdrahn     if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1979*08daf0c2Sdrahn         shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1980*08daf0c2Sdrahn         ++zExp;
1981*08daf0c2Sdrahn     }
1982*08daf0c2Sdrahn     zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1983*08daf0c2Sdrahn     mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1984*08daf0c2Sdrahn     sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1985*08daf0c2Sdrahn     while ( (sbits32) rem0 < 0 ) {
1986*08daf0c2Sdrahn         --zSig0;
1987*08daf0c2Sdrahn         add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1988*08daf0c2Sdrahn     }
1989*08daf0c2Sdrahn     zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1990*08daf0c2Sdrahn     if ( ( zSig1 & 0x3FF ) <= 4 ) {
1991*08daf0c2Sdrahn         mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
1992*08daf0c2Sdrahn         sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
1993*08daf0c2Sdrahn         while ( (sbits32) rem1 < 0 ) {
1994*08daf0c2Sdrahn             --zSig1;
1995*08daf0c2Sdrahn             add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
1996*08daf0c2Sdrahn         }
1997*08daf0c2Sdrahn         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
1998*08daf0c2Sdrahn     }
1999*08daf0c2Sdrahn     shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
2000*08daf0c2Sdrahn     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
2001*08daf0c2Sdrahn 
2002*08daf0c2Sdrahn }
2003*08daf0c2Sdrahn 
2004*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
2005*08daf0c2Sdrahn /*
2006*08daf0c2Sdrahn -------------------------------------------------------------------------------
2007*08daf0c2Sdrahn Returns the remainder of the double-precision floating-point value `a'
2008*08daf0c2Sdrahn with respect to the corresponding value `b'.  The operation is performed
2009*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2010*08daf0c2Sdrahn -------------------------------------------------------------------------------
2011*08daf0c2Sdrahn */
2012*08daf0c2Sdrahn float64 float64_rem( float64 a, float64 b )
2013*08daf0c2Sdrahn {
2014*08daf0c2Sdrahn     flag aSign, bSign, zSign;
2015*08daf0c2Sdrahn     int16 aExp, bExp, expDiff;
2016*08daf0c2Sdrahn     bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2017*08daf0c2Sdrahn     bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2018*08daf0c2Sdrahn     sbits32 sigMean0;
2019*08daf0c2Sdrahn     float64 z;
2020*08daf0c2Sdrahn 
2021*08daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
2022*08daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
2023*08daf0c2Sdrahn     aExp = extractFloat64Exp( a );
2024*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
2025*08daf0c2Sdrahn     bSig1 = extractFloat64Frac1( b );
2026*08daf0c2Sdrahn     bSig0 = extractFloat64Frac0( b );
2027*08daf0c2Sdrahn     bExp = extractFloat64Exp( b );
2028*08daf0c2Sdrahn     bSign = extractFloat64Sign( b );
2029*08daf0c2Sdrahn     if ( aExp == 0x7FF ) {
2030*08daf0c2Sdrahn         if (    ( aSig0 | aSig1 )
2031*08daf0c2Sdrahn              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2032*08daf0c2Sdrahn             return propagateFloat64NaN( a, b );
2033*08daf0c2Sdrahn         }
2034*08daf0c2Sdrahn         goto invalid;
2035*08daf0c2Sdrahn     }
2036*08daf0c2Sdrahn     if ( bExp == 0x7FF ) {
2037*08daf0c2Sdrahn         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2038*08daf0c2Sdrahn         return a;
2039*08daf0c2Sdrahn     }
2040*08daf0c2Sdrahn     if ( bExp == 0 ) {
2041*08daf0c2Sdrahn         if ( ( bSig0 | bSig1 ) == 0 ) {
2042*08daf0c2Sdrahn  invalid:
2043*08daf0c2Sdrahn             float_raise( float_flag_invalid );
2044*08daf0c2Sdrahn             return float64_default_nan;
2045*08daf0c2Sdrahn         }
2046*08daf0c2Sdrahn         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2047*08daf0c2Sdrahn     }
2048*08daf0c2Sdrahn     if ( aExp == 0 ) {
2049*08daf0c2Sdrahn         if ( ( aSig0 | aSig1 ) == 0 ) return a;
2050*08daf0c2Sdrahn         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2051*08daf0c2Sdrahn     }
2052*08daf0c2Sdrahn     expDiff = aExp - bExp;
2053*08daf0c2Sdrahn     if ( expDiff < -1 ) return a;
2054*08daf0c2Sdrahn     shortShift64Left(
2055*08daf0c2Sdrahn         aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2056*08daf0c2Sdrahn     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2057*08daf0c2Sdrahn     q = le64( bSig0, bSig1, aSig0, aSig1 );
2058*08daf0c2Sdrahn     if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2059*08daf0c2Sdrahn     expDiff -= 32;
2060*08daf0c2Sdrahn     while ( 0 < expDiff ) {
2061*08daf0c2Sdrahn         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2062*08daf0c2Sdrahn         q = ( 4 < q ) ? q - 4 : 0;
2063*08daf0c2Sdrahn         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2064*08daf0c2Sdrahn         shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2065*08daf0c2Sdrahn         shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2066*08daf0c2Sdrahn         sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2067*08daf0c2Sdrahn         expDiff -= 29;
2068*08daf0c2Sdrahn     }
2069*08daf0c2Sdrahn     if ( -32 < expDiff ) {
2070*08daf0c2Sdrahn         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2071*08daf0c2Sdrahn         q = ( 4 < q ) ? q - 4 : 0;
2072*08daf0c2Sdrahn         q >>= - expDiff;
2073*08daf0c2Sdrahn         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2074*08daf0c2Sdrahn         expDiff += 24;
2075*08daf0c2Sdrahn         if ( expDiff < 0 ) {
2076*08daf0c2Sdrahn             shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2077*08daf0c2Sdrahn         }
2078*08daf0c2Sdrahn         else {
2079*08daf0c2Sdrahn             shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2080*08daf0c2Sdrahn         }
2081*08daf0c2Sdrahn         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2082*08daf0c2Sdrahn         sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2083*08daf0c2Sdrahn     }
2084*08daf0c2Sdrahn     else {
2085*08daf0c2Sdrahn         shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2086*08daf0c2Sdrahn         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2087*08daf0c2Sdrahn     }
2088*08daf0c2Sdrahn     do {
2089*08daf0c2Sdrahn         alternateASig0 = aSig0;
2090*08daf0c2Sdrahn         alternateASig1 = aSig1;
2091*08daf0c2Sdrahn         ++q;
2092*08daf0c2Sdrahn         sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2093*08daf0c2Sdrahn     } while ( 0 <= (sbits32) aSig0 );
2094*08daf0c2Sdrahn     add64(
2095*08daf0c2Sdrahn         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2096*08daf0c2Sdrahn     if (    ( sigMean0 < 0 )
2097*08daf0c2Sdrahn          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2098*08daf0c2Sdrahn         aSig0 = alternateASig0;
2099*08daf0c2Sdrahn         aSig1 = alternateASig1;
2100*08daf0c2Sdrahn     }
2101*08daf0c2Sdrahn     zSign = ( (sbits32) aSig0 < 0 );
2102*08daf0c2Sdrahn     if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2103*08daf0c2Sdrahn     return
2104*08daf0c2Sdrahn         normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2105*08daf0c2Sdrahn 
2106*08daf0c2Sdrahn }
2107*08daf0c2Sdrahn #endif
2108*08daf0c2Sdrahn 
2109*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
2110*08daf0c2Sdrahn /*
2111*08daf0c2Sdrahn -------------------------------------------------------------------------------
2112*08daf0c2Sdrahn Returns the square root of the double-precision floating-point value `a'.
2113*08daf0c2Sdrahn The operation is performed according to the IEC/IEEE Standard for Binary
2114*08daf0c2Sdrahn Floating-Point Arithmetic.
2115*08daf0c2Sdrahn -------------------------------------------------------------------------------
2116*08daf0c2Sdrahn */
2117*08daf0c2Sdrahn float64 float64_sqrt( float64 a )
2118*08daf0c2Sdrahn {
2119*08daf0c2Sdrahn     flag aSign;
2120*08daf0c2Sdrahn     int16 aExp, zExp;
2121*08daf0c2Sdrahn     bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2122*08daf0c2Sdrahn     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2123*08daf0c2Sdrahn     float64 z;
2124*08daf0c2Sdrahn 
2125*08daf0c2Sdrahn     aSig1 = extractFloat64Frac1( a );
2126*08daf0c2Sdrahn     aSig0 = extractFloat64Frac0( a );
2127*08daf0c2Sdrahn     aExp = extractFloat64Exp( a );
2128*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
2129*08daf0c2Sdrahn     if ( aExp == 0x7FF ) {
2130*08daf0c2Sdrahn         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2131*08daf0c2Sdrahn         if ( ! aSign ) return a;
2132*08daf0c2Sdrahn         goto invalid;
2133*08daf0c2Sdrahn     }
2134*08daf0c2Sdrahn     if ( aSign ) {
2135*08daf0c2Sdrahn         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2136*08daf0c2Sdrahn  invalid:
2137*08daf0c2Sdrahn         float_raise( float_flag_invalid );
2138*08daf0c2Sdrahn         return float64_default_nan;
2139*08daf0c2Sdrahn     }
2140*08daf0c2Sdrahn     if ( aExp == 0 ) {
2141*08daf0c2Sdrahn         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2142*08daf0c2Sdrahn         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2143*08daf0c2Sdrahn     }
2144*08daf0c2Sdrahn     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2145*08daf0c2Sdrahn     aSig0 |= 0x00100000;
2146*08daf0c2Sdrahn     shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2147*08daf0c2Sdrahn     zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2148*08daf0c2Sdrahn     if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2149*08daf0c2Sdrahn     doubleZSig0 = zSig0 + zSig0;
2150*08daf0c2Sdrahn     shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2151*08daf0c2Sdrahn     mul32To64( zSig0, zSig0, &term0, &term1 );
2152*08daf0c2Sdrahn     sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2153*08daf0c2Sdrahn     while ( (sbits32) rem0 < 0 ) {
2154*08daf0c2Sdrahn         --zSig0;
2155*08daf0c2Sdrahn         doubleZSig0 -= 2;
2156*08daf0c2Sdrahn         add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2157*08daf0c2Sdrahn     }
2158*08daf0c2Sdrahn     zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2159*08daf0c2Sdrahn     if ( ( zSig1 & 0x1FF ) <= 5 ) {
2160*08daf0c2Sdrahn         if ( zSig1 == 0 ) zSig1 = 1;
2161*08daf0c2Sdrahn         mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2162*08daf0c2Sdrahn         sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2163*08daf0c2Sdrahn         mul32To64( zSig1, zSig1, &term2, &term3 );
2164*08daf0c2Sdrahn         sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2165*08daf0c2Sdrahn         while ( (sbits32) rem1 < 0 ) {
2166*08daf0c2Sdrahn             --zSig1;
2167*08daf0c2Sdrahn             shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2168*08daf0c2Sdrahn             term3 |= 1;
2169*08daf0c2Sdrahn             term2 |= doubleZSig0;
2170*08daf0c2Sdrahn             add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2171*08daf0c2Sdrahn         }
2172*08daf0c2Sdrahn         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2173*08daf0c2Sdrahn     }
2174*08daf0c2Sdrahn     shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2175*08daf0c2Sdrahn     return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2176*08daf0c2Sdrahn 
2177*08daf0c2Sdrahn }
2178*08daf0c2Sdrahn #endif
2179*08daf0c2Sdrahn 
2180*08daf0c2Sdrahn /*
2181*08daf0c2Sdrahn -------------------------------------------------------------------------------
2182*08daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is equal to
2183*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  The comparison is performed
2184*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2185*08daf0c2Sdrahn -------------------------------------------------------------------------------
2186*08daf0c2Sdrahn */
2187*08daf0c2Sdrahn flag float64_eq( float64 a, float64 b )
2188*08daf0c2Sdrahn {
2189*08daf0c2Sdrahn 
2190*08daf0c2Sdrahn     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2191*08daf0c2Sdrahn               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2192*08daf0c2Sdrahn          || (    ( extractFloat64Exp( b ) == 0x7FF )
2193*08daf0c2Sdrahn               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2194*08daf0c2Sdrahn        ) {
2195*08daf0c2Sdrahn         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2196*08daf0c2Sdrahn             float_raise( float_flag_invalid );
2197*08daf0c2Sdrahn         }
2198*08daf0c2Sdrahn         return 0;
2199*08daf0c2Sdrahn     }
2200*08daf0c2Sdrahn     return ( a == b ) ||
2201*08daf0c2Sdrahn 	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2202*08daf0c2Sdrahn 
2203*08daf0c2Sdrahn }
2204*08daf0c2Sdrahn 
2205*08daf0c2Sdrahn /*
2206*08daf0c2Sdrahn -------------------------------------------------------------------------------
2207*08daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than
2208*08daf0c2Sdrahn or equal to the corresponding value `b', and 0 otherwise.  The comparison
2209*08daf0c2Sdrahn is performed according to the IEC/IEEE Standard for Binary Floating-Point
2210*08daf0c2Sdrahn Arithmetic.
2211*08daf0c2Sdrahn -------------------------------------------------------------------------------
2212*08daf0c2Sdrahn */
2213*08daf0c2Sdrahn flag float64_le( float64 a, float64 b )
2214*08daf0c2Sdrahn {
2215*08daf0c2Sdrahn     flag aSign, bSign;
2216*08daf0c2Sdrahn 
2217*08daf0c2Sdrahn     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2218*08daf0c2Sdrahn               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2219*08daf0c2Sdrahn          || (    ( extractFloat64Exp( b ) == 0x7FF )
2220*08daf0c2Sdrahn               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2221*08daf0c2Sdrahn        ) {
2222*08daf0c2Sdrahn         float_raise( float_flag_invalid );
2223*08daf0c2Sdrahn         return 0;
2224*08daf0c2Sdrahn     }
2225*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
2226*08daf0c2Sdrahn     bSign = extractFloat64Sign( b );
2227*08daf0c2Sdrahn     if ( aSign != bSign )
2228*08daf0c2Sdrahn 	return aSign ||
2229*08daf0c2Sdrahn 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2230*08daf0c2Sdrahn 	      0 );
2231*08daf0c2Sdrahn     return ( a == b ) ||
2232*08daf0c2Sdrahn 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2233*08daf0c2Sdrahn }
2234*08daf0c2Sdrahn 
2235*08daf0c2Sdrahn /*
2236*08daf0c2Sdrahn -------------------------------------------------------------------------------
2237*08daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than
2238*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  The comparison is performed
2239*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2240*08daf0c2Sdrahn -------------------------------------------------------------------------------
2241*08daf0c2Sdrahn */
2242*08daf0c2Sdrahn flag float64_lt( float64 a, float64 b )
2243*08daf0c2Sdrahn {
2244*08daf0c2Sdrahn     flag aSign, bSign;
2245*08daf0c2Sdrahn 
2246*08daf0c2Sdrahn     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2247*08daf0c2Sdrahn               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2248*08daf0c2Sdrahn          || (    ( extractFloat64Exp( b ) == 0x7FF )
2249*08daf0c2Sdrahn               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2250*08daf0c2Sdrahn        ) {
2251*08daf0c2Sdrahn         float_raise( float_flag_invalid );
2252*08daf0c2Sdrahn         return 0;
2253*08daf0c2Sdrahn     }
2254*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
2255*08daf0c2Sdrahn     bSign = extractFloat64Sign( b );
2256*08daf0c2Sdrahn     if ( aSign != bSign )
2257*08daf0c2Sdrahn 	return aSign &&
2258*08daf0c2Sdrahn 	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2259*08daf0c2Sdrahn 	      0 );
2260*08daf0c2Sdrahn     return ( a != b ) &&
2261*08daf0c2Sdrahn 	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2262*08daf0c2Sdrahn 
2263*08daf0c2Sdrahn }
2264*08daf0c2Sdrahn 
2265*08daf0c2Sdrahn #ifndef SOFTFLOAT_FOR_GCC
2266*08daf0c2Sdrahn /*
2267*08daf0c2Sdrahn -------------------------------------------------------------------------------
2268*08daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is equal to
2269*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  The invalid exception is
2270*08daf0c2Sdrahn raised if either operand is a NaN.  Otherwise, the comparison is performed
2271*08daf0c2Sdrahn according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2272*08daf0c2Sdrahn -------------------------------------------------------------------------------
2273*08daf0c2Sdrahn */
2274*08daf0c2Sdrahn flag float64_eq_signaling( float64 a, float64 b )
2275*08daf0c2Sdrahn {
2276*08daf0c2Sdrahn 
2277*08daf0c2Sdrahn     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2278*08daf0c2Sdrahn               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2279*08daf0c2Sdrahn          || (    ( extractFloat64Exp( b ) == 0x7FF )
2280*08daf0c2Sdrahn               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2281*08daf0c2Sdrahn        ) {
2282*08daf0c2Sdrahn         float_raise( float_flag_invalid );
2283*08daf0c2Sdrahn         return 0;
2284*08daf0c2Sdrahn     }
2285*08daf0c2Sdrahn     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2286*08daf0c2Sdrahn 
2287*08daf0c2Sdrahn }
2288*08daf0c2Sdrahn 
2289*08daf0c2Sdrahn /*
2290*08daf0c2Sdrahn -------------------------------------------------------------------------------
2291*08daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than or
2292*08daf0c2Sdrahn equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2293*08daf0c2Sdrahn cause an exception.  Otherwise, the comparison is performed according to the
2294*08daf0c2Sdrahn IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2295*08daf0c2Sdrahn -------------------------------------------------------------------------------
2296*08daf0c2Sdrahn */
2297*08daf0c2Sdrahn flag float64_le_quiet( float64 a, float64 b )
2298*08daf0c2Sdrahn {
2299*08daf0c2Sdrahn     flag aSign, bSign;
2300*08daf0c2Sdrahn 
2301*08daf0c2Sdrahn     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2302*08daf0c2Sdrahn               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2303*08daf0c2Sdrahn          || (    ( extractFloat64Exp( b ) == 0x7FF )
2304*08daf0c2Sdrahn               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2305*08daf0c2Sdrahn        ) {
2306*08daf0c2Sdrahn         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2307*08daf0c2Sdrahn             float_raise( float_flag_invalid );
2308*08daf0c2Sdrahn         }
2309*08daf0c2Sdrahn         return 0;
2310*08daf0c2Sdrahn     }
2311*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
2312*08daf0c2Sdrahn     bSign = extractFloat64Sign( b );
2313*08daf0c2Sdrahn     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2314*08daf0c2Sdrahn     return ( a == b ) || ( aSign ^ ( a < b ) );
2315*08daf0c2Sdrahn 
2316*08daf0c2Sdrahn }
2317*08daf0c2Sdrahn 
2318*08daf0c2Sdrahn /*
2319*08daf0c2Sdrahn -------------------------------------------------------------------------------
2320*08daf0c2Sdrahn Returns 1 if the double-precision floating-point value `a' is less than
2321*08daf0c2Sdrahn the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2322*08daf0c2Sdrahn exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2323*08daf0c2Sdrahn Standard for Binary Floating-Point Arithmetic.
2324*08daf0c2Sdrahn -------------------------------------------------------------------------------
2325*08daf0c2Sdrahn */
2326*08daf0c2Sdrahn flag float64_lt_quiet( float64 a, float64 b )
2327*08daf0c2Sdrahn {
2328*08daf0c2Sdrahn     flag aSign, bSign;
2329*08daf0c2Sdrahn 
2330*08daf0c2Sdrahn     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2331*08daf0c2Sdrahn               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2332*08daf0c2Sdrahn          || (    ( extractFloat64Exp( b ) == 0x7FF )
2333*08daf0c2Sdrahn               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2334*08daf0c2Sdrahn        ) {
2335*08daf0c2Sdrahn         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2336*08daf0c2Sdrahn             float_raise( float_flag_invalid );
2337*08daf0c2Sdrahn         }
2338*08daf0c2Sdrahn         return 0;
2339*08daf0c2Sdrahn     }
2340*08daf0c2Sdrahn     aSign = extractFloat64Sign( a );
2341*08daf0c2Sdrahn     bSign = extractFloat64Sign( b );
2342*08daf0c2Sdrahn     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2343*08daf0c2Sdrahn     return ( a != b ) && ( aSign ^ ( a < b ) );
2344*08daf0c2Sdrahn 
2345*08daf0c2Sdrahn }
2346*08daf0c2Sdrahn 
2347*08daf0c2Sdrahn #endif
2348