1 /*============================================================================
2 This source file is an extension to the SoftFloat IEC/IEEE Floating-point
3 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
4 floating point emulation.
5 
6 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
7 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
8 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
9 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
10 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
11 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
12 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
13 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
14 
15 Derivative works are acceptable, even for commercial purposes, so long as
16 (1) the source code for the derivative work includes prominent notice that
17 the work is derivative, and (2) the source code includes prominent notice with
18 these four paragraphs for those parts of this code that are retained.
19 =============================================================================*/
20 
21 /*============================================================================
22  * Written for Bochs (x86 achitecture simulator) by
23  *            Stanislav Shwartsman [sshwarts at sourceforge net]
24  * ==========================================================================*/
25 
26 #include "softfloatx80.h"
27 #include "softfloat-round-pack.h"
28 #include "softfloat-macros.h"
29 
30 /*----------------------------------------------------------------------------
31 | Returns the result of converting the extended double-precision floating-
32 | point value `a' to the 16-bit two's complement integer format.  The
33 | conversion is performed according to the IEC/IEEE Standard for Binary
34 | Floating-Point Arithmetic - which means in particular that the conversion
35 | is rounded according to the current rounding mode. If `a' is a NaN or the
36 | conversion overflows, the integer indefinite value is returned.
37 *----------------------------------------------------------------------------*/
38 
floatx80_to_int16(floatx80 a,float_status_t & status)39 Bit16s floatx80_to_int16(floatx80 a, float_status_t &status)
40 {
41    if (floatx80_is_unsupported(a)) {
42         float_raise(status, float_flag_invalid);
43         return int16_indefinite;
44    }
45 
46    Bit32s v32 = floatx80_to_int32(a, status);
47 
48    if ((v32 > 32767) || (v32 < -32768)) {
49         status.float_exception_flags = float_flag_invalid; // throw away other flags
50         return int16_indefinite;
51    }
52 
53    return (Bit16s) v32;
54 }
55 
56 /*----------------------------------------------------------------------------
57 | Returns the result of converting the extended double-precision floating-
58 | point value `a' to the 16-bit two's complement integer format.  The
59 | conversion is performed according to the IEC/IEEE Standard for Binary
60 | Floating-Point Arithmetic, except that the conversion is always rounded
61 | toward zero.  If `a' is a NaN or the conversion overflows, the integer
62 | indefinite value is returned.
63 *----------------------------------------------------------------------------*/
64 
floatx80_to_int16_round_to_zero(floatx80 a,float_status_t & status)65 Bit16s floatx80_to_int16_round_to_zero(floatx80 a, float_status_t &status)
66 {
67    if (floatx80_is_unsupported(a)) {
68         float_raise(status, float_flag_invalid);
69         return int16_indefinite;
70    }
71 
72    Bit32s v32 = floatx80_to_int32_round_to_zero(a, status);
73 
74    if ((v32 > 32767) || (v32 < -32768)) {
75         status.float_exception_flags = float_flag_invalid; // throw away other flags
76         return int16_indefinite;
77    }
78 
79    return (Bit16s) v32;
80 }
81 
82 /*----------------------------------------------------------------------------
83 | Separate the source extended double-precision floating point value `a'
84 | into its exponent and significand, store the significant back to the
85 | 'a' and return the exponent. The operation performed is a superset of
86 | the IEC/IEEE recommended logb(x) function.
87 *----------------------------------------------------------------------------*/
88 
floatx80_extract(floatx80 & a,float_status_t & status)89 floatx80 floatx80_extract(floatx80 &a, float_status_t &status)
90 {
91     Bit64u aSig = extractFloatx80Frac(a);
92     Bit32s aExp = extractFloatx80Exp(a);
93     int   aSign = extractFloatx80Sign(a);
94 
95     if (floatx80_is_unsupported(a))
96     {
97         float_raise(status, float_flag_invalid);
98         a = floatx80_default_nan;
99         return a;
100     }
101 
102     if (aExp == 0x7FFF) {
103         if ((Bit64u) (aSig<<1))
104         {
105             a = propagateFloatx80NaN(a, status);
106             return a;
107         }
108         return packFloatx80(0, 0x7FFF, BX_CONST64(0x8000000000000000));
109     }
110     if (aExp == 0)
111     {
112         if (aSig == 0) {
113             float_raise(status, float_flag_divbyzero);
114             a = packFloatx80(aSign, 0, 0);
115             return packFloatx80(1, 0x7FFF, BX_CONST64(0x8000000000000000));
116         }
117         float_raise(status, float_flag_denormal);
118         normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
119     }
120 
121     a.exp = (aSign << 15) + 0x3FFF;
122     a.fraction = aSig;
123     return int32_to_floatx80(aExp - 0x3FFF);
124 }
125 
126 /*----------------------------------------------------------------------------
127 | Scales extended double-precision floating-point value in operand `a' by
128 | value `b'. The function truncates the value in the second operand 'b' to
129 | an integral value and adds that value to the exponent of the operand 'a'.
130 | The operation performed according to the IEC/IEEE Standard for Binary
131 | Floating-Point Arithmetic.
132 *----------------------------------------------------------------------------*/
133 
floatx80_scale(floatx80 a,floatx80 b,float_status_t & status)134 floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status_t &status)
135 {
136     Bit32s aExp, bExp;
137     Bit64u aSig, bSig;
138 
139     // handle unsupported extended double-precision floating encodings
140     if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
141     {
142         float_raise(status, float_flag_invalid);
143         return floatx80_default_nan;
144     }
145 
146     aSig = extractFloatx80Frac(a);
147     aExp = extractFloatx80Exp(a);
148     int aSign = extractFloatx80Sign(a);
149     bSig = extractFloatx80Frac(b);
150     bExp = extractFloatx80Exp(b);
151     int bSign = extractFloatx80Sign(b);
152 
153     if (aExp == 0x7FFF) {
154         if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
155         {
156             return propagateFloatx80NaN(a, b, status);
157         }
158         if ((bExp == 0x7FFF) && bSign) {
159             float_raise(status, float_flag_invalid);
160             return floatx80_default_nan;
161         }
162         if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
163         return a;
164     }
165     if (bExp == 0x7FFF) {
166         if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
167         if ((aExp | aSig) == 0) {
168             if (! bSign) {
169                 float_raise(status, float_flag_invalid);
170                 return floatx80_default_nan;
171             }
172             return a;
173         }
174         if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
175         if (bSign) return packFloatx80(aSign, 0, 0);
176         return packFloatx80(aSign, 0x7FFF, BX_CONST64(0x8000000000000000));
177     }
178     if (aExp == 0) {
179         if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
180         if (aSig == 0) return a;
181         float_raise(status, float_flag_denormal);
182         normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
183         if (bExp < 0x3FFF)
184             return normalizeRoundAndPackFloatx80(80, aSign, aExp, aSig, 0, status);
185     }
186     if (bExp == 0) {
187         if (bSig == 0) return a;
188         float_raise(status, float_flag_denormal);
189         normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
190     }
191 
192     if (bExp > 0x400E) {
193         /* generate appropriate overflow/underflow */
194         return roundAndPackFloatx80(80, aSign,
195                           bSign ? -0x3FFF : 0x7FFF, aSig, 0, status);
196     }
197 
198     if (bExp < 0x3FFF) return a;
199 
200     int shiftCount = 0x403E - bExp;
201     bSig >>= shiftCount;
202     Bit32s scale = (Bit32s) bSig;
203     if (bSign) scale = -scale; /* -32768..32767 */
204     return
205         roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0, status);
206 }
207 
208 /*----------------------------------------------------------------------------
209 | Determine extended-precision floating-point number class.
210 *----------------------------------------------------------------------------*/
211 
floatx80_class(floatx80 a)212 float_class_t floatx80_class(floatx80 a)
213 {
214    Bit32s aExp = extractFloatx80Exp(a);
215    Bit64u aSig = extractFloatx80Frac(a);
216 
217    if(aExp == 0) {
218        if (aSig == 0)
219            return float_zero;
220 
221        /* denormal or pseudo-denormal */
222        return float_denormal;
223    }
224 
225    /* valid numbers have the MS bit set */
226    if (!(aSig & BX_CONST64(0x8000000000000000)))
227        return float_SNaN; /* report unsupported as SNaNs */
228 
229    if(aExp == 0x7fff) {
230        int aSign = extractFloatx80Sign(a);
231 
232        if (((Bit64u) (aSig<< 1)) == 0)
233            return (aSign) ? float_negative_inf : float_positive_inf;
234 
235        return (aSig & BX_CONST64(0x4000000000000000)) ? float_QNaN : float_SNaN;
236    }
237 
238    return float_normalized;
239 }
240 
241 /*----------------------------------------------------------------------------
242 | Compare  between  two extended precision  floating  point  numbers. Returns
243 | 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
244 | the    value    'a'   is   less   than   the   corresponding   value   `b',
245 | 'float_relation_greater' if the value 'a' is greater than the corresponding
246 | value `b', or 'float_relation_unordered' otherwise.
247 *----------------------------------------------------------------------------*/
248 
floatx80_compare(floatx80 a,floatx80 b,int quiet,float_status_t & status)249 int floatx80_compare(floatx80 a, floatx80 b, int quiet, float_status_t &status)
250 {
251     float_class_t aClass = floatx80_class(a);
252     float_class_t bClass = floatx80_class(b);
253 
254     if (aClass == float_SNaN || bClass == float_SNaN)
255     {
256         /* unsupported reported as SNaN */
257         float_raise(status, float_flag_invalid);
258         return float_relation_unordered;
259     }
260 
261     if (aClass == float_QNaN || bClass == float_QNaN) {
262         if (! quiet) float_raise(status, float_flag_invalid);
263         return float_relation_unordered;
264     }
265 
266     if (aClass == float_denormal || bClass == float_denormal) {
267         float_raise(status, float_flag_denormal);
268     }
269 
270     int aSign = extractFloatx80Sign(a);
271     int bSign = extractFloatx80Sign(b);
272 
273     if (aClass == float_zero) {
274         if (bClass == float_zero) return float_relation_equal;
275         return bSign ? float_relation_greater : float_relation_less;
276     }
277 
278     if (bClass == float_zero || aSign != bSign) {
279         return aSign ? float_relation_less : float_relation_greater;
280     }
281 
282     Bit64u aSig = extractFloatx80Frac(a);
283     Bit32s aExp = extractFloatx80Exp(a);
284     Bit64u bSig = extractFloatx80Frac(b);
285     Bit32s bExp = extractFloatx80Exp(b);
286 
287     if (aClass == float_denormal)
288         normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
289 
290     if (bClass == float_denormal)
291         normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
292 
293     if (aExp == bExp && aSig == bSig)
294         return float_relation_equal;
295 
296     int less_than =
297         aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig)))
298               : ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig)));
299 
300     if (less_than) return float_relation_less;
301     return float_relation_greater;
302 }
303