1 /*============================================================================
2 This source file is an extension to the SoftFloat IEC/IEEE Floating-point
3 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
4 floating point emulation.
5
6 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
7 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
8 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
9 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
10 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
11 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
12 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
13 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
14
15 Derivative works are acceptable, even for commercial purposes, so long as
16 (1) the source code for the derivative work includes prominent notice that
17 the work is derivative, and (2) the source code includes prominent notice with
18 these four paragraphs for those parts of this code that are retained.
19 =============================================================================*/
20
21 /*============================================================================
22 * Written for Bochs (x86 achitecture simulator) by
23 * Stanislav Shwartsman [sshwarts at sourceforge net]
24 * ==========================================================================*/
25
26 #include "softfloatx80.h"
27 #include "softfloat-round-pack.h"
28 #include "softfloat-macros.h"
29
30 /*----------------------------------------------------------------------------
31 | Returns the result of converting the extended double-precision floating-
32 | point value `a' to the 16-bit two's complement integer format. The
33 | conversion is performed according to the IEC/IEEE Standard for Binary
34 | Floating-Point Arithmetic - which means in particular that the conversion
35 | is rounded according to the current rounding mode. If `a' is a NaN or the
36 | conversion overflows, the integer indefinite value is returned.
37 *----------------------------------------------------------------------------*/
38
floatx80_to_int16(floatx80 a,float_status_t & status)39 Bit16s floatx80_to_int16(floatx80 a, float_status_t &status)
40 {
41 if (floatx80_is_unsupported(a)) {
42 float_raise(status, float_flag_invalid);
43 return int16_indefinite;
44 }
45
46 Bit32s v32 = floatx80_to_int32(a, status);
47
48 if ((v32 > 32767) || (v32 < -32768)) {
49 status.float_exception_flags = float_flag_invalid; // throw away other flags
50 return int16_indefinite;
51 }
52
53 return (Bit16s) v32;
54 }
55
56 /*----------------------------------------------------------------------------
57 | Returns the result of converting the extended double-precision floating-
58 | point value `a' to the 16-bit two's complement integer format. The
59 | conversion is performed according to the IEC/IEEE Standard for Binary
60 | Floating-Point Arithmetic, except that the conversion is always rounded
61 | toward zero. If `a' is a NaN or the conversion overflows, the integer
62 | indefinite value is returned.
63 *----------------------------------------------------------------------------*/
64
floatx80_to_int16_round_to_zero(floatx80 a,float_status_t & status)65 Bit16s floatx80_to_int16_round_to_zero(floatx80 a, float_status_t &status)
66 {
67 if (floatx80_is_unsupported(a)) {
68 float_raise(status, float_flag_invalid);
69 return int16_indefinite;
70 }
71
72 Bit32s v32 = floatx80_to_int32_round_to_zero(a, status);
73
74 if ((v32 > 32767) || (v32 < -32768)) {
75 status.float_exception_flags = float_flag_invalid; // throw away other flags
76 return int16_indefinite;
77 }
78
79 return (Bit16s) v32;
80 }
81
82 /*----------------------------------------------------------------------------
83 | Separate the source extended double-precision floating point value `a'
84 | into its exponent and significand, store the significant back to the
85 | 'a' and return the exponent. The operation performed is a superset of
86 | the IEC/IEEE recommended logb(x) function.
87 *----------------------------------------------------------------------------*/
88
floatx80_extract(floatx80 & a,float_status_t & status)89 floatx80 floatx80_extract(floatx80 &a, float_status_t &status)
90 {
91 Bit64u aSig = extractFloatx80Frac(a);
92 Bit32s aExp = extractFloatx80Exp(a);
93 int aSign = extractFloatx80Sign(a);
94
95 if (floatx80_is_unsupported(a))
96 {
97 float_raise(status, float_flag_invalid);
98 a = floatx80_default_nan;
99 return a;
100 }
101
102 if (aExp == 0x7FFF) {
103 if ((Bit64u) (aSig<<1))
104 {
105 a = propagateFloatx80NaN(a, status);
106 return a;
107 }
108 return packFloatx80(0, 0x7FFF, BX_CONST64(0x8000000000000000));
109 }
110 if (aExp == 0)
111 {
112 if (aSig == 0) {
113 float_raise(status, float_flag_divbyzero);
114 a = packFloatx80(aSign, 0, 0);
115 return packFloatx80(1, 0x7FFF, BX_CONST64(0x8000000000000000));
116 }
117 float_raise(status, float_flag_denormal);
118 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
119 }
120
121 a.exp = (aSign << 15) + 0x3FFF;
122 a.fraction = aSig;
123 return int32_to_floatx80(aExp - 0x3FFF);
124 }
125
126 /*----------------------------------------------------------------------------
127 | Scales extended double-precision floating-point value in operand `a' by
128 | value `b'. The function truncates the value in the second operand 'b' to
129 | an integral value and adds that value to the exponent of the operand 'a'.
130 | The operation performed according to the IEC/IEEE Standard for Binary
131 | Floating-Point Arithmetic.
132 *----------------------------------------------------------------------------*/
133
floatx80_scale(floatx80 a,floatx80 b,float_status_t & status)134 floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status_t &status)
135 {
136 Bit32s aExp, bExp;
137 Bit64u aSig, bSig;
138
139 // handle unsupported extended double-precision floating encodings
140 if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
141 {
142 float_raise(status, float_flag_invalid);
143 return floatx80_default_nan;
144 }
145
146 aSig = extractFloatx80Frac(a);
147 aExp = extractFloatx80Exp(a);
148 int aSign = extractFloatx80Sign(a);
149 bSig = extractFloatx80Frac(b);
150 bExp = extractFloatx80Exp(b);
151 int bSign = extractFloatx80Sign(b);
152
153 if (aExp == 0x7FFF) {
154 if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
155 {
156 return propagateFloatx80NaN(a, b, status);
157 }
158 if ((bExp == 0x7FFF) && bSign) {
159 float_raise(status, float_flag_invalid);
160 return floatx80_default_nan;
161 }
162 if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
163 return a;
164 }
165 if (bExp == 0x7FFF) {
166 if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
167 if ((aExp | aSig) == 0) {
168 if (! bSign) {
169 float_raise(status, float_flag_invalid);
170 return floatx80_default_nan;
171 }
172 return a;
173 }
174 if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
175 if (bSign) return packFloatx80(aSign, 0, 0);
176 return packFloatx80(aSign, 0x7FFF, BX_CONST64(0x8000000000000000));
177 }
178 if (aExp == 0) {
179 if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
180 if (aSig == 0) return a;
181 float_raise(status, float_flag_denormal);
182 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
183 if (bExp < 0x3FFF)
184 return normalizeRoundAndPackFloatx80(80, aSign, aExp, aSig, 0, status);
185 }
186 if (bExp == 0) {
187 if (bSig == 0) return a;
188 float_raise(status, float_flag_denormal);
189 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
190 }
191
192 if (bExp > 0x400E) {
193 /* generate appropriate overflow/underflow */
194 return roundAndPackFloatx80(80, aSign,
195 bSign ? -0x3FFF : 0x7FFF, aSig, 0, status);
196 }
197
198 if (bExp < 0x3FFF) return a;
199
200 int shiftCount = 0x403E - bExp;
201 bSig >>= shiftCount;
202 Bit32s scale = (Bit32s) bSig;
203 if (bSign) scale = -scale; /* -32768..32767 */
204 return
205 roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0, status);
206 }
207
208 /*----------------------------------------------------------------------------
209 | Determine extended-precision floating-point number class.
210 *----------------------------------------------------------------------------*/
211
floatx80_class(floatx80 a)212 float_class_t floatx80_class(floatx80 a)
213 {
214 Bit32s aExp = extractFloatx80Exp(a);
215 Bit64u aSig = extractFloatx80Frac(a);
216
217 if(aExp == 0) {
218 if (aSig == 0)
219 return float_zero;
220
221 /* denormal or pseudo-denormal */
222 return float_denormal;
223 }
224
225 /* valid numbers have the MS bit set */
226 if (!(aSig & BX_CONST64(0x8000000000000000)))
227 return float_SNaN; /* report unsupported as SNaNs */
228
229 if(aExp == 0x7fff) {
230 int aSign = extractFloatx80Sign(a);
231
232 if (((Bit64u) (aSig<< 1)) == 0)
233 return (aSign) ? float_negative_inf : float_positive_inf;
234
235 return (aSig & BX_CONST64(0x4000000000000000)) ? float_QNaN : float_SNaN;
236 }
237
238 return float_normalized;
239 }
240
241 /*----------------------------------------------------------------------------
242 | Compare between two extended precision floating point numbers. Returns
243 | 'float_relation_equal' if the operands are equal, 'float_relation_less' if
244 | the value 'a' is less than the corresponding value `b',
245 | 'float_relation_greater' if the value 'a' is greater than the corresponding
246 | value `b', or 'float_relation_unordered' otherwise.
247 *----------------------------------------------------------------------------*/
248
floatx80_compare(floatx80 a,floatx80 b,int quiet,float_status_t & status)249 int floatx80_compare(floatx80 a, floatx80 b, int quiet, float_status_t &status)
250 {
251 float_class_t aClass = floatx80_class(a);
252 float_class_t bClass = floatx80_class(b);
253
254 if (aClass == float_SNaN || bClass == float_SNaN)
255 {
256 /* unsupported reported as SNaN */
257 float_raise(status, float_flag_invalid);
258 return float_relation_unordered;
259 }
260
261 if (aClass == float_QNaN || bClass == float_QNaN) {
262 if (! quiet) float_raise(status, float_flag_invalid);
263 return float_relation_unordered;
264 }
265
266 if (aClass == float_denormal || bClass == float_denormal) {
267 float_raise(status, float_flag_denormal);
268 }
269
270 int aSign = extractFloatx80Sign(a);
271 int bSign = extractFloatx80Sign(b);
272
273 if (aClass == float_zero) {
274 if (bClass == float_zero) return float_relation_equal;
275 return bSign ? float_relation_greater : float_relation_less;
276 }
277
278 if (bClass == float_zero || aSign != bSign) {
279 return aSign ? float_relation_less : float_relation_greater;
280 }
281
282 Bit64u aSig = extractFloatx80Frac(a);
283 Bit32s aExp = extractFloatx80Exp(a);
284 Bit64u bSig = extractFloatx80Frac(b);
285 Bit32s bExp = extractFloatx80Exp(b);
286
287 if (aClass == float_denormal)
288 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
289
290 if (bClass == float_denormal)
291 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
292
293 if (aExp == bExp && aSig == bSig)
294 return float_relation_equal;
295
296 int less_than =
297 aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig)))
298 : ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig)));
299
300 if (less_than) return float_relation_less;
301 return float_relation_greater;
302 }
303