1 /*============================================================================
2 This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
3 Package, Release 2b.
4
5 Written by John R. Hauser. This work was made possible in part by the
6 International Computer Science Institute, located at Suite 600, 1947 Center
7 Street, Berkeley, California 94704. Funding was partially provided by the
8 National Science Foundation under grant MIP-9311980. The original version
9 of this code was written as part of a project to build a fixed-point vector
10 processor in collaboration with the University of California at Berkeley,
11 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
12 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
13 arithmetic/SoftFloat.html'.
14
15 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
16 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
17 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
18 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
19 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
20 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
21 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
22 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
23
24 Derivative works are acceptable, even for commercial purposes, so long as
25 (1) the source code for the derivative work includes prominent notice that
26 the work is derivative, and (2) the source code includes prominent notice with
27 these four paragraphs for those parts of this code that are retained.
28 =============================================================================*/
29
30 #define FLOAT128
31
32 /*============================================================================
33 * Adapted for Bochs (x86 achitecture simulator) by
34 * Stanislav Shwartsman [sshwarts at sourceforge net]
35 * ==========================================================================*/
36
37 #include "softfloat.h"
38 #include "softfloat-round-pack.h"
39
40 /*----------------------------------------------------------------------------
41 | Primitive arithmetic functions, including multi-word arithmetic, and
42 | division and square root approximations. (Can be specialized to target
43 | if desired).
44 *----------------------------------------------------------------------------*/
45 #include "softfloat-macros.h"
46
47 /*----------------------------------------------------------------------------
48 | Functions and definitions to determine: (1) whether tininess for underflow
49 | is detected before or after rounding by default, (2) what (if anything)
50 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
51 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
52 | are propagated from function inputs to output. These details are target-
53 | specific.
54 *----------------------------------------------------------------------------*/
55 #include "softfloat-specialize.h"
56
57 /*----------------------------------------------------------------------------
58 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
59 | and 7, and returns the properly rounded 32-bit integer corresponding to the
60 | input. If `zSign' is 1, the input is negated before being converted to an
61 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
62 | is simply rounded to an integer, with the inexact exception raised if the
63 | input cannot be represented exactly as an integer. However, if the fixed-
64 | point input is too large, the invalid exception is raised and the integer
65 | indefinite value is returned.
66 *----------------------------------------------------------------------------*/
67
roundAndPackInt32(int zSign,Bit64u exactAbsZ,float_status_t & status)68 Bit32s roundAndPackInt32(int zSign, Bit64u exactAbsZ, float_status_t &status)
69 {
70 int roundingMode = get_float_rounding_mode(status);
71 int roundNearestEven = (roundingMode == float_round_nearest_even);
72 int roundIncrement = 0x40;
73 if (! roundNearestEven) {
74 if (roundingMode == float_round_to_zero) roundIncrement = 0;
75 else {
76 roundIncrement = 0x7F;
77 if (zSign) {
78 if (roundingMode == float_round_up) roundIncrement = 0;
79 }
80 else {
81 if (roundingMode == float_round_down) roundIncrement = 0;
82 }
83 }
84 }
85 int roundBits = (int)(exactAbsZ & 0x7F);
86 Bit64u absZ = (exactAbsZ + roundIncrement)>>7;
87 absZ &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
88 Bit32s z = (Bit32s) absZ;
89 if (zSign) z = -z;
90 if ((absZ>>32) || (z && ((z < 0) ^ zSign))) {
91 float_raise(status, float_flag_invalid);
92 return (Bit32s)(int32_indefinite);
93 }
94 if (roundBits) {
95 float_raise(status, float_flag_inexact);
96 if ((absZ << 7) > exactAbsZ)
97 set_float_rounding_up(status);
98 }
99 return z;
100 }
101
102 /*----------------------------------------------------------------------------
103 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
104 | `absZ1', with binary point between bits 63 and 64 (between the input words),
105 | and returns the properly rounded 64-bit integer corresponding to the input.
106 | If `zSign' is 1, the input is negated before being converted to an integer.
107 | Ordinarily, the fixed-point input is simply rounded to an integer, with
108 | the inexact exception raised if the input cannot be represented exactly as
109 | an integer. However, if the fixed-point input is too large, the invalid
110 | exception is raised and the integer indefinite value is returned.
111 *----------------------------------------------------------------------------*/
112
roundAndPackInt64(int zSign,Bit64u absZ0,Bit64u absZ1,float_status_t & status)113 Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status)
114 {
115 Bit64s z;
116 int roundingMode = get_float_rounding_mode(status);
117 int roundNearestEven = (roundingMode == float_round_nearest_even);
118 int increment = ((Bit64s) absZ1 < 0);
119 if (! roundNearestEven) {
120 if (roundingMode == float_round_to_zero) increment = 0;
121 else {
122 if (zSign) {
123 increment = (roundingMode == float_round_down) && absZ1;
124 }
125 else {
126 increment = (roundingMode == float_round_up) && absZ1;
127 }
128 }
129 }
130 Bit64u exactAbsZ0 = absZ0;
131 if (increment) {
132 ++absZ0;
133 if (absZ0 == 0) goto overflow;
134 absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven);
135 }
136 z = absZ0;
137 if (zSign) z = -z;
138 if (z && ((z < 0) ^ zSign)) {
139 overflow:
140 float_raise(status, float_flag_invalid);
141 return (Bit64s)(int64_indefinite);
142 }
143 if (absZ1) {
144 float_raise(status, float_flag_inexact);
145 if (absZ0 > exactAbsZ0)
146 set_float_rounding_up(status);
147 }
148 return z;
149 }
150
151 /*----------------------------------------------------------------------------
152 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
153 | `absZ1', with binary point between bits 63 and 64 (between the input words),
154 | and returns the properly rounded 64-bit unsigned integer corresponding to the
155 | input. Ordinarily, the fixed-point input is simply rounded to an integer,
156 | with the inexact exception raised if the input cannot be represented exactly
157 | as an integer. However, if the fixed-point input is too large, the invalid
158 | exception is raised and the largest unsigned integer is returned.
159 *----------------------------------------------------------------------------*/
160
roundAndPackUint64(int zSign,Bit64u absZ0,Bit64u absZ1,float_status_t & status)161 Bit64u roundAndPackUint64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status)
162 {
163 int roundingMode = get_float_rounding_mode(status);
164 int roundNearestEven = (roundingMode == float_round_nearest_even);
165 int increment = ((Bit64s) absZ1 < 0);
166 if (!roundNearestEven) {
167 if (roundingMode == float_round_to_zero) {
168 increment = 0;
169 } else if (absZ1) {
170 if (zSign) {
171 increment = (roundingMode == float_round_down) && absZ1;
172 } else {
173 increment = (roundingMode == float_round_up) && absZ1;
174 }
175 }
176 }
177 if (increment) {
178 ++absZ0;
179 if (absZ0 == 0) {
180 float_raise(status, float_flag_invalid);
181 return uint64_indefinite;
182 }
183 absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven);
184 }
185
186 if (zSign && absZ0) {
187 float_raise(status, float_flag_invalid);
188 return uint64_indefinite;
189 }
190
191 if (absZ1) {
192 float_raise(status, float_flag_inexact);
193 }
194 return absZ0;
195 }
196
197 #ifdef FLOAT16
198
199 /*----------------------------------------------------------------------------
200 | Normalizes the subnormal half-precision floating-point value represented
201 | by the denormalized significand `aSig'. The normalized exponent and
202 | significand are stored at the locations pointed to by `zExpPtr' and
203 | `zSigPtr', respectively.
204 *----------------------------------------------------------------------------*/
205
normalizeFloat16Subnormal(Bit16u aSig,Bit16s * zExpPtr,Bit16u * zSigPtr)206 void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr)
207 {
208 int shiftCount = countLeadingZeros16(aSig) - 5;
209 *zSigPtr = aSig<<shiftCount;
210 *zExpPtr = 1 - shiftCount;
211 }
212
213 /*----------------------------------------------------------------------------
214 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
215 | and significand `zSig', and returns the proper half-precision floating-
216 | point value corresponding to the abstract input. Ordinarily, the abstract
217 | value is simply rounded and packed into the half-precision format, with
218 | the inexact exception raised if the abstract input cannot be represented
219 | exactly. However, if the abstract value is too large, the overflow and
220 | inexact exceptions are raised and an infinity or maximal finite value is
221 | returned. If the abstract value is too small, the input value is rounded to
222 | a subnormal number, and the underflow and inexact exceptions are raised if
223 | the abstract input cannot be represented exactly as a subnormal single-
224 | precision floating-point number.
225 | The input significand `zSig' has its binary point between bits 14
226 | and 13, which is 4 bits to the left of the usual location. This shifted
227 | significand must be normalized or smaller. If `zSig' is not normalized,
228 | `zExp' must be 0; in that case, the result returned is a subnormal number,
229 | and it must not require rounding. In the usual case that `zSig' is
230 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
231 | The handling of underflow and overflow follows the IEC/IEEE Standard for
232 | Binary Floating-Point Arithmetic.
233 *----------------------------------------------------------------------------*/
234
roundAndPackFloat16(int zSign,Bit16s zExp,Bit16u zSig,float_status_t & status)235 float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, float_status_t &status)
236 {
237 Bit16s roundIncrement, roundBits, roundMask;
238
239 int roundingMode = get_float_rounding_mode(status);
240 int roundNearestEven = (roundingMode == float_round_nearest_even);
241 roundIncrement = 8;
242 roundMask = 0xF;
243
244 if (! roundNearestEven) {
245 if (roundingMode == float_round_to_zero) roundIncrement = 0;
246 else {
247 roundIncrement = roundMask;
248 if (zSign) {
249 if (roundingMode == float_round_up) roundIncrement = 0;
250 }
251 else {
252 if (roundingMode == float_round_down) roundIncrement = 0;
253 }
254 }
255 }
256 roundBits = zSig & roundMask;
257 if (0x1D <= (Bit16u) zExp) {
258 if ((0x1D < zExp)
259 || ((zExp == 0x1D) && ((Bit16s) (zSig + roundIncrement) < 0)))
260 {
261 float_raise(status, float_flag_overflow);
262 if (roundBits || float_exception_masked(status, float_flag_overflow)) {
263 float_raise(status, float_flag_inexact);
264 }
265 return packFloat16(zSign, 0x1F, 0) - (roundIncrement == 0);
266 }
267 if (zExp < 0) {
268 int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x8000);
269 zSig = shift16RightJamming(zSig, -zExp);
270 zExp = 0;
271 roundBits = zSig & roundMask;
272 if (isTiny) {
273 if(get_flush_underflow_to_zero(status)) {
274 float_raise(status, float_flag_underflow | float_flag_inexact);
275 return packFloat16(zSign, 0, 0);
276 }
277 // signal the #P according to roundBits calculated AFTER denormalization
278 if (roundBits || !float_exception_masked(status, float_flag_underflow)) {
279 float_raise(status, float_flag_underflow);
280 }
281 }
282 }
283 }
284 if (roundBits) float_raise(status, float_flag_inexact);
285 Bit16u zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 4;
286 zSigRound &= ~(((roundBits ^ 0x10) == 0) & roundNearestEven);
287 if (zSigRound == 0) zExp = 0;
288 return packFloat16(zSign, zExp, zSigRound);
289 }
290
291 #endif
292
293 /*----------------------------------------------------------------------------
294 | Normalizes the subnormal single-precision floating-point value represented
295 | by the denormalized significand `aSig'. The normalized exponent and
296 | significand are stored at the locations pointed to by `zExpPtr' and
297 | `zSigPtr', respectively.
298 *----------------------------------------------------------------------------*/
299
normalizeFloat32Subnormal(Bit32u aSig,Bit16s * zExpPtr,Bit32u * zSigPtr)300 void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr)
301 {
302 int shiftCount = countLeadingZeros32(aSig) - 8;
303 *zSigPtr = aSig<<shiftCount;
304 *zExpPtr = 1 - shiftCount;
305 }
306
307 /*----------------------------------------------------------------------------
308 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
309 | and significand `zSig', and returns the proper single-precision floating-
310 | point value corresponding to the abstract input. Ordinarily, the abstract
311 | value is simply rounded and packed into the single-precision format, with
312 | the inexact exception raised if the abstract input cannot be represented
313 | exactly. However, if the abstract value is too large, the overflow and
314 | inexact exceptions are raised and an infinity or maximal finite value is
315 | returned. If the abstract value is too small, the input value is rounded to
316 | a subnormal number, and the underflow and inexact exceptions are raised if
317 | the abstract input cannot be represented exactly as a subnormal single-
318 | precision floating-point number.
319 | The input significand `zSig' has its binary point between bits 30
320 | and 29, which is 7 bits to the left of the usual location. This shifted
321 | significand must be normalized or smaller. If `zSig' is not normalized,
322 | `zExp' must be 0; in that case, the result returned is a subnormal number,
323 | and it must not require rounding. In the usual case that `zSig' is
324 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
325 | The handling of underflow and overflow follows the IEC/IEEE Standard for
326 | Binary Floating-Point Arithmetic.
327 *----------------------------------------------------------------------------*/
328
roundAndPackFloat32(int zSign,Bit16s zExp,Bit32u zSig,float_status_t & status)329 float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status)
330 {
331 Bit32s roundIncrement, roundBits;
332 const Bit32s roundMask = 0x7F;
333
334 int roundingMode = get_float_rounding_mode(status);
335 int roundNearestEven = (roundingMode == float_round_nearest_even);
336 roundIncrement = 0x40;
337
338 if (! roundNearestEven) {
339 if (roundingMode == float_round_to_zero) roundIncrement = 0;
340 else {
341 roundIncrement = roundMask;
342 if (zSign) {
343 if (roundingMode == float_round_up) roundIncrement = 0;
344 }
345 else {
346 if (roundingMode == float_round_down) roundIncrement = 0;
347 }
348 }
349 }
350 roundBits = zSig & roundMask;
351 if (0xFD <= (Bit16u) zExp) {
352 if ((0xFD < zExp)
353 || ((zExp == 0xFD) && ((Bit32s) (zSig + roundIncrement) < 0)))
354 {
355 float_raise(status, float_flag_overflow);
356 if (roundBits || float_exception_masked(status, float_flag_overflow)) {
357 float_raise(status, float_flag_inexact);
358 if (roundIncrement != 0) set_float_rounding_up(status);
359 }
360 return packFloat32(zSign, 0xFF, 0) - (roundIncrement == 0);
361 }
362 if (zExp < 0) {
363 int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x80000000);
364 if (isTiny) {
365 if (!float_exception_masked(status, float_flag_underflow)) {
366 float_raise(status, float_flag_underflow);
367 zExp += 192; // bias unmasked underflow
368 }
369 }
370 if (zExp < 0) {
371 zSig = shift32RightJamming(zSig, -zExp);
372 zExp = 0;
373 roundBits = zSig & roundMask;
374 if (isTiny) {
375 // masked underflow
376 if(get_flush_underflow_to_zero(status)) {
377 float_raise(status, float_flag_underflow | float_flag_inexact);
378 return packFloat32(zSign, 0, 0);
379 }
380 if (roundBits) float_raise(status, float_flag_underflow);
381 }
382 }
383 }
384 }
385 Bit32u zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 7;
386 zSigRound &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
387 if (zSigRound == 0) zExp = 0;
388 if (roundBits) {
389 float_raise(status, float_flag_inexact);
390 if ((zSigRound << 7) > zSig) set_float_rounding_up(status);
391 }
392 return packFloat32(zSign, zExp, zSigRound);
393 }
394
395 /*----------------------------------------------------------------------------
396 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
397 | and significand `zSig', and returns the proper single-precision floating-
398 | point value corresponding to the abstract input. This routine is just like
399 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
400 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
401 | floating-point exponent.
402 *----------------------------------------------------------------------------*/
403
normalizeRoundAndPackFloat32(int zSign,Bit16s zExp,Bit32u zSig,float_status_t & status)404 float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status)
405 {
406 int shiftCount = countLeadingZeros32(zSig) - 1;
407 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount, status);
408 }
409
410 /*----------------------------------------------------------------------------
411 | Normalizes the subnormal double-precision floating-point value represented
412 | by the denormalized significand `aSig'. The normalized exponent and
413 | significand are stored at the locations pointed to by `zExpPtr' and
414 | `zSigPtr', respectively.
415 *----------------------------------------------------------------------------*/
416
normalizeFloat64Subnormal(Bit64u aSig,Bit16s * zExpPtr,Bit64u * zSigPtr)417 void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr)
418 {
419 int shiftCount = countLeadingZeros64(aSig) - 11;
420 *zSigPtr = aSig<<shiftCount;
421 *zExpPtr = 1 - shiftCount;
422 }
423
424 /*----------------------------------------------------------------------------
425 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
426 | and significand `zSig', and returns the proper double-precision floating-
427 | point value corresponding to the abstract input. Ordinarily, the abstract
428 | value is simply rounded and packed into the double-precision format, with
429 | the inexact exception raised if the abstract input cannot be represented
430 | exactly. However, if the abstract value is too large, the overflow and
431 | inexact exceptions are raised and an infinity or maximal finite value is
432 | returned. If the abstract value is too small, the input value is rounded
433 | to a subnormal number, and the underflow and inexact exceptions are raised
434 | if the abstract input cannot be represented exactly as a subnormal double-
435 | precision floating-point number.
436 | The input significand `zSig' has its binary point between bits 62
437 | and 61, which is 10 bits to the left of the usual location. This shifted
438 | significand must be normalized or smaller. If `zSig' is not normalized,
439 | `zExp' must be 0; in that case, the result returned is a subnormal number,
440 | and it must not require rounding. In the usual case that `zSig' is
441 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
442 | The handling of underflow and overflow follows the IEC/IEEE Standard for
443 | Binary Floating-Point Arithmetic.
444 *----------------------------------------------------------------------------*/
445
roundAndPackFloat64(int zSign,Bit16s zExp,Bit64u zSig,float_status_t & status)446 float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status)
447 {
448 Bit16s roundIncrement, roundBits;
449 const Bit16s roundMask = 0x3FF;
450 int roundingMode = get_float_rounding_mode(status);
451 int roundNearestEven = (roundingMode == float_round_nearest_even);
452 roundIncrement = 0x200;
453 if (! roundNearestEven) {
454 if (roundingMode == float_round_to_zero) roundIncrement = 0;
455 else {
456 roundIncrement = roundMask;
457 if (zSign) {
458 if (roundingMode == float_round_up) roundIncrement = 0;
459 }
460 else {
461 if (roundingMode == float_round_down) roundIncrement = 0;
462 }
463 }
464 }
465 roundBits = (Bit16s)(zSig & roundMask);
466 if (0x7FD <= (Bit16u) zExp) {
467 if ((0x7FD < zExp)
468 || ((zExp == 0x7FD)
469 && ((Bit64s) (zSig + roundIncrement) < 0)))
470 {
471 float_raise(status, float_flag_overflow);
472 if (roundBits || float_exception_masked(status, float_flag_overflow)) {
473 float_raise(status, float_flag_inexact);
474 if (roundIncrement != 0) set_float_rounding_up(status);
475 }
476 return packFloat64(zSign, 0x7FF, 0) - (roundIncrement == 0);
477 }
478 if (zExp < 0) {
479 int isTiny = (zExp < -1) || (zSig + roundIncrement < BX_CONST64(0x8000000000000000));
480 if (isTiny) {
481 if (!float_exception_masked(status, float_flag_underflow)) {
482 float_raise(status, float_flag_underflow);
483 zExp += 1536; // bias unmasked underflow
484 }
485 }
486 if (zExp < 0) {
487 zSig = shift64RightJamming(zSig, -zExp);
488 zExp = 0;
489 roundBits = (Bit16s)(zSig & roundMask);
490 if (isTiny) {
491 // masked underflow
492 if(get_flush_underflow_to_zero(status)) {
493 float_raise(status, float_flag_underflow | float_flag_inexact);
494 return packFloat64(zSign, 0, 0);
495 }
496 if (roundBits) float_raise(status, float_flag_underflow);
497 }
498 }
499 }
500 }
501 Bit64u zSigRound = (zSig + roundIncrement)>>10;
502 zSigRound &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
503 if (zSigRound == 0) zExp = 0;
504 if (roundBits) {
505 float_raise(status, float_flag_inexact);
506 if ((zSigRound << 10) > zSig) set_float_rounding_up(status);
507 }
508 return packFloat64(zSign, zExp, zSigRound);
509 }
510
511 /*----------------------------------------------------------------------------
512 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
513 | and significand `zSig', and returns the proper double-precision floating-
514 | point value corresponding to the abstract input. This routine is just like
515 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
516 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
517 | floating-point exponent.
518 *----------------------------------------------------------------------------*/
519
normalizeRoundAndPackFloat64(int zSign,Bit16s zExp,Bit64u zSig,float_status_t & status)520 float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status)
521 {
522 int shiftCount = countLeadingZeros64(zSig) - 1;
523 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount, status);
524 }
525
526 #ifdef FLOATX80
527
528 /*----------------------------------------------------------------------------
529 | Normalizes the subnormal extended double-precision floating-point value
530 | represented by the denormalized significand `aSig'. The normalized exponent
531 | and significand are stored at the locations pointed to by `zExpPtr' and
532 | `zSigPtr', respectively.
533 *----------------------------------------------------------------------------*/
534
normalizeFloatx80Subnormal(Bit64u aSig,Bit32s * zExpPtr,Bit64u * zSigPtr)535 void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr)
536 {
537 int shiftCount = countLeadingZeros64(aSig);
538 *zSigPtr = aSig<<shiftCount;
539 *zExpPtr = 1 - shiftCount;
540 }
541
542 /*----------------------------------------------------------------------------
543 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
544 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
545 | and returns the proper extended double-precision floating-point value
546 | corresponding to the abstract input. Ordinarily, the abstract value is
547 | rounded and packed into the extended double-precision format, with the
548 | inexact exception raised if the abstract input cannot be represented
549 | exactly. However, if the abstract value is too large, the overflow and
550 | inexact exceptions are raised and an infinity or maximal finite value is
551 | returned. If the abstract value is too small, the input value is rounded to
552 | a subnormal number, and the underflow and inexact exceptions are raised if
553 | the abstract input cannot be represented exactly as a subnormal extended
554 | double-precision floating-point number.
555 | If `roundingPrecision' is 32 or 64, the result is rounded to the same
556 | number of bits as single or double precision, respectively. Otherwise, the
557 | result is rounded to the full precision of the extended double-precision
558 | format.
559 | The input significand must be normalized or smaller. If the input
560 | significand is not normalized, `zExp' must be 0; in that case, the result
561 | returned is a subnormal number, and it must not require rounding. The
562 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
563 | Floating-Point Arithmetic.
564 *----------------------------------------------------------------------------*/
565
SoftFloatRoundAndPackFloatx80(int roundingPrecision,int zSign,Bit32s zExp,Bit64u zSig0,Bit64u zSig1,float_status_t & status)566 floatx80 SoftFloatRoundAndPackFloatx80(int roundingPrecision,
567 int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
568 {
569 Bit64u roundIncrement, roundMask, roundBits;
570 int increment;
571 Bit64u zSigExact; /* support rounding-up response */
572
573 Bit8u roundingMode = get_float_rounding_mode(status);
574 int roundNearestEven = (roundingMode == float_round_nearest_even);
575 if (roundingPrecision == 64) {
576 roundIncrement = BX_CONST64(0x0000000000000400);
577 roundMask = BX_CONST64(0x00000000000007FF);
578 }
579 else if (roundingPrecision == 32) {
580 roundIncrement = BX_CONST64(0x0000008000000000);
581 roundMask = BX_CONST64(0x000000FFFFFFFFFF);
582 }
583 else goto precision80;
584
585 zSig0 |= (zSig1 != 0);
586 if (! roundNearestEven) {
587 if (roundingMode == float_round_to_zero) roundIncrement = 0;
588 else {
589 roundIncrement = roundMask;
590 if (zSign) {
591 if (roundingMode == float_round_up) roundIncrement = 0;
592 }
593 else {
594 if (roundingMode == float_round_down) roundIncrement = 0;
595 }
596 }
597 }
598 roundBits = zSig0 & roundMask;
599 if (0x7FFD <= (Bit32u) (zExp - 1)) {
600 if ((0x7FFE < zExp)
601 || ((zExp == 0x7FFE) && (zSig0 + roundIncrement < zSig0)))
602 {
603 goto overflow;
604 }
605 if (zExp <= 0) {
606 int isTiny = (zExp < 0) || (zSig0 <= zSig0 + roundIncrement);
607 zSig0 = shift64RightJamming(zSig0, 1 - zExp);
608 zSigExact = zSig0;
609 zExp = 0;
610 roundBits = zSig0 & roundMask;
611 if (isTiny) {
612 if (roundBits || (zSig0 && !float_exception_masked(status, float_flag_underflow)))
613 float_raise(status, float_flag_underflow);
614 }
615 zSig0 += roundIncrement;
616 if ((Bit64s) zSig0 < 0) zExp = 1;
617 roundIncrement = roundMask + 1;
618 if (roundNearestEven && (roundBits<<1 == roundIncrement))
619 roundMask |= roundIncrement;
620 zSig0 &= ~roundMask;
621 if (roundBits) {
622 float_raise(status, float_flag_inexact);
623 if (zSig0 > zSigExact) set_float_rounding_up(status);
624 }
625 return packFloatx80(zSign, zExp, zSig0);
626 }
627 }
628 if (roundBits) float_raise(status, float_flag_inexact);
629 zSigExact = zSig0;
630 zSig0 += roundIncrement;
631 if (zSig0 < roundIncrement) {
632 // Basically scale by shifting right and keep overflow
633 ++zExp;
634 zSig0 = BX_CONST64(0x8000000000000000);
635 zSigExact >>= 1; // must scale also, or else later tests will fail
636 }
637 roundIncrement = roundMask + 1;
638 if (roundNearestEven && (roundBits<<1 == roundIncrement))
639 roundMask |= roundIncrement;
640 zSig0 &= ~roundMask;
641 if (zSig0 > zSigExact) set_float_rounding_up(status);
642 if (zSig0 == 0) zExp = 0;
643 return packFloatx80(zSign, zExp, zSig0);
644 precision80:
645 increment = ((Bit64s) zSig1 < 0);
646 if (! roundNearestEven) {
647 if (roundingMode == float_round_to_zero) increment = 0;
648 else {
649 if (zSign) {
650 increment = (roundingMode == float_round_down) && zSig1;
651 }
652 else {
653 increment = (roundingMode == float_round_up) && zSig1;
654 }
655 }
656 }
657 if (0x7FFD <= (Bit32u) (zExp - 1)) {
658 if ((0x7FFE < zExp)
659 || ((zExp == 0x7FFE)
660 && (zSig0 == BX_CONST64(0xFFFFFFFFFFFFFFFF))
661 && increment))
662 {
663 roundMask = 0;
664 overflow:
665 float_raise(status, float_flag_overflow | float_flag_inexact);
666 if ((roundingMode == float_round_to_zero)
667 || (zSign && (roundingMode == float_round_up))
668 || (! zSign && (roundingMode == float_round_down)))
669 {
670 return packFloatx80(zSign, 0x7FFE, ~roundMask);
671 }
672 set_float_rounding_up(status);
673 return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
674 }
675 if (zExp <= 0) {
676 int isTiny = (zExp < 0) || (! increment)
677 || (zSig0 < BX_CONST64(0xFFFFFFFFFFFFFFFF));
678 shift64ExtraRightJamming(zSig0, zSig1, 1 - zExp, &zSig0, &zSig1);
679 zExp = 0;
680 if (isTiny) {
681 if (zSig1 || (zSig0 && !float_exception_masked(status, float_flag_underflow)))
682 float_raise(status, float_flag_underflow);
683 }
684 if (zSig1) float_raise(status, float_flag_inexact);
685 if (roundNearestEven) increment = ((Bit64s) zSig1 < 0);
686 else {
687 if (zSign) {
688 increment = (roundingMode == float_round_down) && zSig1;
689 } else {
690 increment = (roundingMode == float_round_up) && zSig1;
691 }
692 }
693 if (increment) {
694 zSigExact = zSig0++;
695 zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
696 if (zSig0 > zSigExact) set_float_rounding_up(status);
697 if ((Bit64s) zSig0 < 0) zExp = 1;
698 }
699 return packFloatx80(zSign, zExp, zSig0);
700 }
701 }
702 if (zSig1) float_raise(status, float_flag_inexact);
703 if (increment) {
704 zSigExact = zSig0++;
705 if (zSig0 == 0) {
706 zExp++;
707 zSig0 = BX_CONST64(0x8000000000000000);
708 zSigExact >>= 1; // must scale also, or else later tests will fail
709 }
710 else {
711 zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
712 }
713 if (zSig0 > zSigExact) set_float_rounding_up(status);
714 }
715 else {
716 if (zSig0 == 0) zExp = 0;
717 }
718 return packFloatx80(zSign, zExp, zSig0);
719 }
720
roundAndPackFloatx80(int roundingPrecision,int zSign,Bit32s zExp,Bit64u zSig0,Bit64u zSig1,float_status_t & status)721 floatx80 roundAndPackFloatx80(int roundingPrecision,
722 int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
723 {
724 float_status_t round_status = status;
725 floatx80 result = SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
726
727 // bias unmasked undeflow
728 if (status.float_exception_flags & ~status.float_exception_masks & float_flag_underflow) {
729 float_raise(round_status, float_flag_underflow);
730 return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp + 0x6000, zSig0, zSig1, status = round_status);
731 }
732
733 // bias unmasked overflow
734 if (status.float_exception_flags & ~status.float_exception_masks & float_flag_overflow) {
735 float_raise(round_status, float_flag_overflow);
736 return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp - 0x6000, zSig0, zSig1, status = round_status);
737 }
738
739 return result;
740 }
741
742 /*----------------------------------------------------------------------------
743 | Takes an abstract floating-point value having sign `zSign', exponent
744 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
745 | and returns the proper extended double-precision floating-point value
746 | corresponding to the abstract input. This routine is just like
747 | `roundAndPackFloatx80' except that the input significand does not have to be
748 | normalized.
749 *----------------------------------------------------------------------------*/
750
normalizeRoundAndPackFloatx80(int roundingPrecision,int zSign,Bit32s zExp,Bit64u zSig0,Bit64u zSig1,float_status_t & status)751 floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision,
752 int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
753 {
754 if (zSig0 == 0) {
755 zSig0 = zSig1;
756 zSig1 = 0;
757 zExp -= 64;
758 }
759 int shiftCount = countLeadingZeros64(zSig0);
760 shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
761 zExp -= shiftCount;
762 return
763 roundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
764 }
765
766 #endif
767
768 #ifdef FLOAT128
769
770 /*----------------------------------------------------------------------------
771 | Normalizes the subnormal quadruple-precision floating-point value
772 | represented by the denormalized significand formed by the concatenation of
773 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
774 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
775 | significand are stored at the location pointed to by `zSig0Ptr', and the
776 | least significant 64 bits of the normalized significand are stored at the
777 | location pointed to by `zSig1Ptr'.
778 *----------------------------------------------------------------------------*/
779
normalizeFloat128Subnormal(Bit64u aSig0,Bit64u aSig1,Bit32s * zExpPtr,Bit64u * zSig0Ptr,Bit64u * zSig1Ptr)780 void normalizeFloat128Subnormal(
781 Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr)
782 {
783 int shiftCount;
784
785 if (aSig0 == 0) {
786 shiftCount = countLeadingZeros64(aSig1) - 15;
787 if (shiftCount < 0) {
788 *zSig0Ptr = aSig1 >>(-shiftCount);
789 *zSig1Ptr = aSig1 << (shiftCount & 63);
790 }
791 else {
792 *zSig0Ptr = aSig1 << shiftCount;
793 *zSig1Ptr = 0;
794 }
795 *zExpPtr = - shiftCount - 63;
796 }
797 else {
798 shiftCount = countLeadingZeros64(aSig0) - 15;
799 shortShift128Left(aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr);
800 *zExpPtr = 1 - shiftCount;
801 }
802 }
803
804 /*----------------------------------------------------------------------------
805 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
806 | and extended significand formed by the concatenation of `zSig0', `zSig1',
807 | and `zSig2', and returns the proper quadruple-precision floating-point value
808 | corresponding to the abstract input. Ordinarily, the abstract value is
809 | simply rounded and packed into the quadruple-precision format, with the
810 | inexact exception raised if the abstract input cannot be represented
811 | exactly. However, if the abstract value is too large, the overflow and
812 | inexact exceptions are raised and an infinity or maximal finite value is
813 | returned. If the abstract value is too small, the input value is rounded to
814 | a subnormal number, and the underflow and inexact exceptions are raised if
815 | the abstract input cannot be represented exactly as a subnormal quadruple-
816 | precision floating-point number.
817 | The input significand must be normalized or smaller. If the input
818 | significand is not normalized, `zExp' must be 0; in that case, the result
819 | returned is a subnormal number, and it must not require rounding. In the
820 | usual case that the input significand is normalized, `zExp' must be 1 less
821 | than the ``true'' floating-point exponent. The handling of underflow and
822 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
823 *----------------------------------------------------------------------------*/
824
roundAndPackFloat128(int zSign,Bit32s zExp,Bit64u zSig0,Bit64u zSig1,Bit64u zSig2,float_status_t & status)825 float128 roundAndPackFloat128(
826 int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, float_status_t &status)
827 {
828 int increment = ((Bit64s) zSig2 < 0);
829 if (0x7FFD <= (Bit32u) zExp) {
830 if ((0x7FFD < zExp)
831 || ((zExp == 0x7FFD)
832 && eq128(BX_CONST64(0x0001FFFFFFFFFFFF),
833 BX_CONST64(0xFFFFFFFFFFFFFFFF), zSig0, zSig1)
834 && increment))
835 {
836 float_raise(status, float_flag_overflow | float_flag_inexact);
837 return packFloat128(zSign, 0x7FFF, 0, 0);
838 }
839 if (zExp < 0) {
840 int isTiny = (zExp < -1)
841 || ! increment
842 || lt128(zSig0, zSig1,
843 BX_CONST64(0x0001FFFFFFFFFFFF),
844 BX_CONST64(0xFFFFFFFFFFFFFFFF));
845 shift128ExtraRightJamming(
846 zSig0, zSig1, zSig2, -zExp, &zSig0, &zSig1, &zSig2);
847 zExp = 0;
848 if (isTiny && zSig2) float_raise(status, float_flag_underflow);
849 increment = ((Bit64s) zSig2 < 0);
850 }
851 }
852 if (zSig2) float_raise(status, float_flag_inexact);
853 if (increment) {
854 add128(zSig0, zSig1, 0, 1, &zSig0, &zSig1);
855 zSig1 &= ~((zSig2 + zSig2 == 0) & 1);
856 }
857 else {
858 if ((zSig0 | zSig1) == 0) zExp = 0;
859 }
860 return packFloat128(zSign, zExp, zSig0, zSig1);
861 }
862
863 /*----------------------------------------------------------------------------
864 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
865 | and significand formed by the concatenation of `zSig0' and `zSig1', and
866 | returns the proper quadruple-precision floating-point value corresponding
867 | to the abstract input. This routine is just like `roundAndPackFloat128'
868 | except that the input significand has fewer bits and does not have to be
869 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
870 | point exponent.
871 *----------------------------------------------------------------------------*/
872
normalizeRoundAndPackFloat128(int zSign,Bit32s zExp,Bit64u zSig0,Bit64u zSig1,float_status_t & status)873 float128 normalizeRoundAndPackFloat128(
874 int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
875 {
876 Bit64u zSig2;
877
878 if (zSig0 == 0) {
879 zSig0 = zSig1;
880 zSig1 = 0;
881 zExp -= 64;
882 }
883 int shiftCount = countLeadingZeros64(zSig0) - 15;
884 if (0 <= shiftCount) {
885 zSig2 = 0;
886 shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
887 }
888 else {
889 shift128ExtraRightJamming(
890 zSig0, zSig1, 0, -shiftCount, &zSig0, &zSig1, &zSig2);
891 }
892 zExp -= shiftCount;
893 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
894 }
895
896 #endif
897