1 /*============================================================================
2 This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
3 Package, Release 2b.
4 
5 Written by John R. Hauser.  This work was made possible in part by the
6 International Computer Science Institute, located at Suite 600, 1947 Center
7 Street, Berkeley, California 94704.  Funding was partially provided by the
8 National Science Foundation under grant MIP-9311980.  The original version
9 of this code was written as part of a project to build a fixed-point vector
10 processor in collaboration with the University of California at Berkeley,
11 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
12 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
13 arithmetic/SoftFloat.html'.
14 
15 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
16 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
17 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
18 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
19 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
20 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
21 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
22 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
23 
24 Derivative works are acceptable, even for commercial purposes, so long as
25 (1) the source code for the derivative work includes prominent notice that
26 the work is derivative, and (2) the source code includes prominent notice with
27 these four paragraphs for those parts of this code that are retained.
28 =============================================================================*/
29 
30 #define FLOAT128
31 
32 /*============================================================================
33  * Adapted for Bochs (x86 achitecture simulator) by
34  *            Stanislav Shwartsman [sshwarts at sourceforge net]
35  * ==========================================================================*/
36 
37 #include "softfloat.h"
38 #include "softfloat-round-pack.h"
39 
40 /*----------------------------------------------------------------------------
41 | Primitive arithmetic functions, including multi-word arithmetic, and
42 | division and square root approximations. (Can be specialized to target
43 | if desired).
44 *----------------------------------------------------------------------------*/
45 #include "softfloat-macros.h"
46 
47 /*----------------------------------------------------------------------------
48 | Functions and definitions to determine:  (1) whether tininess for underflow
49 | is detected before or after rounding by default, (2) what (if anything)
50 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
51 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
52 | are propagated from function inputs to output.  These details are target-
53 | specific.
54 *----------------------------------------------------------------------------*/
55 #include "softfloat-specialize.h"
56 
57 /*----------------------------------------------------------------------------
58 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
59 | and 7, and returns the properly rounded 32-bit integer corresponding to the
60 | input.  If `zSign' is 1, the input is negated before being converted to an
61 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
62 | is simply rounded to an integer, with the inexact exception raised if the
63 | input cannot be represented exactly as an integer.  However, if the fixed-
64 | point input is too large, the invalid exception is raised and the integer
65 | indefinite value is returned.
66 *----------------------------------------------------------------------------*/
67 
roundAndPackInt32(int zSign,Bit64u exactAbsZ,float_status_t & status)68 Bit32s roundAndPackInt32(int zSign, Bit64u exactAbsZ, float_status_t &status)
69 {
70     int roundingMode = get_float_rounding_mode(status);
71     int roundNearestEven = (roundingMode == float_round_nearest_even);
72     int roundIncrement = 0x40;
73     if (! roundNearestEven) {
74         if (roundingMode == float_round_to_zero) roundIncrement = 0;
75         else {
76             roundIncrement = 0x7F;
77             if (zSign) {
78                 if (roundingMode == float_round_up) roundIncrement = 0;
79             }
80             else {
81                 if (roundingMode == float_round_down) roundIncrement = 0;
82             }
83         }
84     }
85     int roundBits = (int)(exactAbsZ & 0x7F);
86     Bit64u absZ = (exactAbsZ + roundIncrement)>>7;
87     absZ &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
88     Bit32s z = (Bit32s) absZ;
89     if (zSign) z = -z;
90     if ((absZ>>32) || (z && ((z < 0) ^ zSign))) {
91         float_raise(status, float_flag_invalid);
92         return (Bit32s)(int32_indefinite);
93     }
94     if (roundBits) {
95         float_raise(status, float_flag_inexact);
96         if ((absZ << 7) > exactAbsZ)
97             set_float_rounding_up(status);
98     }
99     return z;
100 }
101 
102 /*----------------------------------------------------------------------------
103 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
104 | `absZ1', with binary point between bits 63 and 64 (between the input words),
105 | and returns the properly rounded 64-bit integer corresponding to the input.
106 | If `zSign' is 1, the input is negated before being converted to an integer.
107 | Ordinarily, the fixed-point input is simply rounded to an integer, with
108 | the inexact exception raised if the input cannot be represented exactly as
109 | an integer.  However, if the fixed-point input is too large, the invalid
110 | exception is raised and the integer indefinite value is returned.
111 *----------------------------------------------------------------------------*/
112 
roundAndPackInt64(int zSign,Bit64u absZ0,Bit64u absZ1,float_status_t & status)113 Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status)
114 {
115     Bit64s z;
116     int roundingMode = get_float_rounding_mode(status);
117     int roundNearestEven = (roundingMode == float_round_nearest_even);
118     int increment = ((Bit64s) absZ1 < 0);
119     if (! roundNearestEven) {
120         if (roundingMode == float_round_to_zero) increment = 0;
121         else {
122             if (zSign) {
123                 increment = (roundingMode == float_round_down) && absZ1;
124             }
125             else {
126                 increment = (roundingMode == float_round_up) && absZ1;
127             }
128         }
129     }
130     Bit64u exactAbsZ0 = absZ0;
131     if (increment) {
132         ++absZ0;
133         if (absZ0 == 0) goto overflow;
134         absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven);
135     }
136     z = absZ0;
137     if (zSign) z = -z;
138     if (z && ((z < 0) ^ zSign)) {
139  overflow:
140         float_raise(status, float_flag_invalid);
141         return (Bit64s)(int64_indefinite);
142     }
143     if (absZ1) {
144         float_raise(status, float_flag_inexact);
145         if (absZ0 > exactAbsZ0)
146             set_float_rounding_up(status);
147     }
148     return z;
149 }
150 
151 /*----------------------------------------------------------------------------
152 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
153 | `absZ1', with binary point between bits 63 and 64 (between the input words),
154 | and returns the properly rounded 64-bit unsigned integer corresponding to the
155 | input.  Ordinarily, the fixed-point input is simply rounded to an integer,
156 | with the inexact exception raised if the input cannot be represented exactly
157 | as an integer. However, if the fixed-point input is too large, the invalid
158 | exception is raised and the largest unsigned integer is returned.
159 *----------------------------------------------------------------------------*/
160 
roundAndPackUint64(int zSign,Bit64u absZ0,Bit64u absZ1,float_status_t & status)161 Bit64u roundAndPackUint64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status)
162 {
163     int roundingMode = get_float_rounding_mode(status);
164     int roundNearestEven = (roundingMode == float_round_nearest_even);
165     int increment = ((Bit64s) absZ1 < 0);
166     if (!roundNearestEven) {
167         if (roundingMode == float_round_to_zero) {
168             increment = 0;
169         } else if (absZ1) {
170             if (zSign) {
171                 increment = (roundingMode == float_round_down) && absZ1;
172             } else {
173                 increment = (roundingMode == float_round_up) && absZ1;
174             }
175         }
176     }
177     if (increment) {
178         ++absZ0;
179         if (absZ0 == 0) {
180             float_raise(status, float_flag_invalid);
181             return uint64_indefinite;
182         }
183         absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven);
184     }
185 
186     if (zSign && absZ0) {
187         float_raise(status, float_flag_invalid);
188         return uint64_indefinite;
189     }
190 
191     if (absZ1) {
192         float_raise(status, float_flag_inexact);
193     }
194     return absZ0;
195 }
196 
197 #ifdef FLOAT16
198 
199 /*----------------------------------------------------------------------------
200 | Normalizes the subnormal half-precision floating-point value represented
201 | by the denormalized significand `aSig'.  The normalized exponent and
202 | significand are stored at the locations pointed to by `zExpPtr' and
203 | `zSigPtr', respectively.
204 *----------------------------------------------------------------------------*/
205 
normalizeFloat16Subnormal(Bit16u aSig,Bit16s * zExpPtr,Bit16u * zSigPtr)206 void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr)
207 {
208     int shiftCount = countLeadingZeros16(aSig) - 5;
209     *zSigPtr = aSig<<shiftCount;
210     *zExpPtr = 1 - shiftCount;
211 }
212 
213 /*----------------------------------------------------------------------------
214 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
215 | and significand `zSig', and returns the proper half-precision floating-
216 | point value corresponding to the abstract input.  Ordinarily, the abstract
217 | value is simply rounded and packed into the half-precision format, with
218 | the inexact exception raised if the abstract input cannot be represented
219 | exactly.  However, if the abstract value is too large, the overflow and
220 | inexact exceptions are raised and an infinity or maximal finite value is
221 | returned.  If the abstract value is too small, the input value is rounded to
222 | a subnormal number, and the underflow and inexact exceptions are raised if
223 | the abstract input cannot be represented exactly as a subnormal single-
224 | precision floating-point number.
225 |     The input significand `zSig' has its binary point between bits 14
226 | and 13, which is 4 bits to the left of the usual location.  This shifted
227 | significand must be normalized or smaller.  If `zSig' is not normalized,
228 | `zExp' must be 0; in that case, the result returned is a subnormal number,
229 | and it must not require rounding.  In the usual case that `zSig' is
230 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
231 | The handling of underflow and overflow follows the IEC/IEEE Standard for
232 | Binary Floating-Point Arithmetic.
233 *----------------------------------------------------------------------------*/
234 
roundAndPackFloat16(int zSign,Bit16s zExp,Bit16u zSig,float_status_t & status)235 float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, float_status_t &status)
236 {
237     Bit16s roundIncrement, roundBits, roundMask;
238 
239     int roundingMode = get_float_rounding_mode(status);
240     int roundNearestEven = (roundingMode == float_round_nearest_even);
241     roundIncrement = 8;
242     roundMask = 0xF;
243 
244     if (! roundNearestEven) {
245         if (roundingMode == float_round_to_zero) roundIncrement = 0;
246         else {
247             roundIncrement = roundMask;
248             if (zSign) {
249                 if (roundingMode == float_round_up) roundIncrement = 0;
250             }
251             else {
252                 if (roundingMode == float_round_down) roundIncrement = 0;
253             }
254         }
255     }
256     roundBits = zSig & roundMask;
257     if (0x1D <= (Bit16u) zExp) {
258         if ((0x1D < zExp)
259              || ((zExp == 0x1D) && ((Bit16s) (zSig + roundIncrement) < 0)))
260         {
261             float_raise(status, float_flag_overflow);
262             if (roundBits || float_exception_masked(status, float_flag_overflow)) {
263                 float_raise(status, float_flag_inexact);
264             }
265             return packFloat16(zSign, 0x1F, 0) - (roundIncrement == 0);
266         }
267         if (zExp < 0) {
268             int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x8000);
269             zSig = shift16RightJamming(zSig, -zExp);
270             zExp = 0;
271             roundBits = zSig & roundMask;
272             if (isTiny) {
273                 if(get_flush_underflow_to_zero(status)) {
274                     float_raise(status, float_flag_underflow | float_flag_inexact);
275                     return packFloat16(zSign, 0, 0);
276                 }
277                 // signal the #P according to roundBits calculated AFTER denormalization
278                 if (roundBits || !float_exception_masked(status, float_flag_underflow)) {
279                     float_raise(status, float_flag_underflow);
280                 }
281             }
282         }
283     }
284     if (roundBits) float_raise(status, float_flag_inexact);
285     Bit16u zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 4;
286     zSigRound &= ~(((roundBits ^ 0x10) == 0) & roundNearestEven);
287     if (zSigRound == 0) zExp = 0;
288     return packFloat16(zSign, zExp, zSigRound);
289 }
290 
291 #endif
292 
293 /*----------------------------------------------------------------------------
294 | Normalizes the subnormal single-precision floating-point value represented
295 | by the denormalized significand `aSig'.  The normalized exponent and
296 | significand are stored at the locations pointed to by `zExpPtr' and
297 | `zSigPtr', respectively.
298 *----------------------------------------------------------------------------*/
299 
normalizeFloat32Subnormal(Bit32u aSig,Bit16s * zExpPtr,Bit32u * zSigPtr)300 void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr)
301 {
302     int shiftCount = countLeadingZeros32(aSig) - 8;
303     *zSigPtr = aSig<<shiftCount;
304     *zExpPtr = 1 - shiftCount;
305 }
306 
307 /*----------------------------------------------------------------------------
308 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
309 | and significand `zSig', and returns the proper single-precision floating-
310 | point value corresponding to the abstract input.  Ordinarily, the abstract
311 | value is simply rounded and packed into the single-precision format, with
312 | the inexact exception raised if the abstract input cannot be represented
313 | exactly.  However, if the abstract value is too large, the overflow and
314 | inexact exceptions are raised and an infinity or maximal finite value is
315 | returned.  If the abstract value is too small, the input value is rounded to
316 | a subnormal number, and the underflow and inexact exceptions are raised if
317 | the abstract input cannot be represented exactly as a subnormal single-
318 | precision floating-point number.
319 |     The input significand `zSig' has its binary point between bits 30
320 | and 29, which is 7 bits to the left of the usual location.  This shifted
321 | significand must be normalized or smaller.  If `zSig' is not normalized,
322 | `zExp' must be 0; in that case, the result returned is a subnormal number,
323 | and it must not require rounding.  In the usual case that `zSig' is
324 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
325 | The handling of underflow and overflow follows the IEC/IEEE Standard for
326 | Binary Floating-Point Arithmetic.
327 *----------------------------------------------------------------------------*/
328 
roundAndPackFloat32(int zSign,Bit16s zExp,Bit32u zSig,float_status_t & status)329 float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status)
330 {
331     Bit32s roundIncrement, roundBits;
332     const Bit32s roundMask = 0x7F;
333 
334     int roundingMode = get_float_rounding_mode(status);
335     int roundNearestEven = (roundingMode == float_round_nearest_even);
336     roundIncrement = 0x40;
337 
338     if (! roundNearestEven) {
339         if (roundingMode == float_round_to_zero) roundIncrement = 0;
340         else {
341             roundIncrement = roundMask;
342             if (zSign) {
343                 if (roundingMode == float_round_up) roundIncrement = 0;
344             }
345             else {
346                 if (roundingMode == float_round_down) roundIncrement = 0;
347             }
348         }
349     }
350     roundBits = zSig & roundMask;
351     if (0xFD <= (Bit16u) zExp) {
352         if ((0xFD < zExp)
353              || ((zExp == 0xFD) && ((Bit32s) (zSig + roundIncrement) < 0)))
354         {
355             float_raise(status, float_flag_overflow);
356             if (roundBits || float_exception_masked(status, float_flag_overflow)) {
357                 float_raise(status, float_flag_inexact);
358                 if (roundIncrement != 0) set_float_rounding_up(status);
359             }
360             return packFloat32(zSign, 0xFF, 0) - (roundIncrement == 0);
361         }
362         if (zExp < 0) {
363             int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x80000000);
364             if (isTiny) {
365                 if (!float_exception_masked(status, float_flag_underflow)) {
366                     float_raise(status, float_flag_underflow);
367                     zExp += 192; // bias unmasked underflow
368                 }
369             }
370             if (zExp < 0) {
371                 zSig = shift32RightJamming(zSig, -zExp);
372                 zExp = 0;
373                 roundBits = zSig & roundMask;
374                 if (isTiny) {
375                     // masked underflow
376                     if(get_flush_underflow_to_zero(status)) {
377                         float_raise(status, float_flag_underflow | float_flag_inexact);
378                         return packFloat32(zSign, 0, 0);
379                     }
380                     if (roundBits) float_raise(status, float_flag_underflow);
381                 }
382             }
383         }
384     }
385     Bit32u zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 7;
386     zSigRound &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
387     if (zSigRound == 0) zExp = 0;
388     if (roundBits) {
389         float_raise(status, float_flag_inexact);
390         if ((zSigRound << 7) > zSig) set_float_rounding_up(status);
391     }
392     return packFloat32(zSign, zExp, zSigRound);
393 }
394 
395 /*----------------------------------------------------------------------------
396 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
397 | and significand `zSig', and returns the proper single-precision floating-
398 | point value corresponding to the abstract input.  This routine is just like
399 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
400 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
401 | floating-point exponent.
402 *----------------------------------------------------------------------------*/
403 
normalizeRoundAndPackFloat32(int zSign,Bit16s zExp,Bit32u zSig,float_status_t & status)404 float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status)
405 {
406     int shiftCount = countLeadingZeros32(zSig) - 1;
407     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount, status);
408 }
409 
410 /*----------------------------------------------------------------------------
411 | Normalizes the subnormal double-precision floating-point value represented
412 | by the denormalized significand `aSig'.  The normalized exponent and
413 | significand are stored at the locations pointed to by `zExpPtr' and
414 | `zSigPtr', respectively.
415 *----------------------------------------------------------------------------*/
416 
normalizeFloat64Subnormal(Bit64u aSig,Bit16s * zExpPtr,Bit64u * zSigPtr)417 void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr)
418 {
419     int shiftCount = countLeadingZeros64(aSig) - 11;
420     *zSigPtr = aSig<<shiftCount;
421     *zExpPtr = 1 - shiftCount;
422 }
423 
424 /*----------------------------------------------------------------------------
425 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
426 | and significand `zSig', and returns the proper double-precision floating-
427 | point value corresponding to the abstract input.  Ordinarily, the abstract
428 | value is simply rounded and packed into the double-precision format, with
429 | the inexact exception raised if the abstract input cannot be represented
430 | exactly.  However, if the abstract value is too large, the overflow and
431 | inexact exceptions are raised and an infinity or maximal finite value is
432 | returned.  If the abstract value is too small, the input value is rounded
433 | to a subnormal number, and the underflow and inexact exceptions are raised
434 | if the abstract input cannot be represented exactly as a subnormal double-
435 | precision floating-point number.
436 |     The input significand `zSig' has its binary point between bits 62
437 | and 61, which is 10 bits to the left of the usual location.  This shifted
438 | significand must be normalized or smaller.  If `zSig' is not normalized,
439 | `zExp' must be 0; in that case, the result returned is a subnormal number,
440 | and it must not require rounding.  In the usual case that `zSig' is
441 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
442 | The handling of underflow and overflow follows the IEC/IEEE Standard for
443 | Binary Floating-Point Arithmetic.
444 *----------------------------------------------------------------------------*/
445 
roundAndPackFloat64(int zSign,Bit16s zExp,Bit64u zSig,float_status_t & status)446 float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status)
447 {
448     Bit16s roundIncrement, roundBits;
449     const Bit16s roundMask = 0x3FF;
450     int roundingMode = get_float_rounding_mode(status);
451     int roundNearestEven = (roundingMode == float_round_nearest_even);
452     roundIncrement = 0x200;
453     if (! roundNearestEven) {
454         if (roundingMode == float_round_to_zero) roundIncrement = 0;
455         else {
456             roundIncrement = roundMask;
457             if (zSign) {
458                 if (roundingMode == float_round_up) roundIncrement = 0;
459             }
460             else {
461                 if (roundingMode == float_round_down) roundIncrement = 0;
462             }
463         }
464     }
465     roundBits = (Bit16s)(zSig & roundMask);
466     if (0x7FD <= (Bit16u) zExp) {
467         if ((0x7FD < zExp)
468              || ((zExp == 0x7FD)
469                   && ((Bit64s) (zSig + roundIncrement) < 0)))
470         {
471             float_raise(status, float_flag_overflow);
472             if (roundBits || float_exception_masked(status, float_flag_overflow)) {
473                 float_raise(status, float_flag_inexact);
474                 if (roundIncrement != 0) set_float_rounding_up(status);
475             }
476             return packFloat64(zSign, 0x7FF, 0) - (roundIncrement == 0);
477         }
478         if (zExp < 0) {
479             int isTiny = (zExp < -1) || (zSig + roundIncrement < BX_CONST64(0x8000000000000000));
480             if (isTiny) {
481                 if (!float_exception_masked(status, float_flag_underflow)) {
482                     float_raise(status, float_flag_underflow);
483                     zExp += 1536; // bias unmasked underflow
484                 }
485             }
486             if (zExp < 0) {
487                 zSig = shift64RightJamming(zSig, -zExp);
488                 zExp = 0;
489                 roundBits = (Bit16s)(zSig & roundMask);
490                 if (isTiny) {
491                     // masked underflow
492                     if(get_flush_underflow_to_zero(status)) {
493                         float_raise(status, float_flag_underflow | float_flag_inexact);
494                         return packFloat64(zSign, 0, 0);
495                     }
496                     if (roundBits) float_raise(status, float_flag_underflow);
497                 }
498             }
499         }
500     }
501     Bit64u zSigRound = (zSig + roundIncrement)>>10;
502     zSigRound &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
503     if (zSigRound == 0) zExp = 0;
504     if (roundBits) {
505         float_raise(status, float_flag_inexact);
506         if ((zSigRound << 10) > zSig) set_float_rounding_up(status);
507     }
508     return packFloat64(zSign, zExp, zSigRound);
509 }
510 
511 /*----------------------------------------------------------------------------
512 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
513 | and significand `zSig', and returns the proper double-precision floating-
514 | point value corresponding to the abstract input.  This routine is just like
515 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
516 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
517 | floating-point exponent.
518 *----------------------------------------------------------------------------*/
519 
normalizeRoundAndPackFloat64(int zSign,Bit16s zExp,Bit64u zSig,float_status_t & status)520 float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status)
521 {
522     int shiftCount = countLeadingZeros64(zSig) - 1;
523     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount, status);
524 }
525 
526 #ifdef FLOATX80
527 
528 /*----------------------------------------------------------------------------
529 | Normalizes the subnormal extended double-precision floating-point value
530 | represented by the denormalized significand `aSig'.  The normalized exponent
531 | and significand are stored at the locations pointed to by `zExpPtr' and
532 | `zSigPtr', respectively.
533 *----------------------------------------------------------------------------*/
534 
normalizeFloatx80Subnormal(Bit64u aSig,Bit32s * zExpPtr,Bit64u * zSigPtr)535 void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr)
536 {
537     int shiftCount = countLeadingZeros64(aSig);
538     *zSigPtr = aSig<<shiftCount;
539     *zExpPtr = 1 - shiftCount;
540 }
541 
542 /*----------------------------------------------------------------------------
543 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
544 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
545 | and returns the proper extended double-precision floating-point value
546 | corresponding to the abstract input.  Ordinarily, the abstract value is
547 | rounded and packed into the extended double-precision format, with the
548 | inexact exception raised if the abstract input cannot be represented
549 | exactly.  However, if the abstract value is too large, the overflow and
550 | inexact exceptions are raised and an infinity or maximal finite value is
551 | returned.  If the abstract value is too small, the input value is rounded to
552 | a subnormal number, and the underflow and inexact exceptions are raised if
553 | the abstract input cannot be represented exactly as a subnormal extended
554 | double-precision floating-point number.
555 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
556 | number of bits as single or double precision, respectively.  Otherwise, the
557 | result is rounded to the full precision of the extended double-precision
558 | format.
559 |     The input significand must be normalized or smaller.  If the input
560 | significand is not normalized, `zExp' must be 0; in that case, the result
561 | returned is a subnormal number, and it must not require rounding.  The
562 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
563 | Floating-Point Arithmetic.
564 *----------------------------------------------------------------------------*/
565 
SoftFloatRoundAndPackFloatx80(int roundingPrecision,int zSign,Bit32s zExp,Bit64u zSig0,Bit64u zSig1,float_status_t & status)566 floatx80 SoftFloatRoundAndPackFloatx80(int roundingPrecision,
567         int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
568 {
569     Bit64u roundIncrement, roundMask, roundBits;
570     int increment;
571     Bit64u zSigExact; /* support rounding-up response */
572 
573     Bit8u roundingMode = get_float_rounding_mode(status);
574     int roundNearestEven = (roundingMode == float_round_nearest_even);
575     if (roundingPrecision == 64) {
576         roundIncrement = BX_CONST64(0x0000000000000400);
577         roundMask = BX_CONST64(0x00000000000007FF);
578     }
579     else if (roundingPrecision == 32) {
580         roundIncrement = BX_CONST64(0x0000008000000000);
581         roundMask = BX_CONST64(0x000000FFFFFFFFFF);
582     }
583     else goto precision80;
584 
585     zSig0 |= (zSig1 != 0);
586     if (! roundNearestEven) {
587         if (roundingMode == float_round_to_zero) roundIncrement = 0;
588         else {
589             roundIncrement = roundMask;
590             if (zSign) {
591                 if (roundingMode == float_round_up) roundIncrement = 0;
592             }
593             else {
594                 if (roundingMode == float_round_down) roundIncrement = 0;
595             }
596         }
597     }
598     roundBits = zSig0 & roundMask;
599     if (0x7FFD <= (Bit32u) (zExp - 1)) {
600         if ((0x7FFE < zExp)
601              || ((zExp == 0x7FFE) && (zSig0 + roundIncrement < zSig0)))
602         {
603             goto overflow;
604         }
605         if (zExp <= 0) {
606             int isTiny = (zExp < 0) || (zSig0 <= zSig0 + roundIncrement);
607             zSig0 = shift64RightJamming(zSig0, 1 - zExp);
608             zSigExact = zSig0;
609             zExp = 0;
610             roundBits = zSig0 & roundMask;
611             if (isTiny) {
612                 if (roundBits || (zSig0 && !float_exception_masked(status, float_flag_underflow)))
613                     float_raise(status, float_flag_underflow);
614             }
615             zSig0 += roundIncrement;
616             if ((Bit64s) zSig0 < 0) zExp = 1;
617             roundIncrement = roundMask + 1;
618             if (roundNearestEven && (roundBits<<1 == roundIncrement))
619                 roundMask |= roundIncrement;
620             zSig0 &= ~roundMask;
621             if (roundBits) {
622                 float_raise(status, float_flag_inexact);
623                 if (zSig0 > zSigExact) set_float_rounding_up(status);
624             }
625             return packFloatx80(zSign, zExp, zSig0);
626         }
627     }
628     if (roundBits) float_raise(status, float_flag_inexact);
629     zSigExact = zSig0;
630     zSig0 += roundIncrement;
631     if (zSig0 < roundIncrement) {
632         // Basically scale by shifting right and keep overflow
633         ++zExp;
634         zSig0 = BX_CONST64(0x8000000000000000);
635         zSigExact >>= 1; // must scale also, or else later tests will fail
636     }
637     roundIncrement = roundMask + 1;
638     if (roundNearestEven && (roundBits<<1 == roundIncrement))
639         roundMask |= roundIncrement;
640     zSig0 &= ~roundMask;
641     if (zSig0 > zSigExact) set_float_rounding_up(status);
642     if (zSig0 == 0) zExp = 0;
643     return packFloatx80(zSign, zExp, zSig0);
644  precision80:
645     increment = ((Bit64s) zSig1 < 0);
646     if (! roundNearestEven) {
647         if (roundingMode == float_round_to_zero) increment = 0;
648         else {
649             if (zSign) {
650                 increment = (roundingMode == float_round_down) && zSig1;
651             }
652             else {
653                 increment = (roundingMode == float_round_up) && zSig1;
654             }
655         }
656     }
657     if (0x7FFD <= (Bit32u) (zExp - 1)) {
658         if ((0x7FFE < zExp)
659              || ((zExp == 0x7FFE)
660                   && (zSig0 == BX_CONST64(0xFFFFFFFFFFFFFFFF))
661                   && increment))
662         {
663             roundMask = 0;
664  overflow:
665             float_raise(status, float_flag_overflow | float_flag_inexact);
666             if ((roundingMode == float_round_to_zero)
667                  || (zSign && (roundingMode == float_round_up))
668                  || (! zSign && (roundingMode == float_round_down)))
669             {
670                 return packFloatx80(zSign, 0x7FFE, ~roundMask);
671             }
672             set_float_rounding_up(status);
673             return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
674         }
675         if (zExp <= 0) {
676             int isTiny = (zExp < 0) || (! increment)
677                 || (zSig0 < BX_CONST64(0xFFFFFFFFFFFFFFFF));
678             shift64ExtraRightJamming(zSig0, zSig1, 1 - zExp, &zSig0, &zSig1);
679             zExp = 0;
680             if (isTiny) {
681                 if (zSig1 || (zSig0 && !float_exception_masked(status, float_flag_underflow)))
682                     float_raise(status, float_flag_underflow);
683             }
684             if (zSig1) float_raise(status, float_flag_inexact);
685             if (roundNearestEven) increment = ((Bit64s) zSig1 < 0);
686             else {
687                 if (zSign) {
688                     increment = (roundingMode == float_round_down) && zSig1;
689                 } else {
690                     increment = (roundingMode == float_round_up) && zSig1;
691                 }
692             }
693             if (increment) {
694                 zSigExact = zSig0++;
695                 zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
696                 if (zSig0 > zSigExact) set_float_rounding_up(status);
697                 if ((Bit64s) zSig0 < 0) zExp = 1;
698             }
699             return packFloatx80(zSign, zExp, zSig0);
700         }
701     }
702     if (zSig1) float_raise(status, float_flag_inexact);
703     if (increment) {
704         zSigExact = zSig0++;
705         if (zSig0 == 0) {
706             zExp++;
707             zSig0 = BX_CONST64(0x8000000000000000);
708             zSigExact >>= 1;  // must scale also, or else later tests will fail
709         }
710         else {
711             zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
712         }
713         if (zSig0 > zSigExact) set_float_rounding_up(status);
714     }
715     else {
716         if (zSig0 == 0) zExp = 0;
717     }
718     return packFloatx80(zSign, zExp, zSig0);
719 }
720 
roundAndPackFloatx80(int roundingPrecision,int zSign,Bit32s zExp,Bit64u zSig0,Bit64u zSig1,float_status_t & status)721 floatx80 roundAndPackFloatx80(int roundingPrecision,
722         int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
723 {
724     float_status_t round_status = status;
725     floatx80 result = SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
726 
727     // bias unmasked undeflow
728     if (status.float_exception_flags & ~status.float_exception_masks & float_flag_underflow) {
729        float_raise(round_status, float_flag_underflow);
730        return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp + 0x6000, zSig0, zSig1, status = round_status);
731     }
732 
733     // bias unmasked overflow
734     if (status.float_exception_flags & ~status.float_exception_masks & float_flag_overflow) {
735        float_raise(round_status, float_flag_overflow);
736        return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp - 0x6000, zSig0, zSig1, status = round_status);
737     }
738 
739     return result;
740 }
741 
742 /*----------------------------------------------------------------------------
743 | Takes an abstract floating-point value having sign `zSign', exponent
744 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
745 | and returns the proper extended double-precision floating-point value
746 | corresponding to the abstract input.  This routine is just like
747 | `roundAndPackFloatx80' except that the input significand does not have to be
748 | normalized.
749 *----------------------------------------------------------------------------*/
750 
normalizeRoundAndPackFloatx80(int roundingPrecision,int zSign,Bit32s zExp,Bit64u zSig0,Bit64u zSig1,float_status_t & status)751 floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision,
752         int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
753 {
754     if (zSig0 == 0) {
755         zSig0 = zSig1;
756         zSig1 = 0;
757         zExp -= 64;
758     }
759     int shiftCount = countLeadingZeros64(zSig0);
760     shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
761     zExp -= shiftCount;
762     return
763         roundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
764 }
765 
766 #endif
767 
768 #ifdef FLOAT128
769 
770 /*----------------------------------------------------------------------------
771 | Normalizes the subnormal quadruple-precision floating-point value
772 | represented by the denormalized significand formed by the concatenation of
773 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
774 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
775 | significand are stored at the location pointed to by `zSig0Ptr', and the
776 | least significant 64 bits of the normalized significand are stored at the
777 | location pointed to by `zSig1Ptr'.
778 *----------------------------------------------------------------------------*/
779 
normalizeFloat128Subnormal(Bit64u aSig0,Bit64u aSig1,Bit32s * zExpPtr,Bit64u * zSig0Ptr,Bit64u * zSig1Ptr)780 void normalizeFloat128Subnormal(
781      Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr)
782 {
783     int shiftCount;
784 
785     if (aSig0 == 0) {
786         shiftCount = countLeadingZeros64(aSig1) - 15;
787         if (shiftCount < 0) {
788             *zSig0Ptr = aSig1 >>(-shiftCount);
789             *zSig1Ptr = aSig1 << (shiftCount & 63);
790         }
791         else {
792             *zSig0Ptr = aSig1 << shiftCount;
793             *zSig1Ptr = 0;
794         }
795         *zExpPtr = - shiftCount - 63;
796     }
797     else {
798         shiftCount = countLeadingZeros64(aSig0) - 15;
799         shortShift128Left(aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr);
800         *zExpPtr = 1 - shiftCount;
801     }
802 }
803 
804 /*----------------------------------------------------------------------------
805 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
806 | and extended significand formed by the concatenation of `zSig0', `zSig1',
807 | and `zSig2', and returns the proper quadruple-precision floating-point value
808 | corresponding to the abstract input.  Ordinarily, the abstract value is
809 | simply rounded and packed into the quadruple-precision format, with the
810 | inexact exception raised if the abstract input cannot be represented
811 | exactly.  However, if the abstract value is too large, the overflow and
812 | inexact exceptions are raised and an infinity or maximal finite value is
813 | returned.  If the abstract value is too small, the input value is rounded to
814 | a subnormal number, and the underflow and inexact exceptions are raised if
815 | the abstract input cannot be represented exactly as a subnormal quadruple-
816 | precision floating-point number.
817 |     The input significand must be normalized or smaller.  If the input
818 | significand is not normalized, `zExp' must be 0; in that case, the result
819 | returned is a subnormal number, and it must not require rounding.  In the
820 | usual case that the input significand is normalized, `zExp' must be 1 less
821 | than the ``true'' floating-point exponent.  The handling of underflow and
822 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
823 *----------------------------------------------------------------------------*/
824 
roundAndPackFloat128(int zSign,Bit32s zExp,Bit64u zSig0,Bit64u zSig1,Bit64u zSig2,float_status_t & status)825 float128 roundAndPackFloat128(
826      int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, float_status_t &status)
827 {
828     int increment = ((Bit64s) zSig2 < 0);
829     if (0x7FFD <= (Bit32u) zExp) {
830         if ((0x7FFD < zExp)
831              || ((zExp == 0x7FFD)
832                   && eq128(BX_CONST64(0x0001FFFFFFFFFFFF),
833                          BX_CONST64(0xFFFFFFFFFFFFFFFF), zSig0, zSig1)
834                   && increment))
835         {
836             float_raise(status, float_flag_overflow | float_flag_inexact);
837             return packFloat128(zSign, 0x7FFF, 0, 0);
838         }
839         if (zExp < 0) {
840             int isTiny = (zExp < -1)
841                 || ! increment
842                 || lt128(zSig0, zSig1,
843                        BX_CONST64(0x0001FFFFFFFFFFFF),
844                        BX_CONST64(0xFFFFFFFFFFFFFFFF));
845             shift128ExtraRightJamming(
846                 zSig0, zSig1, zSig2, -zExp, &zSig0, &zSig1, &zSig2);
847             zExp = 0;
848             if (isTiny && zSig2) float_raise(status, float_flag_underflow);
849             increment = ((Bit64s) zSig2 < 0);
850         }
851     }
852     if (zSig2) float_raise(status, float_flag_inexact);
853     if (increment) {
854         add128(zSig0, zSig1, 0, 1, &zSig0, &zSig1);
855         zSig1 &= ~((zSig2 + zSig2 == 0) & 1);
856     }
857     else {
858         if ((zSig0 | zSig1) == 0) zExp = 0;
859     }
860     return packFloat128(zSign, zExp, zSig0, zSig1);
861 }
862 
863 /*----------------------------------------------------------------------------
864 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
865 | and significand formed by the concatenation of `zSig0' and `zSig1', and
866 | returns the proper quadruple-precision floating-point value corresponding
867 | to the abstract input.  This routine is just like `roundAndPackFloat128'
868 | except that the input significand has fewer bits and does not have to be
869 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
870 | point exponent.
871 *----------------------------------------------------------------------------*/
872 
normalizeRoundAndPackFloat128(int zSign,Bit32s zExp,Bit64u zSig0,Bit64u zSig1,float_status_t & status)873 float128 normalizeRoundAndPackFloat128(
874      int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
875 {
876     Bit64u zSig2;
877 
878     if (zSig0 == 0) {
879         zSig0 = zSig1;
880         zSig1 = 0;
881         zExp -= 64;
882     }
883     int shiftCount = countLeadingZeros64(zSig0) - 15;
884     if (0 <= shiftCount) {
885         zSig2 = 0;
886         shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
887     }
888     else {
889         shift128ExtraRightJamming(
890             zSig0, zSig1, 0, -shiftCount, &zSig0, &zSig1, &zSig2);
891     }
892     zExp -= shiftCount;
893     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
894 }
895 
896 #endif
897