1
2 #define SOFTFLOAT_68K
3
4 #include <stdint.h>
5 #include <stdlib.h>
6 #include "softfloat/softfloat.h"
7
8
9 /*
10 * QEMU float support
11 *
12 * The code in this source file is derived from release 2a of the SoftFloat
13 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
14 * some later contributions) are provided under that license, as detailed below.
15 * It has subsequently been modified by contributors to the QEMU Project,
16 * so some portions are provided under:
17 * the SoftFloat-2a license
18 * the BSD license
19 * GPL-v2-or-later
20 *
21 * Any future contributions to this file after December 1st 2014 will be
22 * taken to be licensed under the Softfloat-2a license unless specifically
23 * indicated otherwise.
24 */
25
26 /*
27 ===============================================================================
28 This C source file is part of the SoftFloat IEC/IEEE Floating-point
29 Arithmetic Package, Release 2a.
30
31 Written by John R. Hauser. This work was made possible in part by the
32 International Computer Science Institute, located at Suite 600, 1947 Center
33 Street, Berkeley, California 94704. Funding was partially provided by the
34 National Science Foundation under grant MIP-9311980. The original version
35 of this code was written as part of a project to build a fixed-point vector
36 processor in collaboration with the University of California at Berkeley,
37 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
38 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
39 arithmetic/SoftFloat.html'.
40
41 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
42 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
43 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
44 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
45 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
46
47 Derivative works are acceptable, even for commercial purposes, so long as
48 (1) they include prominent notice that the work is derivative, and (2) they
49 include prominent notice akin to these four paragraphs for those parts of
50 this code that are retained.
51
52 ===============================================================================
53 */
54
55 /* BSD licensing:
56 * Copyright (c) 2006, Fabrice Bellard
57 * All rights reserved.
58 *
59 * Redistribution and use in source and binary forms, with or without
60 * modification, are permitted provided that the following conditions are met:
61 *
62 * 1. Redistributions of source code must retain the above copyright notice,
63 * this list of conditions and the following disclaimer.
64 *
65 * 2. Redistributions in binary form must reproduce the above copyright notice,
66 * this list of conditions and the following disclaimer in the documentation
67 * and/or other materials provided with the distribution.
68 *
69 * 3. Neither the name of the copyright holder nor the names of its contributors
70 * may be used to endorse or promote products derived from this software without
71 * specific prior written permission.
72 *
73 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
74 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
77 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
78 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
79 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
80 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
81 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
82 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
83 * THE POSSIBILITY OF SUCH DAMAGE.
84 */
85
86 /* Portions of this work are licensed under the terms of the GNU GPL,
87 * version 2 or later. See the COPYING file in the top-level directory.
88 */
89
90 /* We only need stdlib for abort() */
91
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "softfloat-macros.h"
98
99 /*----------------------------------------------------------------------------
100 | Variables for storing sign, exponent and significand of internal extended
101 | double-precision floating-point value for external use.
102 *----------------------------------------------------------------------------*/
103 flag floatx80_internal_sign = 0;
104 int32_t floatx80_internal_exp = 0;
105 uint64_t floatx80_internal_sig = 0;
106 int32_t floatx80_internal_exp0 = 0;
107 uint64_t floatx80_internal_sig0 = 0;
108 uint64_t floatx80_internal_sig1 = 0;
109 int8_t floatx80_internal_precision = 80;
110 int8_t floatx80_internal_mode = float_round_nearest_even;
111
112 /*----------------------------------------------------------------------------
113 | Functions for storing sign, exponent and significand of extended
114 | double-precision floating-point intermediate result for external use.
115 *----------------------------------------------------------------------------*/
roundSaveFloatx80Internal(int8_t roundingPrecision,flag zSign,int32_t zExp,uint64_t zSig0,uint64_t zSig1,float_status * status)116 floatx80 roundSaveFloatx80Internal( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status )
117 {
118 int64_t roundIncrement, roundMask, roundBits;
119 flag increment;
120
121 if ( roundingPrecision == 80 ) {
122 goto precision80;
123 } else if ( roundingPrecision == 64 ) {
124 roundIncrement = LIT64( 0x0000000000000400 );
125 roundMask = LIT64( 0x00000000000007FF );
126 } else if ( roundingPrecision == 32 ) {
127 roundIncrement = LIT64( 0x0000008000000000 );
128 roundMask = LIT64( 0x000000FFFFFFFFFF );
129 } else {
130 goto precision80;
131 }
132
133 zSig0 |= ( zSig1 != 0 );
134 if ( status->float_rounding_mode != float_round_nearest_even ) {
135 if ( status->float_rounding_mode == float_round_to_zero ) {
136 roundIncrement = 0;
137 } else {
138 roundIncrement = roundMask;
139 if ( zSign ) {
140 if ( status->float_rounding_mode == float_round_up ) roundIncrement = 0;
141 } else {
142 if ( status->float_rounding_mode == float_round_down ) roundIncrement = 0;
143 }
144 }
145 }
146
147 roundBits = zSig0 & roundMask;
148
149 zSig0 += roundIncrement;
150 if ( zSig0 < roundIncrement ) {
151 ++zExp;
152 zSig0 = LIT64( 0x8000000000000000 );
153 }
154 roundIncrement = roundMask + 1;
155 if ( status->float_rounding_mode == float_round_nearest_even && ( roundBits<<1 == roundIncrement ) ) {
156 roundMask |= roundIncrement;
157 }
158 zSig0 &= ~ roundMask;
159 if ( zSig0 == 0 ) zExp = 0;
160 return packFloatx80( zSign, zExp, zSig0 );
161
162 precision80:
163 increment = ( (int64_t) zSig1 < 0 );
164 if ( status->float_rounding_mode != float_round_nearest_even ) {
165 if ( status->float_rounding_mode == float_round_to_zero ) {
166 increment = 0;
167 } else {
168 if ( zSign ) {
169 increment = ( status->float_rounding_mode == float_round_down ) && zSig1;
170 } else {
171 increment = ( status->float_rounding_mode == float_round_up ) && zSig1;
172 }
173 }
174 }
175 if ( increment ) {
176 ++zSig0;
177 if ( zSig0 == 0 ) {
178 ++zExp;
179 zSig0 = LIT64( 0x8000000000000000 );
180 } else {
181 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & ( status->float_rounding_mode == float_round_nearest_even ) );
182 }
183 } else {
184 if ( zSig0 == 0 ) zExp = 0;
185 }
186 return packFloatx80( zSign, zExp, zSig0 );
187 }
188
saveFloatx80Internal(int8_t prec,flag zSign,int32_t zExp,uint64_t zSig0,uint64_t zSig1,float_status * status)189 static void saveFloatx80Internal( int8_t prec, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status )
190 {
191 floatx80_internal_sign = zSign;
192 floatx80_internal_exp = zExp;
193 floatx80_internal_sig0 = zSig0;
194 floatx80_internal_sig1 = zSig1;
195 floatx80_internal_precision = prec;
196 floatx80_internal_mode = status->float_rounding_mode;
197 }
198
saveFloat64Internal(flag zSign,int16_t zExp,uint64_t zSig,float_status * status)199 static void saveFloat64Internal( flag zSign, int16_t zExp, uint64_t zSig, float_status *status )
200 {
201 floatx80_internal_sign = zSign;
202 floatx80_internal_exp = zExp + 0x3C01;
203 floatx80_internal_sig0 = zSig<<1;
204 floatx80_internal_sig1 = 0;
205 floatx80_internal_precision = 64;
206 floatx80_internal_mode = status->float_rounding_mode;
207 }
208
saveFloat32Internal(flag zSign,int16_t zExp,uint32_t zSig,float_status * status)209 static void saveFloat32Internal( flag zSign, int16_t zExp, uint32_t zSig, float_status *status )
210 {
211 floatx80 z = roundSaveFloatx80Internal( 32, zSign, zExp + 0x3F81, ( (uint64_t) zSig )<<33, 0, status );
212
213 floatx80_internal_sign = zSign;
214 floatx80_internal_exp = extractFloatx80Exp( z );
215 floatx80_internal_sig = extractFloatx80Frac( z );
216 floatx80_internal_exp0 = zExp + 0x3F81;
217 floatx80_internal_sig0 = ( (uint64_t) zSig )<<33;
218 floatx80_internal_sig1 = 0;
219 }
220
221 /*----------------------------------------------------------------------------
222 | Functions for returning sign, exponent and significand of extended
223 | double-precision floating-point intermediate result for external use.
224 *----------------------------------------------------------------------------*/
225
getRoundedFloatInternal(int8_t roundingPrecision,flag * pzSign,int32_t * pzExp,uint64_t * pzSig)226 void getRoundedFloatInternal( int8_t roundingPrecision, flag *pzSign, int32_t *pzExp, uint64_t *pzSig )
227 {
228 int64_t roundIncrement, roundMask, roundBits;
229 flag increment;
230
231 flag zSign = floatx80_internal_sign;
232 int32_t zExp = floatx80_internal_exp;
233 uint64_t zSig0 = floatx80_internal_sig0;
234 uint64_t zSig1 = floatx80_internal_sig1;
235
236 if ( roundingPrecision == 80 ) {
237 goto precision80;
238 } else if ( roundingPrecision == 64 ) {
239 roundIncrement = LIT64( 0x0000000000000400 );
240 roundMask = LIT64( 0x00000000000007FF );
241 } else if ( roundingPrecision == 32 ) {
242 roundIncrement = LIT64( 0x0000008000000000 );
243 roundMask = LIT64( 0x000000FFFFFFFFFF );
244 } else {
245 goto precision80;
246 }
247
248 zSig0 |= ( zSig1 != 0 );
249 if ( floatx80_internal_mode != float_round_nearest_even ) {
250 if ( floatx80_internal_mode == float_round_to_zero ) {
251 roundIncrement = 0;
252 } else {
253 roundIncrement = roundMask;
254 if ( zSign ) {
255 if ( floatx80_internal_mode == float_round_up ) roundIncrement = 0;
256 } else {
257 if ( floatx80_internal_mode == float_round_down ) roundIncrement = 0;
258 }
259 }
260 }
261
262 roundBits = zSig0 & roundMask;
263
264 zSig0 += roundIncrement;
265 if ( zSig0 < roundIncrement ) {
266 ++zExp;
267 zSig0 = LIT64( 0x8000000000000000 );
268 }
269 roundIncrement = roundMask + 1;
270 if ( floatx80_internal_mode == float_round_nearest_even && ( roundBits<<1 == roundIncrement ) ) {
271 roundMask |= roundIncrement;
272 }
273 zSig0 &= ~ roundMask;
274 if ( zSig0 == 0 ) zExp = 0;
275
276 *pzSign = zSign;
277 *pzExp = zExp;
278 *pzSig = zSig0;
279 return;
280
281 precision80:
282 increment = ( (int64_t) zSig1 < 0 );
283 if ( floatx80_internal_mode != float_round_nearest_even ) {
284 if ( floatx80_internal_mode == float_round_to_zero ) {
285 increment = 0;
286 } else {
287 if ( zSign ) {
288 increment = ( floatx80_internal_mode == float_round_down ) && zSig1;
289 } else {
290 increment = ( floatx80_internal_mode == float_round_up ) && zSig1;
291 }
292 }
293 }
294 if ( increment ) {
295 ++zSig0;
296 if ( zSig0 == 0 ) {
297 ++zExp;
298 zSig0 = LIT64( 0x8000000000000000 );
299 } else {
300 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & ( floatx80_internal_mode == float_round_nearest_even ) );
301 }
302 } else {
303 if ( zSig0 == 0 ) zExp = 0;
304 }
305
306 *pzSign = zSign;
307 *pzExp = zExp;
308 *pzSig = zSig0;
309 }
310
getFloatInternalOverflow(void)311 floatx80 getFloatInternalOverflow( void )
312 {
313 flag zSign;
314 int32_t zExp;
315 uint64_t zSig;
316
317 getRoundedFloatInternal( floatx80_internal_precision, &zSign, &zExp, &zSig );
318
319 if (zExp > (0x7fff + 0x6000)) { // catastrophic
320 zExp = 0;
321 } else {
322 zExp -= 0x6000;
323 }
324
325 return packFloatx80( zSign, zExp, zSig );
326
327 }
328
getFloatInternalUnderflow(void)329 floatx80 getFloatInternalUnderflow( void )
330 {
331 flag zSign;
332 int32_t zExp;
333 uint64_t zSig;
334
335 getRoundedFloatInternal( floatx80_internal_precision, &zSign, &zExp, &zSig );
336
337 if (zExp < (0x0000 - 0x6000)) { // catastrophic
338 zExp = 0;
339 } else {
340 zExp += 0x6000;
341 }
342
343 return packFloatx80( zSign, zExp, zSig );
344
345 }
346
getFloatInternalRoundedAll(void)347 floatx80 getFloatInternalRoundedAll( void )
348 {
349 flag zSign;
350 int32_t zExp;
351 uint64_t zSig, zSig32, zSig64, zSig80;
352
353 if (floatx80_internal_precision == 80) {
354 getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 );
355 zSig = zSig80;
356 } else if (floatx80_internal_precision == 64) {
357 getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 );
358 getRoundedFloatInternal( 64, &zSign, &zExp, &zSig64 );
359 zSig = zSig64;
360 zSig |= zSig80 & LIT64( 0x00000000000007FF );
361 } else {
362 getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 );
363 getRoundedFloatInternal( 64, &zSign, &zExp, &zSig64 );
364 getRoundedFloatInternal( 32, &zSign, &zExp, &zSig32 );
365 zSig = zSig32;
366 zSig |= zSig64 & LIT64( 0x000000FFFFFFFFFF );
367 zSig |= zSig80 & LIT64( 0x00000000000007FF );
368 }
369
370 return packFloatx80( zSign, zExp & 0x7FFF, zSig );
371
372 }
373
getFloatInternalRoundedSome(void)374 floatx80 getFloatInternalRoundedSome( void )
375 {
376 flag zSign;
377 int32_t zExp;
378 uint64_t zSig, zSig32, zSig64, zSig80;
379
380 if (floatx80_internal_precision == 80) {
381 getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 );
382 zSig = zSig80;
383 } else if (floatx80_internal_precision == 64) {
384 getRoundedFloatInternal( 64, &zSign, &zExp, &zSig64 );
385 zSig80 = floatx80_internal_sig0;
386 if (zSig64 != (zSig80 & LIT64( 0xFFFFFFFFFFFFF800 ))) {
387 zSig80++;
388 }
389 zSig = zSig64;
390 zSig |= zSig80 & LIT64( 0x00000000000007FF );
391 } else {
392 getRoundedFloatInternal( 32, &zSign, &zExp, &zSig32 );
393 zSig80 = floatx80_internal_sig0;
394 if (zSig32 != (zSig80 & LIT64( 0xFFFFFF0000000000 ))) {
395 zSig80++;
396 }
397 zSig = zSig32;
398 zSig |= zSig80 & LIT64( 0x000000FFFFFFFFFF );
399 }
400
401 return packFloatx80( zSign, zExp & 0x7FFF, zSig );
402
403 }
404
getFloatInternalFloatx80(void)405 floatx80 getFloatInternalFloatx80( void )
406 {
407 flag zSign;
408 int32_t zExp;
409 uint64_t zSig;
410
411 getRoundedFloatInternal( 80, &zSign, &zExp, &zSig );
412
413 return packFloatx80( zSign, zExp & 0x7FFF, zSig );
414
415 }
416
getFloatInternalUnrounded(void)417 floatx80 getFloatInternalUnrounded( void )
418 {
419 flag zSign = floatx80_internal_sign;
420 int32_t zExp = floatx80_internal_exp;
421 uint64_t zSig = floatx80_internal_sig0;
422
423 return packFloatx80( zSign, zExp & 0x7FFF, zSig );
424
425 }
426
getFloatInternalGRS(void)427 uint64_t getFloatInternalGRS( void )
428 {
429 #if 1
430 if (floatx80_internal_sig1)
431 return 5;
432
433 if (floatx80_internal_precision == 64 &&
434 floatx80_internal_sig0 & LIT64( 0x00000000000007FF )) {
435 return 1;
436 }
437 if (floatx80_internal_precision == 32 &&
438 floatx80_internal_sig0 & LIT64( 0x000000FFFFFFFFFF )) {
439 return 1;
440 }
441
442 return 0;
443 #else
444 uint64_t roundbits;
445 shift64RightJamming(floatx80_internal_sig1, 61, &roundbits);
446
447 return roundbits;
448 #endif
449 }
450
451 /*----------------------------------------------------------------------------
452 | Functions and definitions to determine: (1) whether tininess for underflow
453 | is detected before or after rounding by default, (2) what (if anything)
454 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
455 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
456 | are propagated from function inputs to output. These details are target-
457 | specific.
458 *----------------------------------------------------------------------------*/
459 #include "softfloat-specialize.h"
460
461 /*----------------------------------------------------------------------------
462 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
463 | and 7, and returns the properly rounded 32-bit integer corresponding to the
464 | input. If `zSign' is 1, the input is negated before being converted to an
465 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
466 | is simply rounded to an integer, with the inexact exception raised if the
467 | input cannot be represented exactly as an integer. However, if the fixed-
468 | point input is too large, the invalid exception is raised and the largest
469 | positive or negative integer is returned.
470 *----------------------------------------------------------------------------*/
471
roundAndPackInt32(flag zSign,uint64_t absZ,float_status * status)472 static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
473 {
474 int8_t roundingMode;
475 flag roundNearestEven;
476 int8_t roundIncrement, roundBits;
477 int32_t z;
478
479 roundingMode = status->float_rounding_mode;
480 roundNearestEven = ( roundingMode == float_round_nearest_even );
481 switch (roundingMode) {
482 case float_round_nearest_even:
483 case float_round_ties_away:
484 roundIncrement = 0x40;
485 break;
486 case float_round_to_zero:
487 roundIncrement = 0;
488 break;
489 case float_round_up:
490 roundIncrement = zSign ? 0 : 0x7f;
491 break;
492 case float_round_down:
493 roundIncrement = zSign ? 0x7f : 0;
494 break;
495 default:
496 abort();
497 }
498 roundBits = absZ & 0x7F;
499 absZ = ( absZ + roundIncrement )>>7;
500 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
501 z = absZ;
502 if ( zSign ) z = - z;
503 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
504 float_raise(float_flag_invalid, status);
505 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
506 }
507 if (roundBits) {
508 status->float_exception_flags |= float_flag_inexact;
509 }
510 return z;
511
512 }
513
514
515 #ifdef SOFTFLOAT_68K // 30-01-2017: Added for Previous
roundAndPackInt16(flag zSign,uint64_t absZ,float_status * status)516 static int16_t roundAndPackInt16( flag zSign, uint64_t absZ, float_status *status )
517 {
518 int8_t roundingMode;
519 flag roundNearestEven;
520 int8_t roundIncrement, roundBits;
521 int16_t z;
522
523 roundingMode = status->float_rounding_mode;
524 roundNearestEven = ( roundingMode == float_round_nearest_even );
525 roundIncrement = 0x40;
526 if ( ! roundNearestEven ) {
527 if ( roundingMode == float_round_to_zero ) {
528 roundIncrement = 0;
529 }
530 else {
531 roundIncrement = 0x7F;
532 if ( zSign ) {
533 if ( roundingMode == float_round_up ) roundIncrement = 0;
534 }
535 else {
536 if ( roundingMode == float_round_down ) roundIncrement = 0;
537 }
538 }
539 }
540 roundBits = absZ & 0x7F;
541 absZ = ( absZ + roundIncrement )>>7;
542 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
543 z = absZ;
544 if ( zSign ) z = - z;
545 z = (int16_t) z;
546 if ( ( absZ>>16 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
547 float_raise( float_flag_invalid, status );
548 return zSign ? (int16_t) 0x8000 : 0x7FFF;
549 }
550 if ( roundBits ) status->float_exception_flags |= float_flag_inexact;
551 return z;
552
553 }
554
roundAndPackInt8(flag zSign,uint64_t absZ,float_status * status)555 static int8_t roundAndPackInt8( flag zSign, uint64_t absZ, float_status *status )
556 {
557 int8_t roundingMode;
558 flag roundNearestEven;
559 int8_t roundIncrement, roundBits;
560 int8_t z;
561
562 roundingMode = status->float_rounding_mode;
563 roundNearestEven = ( roundingMode == float_round_nearest_even );
564 roundIncrement = 0x40;
565 if ( ! roundNearestEven ) {
566 if ( roundingMode == float_round_to_zero ) {
567 roundIncrement = 0;
568 }
569 else {
570 roundIncrement = 0x7F;
571 if ( zSign ) {
572 if ( roundingMode == float_round_up ) roundIncrement = 0;
573 }
574 else {
575 if ( roundingMode == float_round_down ) roundIncrement = 0;
576 }
577 }
578 }
579 roundBits = absZ & 0x7F;
580 absZ = ( absZ + roundIncrement )>>7;
581 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
582 z = absZ;
583 if ( zSign ) z = - z;
584 z = (int8_t) z;
585 if ( ( absZ>>8 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
586 float_raise( float_flag_invalid, status );
587 return zSign ? (int8_t) 0x80 : 0x7F;
588 }
589 if ( roundBits ) status->float_exception_flags |= float_flag_inexact;
590 return z;
591
592 }
593 #endif // End of addition for Previous
594
595 /*----------------------------------------------------------------------------
596 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
597 | `absZ1', with binary point between bits 63 and 64 (between the input words),
598 | and returns the properly rounded 64-bit integer corresponding to the input.
599 | If `zSign' is 1, the input is negated before being converted to an integer.
600 | Ordinarily, the fixed-point input is simply rounded to an integer, with
601 | the inexact exception raised if the input cannot be represented exactly as
602 | an integer. However, if the fixed-point input is too large, the invalid
603 | exception is raised and the largest positive or negative integer is
604 | returned.
605 *----------------------------------------------------------------------------*/
606
roundAndPackInt64(flag zSign,uint64_t absZ0,uint64_t absZ1,float_status * status)607 static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
608 float_status *status)
609 {
610 int8_t roundingMode;
611 flag roundNearestEven, increment;
612 int64_t z;
613
614 roundingMode = status->float_rounding_mode;
615 roundNearestEven = ( roundingMode == float_round_nearest_even );
616 switch (roundingMode) {
617 case float_round_nearest_even:
618 case float_round_ties_away:
619 increment = ((int64_t) absZ1 < 0);
620 break;
621 case float_round_to_zero:
622 increment = 0;
623 break;
624 case float_round_up:
625 increment = !zSign && absZ1;
626 break;
627 case float_round_down:
628 increment = zSign && absZ1;
629 break;
630 default:
631 abort();
632 }
633 if ( increment ) {
634 ++absZ0;
635 if ( absZ0 == 0 ) goto overflow;
636 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
637 }
638 z = absZ0;
639 if ( zSign ) z = - z;
640 if ( z && ( ( z < 0 ) ^ zSign ) ) {
641 overflow:
642 float_raise(float_flag_invalid, status);
643 return
644 zSign ? (int64_t) LIT64( 0x8000000000000000 )
645 : LIT64( 0x7FFFFFFFFFFFFFFF );
646 }
647 if (absZ1) {
648 status->float_exception_flags |= float_flag_inexact;
649 }
650 return z;
651
652 }
653
654 /*----------------------------------------------------------------------------
655 | Returns the fraction bits of the single-precision floating-point value `a'.
656 *----------------------------------------------------------------------------*/
657
extractFloat32Frac(float32 a)658 static inline uint32_t extractFloat32Frac( float32 a )
659 {
660
661 return float32_val(a) & 0x007FFFFF;
662
663 }
664
665 /*----------------------------------------------------------------------------
666 | Returns the exponent bits of the single-precision floating-point value `a'.
667 *----------------------------------------------------------------------------*/
668
extractFloat32Exp(float32 a)669 static inline int extractFloat32Exp(float32 a)
670 {
671
672 return ( float32_val(a)>>23 ) & 0xFF;
673
674 }
675
676 /*----------------------------------------------------------------------------
677 | Returns the sign bit of the single-precision floating-point value `a'.
678 *----------------------------------------------------------------------------*/
679
extractFloat32Sign(float32 a)680 static inline flag extractFloat32Sign( float32 a )
681 {
682
683 return float32_val(a)>>31;
684
685 }
686
687 /*----------------------------------------------------------------------------
688 | Normalizes the subnormal single-precision floating-point value represented
689 | by the denormalized significand `aSig'. The normalized exponent and
690 | significand are stored at the locations pointed to by `zExpPtr' and
691 | `zSigPtr', respectively.
692 *----------------------------------------------------------------------------*/
693
694 static void
normalizeFloat32Subnormal(uint32_t aSig,int * zExpPtr,uint32_t * zSigPtr)695 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
696 {
697 int8_t shiftCount;
698
699 shiftCount = countLeadingZeros32( aSig ) - 8;
700 *zSigPtr = aSig<<shiftCount;
701 *zExpPtr = 1 - shiftCount;
702
703 }
704
705 /*----------------------------------------------------------------------------
706 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
707 | single-precision floating-point value, returning the result. After being
708 | shifted into the proper positions, the three fields are simply added
709 | together to form the result. This means that any integer portion of `zSig'
710 | will be added into the exponent. Since a properly normalized significand
711 | will have an integer portion equal to 1, the `zExp' input should be 1 less
712 | than the desired result exponent whenever `zSig' is a complete, normalized
713 | significand.
714 *----------------------------------------------------------------------------*/
715
packFloat32(flag zSign,int zExp,uint32_t zSig)716 static inline float32 packFloat32(flag zSign, int zExp, uint32_t zSig)
717 {
718
719 return make_float32(
720 ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
721
722 }
723
724 /*----------------------------------------------------------------------------
725 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
726 | and significand `zSig', and returns the proper single-precision floating-
727 | point value corresponding to the abstract input. Ordinarily, the abstract
728 | value is simply rounded and packed into the single-precision format, with
729 | the inexact exception raised if the abstract input cannot be represented
730 | exactly. However, if the abstract value is too large, the overflow and
731 | inexact exceptions are raised and an infinity or maximal finite value is
732 | returned. If the abstract value is too small, the input value is rounded to
733 | a subnormal number, and the underflow and inexact exceptions are raised if
734 | the abstract input cannot be represented exactly as a subnormal single-
735 | precision floating-point number.
736 | The input significand `zSig' has its binary point between bits 30
737 | and 29, which is 7 bits to the left of the usual location. This shifted
738 | significand must be normalized or smaller. If `zSig' is not normalized,
739 | `zExp' must be 0; in that case, the result returned is a subnormal number,
740 | and it must not require rounding. In the usual case that `zSig' is
741 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
742 | The handling of underflow and overflow follows the IEC/IEEE Standard for
743 | Binary Floating-Point Arithmetic.
744 *----------------------------------------------------------------------------*/
745
roundAndPackFloat32(flag zSign,int zExp,uint32_t zSig,float_status * status)746 static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
747 float_status *status)
748 {
749 int8_t roundingMode;
750 flag roundNearestEven;
751 int8_t roundIncrement, roundBits;
752 flag isTiny;
753
754 roundingMode = status->float_rounding_mode;
755 roundNearestEven = ( roundingMode == float_round_nearest_even );
756 switch (roundingMode) {
757 case float_round_nearest_even:
758 case float_round_ties_away:
759 roundIncrement = 0x40;
760 break;
761 case float_round_to_zero:
762 roundIncrement = 0;
763 break;
764 case float_round_up:
765 roundIncrement = zSign ? 0 : 0x7f;
766 break;
767 case float_round_down:
768 roundIncrement = zSign ? 0x7f : 0;
769 break;
770 default:
771 abort();
772 break;
773 }
774 roundBits = zSig & 0x7F;
775 if ( 0xFD <= (uint16_t) zExp ) {
776 if ( ( 0xFD < zExp )
777 || ( ( zExp == 0xFD )
778 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
779 ) {
780 #ifdef SOFTFLOAT_68K
781 float_raise( float_flag_overflow, status );
782 saveFloat32Internal( zSign, zExp, zSig, status );
783 if ( roundBits ) float_raise( float_flag_inexact, status );
784 #else
785 float_raise(float_flag_overflow | float_flag_inexact, status);
786 #endif
787 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
788 }
789 if ( zExp < 0 ) {
790 if (status->flush_to_zero) {
791 //float_raise(float_flag_output_denormal, status);
792 return packFloat32(zSign, 0, 0);
793 }
794 isTiny =
795 (status->float_detect_tininess
796 == float_tininess_before_rounding)
797 || ( zExp < -1 )
798 || ( zSig + roundIncrement < 0x80000000 );
799 #ifdef SOFTFLOAT_68K
800 if ( isTiny ) {
801 float_raise( float_flag_underflow, status );
802 saveFloat32Internal( zSign, zExp, zSig, status );
803 }
804 #endif
805 shift32RightJamming( zSig, - zExp, &zSig );
806 zExp = 0;
807 roundBits = zSig & 0x7F;
808 #ifndef SOFTFLOAT_68K
809 if (isTiny && roundBits)
810 float_raise(float_flag_underflow, status);
811 #endif
812 }
813 }
814 if (roundBits) {
815 status->float_exception_flags |= float_flag_inexact;
816 }
817 zSig = ( zSig + roundIncrement )>>7;
818 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
819 if ( zSig == 0 ) zExp = 0;
820 return packFloat32( zSign, zExp, zSig );
821
822 }
823
824 /*----------------------------------------------------------------------------
825 | Returns the fraction bits of the double-precision floating-point value `a'.
826 *----------------------------------------------------------------------------*/
827
extractFloat64Frac(float64 a)828 static inline uint64_t extractFloat64Frac( float64 a )
829 {
830
831 return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
832
833 }
834
835 /*----------------------------------------------------------------------------
836 | Returns the exponent bits of the double-precision floating-point value `a'.
837 *----------------------------------------------------------------------------*/
838
extractFloat64Exp(float64 a)839 static inline int extractFloat64Exp(float64 a)
840 {
841
842 return ( float64_val(a)>>52 ) & 0x7FF;
843
844 }
845
846 /*----------------------------------------------------------------------------
847 | Returns the sign bit of the double-precision floating-point value `a'.
848 *----------------------------------------------------------------------------*/
849
extractFloat64Sign(float64 a)850 static inline flag extractFloat64Sign( float64 a )
851 {
852
853 return float64_val(a)>>63;
854
855 }
856
857 /*----------------------------------------------------------------------------
858 | If `a' is denormal and we are in flush-to-zero mode then set the
859 | input-denormal exception and return zero. Otherwise just return the value.
860 *----------------------------------------------------------------------------*/
float64_squash_input_denormal(float64 a,float_status * status)861 float64 float64_squash_input_denormal(float64 a, float_status *status)
862 {
863 if (status->flush_inputs_to_zero) {
864 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
865 //float_raise(float_flag_input_denormal, status);
866 return make_float64(float64_val(a) & (1ULL << 63));
867 }
868 }
869 return a;
870 }
871
872 /*----------------------------------------------------------------------------
873 | Normalizes the subnormal double-precision floating-point value represented
874 | by the denormalized significand `aSig'. The normalized exponent and
875 | significand are stored at the locations pointed to by `zExpPtr' and
876 | `zSigPtr', respectively.
877 *----------------------------------------------------------------------------*/
878
879 static void
normalizeFloat64Subnormal(uint64_t aSig,int * zExpPtr,uint64_t * zSigPtr)880 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
881 {
882 int8_t shiftCount;
883
884 shiftCount = countLeadingZeros64( aSig ) - 11;
885 *zSigPtr = aSig<<shiftCount;
886 *zExpPtr = 1 - shiftCount;
887
888 }
889
890 /*----------------------------------------------------------------------------
891 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
892 | double-precision floating-point value, returning the result. After being
893 | shifted into the proper positions, the three fields are simply added
894 | together to form the result. This means that any integer portion of `zSig'
895 | will be added into the exponent. Since a properly normalized significand
896 | will have an integer portion equal to 1, the `zExp' input should be 1 less
897 | than the desired result exponent whenever `zSig' is a complete, normalized
898 | significand.
899 *----------------------------------------------------------------------------*/
900
packFloat64(flag zSign,int zExp,uint64_t zSig)901 static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
902 {
903
904 return make_float64(
905 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
906
907 }
908
909 /*----------------------------------------------------------------------------
910 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
911 | and significand `zSig', and returns the proper double-precision floating-
912 | point value corresponding to the abstract input. Ordinarily, the abstract
913 | value is simply rounded and packed into the double-precision format, with
914 | the inexact exception raised if the abstract input cannot be represented
915 | exactly. However, if the abstract value is too large, the overflow and
916 | inexact exceptions are raised and an infinity or maximal finite value is
917 | returned. If the abstract value is too small, the input value is rounded to
918 | a subnormal number, and the underflow and inexact exceptions are raised if
919 | the abstract input cannot be represented exactly as a subnormal double-
920 | precision floating-point number.
921 | The input significand `zSig' has its binary point between bits 62
922 | and 61, which is 10 bits to the left of the usual location. This shifted
923 | significand must be normalized or smaller. If `zSig' is not normalized,
924 | `zExp' must be 0; in that case, the result returned is a subnormal number,
925 | and it must not require rounding. In the usual case that `zSig' is
926 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
927 | The handling of underflow and overflow follows the IEC/IEEE Standard for
928 | Binary Floating-Point Arithmetic.
929 *----------------------------------------------------------------------------*/
930
roundAndPackFloat64(flag zSign,int zExp,uint64_t zSig,float_status * status)931 static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
932 float_status *status)
933 {
934 int8_t roundingMode;
935 flag roundNearestEven;
936 int roundIncrement, roundBits;
937 flag isTiny;
938
939 roundingMode = status->float_rounding_mode;
940 roundNearestEven = ( roundingMode == float_round_nearest_even );
941 switch (roundingMode) {
942 case float_round_nearest_even:
943 case float_round_ties_away:
944 roundIncrement = 0x200;
945 break;
946 case float_round_to_zero:
947 roundIncrement = 0;
948 break;
949 case float_round_up:
950 roundIncrement = zSign ? 0 : 0x3ff;
951 break;
952 case float_round_down:
953 roundIncrement = zSign ? 0x3ff : 0;
954 break;
955 default:
956 abort();
957 }
958 roundBits = zSig & 0x3FF;
959 if ( 0x7FD <= (uint16_t) zExp ) {
960 if ( ( 0x7FD < zExp )
961 || ( ( zExp == 0x7FD )
962 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
963 ) {
964 #ifdef SOFTFLOAT_68K
965 float_raise( float_flag_overflow, status );
966 saveFloat64Internal( zSign, zExp, zSig, status );
967 if ( roundBits ) float_raise( float_flag_inexact, status );
968 #else
969 float_raise(float_flag_overflow | float_flag_inexact, status);
970 #endif
971 return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
972 }
973 if ( zExp < 0 ) {
974 if (status->flush_to_zero) {
975 //float_raise(float_flag_output_denormal, status);
976 return packFloat64(zSign, 0, 0);
977 }
978 isTiny =
979 (status->float_detect_tininess
980 == float_tininess_before_rounding)
981 || ( zExp < -1 )
982 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
983 #ifdef SOFTFLOAT_68K
984 if ( isTiny ) {
985 float_raise( float_flag_underflow, status );
986 saveFloat64Internal( zSign, zExp, zSig, status );
987 }
988 #endif
989 shift64RightJamming( zSig, - zExp, &zSig );
990 zExp = 0;
991 roundBits = zSig & 0x3FF;
992 #ifndef SOFTFLOAT_68K
993 if (isTiny && roundBits)
994 float_raise(float_flag_underflow, status);
995 #endif
996 }
997 }
998 if (roundBits) {
999 status->float_exception_flags |= float_flag_inexact;
1000 }
1001 zSig = ( zSig + roundIncrement )>>10;
1002 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
1003 if ( zSig == 0 ) zExp = 0;
1004 return packFloat64( zSign, zExp, zSig );
1005
1006 }
1007
1008 /*----------------------------------------------------------------------------
1009 | Returns the fraction bits of the extended double-precision floating-point
1010 | value `a'.
1011 *----------------------------------------------------------------------------*/
1012
extractFloatx80Frac(floatx80 a)1013 uint64_t extractFloatx80Frac( floatx80 a )
1014 {
1015
1016 return a.low;
1017
1018 }
1019
1020 /*----------------------------------------------------------------------------
1021 | Returns the exponent bits of the extended double-precision floating-point
1022 | value `a'.
1023 *----------------------------------------------------------------------------*/
1024
extractFloatx80Exp(floatx80 a)1025 int32_t extractFloatx80Exp( floatx80 a )
1026 {
1027
1028 return a.high & 0x7FFF;
1029
1030 }
1031
1032 /*----------------------------------------------------------------------------
1033 | Returns the sign bit of the extended double-precision floating-point value
1034 | `a'.
1035 *----------------------------------------------------------------------------*/
1036
extractFloatx80Sign(floatx80 a)1037 flag extractFloatx80Sign( floatx80 a )
1038 {
1039
1040 return a.high>>15;
1041
1042 }
1043
1044 /*----------------------------------------------------------------------------
1045 | Normalizes the subnormal extended double-precision floating-point value
1046 | represented by the denormalized significand `aSig'. The normalized exponent
1047 | and significand are stored at the locations pointed to by `zExpPtr' and
1048 | `zSigPtr', respectively.
1049 *----------------------------------------------------------------------------*/
1050
normalizeFloatx80Subnormal(uint64_t aSig,int32_t * zExpPtr,uint64_t * zSigPtr)1051 void normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr )
1052 {
1053 int8_t shiftCount;
1054
1055 shiftCount = countLeadingZeros64( aSig );
1056 *zSigPtr = aSig<<shiftCount;
1057 #ifdef SOFTFLOAT_68K
1058 *zExpPtr = -shiftCount;
1059 #else
1060 *zExpPtr = 1 - shiftCount;
1061 #endif
1062 }
1063
1064 /*----------------------------------------------------------------------------
1065 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
1066 | extended double-precision floating-point value, returning the result.
1067 *----------------------------------------------------------------------------*/
1068
packFloatx80(flag zSign,int32_t zExp,uint64_t zSig)1069 floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig )
1070 {
1071 floatx80 z;
1072
1073 z.low = zSig;
1074 z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
1075 return z;
1076
1077 }
1078
1079 /*----------------------------------------------------------------------------
1080 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1081 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
1082 | and returns the proper extended double-precision floating-point value
1083 | corresponding to the abstract input. Ordinarily, the abstract value is
1084 | rounded and packed into the extended double-precision format, with the
1085 | inexact exception raised if the abstract input cannot be represented
1086 | exactly. However, if the abstract value is too large, the overflow and
1087 | inexact exceptions are raised and an infinity or maximal finite value is
1088 | returned. If the abstract value is too small, the input value is rounded to
1089 | a subnormal number, and the underflow and inexact exceptions are raised if
1090 | the abstract input cannot be represented exactly as a subnormal extended
1091 | double-precision floating-point number.
1092 | If `roundingPrecision' is 32 or 64, the result is rounded to the same
1093 | number of bits as single or double precision, respectively. Otherwise, the
1094 | result is rounded to the full precision of the extended double-precision
1095 | format.
1096 | The input significand must be normalized or smaller. If the input
1097 | significand is not normalized, `zExp' must be 0; in that case, the result
1098 | returned is a subnormal number, and it must not require rounding. The
1099 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
1100 | Floating-Point Arithmetic.
1101 *----------------------------------------------------------------------------*/
1102
1103 #ifndef SOFTFLOAT_68K
roundAndPackFloatx80(int8_t roundingPrecision,flag zSign,int32_t zExp,uint64_t zSig0,uint64_t zSig1,float_status * status)1104 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
1105 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
1106 float_status *status)
1107 {
1108 int8_t roundingMode;
1109 flag roundNearestEven, increment, isTiny;
1110 int64_t roundIncrement, roundMask, roundBits;
1111
1112 roundingMode = status->float_rounding_mode;
1113 roundNearestEven = ( roundingMode == float_round_nearest_even );
1114 if ( roundingPrecision == 80 ) goto precision80;
1115 if ( roundingPrecision == 64 ) {
1116 roundIncrement = LIT64( 0x0000000000000400 );
1117 roundMask = LIT64( 0x00000000000007FF );
1118 }
1119 else if ( roundingPrecision == 32 ) {
1120 roundIncrement = LIT64( 0x0000008000000000 );
1121 roundMask = LIT64( 0x000000FFFFFFFFFF );
1122 }
1123 else {
1124 goto precision80;
1125 }
1126 zSig0 |= ( zSig1 != 0 );
1127 switch (roundingMode) {
1128 case float_round_nearest_even:
1129 case float_round_ties_away:
1130 break;
1131 case float_round_to_zero:
1132 roundIncrement = 0;
1133 break;
1134 case float_round_up:
1135 roundIncrement = zSign ? 0 : roundMask;
1136 break;
1137 case float_round_down:
1138 roundIncrement = zSign ? roundMask : 0;
1139 break;
1140 default:
1141 abort();
1142 }
1143 roundBits = zSig0 & roundMask;
1144 #ifdef SOFTFLOAT_68K
1145 if ( 0x7FFE <= (uint32_t) zExp ) {
1146 #else
1147 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
1148 #endif
1149 if ( ( 0x7FFE < zExp )
1150 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
1151 ) {
1152 goto overflow;
1153 }
1154 #ifdef SOFTFLOAT_68K
1155 if ( zExp < 0 ) {
1156 #else
1157 if ( zExp <= 0 ) {
1158 #endif
1159 if (status->flush_to_zero) {
1160 //float_raise(float_flag_output_denormal, status);
1161 return packFloatx80(zSign, 0, 0);
1162 }
1163 isTiny =
1164 (status->float_detect_tininess
1165 == float_tininess_before_rounding)
1166 #ifdef SOFTFLOAT_68K
1167 || ( zExp < -1 )
1168 #else
1169 || ( zExp < 0 )
1170 #endif
1171 || ( zSig0 <= zSig0 + roundIncrement );
1172 #ifdef SOFTFLOAT_68K
1173 if ( isTiny ) {
1174 float_raise( float_flag_underflow, status );
1175 saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status );
1176 }
1177 shift64RightJamming( zSig0, -zExp, &zSig0 );
1178 #else
1179 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
1180 #endif
1181 zExp = 0;
1182 roundBits = zSig0 & roundMask;
1183 #ifdef SOFTFLOAT_68K
1184 if ( isTiny ) float_raise( float_flag_underflow, status );
1185 #else
1186 if (isTiny && roundBits) {
1187 float_raise(float_flag_underflow, status);
1188 }
1189 #endif
1190 if (roundBits) {
1191 status->float_exception_flags |= float_flag_inexact;
1192 }
1193 zSig0 += roundIncrement;
1194 #ifndef SOFTFLOAT_68K
1195 if ( (int64_t) zSig0 < 0 ) zExp = 1;
1196 #endif
1197 roundIncrement = roundMask + 1;
1198 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1199 roundMask |= roundIncrement;
1200 }
1201 zSig0 &= ~ roundMask;
1202 return packFloatx80( zSign, zExp, zSig0 );
1203 }
1204 }
1205 if (roundBits) {
1206 status->float_exception_flags |= float_flag_inexact;
1207 }
1208 zSig0 += roundIncrement;
1209 if ( zSig0 < roundIncrement ) {
1210 ++zExp;
1211 zSig0 = LIT64( 0x8000000000000000 );
1212 }
1213 roundIncrement = roundMask + 1;
1214 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1215 roundMask |= roundIncrement;
1216 }
1217 zSig0 &= ~ roundMask;
1218 if ( zSig0 == 0 ) zExp = 0;
1219 return packFloatx80( zSign, zExp, zSig0 );
1220 precision80:
1221 switch (roundingMode) {
1222 case float_round_nearest_even:
1223 case float_round_ties_away:
1224 increment = ((int64_t)zSig1 < 0);
1225 break;
1226 case float_round_to_zero:
1227 increment = 0;
1228 break;
1229 case float_round_up:
1230 increment = !zSign && zSig1;
1231 break;
1232 case float_round_down:
1233 increment = zSign && zSig1;
1234 break;
1235 default:
1236 abort();
1237 }
1238 #ifdef SOFTFLOAT_68K
1239 if ( 0x7FFE <= (uint32_t) zExp ) {
1240 #else
1241 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
1242 #endif
1243 if ( ( 0x7FFE < zExp )
1244 || ( ( zExp == 0x7FFE )
1245 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
1246 && increment
1247 )
1248 ) {
1249 roundMask = 0;
1250 overflow:
1251 #ifndef SOFTFLOAT_68K
1252 float_raise(float_flag_overflow | float_flag_inexact, status);
1253 #else
1254 float_raise( float_flag_overflow, status );
1255 saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status );
1256 if ( ( zSig0 & roundMask ) || zSig1 ) float_raise( float_flag_inexact, status );
1257 #endif
1258 if ( ( roundingMode == float_round_to_zero )
1259 || ( zSign && ( roundingMode == float_round_up ) )
1260 || ( ! zSign && ( roundingMode == float_round_down ) )
1261 ) {
1262 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
1263 }
1264 return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low );
1265 }
1266 #ifdef SOFTFLOAT_68K
1267 if ( zExp < 0 ) {
1268 #else
1269 if ( zExp <= 0 ) {
1270 #endif
1271 isTiny =
1272 (status->float_detect_tininess
1273 == float_tininess_before_rounding)
1274 #ifdef SOFTFLOAT_68K
1275 || ( zExp < -1 )
1276 #else
1277 || ( zExp < 0 )
1278 #endif
1279 || ! increment
1280 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
1281 #ifdef SOFTFLOAT_68K
1282 if ( isTiny ) {
1283 float_raise( float_flag_underflow, status );
1284 saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status );
1285 }
1286 shift64ExtraRightJamming( zSig0, zSig1, -zExp, &zSig0, &zSig1 );
1287 #else
1288 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
1289 #endif
1290 zExp = 0;
1291 #ifndef SOFTFLOAT_68K
1292 if ( isTiny && zSig1 ) float_raise( float_flag_underflow, status );
1293 #endif
1294 if (zSig1) float_raise(float_flag_inexact, status);
1295 switch (roundingMode) {
1296 case float_round_nearest_even:
1297 case float_round_ties_away:
1298 increment = ((int64_t)zSig1 < 0);
1299 break;
1300 case float_round_to_zero:
1301 increment = 0;
1302 break;
1303 case float_round_up:
1304 increment = !zSign && zSig1;
1305 break;
1306 case float_round_down:
1307 increment = zSign && zSig1;
1308 break;
1309 default:
1310 abort();
1311 }
1312 if ( increment ) {
1313 ++zSig0;
1314 zSig0 &=
1315 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
1316 #ifndef SOFTFLOAT_68K
1317 if ( (int64_t) zSig0 < 0 ) zExp = 1;
1318 #endif
1319 }
1320 return packFloatx80( zSign, zExp, zSig0 );
1321 }
1322 }
1323 if (zSig1) {
1324 status->float_exception_flags |= float_flag_inexact;
1325 }
1326 if ( increment ) {
1327 ++zSig0;
1328 if ( zSig0 == 0 ) {
1329 ++zExp;
1330 zSig0 = LIT64( 0x8000000000000000 );
1331 }
1332 else {
1333 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
1334 }
1335 }
1336 else {
1337 if ( zSig0 == 0 ) zExp = 0;
1338 }
1339 return packFloatx80( zSign, zExp, zSig0 );
1340
1341 }
1342
1343 #else // SOFTFLOAT_68K
1344
1345 floatx80 roundAndPackFloatx80( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status )
1346 {
1347 int8_t roundingMode;
1348 flag roundNearestEven, increment;
1349 int64_t roundIncrement, roundMask, roundBits;
1350 int32_t expOffset;
1351
1352 roundingMode = status->float_rounding_mode;
1353 roundNearestEven = ( roundingMode == float_round_nearest_even );
1354 if ( roundingPrecision == 80 ) goto precision80;
1355 if ( roundingPrecision == 64 ) {
1356 roundIncrement = LIT64( 0x0000000000000400 );
1357 roundMask = LIT64( 0x00000000000007FF );
1358 expOffset = 0x3C00;
1359 } else if ( roundingPrecision == 32 ) {
1360 roundIncrement = LIT64( 0x0000008000000000 );
1361 roundMask = LIT64( 0x000000FFFFFFFFFF );
1362 expOffset = 0x3F80;
1363 } else {
1364 goto precision80;
1365 }
1366 zSig0 |= ( zSig1 != 0 );
1367 if ( ! roundNearestEven ) {
1368 if ( roundingMode == float_round_to_zero ) {
1369 roundIncrement = 0;
1370 } else {
1371 roundIncrement = roundMask;
1372 if ( zSign ) {
1373 if ( roundingMode == float_round_up ) roundIncrement = 0;
1374 } else {
1375 if ( roundingMode == float_round_down ) roundIncrement = 0;
1376 }
1377 }
1378 }
1379 roundBits = zSig0 & roundMask;
1380 if ( ( ( 0x7FFE - expOffset ) < zExp ) ||
1381 ( ( zExp == ( 0x7FFE - expOffset ) ) && ( zSig0 + roundIncrement < zSig0 ) ) ) {
1382 float_raise( float_flag_overflow, status );
1383 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status );
1384 if ( zSig0 & roundMask ) float_raise( float_flag_inexact, status );
1385 if ( ( roundingMode == float_round_to_zero )
1386 || ( zSign && ( roundingMode == float_round_up ) )
1387 || ( ! zSign && ( roundingMode == float_round_down ) )
1388 ) {
1389 return packFloatx80( zSign, 0x7FFE - expOffset, ~ roundMask );
1390 }
1391 return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low );
1392 }
1393 if ( zExp < ( expOffset + 1 ) ) {
1394 float_raise( float_flag_underflow, status );
1395 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status );
1396 shift64RightJamming( zSig0, -( zExp - ( expOffset + 1 ) ), &zSig0 );
1397 zExp = expOffset + 1;
1398 roundBits = zSig0 & roundMask;
1399 if ( roundBits ) float_raise( float_flag_inexact, status );
1400 zSig0 += roundIncrement;
1401 roundIncrement = roundMask + 1;
1402 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1403 roundMask |= roundIncrement;
1404 }
1405 zSig0 &= ~ roundMask;
1406 return packFloatx80( zSign, zExp, zSig0 );
1407 }
1408 if ( roundBits ) {
1409 float_raise( float_flag_inexact, status );
1410 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status);
1411 }
1412 zSig0 += roundIncrement;
1413 if ( zSig0 < roundIncrement ) {
1414 ++zExp;
1415 zSig0 = LIT64( 0x8000000000000000 );
1416 }
1417 roundIncrement = roundMask + 1;
1418 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1419 roundMask |= roundIncrement;
1420 }
1421 zSig0 &= ~ roundMask;
1422 if ( zSig0 == 0 ) zExp = 0;
1423 return packFloatx80( zSign, zExp, zSig0 );
1424 precision80:
1425 increment = ( (int64_t) zSig1 < 0 );
1426 if ( ! roundNearestEven ) {
1427 if ( roundingMode == float_round_to_zero ) {
1428 increment = 0;
1429 } else {
1430 if ( zSign ) {
1431 increment = ( roundingMode == float_round_down ) && zSig1;
1432 } else {
1433 increment = ( roundingMode == float_round_up ) && zSig1;
1434 }
1435 }
1436 }
1437 if ( 0x7FFE <= (uint32_t) zExp ) {
1438 if ( ( 0x7FFE < zExp ) ||
1439 ( ( zExp == 0x7FFE ) && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) && increment )
1440 ) {
1441 roundMask = 0;
1442 float_raise( float_flag_overflow, status );
1443 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status );
1444 if ( ( zSig0 & roundMask ) || zSig1 ) float_raise( float_flag_inexact, status );
1445 if ( ( roundingMode == float_round_to_zero )
1446 || ( zSign && ( roundingMode == float_round_up ) )
1447 || ( ! zSign && ( roundingMode == float_round_down ) )
1448 ) {
1449 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
1450 }
1451 return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low );
1452 }
1453 if ( zExp < 0 ) {
1454 float_raise( float_flag_underflow, status );
1455 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status);
1456 shift64ExtraRightJamming( zSig0, zSig1, -zExp, &zSig0, &zSig1 );
1457 zExp = 0;
1458 if ( zSig1 ) float_raise( float_flag_inexact, status );
1459 if ( roundNearestEven ) {
1460 increment = ( (int64_t) zSig1 < 0 );
1461 } else {
1462 if ( zSign ) {
1463 increment = ( roundingMode == float_round_down ) && zSig1;
1464 } else {
1465 increment = ( roundingMode == float_round_up ) && zSig1;
1466 }
1467 }
1468 if ( increment ) {
1469 ++zSig0;
1470 zSig0 &=
1471 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
1472 }
1473 return packFloatx80( zSign, zExp, zSig0 );
1474 }
1475 }
1476 if ( zSig1 ) {
1477 float_raise( float_flag_inexact, status );
1478 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status );
1479 }
1480 if ( increment ) {
1481 ++zSig0;
1482 if ( zSig0 == 0 ) {
1483 ++zExp;
1484 zSig0 = LIT64( 0x8000000000000000 );
1485 } else {
1486 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
1487 }
1488 } else {
1489 if ( zSig0 == 0 ) zExp = 0;
1490 }
1491 return packFloatx80( zSign, zExp, zSig0 );
1492
1493 }
1494
1495 #endif
1496
1497 #ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous
1498 floatx80 roundSigAndPackFloatx80( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status )
1499 {
1500 int8_t roundingMode;
1501 flag roundNearestEven, isTiny;
1502 int64_t roundIncrement, roundMask, roundBits;
1503
1504 roundingMode = status->float_rounding_mode;
1505 roundNearestEven = ( roundingMode == float_round_nearest_even );
1506 if ( roundingPrecision == 32 ) {
1507 roundIncrement = LIT64( 0x0000008000000000 );
1508 roundMask = LIT64( 0x000000FFFFFFFFFF );
1509 } else if ( roundingPrecision == 64 ) {
1510 roundIncrement = LIT64( 0x0000000000000400 );
1511 roundMask = LIT64( 0x00000000000007FF );
1512 } else {
1513 return roundAndPackFloatx80( 80, zSign, zExp, zSig0, zSig1, status );
1514 }
1515 zSig0 |= ( zSig1 != 0 );
1516 if ( ! roundNearestEven ) {
1517 if ( roundingMode == float_round_to_zero ) {
1518 roundIncrement = 0;
1519 }
1520 else {
1521 roundIncrement = roundMask;
1522 if ( zSign ) {
1523 if ( roundingMode == float_round_up ) roundIncrement = 0;
1524 }
1525 else {
1526 if ( roundingMode == float_round_down ) roundIncrement = 0;
1527 }
1528 }
1529 }
1530 roundBits = zSig0 & roundMask;
1531
1532 if ( 0x7FFE <= (uint32_t) zExp ) {
1533 if ( ( 0x7FFE < zExp )
1534 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
1535 ) {
1536 float_raise( float_flag_overflow, status );
1537 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status);
1538 if ( zSig0 & roundMask ) float_raise( float_flag_inexact, status );
1539 if ( ( roundingMode == float_round_to_zero )
1540 || ( zSign && ( roundingMode == float_round_up ) )
1541 || ( ! zSign && ( roundingMode == float_round_down ) )
1542 ) {
1543 return packFloatx80( zSign, 0x7FFE, LIT64( 0xFFFFFFFFFFFFFFFF ) );
1544 }
1545 return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low );
1546 }
1547
1548 if ( zExp < 0 ) {
1549 isTiny =
1550 ( status->float_detect_tininess == float_tininess_before_rounding )
1551 || ( zExp < -1 )
1552 || ( zSig0 <= zSig0 + roundIncrement );
1553 if ( isTiny ) {
1554 float_raise( float_flag_underflow, status );
1555 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status );
1556 }
1557 shift64RightJamming( zSig0, -zExp, &zSig0 );
1558 zExp = 0;
1559 roundBits = zSig0 & roundMask;
1560 if ( roundBits ) float_raise ( float_flag_inexact, status );
1561 zSig0 += roundIncrement;
1562 if ( roundNearestEven && ( roundBits == roundIncrement ) ) {
1563 roundMask |= roundIncrement<<1;
1564 }
1565 zSig0 &= ~roundMask;
1566 return packFloatx80( zSign, zExp, zSig0 );
1567 }
1568 }
1569 if ( roundBits ) {
1570 float_raise( float_flag_inexact, status );
1571 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status );
1572 }
1573 zSig0 += roundIncrement;
1574 if ( zSig0 < roundIncrement ) {
1575 ++zExp;
1576 zSig0 = LIT64( 0x8000000000000000 );
1577 }
1578 roundIncrement = roundMask + 1;
1579 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1580 roundMask |= roundIncrement;
1581 }
1582 zSig0 &= ~ roundMask;
1583 if ( zSig0 == 0 ) zExp = 0;
1584 return packFloatx80( zSign, zExp, zSig0 );
1585
1586 }
1587 #endif // End of Addition for Previous
1588
1589
1590 /*----------------------------------------------------------------------------
1591 | Takes an abstract floating-point value having sign `zSign', exponent
1592 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
1593 | and returns the proper extended double-precision floating-point value
1594 | corresponding to the abstract input. This routine is just like
1595 | `roundAndPackFloatx80' except that the input significand does not have to be
1596 | normalized.
1597 *----------------------------------------------------------------------------*/
1598
1599 static floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
1600 flag zSign, int32_t zExp,
1601 uint64_t zSig0, uint64_t zSig1,
1602 float_status *status)
1603 {
1604 int8_t shiftCount;
1605
1606 if ( zSig0 == 0 ) {
1607 zSig0 = zSig1;
1608 zSig1 = 0;
1609 zExp -= 64;
1610 }
1611 shiftCount = countLeadingZeros64( zSig0 );
1612 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1613 zExp -= shiftCount;
1614 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
1615 zSig0, zSig1, status);
1616
1617 }
1618
1619 /*----------------------------------------------------------------------------
1620 | Returns the result of converting the 32-bit two's complement integer `a'
1621 | to the extended double-precision floating-point format. The conversion
1622 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
1623 | Arithmetic.
1624 *----------------------------------------------------------------------------*/
1625
1626 floatx80 int32_to_floatx80(int32_t a)
1627 {
1628 flag zSign;
1629 uint32_t absA;
1630 int8_t shiftCount;
1631 uint64_t zSig;
1632
1633 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1634 zSign = ( a < 0 );
1635 absA = zSign ? - a : a;
1636 shiftCount = countLeadingZeros32( absA ) + 32;
1637 zSig = absA;
1638 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1639
1640 }
1641
1642 /*----------------------------------------------------------------------------
1643 | Returns the result of converting the single-precision floating-point value
1644 | `a' to the extended double-precision floating-point format. The conversion
1645 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
1646 | Arithmetic.
1647 *----------------------------------------------------------------------------*/
1648
1649 floatx80 float32_to_floatx80(float32 a, float_status *status)
1650 {
1651 flag aSign;
1652 int aExp;
1653 uint32_t aSig;
1654
1655 aSig = extractFloat32Frac( a );
1656 aExp = extractFloat32Exp( a );
1657 aSign = extractFloat32Sign( a );
1658 if ( aExp == 0xFF ) {
1659 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a, status ), status );
1660 return packFloatx80( aSign, 0x7FFF, floatx80_default_infinity_low );
1661 }
1662 if ( aExp == 0 ) {
1663 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1664 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1665 }
1666 aSig |= 0x00800000;
1667 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
1668
1669 }
1670
1671 #ifdef SOFTFLOAT_68K // 31-12-2016: Added for Previous
1672 floatx80 float32_to_floatx80_allowunnormal(float32 a , float_status *status)
1673 {
1674 flag aSign;
1675 int16_t aExp;
1676 uint32_t aSig;
1677
1678 aSig = extractFloat32Frac(a);
1679 aExp = extractFloat32Exp(a);
1680 aSign = extractFloat32Sign(a);
1681 if (aExp == 0xFF) {
1682 return packFloatx80( aSign, 0x7FFF, ( (uint64_t) aSig )<<40 );
1683 }
1684 if (aExp == 0) {
1685 if (aSig == 0) return packFloatx80(aSign, 0, 0);
1686 return packFloatx80(aSign, 0x3F81, ((uint64_t) aSig) << 40);
1687 }
1688 aSig |= 0x00800000;
1689 return packFloatx80(aSign, aExp + 0x3F80, ((uint64_t)aSig) << 40);
1690
1691 }
1692 #endif // end of addition for Previous
1693
1694 /*----------------------------------------------------------------------------
1695 | Returns the result of converting the double-precision floating-point value
1696 | `a' to the extended double-precision floating-point format. The conversion
1697 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
1698 | Arithmetic.
1699 *----------------------------------------------------------------------------*/
1700
1701 floatx80 float64_to_floatx80(float64 a, float_status *status)
1702 {
1703 flag aSign;
1704 int aExp;
1705 uint64_t aSig;
1706
1707 aSig = extractFloat64Frac( a );
1708 aExp = extractFloat64Exp( a );
1709 aSign = extractFloat64Sign( a );
1710 if ( aExp == 0x7FF ) {
1711 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a, status ), status );
1712 return packFloatx80( aSign, 0x7FFF, floatx80_default_infinity_low );
1713 }
1714 if ( aExp == 0 ) {
1715 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1716 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1717 }
1718 return
1719 packFloatx80(
1720 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
1721
1722 }
1723
1724 #ifdef SOFTFLOAT_68K // 31-12-2016: Added for Previous
1725 floatx80 float64_to_floatx80_allowunnormal( float64 a, float_status *status )
1726 {
1727 flag aSign;
1728 int16_t aExp;
1729 uint64_t aSig;
1730
1731 aSig = extractFloat64Frac( a );
1732 aExp = extractFloat64Exp( a );
1733 aSign = extractFloat64Sign( a );
1734 if ( aExp == 0x7FF ) {
1735 return packFloatx80( aSign, 0x7FFF, aSig<<11 );
1736 }
1737 if ( aExp == 0 ) {
1738 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1739 return packFloatx80( aSign, 0x3C01, aSig<<11 );
1740 }
1741 return
1742 packFloatx80(
1743 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
1744
1745 }
1746 #endif // end of addition for Previous
1747
1748 /*----------------------------------------------------------------------------
1749 | Returns the result of converting the extended double-precision floating-
1750 | point value `a' to the 32-bit two's complement integer format. The
1751 | conversion is performed according to the IEC/IEEE Standard for Binary
1752 | Floating-Point Arithmetic---which means in particular that the conversion
1753 | is rounded according to the current rounding mode. If `a' is a NaN, the
1754 | largest positive integer is returned. Otherwise, if the conversion
1755 | overflows, the largest integer with the same sign as `a' is returned.
1756 *----------------------------------------------------------------------------*/
1757
1758 int32_t floatx80_to_int32(floatx80 a, float_status *status)
1759 {
1760 flag aSign;
1761 int32_t aExp, shiftCount;
1762 uint64_t aSig;
1763
1764 aSig = extractFloatx80Frac( a );
1765 aExp = extractFloatx80Exp( a );
1766 aSign = extractFloatx80Sign( a );
1767 #ifdef SOFTFLOAT_68K
1768 if ( aExp == 0x7FFF ) {
1769 if ( (uint64_t) ( aSig<<1 ) ) {
1770 a = propagateFloatx80NaNOneArg( a, status );
1771 if ( a.low == aSig ) float_raise( float_flag_invalid, status );
1772 return (int32_t)(a.low>>32);
1773 }
1774 float_raise( float_flag_invalid, status );
1775 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
1776 }
1777 #else
1778 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
1779 #endif
1780 shiftCount = 0x4037 - aExp;
1781 if ( shiftCount <= 0 ) shiftCount = 1;
1782 shift64RightJamming( aSig, shiftCount, &aSig );
1783 return roundAndPackInt32(aSign, aSig, status);
1784
1785 }
1786
1787 #ifdef SOFTFLOAT_68K // 30-01-2017: Addition for Previous
1788 int16_t floatx80_to_int16( floatx80 a, float_status *status)
1789 {
1790 flag aSign;
1791 int32_t aExp, shiftCount;
1792 uint64_t aSig;
1793
1794 aSig = extractFloatx80Frac( a );
1795 aExp = extractFloatx80Exp( a );
1796 aSign = extractFloatx80Sign( a );
1797 if ( aExp == 0x7FFF ) {
1798 float_raise( float_flag_invalid, status );
1799 if ( (uint64_t) ( aSig<<1 ) ) {
1800 a = propagateFloatx80NaNOneArg( a, status );
1801 if ( a.low == aSig ) float_raise( float_flag_invalid, status );
1802 return (int16_t)(a.low>>48);
1803 }
1804 return aSign ? (int16_t) 0x8000 : 0x7FFF;
1805 }
1806 shiftCount = 0x4037 - aExp;
1807 if ( shiftCount <= 0 ) shiftCount = 1;
1808 shift64RightJamming( aSig, shiftCount, &aSig );
1809 return roundAndPackInt16( aSign, aSig, status );
1810
1811 }
1812 int8_t floatx80_to_int8( floatx80 a, float_status *status)
1813 {
1814 flag aSign;
1815 int32_t aExp, shiftCount;
1816 uint64_t aSig;
1817
1818 aSig = extractFloatx80Frac( a );
1819 aExp = extractFloatx80Exp( a );
1820 aSign = extractFloatx80Sign( a );
1821 if ( aExp == 0x7FFF ) {
1822 if ( (uint64_t) ( aSig<<1 ) ) {
1823 a = propagateFloatx80NaNOneArg( a, status );
1824 if ( a.low == aSig ) float_raise( float_flag_invalid, status );
1825 return (int8_t)(a.low>>56);
1826 }
1827 float_raise( float_flag_invalid, status );
1828 return aSign ? (int8_t) 0x80 : 0x7F;
1829 }
1830 shiftCount = 0x4037 - aExp;
1831 if ( shiftCount <= 0 ) shiftCount = 1;
1832 shift64RightJamming( aSig, shiftCount, &aSig );
1833 return roundAndPackInt8( aSign, aSig, status );
1834
1835 }
1836 #endif // End of addition for Previous
1837
1838
1839 /*----------------------------------------------------------------------------
1840 | Returns the result of converting the extended double-precision floating-
1841 | point value `a' to the 32-bit two's complement integer format. The
1842 | conversion is performed according to the IEC/IEEE Standard for Binary
1843 | Floating-Point Arithmetic, except that the conversion is always rounded
1844 | toward zero. If `a' is a NaN, the largest positive integer is returned.
1845 | Otherwise, if the conversion overflows, the largest integer with the same
1846 | sign as `a' is returned.
1847 *----------------------------------------------------------------------------*/
1848
1849 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
1850 {
1851 flag aSign;
1852 int32_t aExp, shiftCount;
1853 uint64_t aSig, savedASig;
1854 int32_t z;
1855
1856 if (floatx80_invalid_encoding(a)) {
1857 float_raise(float_flag_invalid, status);
1858 return 1 << 31;
1859 }
1860 aSig = extractFloatx80Frac( a );
1861 aExp = extractFloatx80Exp( a );
1862 aSign = extractFloatx80Sign( a );
1863 if ( 0x401E < aExp ) {
1864 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
1865 goto invalid;
1866 }
1867 else if ( aExp < 0x3FFF ) {
1868 if (aExp || aSig) {
1869 status->float_exception_flags |= float_flag_inexact;
1870 }
1871 return 0;
1872 }
1873 shiftCount = 0x403E - aExp;
1874 savedASig = aSig;
1875 aSig >>= shiftCount;
1876 z = aSig;
1877 if ( aSign ) z = - z;
1878 if ( ( z < 0 ) ^ aSign ) {
1879 invalid:
1880 float_raise(float_flag_invalid, status);
1881 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
1882 }
1883 if ( ( aSig<<shiftCount ) != savedASig ) {
1884 status->float_exception_flags |= float_flag_inexact;
1885 }
1886 return z;
1887
1888 }
1889
1890 /*----------------------------------------------------------------------------
1891 | Returns the result of converting the extended double-precision floating-
1892 | point value `a' to the 64-bit two's complement integer format. The
1893 | conversion is performed according to the IEC/IEEE Standard for Binary
1894 | Floating-Point Arithmetic---which means in particular that the conversion
1895 | is rounded according to the current rounding mode. If `a' is a NaN,
1896 | the largest positive integer is returned. Otherwise, if the conversion
1897 | overflows, the largest integer with the same sign as `a' is returned.
1898 *----------------------------------------------------------------------------*/
1899
1900 int64_t floatx80_to_int64(floatx80 a, float_status *status)
1901 {
1902 flag aSign;
1903 int32_t aExp, shiftCount;
1904 uint64_t aSig, aSigExtra;
1905
1906 if (floatx80_invalid_encoding(a)) {
1907 float_raise(float_flag_invalid, status);
1908 return 1ULL << 63;
1909 }
1910 aSig = extractFloatx80Frac( a );
1911 aExp = extractFloatx80Exp( a );
1912 aSign = extractFloatx80Sign( a );
1913 shiftCount = 0x403E - aExp;
1914 if ( shiftCount <= 0 ) {
1915 if ( shiftCount ) {
1916 float_raise(float_flag_invalid, status);
1917 if ( ! aSign
1918 || ( ( aExp == 0x7FFF )
1919 && ( aSig != LIT64( 0x8000000000000000 ) ) )
1920 ) {
1921 return LIT64( 0x7FFFFFFFFFFFFFFF );
1922 }
1923 return (int64_t) LIT64( 0x8000000000000000 );
1924 }
1925 aSigExtra = 0;
1926 }
1927 else {
1928 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
1929 }
1930 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
1931
1932 }
1933
1934 /*----------------------------------------------------------------------------
1935 | Returns the result of converting the extended double-precision floating-
1936 | point value `a' to the single-precision floating-point format. The
1937 | conversion is performed according to the IEC/IEEE Standard for Binary
1938 | Floating-Point Arithmetic.
1939 *----------------------------------------------------------------------------*/
1940
1941 float32 floatx80_to_float32(floatx80 a, float_status *status)
1942 {
1943 flag aSign;
1944 int32_t aExp;
1945 uint64_t aSig;
1946
1947 aSig = extractFloatx80Frac( a );
1948 aExp = extractFloatx80Exp( a );
1949 aSign = extractFloatx80Sign( a );
1950 if ( aExp == 0x7FFF ) {
1951 if ( (uint64_t) ( aSig<<1 ) ) {
1952 return commonNaNToFloat32(floatx80ToCommonNaN(a, status));
1953 }
1954 return packFloat32( aSign, 0xFF, 0 );
1955 }
1956 #ifdef SOFTFLOAT_68K
1957 if ( aExp == 0 ) {
1958 if ( aSig == 0) return packFloat32( aSign, 0, 0 );
1959 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
1960 }
1961 shift64RightJamming( aSig, 33, &aSig );
1962 aExp -= 0x3F81;
1963 #else
1964 shift64RightJamming( aSig, 33, &aSig );
1965 if ( aExp || aSig ) aExp -= 0x3F81;
1966 #endif
1967 return roundAndPackFloat32(aSign, aExp, aSig, status);
1968
1969 }
1970
1971 /*----------------------------------------------------------------------------
1972 | Returns the result of converting the extended double-precision floating-
1973 | point value `a' to the double-precision floating-point format. The
1974 | conversion is performed according to the IEC/IEEE Standard for Binary
1975 | Floating-Point Arithmetic.
1976 *----------------------------------------------------------------------------*/
1977
1978 float64 floatx80_to_float64(floatx80 a, float_status *status)
1979 {
1980 flag aSign;
1981 int32_t aExp;
1982 uint64_t aSig, zSig;
1983
1984 aSig = extractFloatx80Frac( a );
1985 aExp = extractFloatx80Exp( a );
1986 aSign = extractFloatx80Sign( a );
1987 if ( aExp == 0x7FFF ) {
1988 if ( (uint64_t) ( aSig<<1 ) ) {
1989 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
1990 }
1991 return packFloat64( aSign, 0x7FF, 0 );
1992 }
1993 #ifdef SOFTFLOAT_68K
1994 if ( aExp == 0 ) {
1995 if ( aSig == 0) return packFloat64( aSign, 0, 0 );
1996 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
1997 }
1998 shift64RightJamming( aSig, 1, &zSig );
1999 aExp -= 0x3C01;
2000 #else
2001 shift64RightJamming( aSig, 1, &zSig );
2002 if ( aExp || aSig ) aExp -= 0x3C01;
2003 #endif
2004 return roundAndPackFloat64(aSign, aExp, zSig, status);
2005
2006 }
2007
2008 #ifdef SOFTFLOAT_68K // 31-01-2017
2009 /*----------------------------------------------------------------------------
2010 | Returns the result of converting the extended double-precision floating-
2011 | point value `a' to the extended double-precision floating-point format.
2012 | The conversion is performed according to the IEC/IEEE Standard for Binary
2013 | Floating-Point Arithmetic.
2014 *----------------------------------------------------------------------------*/
2015
2016 floatx80 floatx80_to_floatx80( floatx80 a, float_status *status )
2017 {
2018 flag aSign;
2019 int32_t aExp;
2020 uint64_t aSig;
2021
2022 aSig = extractFloatx80Frac( a );
2023 aExp = extractFloatx80Exp( a );
2024 aSign = extractFloatx80Sign( a );
2025
2026 if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) {
2027 return propagateFloatx80NaNOneArg( a, status );
2028 }
2029 if ( aExp == 0 && aSig != 0 ) {
2030 return normalizeRoundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status );
2031 }
2032 return a;
2033
2034 }
2035 #endif
2036
2037 #ifdef SOFTFLOAT_68K // 30-01-2016: Added for Previous
2038 floatx80 floatx80_round32( floatx80 a, float_status *status )
2039 {
2040 flag aSign;
2041 int32_t aExp;
2042 uint64_t aSig;
2043
2044 aSig = extractFloatx80Frac( a );
2045 aExp = extractFloatx80Exp( a );
2046 aSign = extractFloatx80Sign( a );
2047
2048 if ( aExp == 0x7FFF || aSig == 0 ) {
2049 return a;
2050 }
2051 if ( aExp == 0 ) {
2052 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2053 }
2054
2055 return roundSigAndPackFloatx80( 32, aSign, aExp, aSig, 0, status );
2056 }
2057
2058 floatx80 floatx80_round64( floatx80 a, float_status *status )
2059 {
2060 flag aSign;
2061 int32_t aExp;
2062 uint64_t aSig;
2063
2064 aSig = extractFloatx80Frac( a );
2065 aExp = extractFloatx80Exp( a );
2066 aSign = extractFloatx80Sign( a );
2067
2068 if ( aExp == 0x7FFF || aSig == 0 ) {
2069 return a;
2070 }
2071 if ( aExp == 0 ) {
2072 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2073 }
2074
2075 return roundSigAndPackFloatx80( 64, aSign, aExp, aSig, 0, status );
2076 }
2077
2078 floatx80 floatx80_round_to_float32( floatx80 a, float_status *status )
2079 {
2080 flag aSign;
2081 int32_t aExp;
2082 uint64_t aSig;
2083
2084 aSign = extractFloatx80Sign( a );
2085 aSig = extractFloatx80Frac( a );
2086 aExp = extractFloatx80Exp( a );
2087
2088 if ( aExp == 0x7FFF ) {
2089 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status );
2090 return a;
2091 }
2092 if ( aExp == 0 ) {
2093 if ( aSig == 0 ) return a;
2094 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2095 }
2096
2097 return roundAndPackFloatx80( 32, aSign, aExp, aSig, 0, status );
2098 }
2099
2100 floatx80 floatx80_round_to_float64( floatx80 a, float_status *status )
2101 {
2102 flag aSign;
2103 int32_t aExp;
2104 uint64_t aSig;
2105
2106 aSign = extractFloatx80Sign( a );
2107 aSig = extractFloatx80Frac( a );
2108 aExp = extractFloatx80Exp( a );
2109
2110 if ( aExp == 0x7FFF ) {
2111 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status );
2112 return a;
2113 }
2114 if ( aExp == 0 ) {
2115 if ( aSig == 0 ) return a;
2116 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2117 }
2118
2119 return roundAndPackFloatx80( 64, aSign, aExp, aSig, 0, status );
2120 }
2121
2122
2123 floatx80 floatx80_normalize( floatx80 a )
2124 {
2125 flag aSign;
2126 int16_t aExp;
2127 uint64_t aSig;
2128 int8_t shiftCount;
2129
2130 aSig = extractFloatx80Frac( a );
2131 aExp = extractFloatx80Exp( a );
2132 aSign = extractFloatx80Sign( a );
2133
2134 if ( aExp == 0x7FFF || aExp == 0 ) return a;
2135 if ( aSig == 0 ) return packFloatx80(aSign, 0, 0);
2136
2137 shiftCount = countLeadingZeros64( aSig );
2138
2139 if ( shiftCount > aExp ) shiftCount = aExp;
2140
2141 aExp -= shiftCount;
2142 aSig <<= shiftCount;
2143
2144 return packFloatx80( aSign, aExp, aSig );
2145 }
2146 #endif // end of addition for Previous
2147
2148 /*----------------------------------------------------------------------------
2149 | Rounds the extended double-precision floating-point value `a' to an integer,
2150 | and returns the result as an extended quadruple-precision floating-point
2151 | value. The operation is performed according to the IEC/IEEE Standard for
2152 | Binary Floating-Point Arithmetic.
2153 *----------------------------------------------------------------------------*/
2154
2155 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2156 {
2157 flag aSign;
2158 int32_t aExp;
2159 uint64_t lastBitMask, roundBitsMask;
2160 int8_t roundingMode;
2161 floatx80 z;
2162
2163 roundingMode = status->float_rounding_mode;
2164 aSign = extractFloatx80Sign(a);
2165 aExp = extractFloatx80Exp( a );
2166 if ( 0x403E <= aExp ) {
2167 if ( aExp == 0x7FFF ) {
2168 if ((uint64_t) ( extractFloatx80Frac( a )<<1 ) )
2169 return propagateFloatx80NaNOneArg(a, status);
2170 return inf_clear_intbit(status) ? packFloatx80(aSign, aExp, 0) : a;
2171 }
2172 return a;
2173 }
2174 if ( aExp < 0x3FFF ) {
2175 if ( ( aExp == 0 )
2176 #ifdef SOFTFLOAT_68K
2177 && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) {
2178 #else
2179 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
2180 #endif
2181 return a;
2182 }
2183 status->float_exception_flags |= float_flag_inexact;
2184 switch (status->float_rounding_mode) {
2185 case float_round_nearest_even:
2186 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
2187 ) {
2188 return
2189 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
2190 }
2191 break;
2192 case float_round_ties_away:
2193 if (aExp == 0x3FFE) {
2194 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
2195 }
2196 break;
2197 case float_round_down:
2198 return
2199 aSign ?
2200 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
2201 : packFloatx80( 0, 0, 0 );
2202 case float_round_up:
2203 return
2204 aSign ? packFloatx80( 1, 0, 0 )
2205 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
2206 }
2207 return packFloatx80( aSign, 0, 0 );
2208 }
2209 lastBitMask = 1;
2210 lastBitMask <<= 0x403E - aExp;
2211 roundBitsMask = lastBitMask - 1;
2212 z = a;
2213 switch (status->float_rounding_mode) {
2214 case float_round_nearest_even:
2215 z.low += lastBitMask>>1;
2216 if ((z.low & roundBitsMask) == 0) {
2217 z.low &= ~lastBitMask;
2218 }
2219 break;
2220 case float_round_ties_away:
2221 z.low += lastBitMask >> 1;
2222 break;
2223 case float_round_to_zero:
2224 break;
2225 case float_round_up:
2226 if (!extractFloatx80Sign(z)) {
2227 z.low += roundBitsMask;
2228 }
2229 break;
2230 case float_round_down:
2231 if (extractFloatx80Sign(z)) {
2232 z.low += roundBitsMask;
2233 }
2234 break;
2235 default:
2236 abort();
2237 }
2238 z.low &= ~ roundBitsMask;
2239 if ( z.low == 0 ) {
2240 ++z.high;
2241 z.low = LIT64( 0x8000000000000000 );
2242 }
2243 if (z.low != a.low) {
2244 status->float_exception_flags |= float_flag_inexact;
2245 }
2246 return z;
2247
2248 }
2249
2250 #ifdef SOFTFLOAT_68K // 09-01-2017: Added for Previous
2251 floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status)
2252 {
2253 flag aSign;
2254 int32_t aExp;
2255 uint64_t lastBitMask, roundBitsMask;
2256 floatx80 z;
2257
2258 aSign = extractFloatx80Sign(a);
2259 aExp = extractFloatx80Exp( a );
2260 if ( 0x403E <= aExp ) {
2261 if ( aExp == 0x7FFF ) {
2262 if ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
2263 return propagateFloatx80NaNOneArg( a, status );
2264 return inf_clear_intbit(status) ? packFloatx80(aSign, aExp, 0) : a;
2265 }
2266 return a;
2267 }
2268 if ( aExp < 0x3FFF ) {
2269 if ( ( aExp == 0 )
2270 #ifdef SOFTFLOAT_68K
2271 && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) {
2272 #else
2273 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
2274 #endif
2275 return a;
2276 }
2277 status->float_exception_flags |= float_flag_inexact;
2278 return packFloatx80( aSign, 0, 0 );
2279 }
2280 lastBitMask = 1;
2281 lastBitMask <<= 0x403E - aExp;
2282 roundBitsMask = lastBitMask - 1;
2283 z = a;
2284 z.low &= ~ roundBitsMask;
2285 if ( z.low == 0 ) {
2286 ++z.high;
2287 z.low = LIT64( 0x8000000000000000 );
2288 }
2289 if ( z.low != a.low ) status->float_exception_flags |= float_flag_inexact;
2290 return z;
2291
2292 }
2293 #endif // End of addition for Previous
2294
2295 /*----------------------------------------------------------------------------
2296 | Returns the result of adding the absolute values of the extended double-
2297 | precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
2298 | negated before being returned. `zSign' is ignored if the result is a NaN.
2299 | The addition is performed according to the IEC/IEEE Standard for Binary
2300 | Floating-Point Arithmetic.
2301 *----------------------------------------------------------------------------*/
2302
2303 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
2304 float_status *status)
2305 {
2306 int32_t aExp, bExp, zExp;
2307 uint64_t aSig, bSig, zSig0, zSig1;
2308 int32_t expDiff;
2309
2310 aSig = extractFloatx80Frac( a );
2311 aExp = extractFloatx80Exp( a );
2312 bSig = extractFloatx80Frac( b );
2313 bExp = extractFloatx80Exp( b );
2314 #ifdef SOFTFLOAT_68K
2315 if ( aExp == 0 ) {
2316 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2317 }
2318 if ( bExp == 0 ) {
2319 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2320 }
2321 #endif
2322 expDiff = aExp - bExp;
2323 if ( 0 < expDiff ) {
2324 if ( aExp == 0x7FFF ) {
2325 if ((uint64_t)(aSig << 1))
2326 return propagateFloatx80NaN(a, b, status);
2327 return inf_clear_intbit(status) ? packFloatx80(extractFloatx80Sign(a), aExp, 0) : a;
2328 }
2329 #ifndef SOFTFLOAT_68K
2330 if ( bExp == 0 ) --expDiff;
2331 #endif
2332 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2333 zExp = aExp;
2334 }
2335 else if ( expDiff < 0 ) {
2336 if ( bExp == 0x7FFF ) {
2337 if ((uint64_t)(bSig << 1))
2338 return propagateFloatx80NaN(a, b, status);
2339 if (inf_clear_intbit(status)) bSig = 0;
2340 return packFloatx80( zSign, bExp, bSig );
2341 }
2342 #ifndef SOFTFLOAT_68K
2343 if ( aExp == 0 ) ++expDiff;
2344 #endif
2345 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2346 zExp = bExp;
2347 }
2348 else {
2349 if ( aExp == 0x7FFF ) {
2350 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
2351 return propagateFloatx80NaN(a, b, status);
2352 }
2353 if (inf_clear_intbit(status)) return packFloatx80(extractFloatx80Sign(a), aExp, 0);
2354 return faddsub_swap_inf(status) ? b : a;
2355 }
2356 zSig1 = 0;
2357 zSig0 = aSig + bSig;
2358 #ifndef SOFTFLOAT_68K
2359 if ( aExp == 0 ) {
2360 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
2361 goto roundAndPack;
2362 }
2363 #endif
2364 zExp = aExp;
2365 #ifdef SOFTFLOAT_68K
2366 if ( aSig == 0 && bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2367 if ( aSig == 0 || bSig == 0 ) goto roundAndPack;
2368 #endif
2369 goto shiftRight1;
2370 }
2371 zSig0 = aSig + bSig;
2372 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
2373 shiftRight1:
2374 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
2375 zSig0 |= LIT64( 0x8000000000000000 );
2376 ++zExp;
2377 roundAndPack:
2378 return roundAndPackFloatx80(status->floatx80_rounding_precision,
2379 zSign, zExp, zSig0, zSig1, status);
2380 }
2381
2382 /*----------------------------------------------------------------------------
2383 | Returns the result of subtracting the absolute values of the extended
2384 | double-precision floating-point values `a' and `b'. If `zSign' is 1, the
2385 | difference is negated before being returned. `zSign' is ignored if the
2386 | result is a NaN. The subtraction is performed according to the IEC/IEEE
2387 | Standard for Binary Floating-Point Arithmetic.
2388 *----------------------------------------------------------------------------*/
2389
2390 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
2391 float_status *status)
2392 {
2393 int32_t aExp, bExp, zExp;
2394 uint64_t aSig, bSig, zSig0, zSig1;
2395 int32_t expDiff;
2396
2397 aSig = extractFloatx80Frac( a );
2398 aExp = extractFloatx80Exp( a );
2399 bSig = extractFloatx80Frac( b );
2400 bExp = extractFloatx80Exp( b );
2401 expDiff = aExp - bExp;
2402 if ( 0 < expDiff ) goto aExpBigger;
2403 if ( expDiff < 0 ) goto bExpBigger;
2404 if ( aExp == 0x7FFF ) {
2405 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
2406 return propagateFloatx80NaN(a, b, status);
2407 }
2408 float_raise(float_flag_invalid, status);
2409 return floatx80_default_nan(status);
2410 }
2411 #ifndef SOFTFLOAT_68K
2412 if ( aExp == 0 ) {
2413 aExp = 1;
2414 bExp = 1;
2415 }
2416 #endif
2417 zSig1 = 0;
2418 if ( bSig < aSig ) goto aBigger;
2419 if ( aSig < bSig ) goto bBigger;
2420 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
2421 bExpBigger:
2422 if ( bExp == 0x7FFF ) {
2423 if ((uint64_t)(bSig << 1)) return propagateFloatx80NaN(a, b, status);
2424 if (inf_clear_intbit(status)) bSig = 0;
2425 return packFloatx80(zSign ^ 1, bExp, bSig);
2426 }
2427 #ifndef SOFTFLOAT_68K
2428 if ( aExp == 0 ) ++expDiff;
2429 #endif
2430 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2431 bBigger:
2432 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
2433 zExp = bExp;
2434 zSign ^= 1;
2435 goto normalizeRoundAndPack;
2436 aExpBigger:
2437 if ( aExp == 0x7FFF ) {
2438 if ((uint64_t)(aSig << 1)) return propagateFloatx80NaN(a, b, status);
2439 return inf_clear_intbit(status) ? packFloatx80(extractFloatx80Sign(a), aExp, 0) : a;
2440 }
2441 #ifndef SOFTFLOAT_68K
2442 if ( bExp == 0 ) --expDiff;
2443 #endif
2444 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2445 aBigger:
2446 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
2447 zExp = aExp;
2448 normalizeRoundAndPack:
2449 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
2450 zSign, zExp, zSig0, zSig1, status);
2451 }
2452
2453 /*----------------------------------------------------------------------------
2454 | Returns the result of adding the extended double-precision floating-point
2455 | values `a' and `b'. The operation is performed according to the IEC/IEEE
2456 | Standard for Binary Floating-Point Arithmetic.
2457 *----------------------------------------------------------------------------*/
2458
2459 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2460 {
2461 flag aSign, bSign;
2462
2463 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
2464 float_raise(float_flag_invalid, status);
2465 return floatx80_default_nan(status);
2466 }
2467 aSign = extractFloatx80Sign( a );
2468 bSign = extractFloatx80Sign( b );
2469 if ( aSign == bSign ) {
2470 return addFloatx80Sigs(a, b, aSign, status);
2471 }
2472 else {
2473 return subFloatx80Sigs(a, b, aSign, status);
2474 }
2475
2476 }
2477
2478 /*----------------------------------------------------------------------------
2479 | Returns the result of subtracting the extended double-precision floating-
2480 | point values `a' and `b'. The operation is performed according to the
2481 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2482 *----------------------------------------------------------------------------*/
2483
2484 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2485 {
2486 flag aSign, bSign;
2487
2488 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
2489 float_raise(float_flag_invalid, status);
2490 return floatx80_default_nan(status);
2491 }
2492 aSign = extractFloatx80Sign( a );
2493 bSign = extractFloatx80Sign( b );
2494 if ( aSign == bSign ) {
2495 return subFloatx80Sigs(a, b, aSign, status);
2496 }
2497 else {
2498 return addFloatx80Sigs(a, b, aSign, status);
2499 }
2500
2501 }
2502
2503 /*----------------------------------------------------------------------------
2504 | Returns the result of multiplying the extended double-precision floating-
2505 | point values `a' and `b'. The operation is performed according to the
2506 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2507 *----------------------------------------------------------------------------*/
2508
2509 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2510 {
2511 flag aSign, bSign, zSign;
2512 int32_t aExp, bExp, zExp;
2513 uint64_t aSig, bSig, zSig0, zSig1;
2514
2515 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
2516 float_raise(float_flag_invalid, status);
2517 return floatx80_default_nan(status);
2518 }
2519 aSig = extractFloatx80Frac( a );
2520 aExp = extractFloatx80Exp( a );
2521 aSign = extractFloatx80Sign( a );
2522 bSig = extractFloatx80Frac( b );
2523 bExp = extractFloatx80Exp( b );
2524 bSign = extractFloatx80Sign( b );
2525 zSign = aSign ^ bSign;
2526 if ( aExp == 0x7FFF ) {
2527 if ( (uint64_t) ( aSig<<1 )
2528 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
2529 return propagateFloatx80NaN(a, b, status);
2530 }
2531 if ( ( bExp | bSig ) == 0 ) goto invalid;
2532 if (inf_clear_intbit(status)) aSig = 0;
2533 return packFloatx80(zSign, aExp, aSig);
2534 }
2535 if ( bExp == 0x7FFF ) {
2536 if ((uint64_t)(bSig << 1)) {
2537 return propagateFloatx80NaN(a, b, status);
2538 }
2539 if ( ( aExp | aSig ) == 0 ) {
2540 invalid:
2541 float_raise(float_flag_invalid, status);
2542 return floatx80_default_nan(status);
2543 }
2544 if (inf_clear_intbit(status)) bSig = 0;
2545 return packFloatx80(zSign, bExp, bSig);
2546 }
2547 if ( aExp == 0 ) {
2548 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2549 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2550 }
2551 if ( bExp == 0 ) {
2552 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2553 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2554 }
2555 zExp = aExp + bExp - 0x3FFE;
2556 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2557 if ( 0 < (int64_t) zSig0 ) {
2558 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
2559 --zExp;
2560 }
2561 return roundAndPackFloatx80(status->floatx80_rounding_precision,
2562 zSign, zExp, zSig0, zSig1, status);
2563 }
2564
2565 #ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous
2566 floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status )
2567 {
2568 flag aSign, bSign, zSign;
2569 int32_t aExp, bExp, zExp;
2570 uint64_t aSig, bSig, zSig0, zSig1;
2571
2572 aSig = extractFloatx80Frac( a );
2573 aExp = extractFloatx80Exp( a );
2574 aSign = extractFloatx80Sign( a );
2575 bSig = extractFloatx80Frac( b );
2576 bExp = extractFloatx80Exp( b );
2577 bSign = extractFloatx80Sign( b );
2578 zSign = aSign ^ bSign;
2579 if ( aExp == 0x7FFF ) {
2580 if ( (uint64_t) ( aSig<<1 )
2581 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
2582 return propagateFloatx80NaN( a, b, status );
2583 }
2584 if ( ( bExp | bSig ) == 0 ) goto invalid;
2585 if (inf_clear_intbit(status)) aSig = 0;
2586 return packFloatx80(zSign, aExp, aSig);
2587 }
2588 if ( bExp == 0x7FFF ) {
2589 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
2590 if ( ( aExp | aSig ) == 0 ) {
2591 invalid:
2592 float_raise( float_flag_invalid, status );
2593 return floatx80_default_nan(status);
2594 }
2595 if (inf_clear_intbit(status)) bSig = 0;
2596 return packFloatx80(zSign, bExp, bSig);
2597 }
2598 if ( aExp == 0 ) {
2599 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2600 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2601 }
2602 if ( bExp == 0 ) {
2603 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2604 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2605 }
2606 aSig &= LIT64( 0xFFFFFF0000000000 );
2607 bSig &= LIT64( 0xFFFFFF0000000000 );
2608 zExp = aExp + bExp - 0x3FFE;
2609 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2610 if ( 0 < (int64_t) zSig0 ) {
2611 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
2612 --zExp;
2613 }
2614 return roundSigAndPackFloatx80( 32, zSign, zExp, zSig0, zSig1, status);
2615
2616 }
2617 #endif // End of addition for Previous
2618
2619
2620 /*----------------------------------------------------------------------------
2621 | Returns the result of dividing the extended double-precision floating-point
2622 | value `a' by the corresponding value `b'. The operation is performed
2623 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2624 *----------------------------------------------------------------------------*/
2625
2626 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2627 {
2628 flag aSign, bSign, zSign;
2629 int32_t aExp, bExp, zExp;
2630 uint64_t aSig, bSig, zSig0, zSig1;
2631 uint64_t rem0, rem1, rem2, term0, term1, term2;
2632
2633 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
2634 float_raise(float_flag_invalid, status);
2635 return floatx80_default_nan(status);
2636 }
2637 aSig = extractFloatx80Frac( a );
2638 aExp = extractFloatx80Exp( a );
2639 aSign = extractFloatx80Sign( a );
2640 bSig = extractFloatx80Frac( b );
2641 bExp = extractFloatx80Exp( b );
2642 bSign = extractFloatx80Sign( b );
2643 zSign = aSign ^ bSign;
2644 if ( aExp == 0x7FFF ) {
2645 if ((uint64_t)(aSig << 1)) {
2646 return propagateFloatx80NaN(a, b, status);
2647 }
2648 if ( bExp == 0x7FFF ) {
2649 if ((uint64_t)(bSig << 1)) {
2650 return propagateFloatx80NaN(a, b, status);
2651 }
2652 goto invalid;
2653 }
2654 if (inf_clear_intbit(status)) aSig = 0;
2655 return packFloatx80(zSign, aExp, aSig);
2656 }
2657 if ( bExp == 0x7FFF ) {
2658 if ((uint64_t)(bSig << 1)) {
2659 return propagateFloatx80NaN(a, b, status);
2660 }
2661 return packFloatx80( zSign, 0, 0 );
2662 }
2663 if ( bExp == 0 ) {
2664 if ( bSig == 0 ) {
2665 if ( ( aExp | aSig ) == 0 ) {
2666 invalid:
2667 float_raise(float_flag_invalid, status);
2668 return floatx80_default_nan(status);
2669 }
2670 float_raise(float_flag_divbyzero, status);
2671 return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low );
2672 }
2673 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2674 }
2675 if ( aExp == 0 ) {
2676 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2677 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2678 }
2679 zExp = aExp - bExp + 0x3FFE;
2680 rem1 = 0;
2681 if ( bSig <= aSig ) {
2682 shift128Right( aSig, 0, 1, &aSig, &rem1 );
2683 ++zExp;
2684 }
2685 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
2686 mul64To128( bSig, zSig0, &term0, &term1 );
2687 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
2688 while ( (int64_t) rem0 < 0 ) {
2689 --zSig0;
2690 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2691 }
2692 zSig1 = estimateDiv128To64( rem1, 0, bSig );
2693 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
2694 mul64To128( bSig, zSig1, &term1, &term2 );
2695 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
2696 while ( (int64_t) rem1 < 0 ) {
2697 --zSig1;
2698 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
2699 }
2700 zSig1 |= ( ( rem1 | rem2 ) != 0 );
2701 }
2702 return roundAndPackFloatx80(status->floatx80_rounding_precision,
2703 zSign, zExp, zSig0, zSig1, status);
2704 }
2705
2706 #ifdef SOFTFLOAT_68K // 21-01-2017: Addition for Previous
2707 floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status )
2708 {
2709 flag aSign, bSign, zSign;
2710 int32_t aExp, bExp, zExp;
2711 uint64_t aSig, bSig, zSig0, zSig1;
2712 uint64_t rem0, rem1, rem2, term0, term1, term2;
2713
2714 aSig = extractFloatx80Frac( a );
2715 aExp = extractFloatx80Exp( a );
2716 aSign = extractFloatx80Sign( a );
2717 bSig = extractFloatx80Frac( b );
2718 bExp = extractFloatx80Exp( b );
2719 bSign = extractFloatx80Sign( b );
2720 zSign = aSign ^ bSign;
2721 if ( aExp == 0x7FFF ) {
2722 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
2723 if ( bExp == 0x7FFF ) {
2724 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
2725 goto invalid;
2726 }
2727 if (inf_clear_intbit(status)) aSig = 0;
2728 return packFloatx80(zSign, aExp, aSig);
2729 }
2730 if ( bExp == 0x7FFF ) {
2731 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
2732 return packFloatx80( zSign, 0, 0 );
2733 }
2734 if ( bExp == 0 ) {
2735 if ( bSig == 0 ) {
2736 if ( ( aExp | aSig ) == 0 ) {
2737 invalid:
2738 float_raise( float_flag_invalid, status );
2739 return floatx80_default_nan(status);
2740 }
2741 float_raise( float_flag_divbyzero, status );
2742 return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low );
2743 }
2744 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2745 }
2746 if ( aExp == 0 ) {
2747 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2748 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2749 }
2750
2751 zExp = aExp - bExp + 0x3FFE;
2752 rem1 = 0;
2753 if ( bSig <= aSig ) {
2754 shift128Right( aSig, 0, 1, &aSig, &rem1 );
2755 ++zExp;
2756 }
2757 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
2758 mul64To128( bSig, zSig0, &term0, &term1 );
2759 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
2760 while ( (int64_t) rem0 < 0 ) {
2761 --zSig0;
2762 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2763 }
2764 zSig1 = estimateDiv128To64( rem1, 0, bSig );
2765 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
2766 mul64To128( bSig, zSig1, &term1, &term2 );
2767 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
2768 while ( (int64_t) rem1 < 0 ) {
2769 --zSig1;
2770 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
2771 }
2772 zSig1 |= ( ( rem1 | rem2 ) != 0 );
2773 }
2774 return roundSigAndPackFloatx80( 32, zSign, zExp, zSig0, zSig1, status);
2775
2776 }
2777 #endif // End of addition for Previous
2778
2779
2780 /*----------------------------------------------------------------------------
2781 | Returns the remainder of the extended double-precision floating-point value
2782 | `a' with respect to the corresponding value `b'. The operation is performed
2783 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2784 *----------------------------------------------------------------------------*/
2785
2786 #ifndef SOFTFLOAT_68K
2787 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2788 {
2789 flag aSign, zSign;
2790 int32_t aExp, bExp, expDiff;
2791 uint64_t aSig0, aSig1, bSig;
2792 uint64_t q, term0, term1, alternateASig0, alternateASig1;
2793
2794 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
2795 float_raise(float_flag_invalid, status);
2796 return floatx80_default_nan(status);
2797 }
2798 aSig0 = extractFloatx80Frac( a );
2799 aExp = extractFloatx80Exp( a );
2800 aSign = extractFloatx80Sign( a );
2801 bSig = extractFloatx80Frac( b );
2802 bExp = extractFloatx80Exp( b );
2803 if ( aExp == 0x7FFF ) {
2804 if ( (uint64_t) ( aSig0<<1 )
2805 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
2806 return propagateFloatx80NaN(a, b, status);
2807 }
2808 goto invalid;
2809 }
2810 if ( bExp == 0x7FFF ) {
2811 if ((uint64_t)(bSig << 1)) {
2812 return propagateFloatx80NaN(a, b, status);
2813 }
2814 return a;
2815 }
2816 if ( bExp == 0 ) {
2817 if ( bSig == 0 ) {
2818 invalid:
2819 float_raise(float_flag_invalid, status);
2820 return floatx80_default_nan(status);
2821 }
2822 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2823 }
2824 if ( aExp == 0 ) {
2825 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
2826 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
2827 }
2828 bSig |= LIT64( 0x8000000000000000 );
2829 zSign = aSign;
2830 expDiff = aExp - bExp;
2831 aSig1 = 0;
2832 if ( expDiff < 0 ) {
2833 if ( expDiff < -1 ) return a;
2834 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
2835 expDiff = 0;
2836 }
2837 q = ( bSig <= aSig0 );
2838 if ( q ) aSig0 -= bSig;
2839 expDiff -= 64;
2840 while ( 0 < expDiff ) {
2841 q = estimateDiv128To64( aSig0, aSig1, bSig );
2842 q = ( 2 < q ) ? q - 2 : 0;
2843 mul64To128( bSig, q, &term0, &term1 );
2844 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2845 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
2846 expDiff -= 62;
2847 }
2848 expDiff += 64;
2849 if ( 0 < expDiff ) {
2850 q = estimateDiv128To64( aSig0, aSig1, bSig );
2851 q = ( 2 < q ) ? q - 2 : 0;
2852 q >>= 64 - expDiff;
2853 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
2854 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2855 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
2856 while ( le128( term0, term1, aSig0, aSig1 ) ) {
2857 ++q;
2858 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2859 }
2860 }
2861 else {
2862 term1 = 0;
2863 term0 = bSig;
2864 }
2865 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
2866 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
2867 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
2868 && ( q & 1 ) )
2869 ) {
2870 aSig0 = alternateASig0;
2871 aSig1 = alternateASig1;
2872 zSign = ! zSign;
2873 }
2874 return
2875 normalizeRoundAndPackFloatx80(
2876 80, zSign, bExp + expDiff, aSig0, aSig1, status);
2877
2878 }
2879 #else // 09-01-2017: Modified version for Previous
2880 floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status )
2881 {
2882 flag aSign, bSign, zSign;
2883 int32_t aExp, bExp, expDiff;
2884 uint64_t aSig0, aSig1, bSig;
2885 uint64_t qTemp, term0, term1, alternateASig0, alternateASig1;
2886
2887 aSig0 = extractFloatx80Frac( a );
2888 aExp = extractFloatx80Exp( a );
2889 aSign = extractFloatx80Sign( a );
2890 bSig = extractFloatx80Frac( b );
2891 bExp = extractFloatx80Exp( b );
2892 bSign = extractFloatx80Sign( b );
2893
2894 if ( aExp == 0x7FFF ) {
2895 if ( (uint64_t) ( aSig0<<1 )
2896 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
2897 return propagateFloatx80NaN( a, b, status );
2898 }
2899 goto invalid;
2900 }
2901 if ( bExp == 0x7FFF ) {
2902 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
2903 *s = (aSign != bSign);
2904 *q = 0;
2905 return a;
2906 }
2907 if ( bExp == 0 ) {
2908 if ( bSig == 0 ) {
2909 invalid:
2910 float_raise( float_flag_invalid, status );
2911 return floatx80_default_nan(status);
2912 }
2913 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2914 }
2915 if ( aExp == 0 ) {
2916 #ifdef SOFTFLOAT_68K
2917 if ( aSig0 == 0 ) {
2918 *s = (aSign != bSign);
2919 *q = 0;
2920 return a;
2921 }
2922 #else
2923 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
2924 #endif
2925 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
2926 }
2927 bSig |= LIT64( 0x8000000000000000 );
2928 zSign = aSign;
2929 expDiff = aExp - bExp;
2930 *s = (aSign != bSign);
2931 aSig1 = 0;
2932 if ( expDiff < 0 ) {
2933 if ( expDiff < -1 ) return a;
2934 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
2935 expDiff = 0;
2936 }
2937 qTemp = ( bSig <= aSig0 );
2938 if ( qTemp ) aSig0 -= bSig;
2939 *q = ( expDiff > 63 ) ? 0 : ( qTemp<<expDiff );
2940 expDiff -= 64;
2941 while ( 0 < expDiff ) {
2942 qTemp = estimateDiv128To64( aSig0, aSig1, bSig );
2943 qTemp = ( 2 < qTemp ) ? qTemp - 2 : 0;
2944 mul64To128( bSig, qTemp, &term0, &term1 );
2945 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2946 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
2947 *q = ( expDiff > 63 ) ? 0 : ( qTemp<<expDiff );
2948 expDiff -= 62;
2949 }
2950 expDiff += 64;
2951 if ( 0 < expDiff ) {
2952 qTemp = estimateDiv128To64( aSig0, aSig1, bSig );
2953 qTemp = ( 2 < qTemp ) ? qTemp - 2 : 0;
2954 qTemp >>= 64 - expDiff;
2955 mul64To128( bSig, qTemp<<( 64 - expDiff ), &term0, &term1 );
2956 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2957 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
2958 while ( le128( term0, term1, aSig0, aSig1 ) ) {
2959 ++qTemp;
2960 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2961 }
2962 *q += qTemp;
2963 }
2964 else {
2965 term1 = 0;
2966 term0 = bSig;
2967 }
2968 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
2969 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
2970 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
2971 && ( qTemp & 1 ) )
2972 ) {
2973 aSig0 = alternateASig0;
2974 aSig1 = alternateASig1;
2975 zSign = ! zSign;
2976 ++*q;
2977 }
2978 return
2979 normalizeRoundAndPackFloatx80(
2980 80, zSign, bExp + expDiff, aSig0, aSig1, status );
2981
2982 }
2983 #endif // End of modification
2984
2985
2986 #ifdef SOFTFLOAT_68K // 08-01-2017: Added for Previous
2987 /*----------------------------------------------------------------------------
2988 | Returns the modulo remainder of the extended double-precision floating-point
2989 | value `a' with respect to the corresponding value `b'.
2990 *----------------------------------------------------------------------------*/
2991
2992 floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status )
2993 {
2994 flag aSign, bSign, zSign;
2995 int32_t aExp, bExp, expDiff;
2996 uint64_t aSig0, aSig1, bSig;
2997 uint64_t qTemp, term0, term1;
2998
2999 aSig0 = extractFloatx80Frac( a );
3000 aExp = extractFloatx80Exp( a );
3001 aSign = extractFloatx80Sign( a );
3002 bSig = extractFloatx80Frac( b );
3003 bExp = extractFloatx80Exp( b );
3004 bSign = extractFloatx80Sign( b );
3005
3006 if ( aExp == 0x7FFF ) {
3007 if ( (uint64_t) ( aSig0<<1 )
3008 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
3009 return propagateFloatx80NaN( a, b, status );
3010 }
3011 goto invalid;
3012 }
3013 if ( bExp == 0x7FFF ) {
3014 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
3015 *s = (aSign != bSign);
3016 *q = 0;
3017 return a;
3018 }
3019 if ( bExp == 0 ) {
3020 if ( bSig == 0 ) {
3021 invalid:
3022 float_raise( float_flag_invalid, status );
3023 return floatx80_default_nan(status);
3024 }
3025 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3026 }
3027 if ( aExp == 0 ) {
3028 #ifdef SOFTFLOAT_68K
3029 if ( aSig0 == 0 ) {
3030 *s = (aSign != bSign);
3031 *q = 0;
3032 return a;
3033 }
3034 #else
3035 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
3036 #endif
3037 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3038 }
3039 bSig |= LIT64( 0x8000000000000000 );
3040 zSign = aSign;
3041 expDiff = aExp - bExp;
3042 *s = (aSign != bSign);
3043 aSig1 = 0;
3044 if ( expDiff < 0 ) return a;
3045 qTemp = ( bSig <= aSig0 );
3046 if ( qTemp ) aSig0 -= bSig;
3047 *q = ( expDiff > 63 ) ? 0 : ( qTemp<<expDiff );
3048 expDiff -= 64;
3049 while ( 0 < expDiff ) {
3050 qTemp = estimateDiv128To64( aSig0, aSig1, bSig );
3051 qTemp = ( 2 < qTemp ) ? qTemp - 2 : 0;
3052 mul64To128( bSig, qTemp, &term0, &term1 );
3053 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3054 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3055 *q = ( expDiff > 63 ) ? 0 : ( qTemp<<expDiff );
3056 expDiff -= 62;
3057 }
3058 expDiff += 64;
3059 if ( 0 < expDiff ) {
3060 qTemp = estimateDiv128To64( aSig0, aSig1, bSig );
3061 qTemp = ( 2 < qTemp ) ? qTemp - 2 : 0;
3062 qTemp >>= 64 - expDiff;
3063 mul64To128( bSig, qTemp<<( 64 - expDiff ), &term0, &term1 );
3064 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3065 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3066 while ( le128( term0, term1, aSig0, aSig1 ) ) {
3067 ++qTemp;
3068 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3069 }
3070 *q += qTemp;
3071 }
3072 return
3073 normalizeRoundAndPackFloatx80(
3074 80, zSign, bExp + expDiff, aSig0, aSig1, status );
3075
3076 }
3077 #endif // end of addition for Previous
3078
3079
3080 /*----------------------------------------------------------------------------
3081 | Returns the square root of the extended double-precision floating-point
3082 | value `a'. The operation is performed according to the IEC/IEEE Standard
3083 | for Binary Floating-Point Arithmetic.
3084 *----------------------------------------------------------------------------*/
3085
3086 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
3087 {
3088 flag aSign;
3089 int32_t aExp, zExp;
3090 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
3091 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3092
3093 if (floatx80_invalid_encoding(a)) {
3094 float_raise(float_flag_invalid, status);
3095 return floatx80_default_nan(status);
3096 }
3097 aSig0 = extractFloatx80Frac( a );
3098 aExp = extractFloatx80Exp( a );
3099 aSign = extractFloatx80Sign( a );
3100 if ( aExp == 0x7FFF ) {
3101 if ((uint64_t)(aSig0 << 1))
3102 return propagateFloatx80NaNOneArg(a, status);
3103 if (!aSign) return inf_clear_intbit(status) ? packFloatx80(aSign, aExp, 0) : a;
3104 goto invalid;
3105 }
3106 if ( aSign ) {
3107 if ( ( aExp | aSig0 ) == 0 ) return a;
3108 invalid:
3109 float_raise(float_flag_invalid, status);
3110 return floatx80_default_nan(status);
3111 }
3112 if ( aExp == 0 ) {
3113 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
3114 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3115 }
3116 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
3117 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
3118 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
3119 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
3120 doubleZSig0 = zSig0<<1;
3121 mul64To128( zSig0, zSig0, &term0, &term1 );
3122 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
3123 while ( (int64_t) rem0 < 0 ) {
3124 --zSig0;
3125 doubleZSig0 -= 2;
3126 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
3127 }
3128 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
3129 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
3130 if ( zSig1 == 0 ) zSig1 = 1;
3131 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
3132 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3133 mul64To128( zSig1, zSig1, &term2, &term3 );
3134 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
3135 while ( (int64_t) rem1 < 0 ) {
3136 --zSig1;
3137 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
3138 term3 |= 1;
3139 term2 |= doubleZSig0;
3140 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
3141 }
3142 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
3143 }
3144 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
3145 zSig0 |= doubleZSig0;
3146 return roundAndPackFloatx80(status->floatx80_rounding_precision,
3147 0, zExp, zSig0, zSig1, status);
3148 }
3149
3150
3151 #ifdef SOFTFLOAT_68K // 07-01-2017: Added for Previous
3152 /*----------------------------------------------------------------------------
3153 | Returns the mantissa of the extended double-precision floating-point
3154 | value `a'.
3155 *----------------------------------------------------------------------------*/
3156
3157 floatx80 floatx80_getman( floatx80 a, float_status *status)
3158 {
3159 flag aSign;
3160 int32_t aExp;
3161 uint64_t aSig;
3162
3163 aSig = extractFloatx80Frac( a );
3164 aExp = extractFloatx80Exp( a );
3165 aSign = extractFloatx80Sign( a );
3166
3167 if ( aExp == 0x7FFF ) {
3168 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status );
3169 float_raise( float_flag_invalid, status );
3170 return floatx80_default_nan(status);
3171 }
3172
3173 if ( aExp == 0 ) {
3174 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3175 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3176 }
3177
3178 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 0x3FFF, aSig, 0, status);
3179 }
3180
3181 /*----------------------------------------------------------------------------
3182 | Returns the exponent of the extended double-precision floating-point
3183 | value `a' as an extended double-precision value.
3184 *----------------------------------------------------------------------------*/
3185
3186 floatx80 floatx80_getexp( floatx80 a, float_status *status)
3187 {
3188 flag aSign;
3189 int32_t aExp;
3190 uint64_t aSig;
3191
3192 aSig = extractFloatx80Frac( a );
3193 aExp = extractFloatx80Exp( a );
3194 aSign = extractFloatx80Sign( a );
3195
3196 if ( aExp == 0x7FFF ) {
3197 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status );
3198 float_raise( float_flag_invalid, status );
3199 return floatx80_default_nan(status);
3200 }
3201
3202 if ( aExp == 0 ) {
3203 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3204 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3205 }
3206
3207 return int32_to_floatx80(aExp - 0x3FFF);
3208 }
3209
3210 /*----------------------------------------------------------------------------
3211 | Scales extended double-precision floating-point value in operand `a' by
3212 | value `b'. The function truncates the value in the second operand 'b' to
3213 | an integral value and adds that value to the exponent of the operand 'a'.
3214 | The operation performed according to the IEC/IEEE Standard for Binary
3215 | Floating-Point Arithmetic.
3216 *----------------------------------------------------------------------------*/
3217
3218 floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status)
3219 {
3220 flag aSign, bSign;
3221 int32_t aExp, bExp, shiftCount;
3222 uint64_t aSig, bSig;
3223
3224 aSig = extractFloatx80Frac(a);
3225 aExp = extractFloatx80Exp(a);
3226 aSign = extractFloatx80Sign(a);
3227 bSig = extractFloatx80Frac(b);
3228 bExp = extractFloatx80Exp(b);
3229 bSign = extractFloatx80Sign(b);
3230
3231 if ( bExp == 0x7FFF ) {
3232 if ( (uint64_t) ( bSig<<1 ) ||
3233 ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) ) {
3234 return propagateFloatx80NaN( a, b, status );
3235 }
3236 float_raise( float_flag_invalid, status );
3237 return floatx80_default_nan(status);
3238 }
3239 if ( aExp == 0x7FFF ) {
3240 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
3241 return a;
3242 }
3243 if ( aExp == 0 ) {
3244 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0);
3245 if ( bExp < 0x3FFF ) return a;
3246 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3247 }
3248
3249 if ( bExp < 0x3FFF ) return a;
3250
3251 if ( 0x400F < bExp ) {
3252 aExp = bSign ? -0x6001 : 0xE000;
3253 return roundAndPackFloatx80(
3254 status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status );
3255 }
3256
3257 shiftCount = 0x403E - bExp;
3258 bSig >>= shiftCount;
3259 aExp = bSign ? ( aExp - bSig ) : ( aExp + bSig );
3260
3261 return roundAndPackFloatx80(
3262 status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status);
3263
3264 }
3265
3266 /*-----------------------------------------------------------------------------
3267 | Calculates the absolute value of the extended double-precision floating-point
3268 | value `a'. The operation is performed according to the IEC/IEEE Standard
3269 | for Binary Floating-Point Arithmetic.
3270 *----------------------------------------------------------------------------*/
3271
3272 floatx80 floatx80_abs(floatx80 a, float_status *status)
3273 {
3274 int32_t aExp;
3275 uint64_t aSig;
3276
3277 aSig = extractFloatx80Frac(a);
3278 aExp = extractFloatx80Exp(a);
3279
3280 if ( aExp == 0x7FFF ) {
3281 if ( (uint64_t) ( aSig<<1 ) )
3282 return propagateFloatx80NaNOneArg( a, status );
3283 if (inf_clear_intbit(status)) aSig = 0;
3284 return packFloatx80(0, aExp, aSig);
3285 }
3286
3287 if ( aExp == 0 ) {
3288 if ( aSig == 0 ) return packFloatx80( 0, 0, 0 );
3289 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3290 }
3291
3292 return roundAndPackFloatx80(
3293 status->floatx80_rounding_precision, 0, aExp, aSig, 0, status );
3294
3295 }
3296
3297 /*-----------------------------------------------------------------------------
3298 | Changes the sign of the extended double-precision floating-point value 'a'.
3299 | The operation is performed according to the IEC/IEEE Standard for Binary
3300 | Floating-Point Arithmetic.
3301 *----------------------------------------------------------------------------*/
3302
3303 floatx80 floatx80_neg(floatx80 a, float_status *status)
3304 {
3305 flag aSign;
3306 int32_t aExp;
3307 uint64_t aSig;
3308
3309 aSig = extractFloatx80Frac(a);
3310 aExp = extractFloatx80Exp(a);
3311 aSign = extractFloatx80Sign(a);
3312
3313 if ( aExp == 0x7FFF ) {
3314 if ( (uint64_t) ( aSig<<1 ) )
3315 return propagateFloatx80NaNOneArg( a, status );
3316 if (inf_clear_intbit(status)) aSig = 0;
3317 return packFloatx80(!aSign, aExp, aSig);
3318 }
3319
3320 aSign = !aSign;
3321
3322 if ( aExp == 0 ) {
3323 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3324 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3325 }
3326
3327 return roundAndPackFloatx80(
3328 status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status );
3329
3330 }
3331
3332 /*----------------------------------------------------------------------------
3333 | Returns the result of comparing the extended double-precision floating-
3334 | point values `a' and `b'. The result is abstracted for matching the
3335 | corresponding condition codes.
3336 *----------------------------------------------------------------------------*/
3337
3338 floatx80 floatx80_cmp( floatx80 a, floatx80 b, float_status *status )
3339 {
3340 flag aSign, bSign;
3341 int32_t aExp, bExp;
3342 uint64_t aSig, bSig;
3343
3344 aSig = extractFloatx80Frac( a );
3345 aExp = extractFloatx80Exp( a );
3346 aSign = extractFloatx80Sign( a );
3347 bSig = extractFloatx80Frac( b );
3348 bExp = extractFloatx80Exp( b );
3349 bSign = extractFloatx80Sign( b );
3350
3351 if ( ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) ||
3352 ( bExp == 0x7FFF && (uint64_t) ( bSig<<1 ) ) ) {
3353 // 68040 FCMP -NaN return N flag set
3354 if (fcmp_signed_nan(status))
3355 return propagateFloatx80NaN(a, b, status );
3356 return propagateFloatx80NaN(packFloatx80(0, aExp, aSig),
3357 packFloatx80(0, bExp, bSig), status);
3358 }
3359
3360 if ( bExp < aExp ) return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3361 if ( aExp < bExp ) return packFloatx80( bSign ^ 1, 0x3FFF, LIT64( 0x8000000000000000 ) );
3362
3363 if ( aExp == 0x7FFF ) {
3364 if ( aSign == bSign ) return packFloatx80( aSign, 0, 0 );
3365 return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3366 }
3367
3368 if ( bSig < aSig ) return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3369 if ( aSig < bSig ) return packFloatx80( bSign ^ 1, 0x3FFF, LIT64( 0x8000000000000000 ) );
3370
3371 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3372
3373 if ( aSign == bSign ) return packFloatx80( 0, 0, 0 );
3374
3375 return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3376
3377 }
3378
3379 floatx80 floatx80_tst( floatx80 a, float_status *status )
3380 {
3381 int32_t aExp;
3382 uint64_t aSig;
3383
3384 aSig = extractFloatx80Frac( a );
3385 aExp = extractFloatx80Exp( a );
3386
3387 if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) )
3388 return propagateFloatx80NaNOneArg( a, status );
3389 return a;
3390 }
3391
3392 floatx80 floatx80_move( floatx80 a, float_status *status )
3393 {
3394 flag aSign;
3395 int32_t aExp;
3396 uint64_t aSig;
3397
3398 aSig = extractFloatx80Frac( a );
3399 aExp = extractFloatx80Exp( a );
3400 aSign = extractFloatx80Sign( a );
3401
3402 if ( aExp == 0x7FFF ) {
3403 if ((uint64_t)(aSig << 1)) return propagateFloatx80NaNOneArg(a, status);
3404 return inf_clear_intbit(status) ? packFloatx80(aSign, aExp, 0) : a;
3405 }
3406 if ( aExp == 0 ) {
3407 if ( aSig == 0 ) return a;
3408 return normalizeRoundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status );
3409 }
3410 return roundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status );
3411 }
3412
3413 floatx80 floatx80_denormalize( floatx80 a, flag eSign)
3414 {
3415 flag aSign;
3416 int32_t aExp;
3417 uint64_t aSig;
3418 int32_t shiftCount;
3419
3420 aSig = extractFloatx80Frac( a );
3421 aExp = extractFloatx80Exp( a );
3422 aSign = extractFloatx80Sign( a );
3423
3424 if ( eSign ) {
3425 shiftCount = 0x8000 - aExp;
3426 aExp = 0;
3427 if (shiftCount > 63) {
3428 aSig = 0;
3429 } else {
3430 aSig >>= shiftCount;
3431 }
3432 }
3433 return packFloatx80(aSign, aExp, aSig);
3434 }
3435
3436 #endif // End of addition for Previous
3437
3438 /*----------------------------------------------------------------------------
3439 | Returns 1 if the extended double-precision floating-point value `a' is
3440 | equal to the corresponding value `b', and 0 otherwise. The comparison is
3441 | performed according to the IEC/IEEE Standard for Binary Floating-Point
3442 | Arithmetic.
3443 *----------------------------------------------------------------------------*/
3444
3445 flag floatx80_eq( floatx80 a, floatx80 b, float_status *status )
3446 {
3447 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3448 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
3449 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3450 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
3451 ) {
3452 if ( floatx80_is_signaling_nan( a )
3453 || floatx80_is_signaling_nan( b ) ) {
3454 float_raise( float_flag_invalid, status );
3455 }
3456 return 0;
3457 }
3458 return
3459 ( a.low == b.low )
3460 && ( ( a.high == b.high )
3461 || ( ( a.low == 0 )
3462 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
3463 );
3464
3465 }
3466
3467 /*----------------------------------------------------------------------------
3468 | Returns 1 if the extended double-precision floating-point value `a' is
3469 | less than or equal to the corresponding value `b', and 0 otherwise. The
3470 | comparison is performed according to the IEC/IEEE Standard for Binary
3471 | Floating-Point Arithmetic.
3472 *----------------------------------------------------------------------------*/
3473
3474 flag floatx80_le( floatx80 a, floatx80 b, float_status *status )
3475 {
3476 flag aSign, bSign;
3477
3478 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3479 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
3480 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3481 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
3482 ) {
3483 float_raise( float_flag_invalid, status );
3484 return 0;
3485 }
3486 aSign = extractFloatx80Sign( a );
3487 bSign = extractFloatx80Sign( b );
3488 if ( aSign != bSign ) {
3489 return
3490 aSign
3491 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3492 == 0 );
3493 }
3494 return
3495 aSign ? le128( b.high, b.low, a.high, a.low )
3496 : le128( a.high, a.low, b.high, b.low );
3497 }
3498
3499 /*----------------------------------------------------------------------------
3500 | Returns 1 if the extended double-precision floating-point value `a' is
3501 | less than the corresponding value `b', and 0 otherwise. The comparison
3502 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
3503 | Arithmetic.
3504 *----------------------------------------------------------------------------*/
3505
3506 flag floatx80_lt( floatx80 a, floatx80 b, float_status *status )
3507 {
3508 flag aSign, bSign;
3509
3510 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3511 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
3512 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3513 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
3514 ) {
3515 float_raise( float_flag_invalid, status );
3516 return 0;
3517 }
3518 aSign = extractFloatx80Sign( a );
3519 bSign = extractFloatx80Sign( b );
3520 if ( aSign != bSign ) {
3521 return
3522 aSign
3523 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3524 != 0 );
3525 }
3526 return
3527 aSign ? lt128( b.high, b.low, a.high, a.low )
3528 : lt128( a.high, a.low, b.high, b.low );
3529
3530 }
3531
3532
3533 /*----------------------------------------------------------------------------
3534 | Returns the result of converting the 64-bit two's complement integer `a'
3535 | to the extended double-precision floating-point format. The conversion
3536 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
3537 | Arithmetic.
3538 *----------------------------------------------------------------------------*/
3539
3540 floatx80 int64_to_floatx80( int64_t a )
3541 {
3542 flag zSign;
3543 uint64_t absA;
3544 int8_t shiftCount;
3545
3546 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3547 zSign = ( a < 0 );
3548 absA = zSign ? - a : a;
3549 shiftCount = countLeadingZeros64( absA );
3550 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
3551 }
3552